Merge "Upstream: Integrate TCG changes for remaining arches"
diff --git a/Makefile.android b/Makefile.android
index 58b37f1..b681764 100644
--- a/Makefile.android
+++ b/Makefile.android
@@ -799,9 +799,6 @@
 UI_AND_CORE_SOURCES += $(LIBPNG_SOURCES)
 UI_AND_CORE_CFLAGS  += $(LIBPNG_CFLAGS) -I$(LOCAL_PATH)/$(LIBPNG_DIR)
 
-# temp files used to collect UI->Core exchange protocol.
-UI_AND_CORE_SOURCES += android/ui-core-protocol.c android/core-ui-protocol.c
-
 # The common libraries
 #
 QEMU_SYSTEM_LDLIBS := -lm
@@ -879,6 +876,8 @@
               user-events-qemu.c \
               android/cmdline-option.c \
               android/looper-qemu.c \
+              android/protocol/ui-commands-qemu.c \
+              android/protocol/core-commands-qemu.c \
               android/main.c \
 
 # Add common system libraries
@@ -1058,8 +1057,9 @@
               android/looper-generic.c \
               android/display-core.c \
               android/framebuffer-core.c \
-              android/user-events-core.c \
-              android/ui-ctl-core.c \
+              android/protocol/user-events-impl.c \
+              android/protocol/ui-commands-proxy.c \
+              android/protocol/core-commands-impl.c \
 
 # Add common system libraries
 #
@@ -1204,7 +1204,6 @@
 # include other sources
 #
 VL_SOURCES := framebuffer.c \
-              user-events-ui.c \
               android/cmdline-option.c \
               android/config.c \
               android/display.c \
@@ -1217,7 +1216,9 @@
               console-ui.c \
               iolooper-select.c \
               android/framebuffer-ui.c \
-              android/ui-ctl-ui.c \
+              android/protocol/ui-commands-impl.c \
+              android/protocol/core-commands-proxy.c \
+              android/protocol/user-events-proxy.c \
 
 # Add common system libraries
 #
diff --git a/android/console.c b/android/console.c
index 81c2efe..59610eb 100644
--- a/android/console.c
+++ b/android/console.c
@@ -50,11 +50,12 @@
 #include "user-events.h"
 #include "android/keycode-array.h"
 #include "android/charmap.h"
-#include "android/core-ui-protocol.h"
 #include "android/display-core.h"
 #include "android/framebuffer-core.h"
-#include "android/user-events-core.h"
-#include "android/ui-ctl-core.h"
+#include "android/protocol/user-events-impl.h"
+#include "android/protocol/ui-commands-api.h"
+#include "android/protocol/core-commands-impl.h"
+#include "android/protocol/ui-commands-proxy.h"
 
 #if defined(CONFIG_SLIRP)
 #include "libslirp.h"
@@ -125,9 +126,6 @@
 /* User events service client. */
 ControlClient user_events_client = NULL;
 
-/* User events service. */
-CoreUserEvents* core_ue = NULL;
-
 /* UI control service client (UI -> Core). */
 ControlClient ui_core_ctl_client = NULL;
 
@@ -258,17 +256,17 @@
     }
 
     if (client == user_events_client) {
-        coreue_destroy(core_ue);
+        userEventsImpl_destroy();
         user_events_client = NULL;
     }
 
     if (client == ui_core_ctl_client) {
-        uicorectl_destroy();
+        coreCmdImpl_destroy();
         ui_core_ctl_client = NULL;
     }
 
     if (client == core_ui_ctl_client) {
-        coreuictl_destroy();
+        uiCmdProxy_destroy();
         core_ui_ctl_client = NULL;
     }
 #endif  // CONFIG_STANDALONE_CORE
@@ -2443,7 +2441,7 @@
         }
     }
 
-    android_ui_set_window_scale( scale, is_dpi );
+    uicmd_set_window_scale( scale, is_dpi );
     return 0;
 }
 
@@ -2591,8 +2589,7 @@
         return -1;
     }
 
-    core_ue = coreue_create(client->sock);
-    if (core_ue != NULL) {
+    if (!userEventsImpl_create(client->sock)) {
         char reply_buf[4096];
         user_events_client = client;
         snprintf(reply_buf, sizeof(reply_buf), "OK\r\n");
@@ -2607,7 +2604,7 @@
 }
 
 void
-destroy_control_ue_client(void)
+destroy_user_events_client(void)
 {
     if (user_events_client != NULL) {
         control_client_destroy(user_events_client);
@@ -2624,7 +2621,7 @@
         return -1;
     }
 
-    if (!uicorectl_create(client->sock)) {
+    if (!coreCmdImpl_create(client->sock)) {
         char reply_buf[4096];
         ui_core_ctl_client = client;
         snprintf(reply_buf, sizeof(reply_buf), "OK\r\n");
@@ -2646,6 +2643,14 @@
     }
 }
 
+void
+destroy_corecmd_client(void)
+{
+    if (ui_core_ctl_client != NULL) {
+        control_client_destroy(ui_core_ctl_client);
+    }
+}
+
 static int
 do_create_core_ui_ctl_service( ControlClient client, char* args )
 {
@@ -2656,7 +2661,7 @@
         return -1;
     }
 
-    if (!coreuictl_create(client->sock)) {
+    if (!uiCmdProxy_create(client->sock)) {
         char reply_buf[4096];
         core_ui_ctl_client = client;
         snprintf(reply_buf, sizeof(reply_buf), "OK\r\n");
@@ -2677,6 +2682,15 @@
         control_client_destroy(core_ui_ctl_client);
     }
 }
+
+void
+destroy_uicmd_client(void)
+{
+    if (core_ui_ctl_client != NULL) {
+        control_client_destroy(core_ui_ctl_client);
+    }
+}
+
 #endif  // CONFIG_STANDALONE_CORE
 
 static const CommandDefRec  qemu_commands[] =
diff --git a/android/core-connection.c b/android/core-connection.c
index 93a3efe..1099291 100644
--- a/android/core-connection.c
+++ b/android/core-connection.c
@@ -281,10 +281,15 @@
         syncsocket_read_line_absolute(desc->ssocket, buf, sizeof(buf), deadline);
     _zero_terminate(buf, sizeof(buf), handshake_len);
     // Replace terminating "\r\n" with 0
-    if (handshake_len >= 2 && buf[handshake_len - 2] == '\r') {
-        buf[handshake_len - 2] = '\0';
+    if (handshake_len >= 1) {
+        if (buf[handshake_len - 1] == '\r' || buf[handshake_len - 1] == '\n') {
+            buf[handshake_len - 1] = '\0';
+            if (handshake_len >= 2 && (buf[handshake_len - 2] == '\r' ||
+                                       buf[handshake_len - 2] == '\n')) {
+                buf[handshake_len - 2] = '\0';
+            }
+        }
     }
-    printf("Handshake: %s\n", buf);
     // Lets see what kind of response we've got here.
     if (_is_reply_ok(buf, handshake_len)) {
         *handshake = strdup(buf + 3);
@@ -316,6 +321,35 @@
     }
 }
 
+CoreConnection*
+core_connection_create_and_switch(SockAddress* console_socket,
+                                  const char* stream_name,
+                                  char** handshake)
+{
+    char switch_cmd[256];
+    CoreConnection* connection = NULL;
+
+    // Connect to the console service.
+    connection = core_connection_create(console_socket);
+    if (connection == NULL) {
+        return NULL;
+    }
+    if (core_connection_open(connection)) {
+        core_connection_free(connection);
+        return NULL;
+    }
+
+    // Perform the switch.
+    snprintf(switch_cmd, sizeof(switch_cmd), "%s", stream_name);
+    if (core_connection_switch_stream(connection, switch_cmd, handshake)) {
+        core_connection_close(connection);
+        core_connection_free(connection);
+        return NULL;
+    }
+
+    return connection;
+}
+
 void
 core_connection_detach(CoreConnection* desc)
 {
diff --git a/android/core-connection.h b/android/core-connection.h
index 19e91a1..5701f8c 100644
--- a/android/core-connection.h
+++ b/android/core-connection.h
@@ -29,8 +29,8 @@
 // Maximum number of core porocesses running simultaneously on a machine.
 #define MAX_CORE_PROCS          16
 
-// Socket timeout in millisec (set to half a second)
-#define CORE_PORT_TIMEOUT_MS    500
+// Socket timeout in millisec (set to 5 seconds)
+#define CORE_PORT_TIMEOUT_MS    5000
 
 /* Opens core console socket.
  * Param:
@@ -122,6 +122,20 @@
                                   const char* stream_name,
                                   char** handshake);
 
+/* Creates a console client, and switches it to a given stream.
+ *  console_socket Socket address for the console.
+ *  stream_name Name of the stream to switch to.
+ *  handshake Address of a string to allocate for a handshake message on
+ *      success, or an error message on failure. If upon return from this
+ *      routine that string is not NULL, its buffer must be freed with 'free'.
+ * Return:
+ *  Allocated and initialized descriptor for the switched client on success, or
+ *  NULL on failure.
+ */
+CoreConnection* core_connection_create_and_switch(SockAddress* console_socket,
+                                                  const char* stream_name,
+                                                  char** handshake);
+
 /* Detaches opened console client from the console.
  * By console protocol, writing "\r\n" string to the console will destroy the
  * console client.
@@ -138,4 +152,18 @@
  */
 int core_connection_get_socket(CoreConnection* desc);
 
+/* Calculates timeout for transferring the given number of bytes via core
+ * connection.
+ * Return:
+ *  Number of milliseconds during which the entire number of bytes is expected
+ *  to be transferred via core connection.
+ */
+static inline int
+core_connection_get_timeout(size_t data_size)
+{
+    // Min 2 seconds + 10 millisec for each transferring byte.
+    // TODO: Come up with a better arithmetics here.
+    return 2000 + data_size * 10;
+}
+
 #endif  // QEMU_ANDROID_CORE_CONNECTION_H
diff --git a/android/core-ui-protocol.c b/android/core-ui-protocol.c
deleted file mode 100644
index a85ae7c..0000000
--- a/android/core-ui-protocol.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * This file contains helper routines that are used to establish communication
- * between Core and UI components of the emulator. This is a temporary file
- * where we will collect functional dependencies between Core and UI in the
- * process of separating UI and Core in the emulator build.
- */
-
-#include "android/globals.h"
-#include "android/android.h"
-#include "android/core-ui-protocol.h"
-#if defined(CONFIG_STANDALONE_CORE)
-#include "android/ui-ctl-core.h"
-#endif  // defined(CONFIG_STANDALONE_CORE)
-
-#if !defined(CONFIG_STANDALONE_CORE)
-/* in android/qemulator.c */
-extern void  android_emulator_set_window_scale( double, int );
-#endif
-
-void
-android_ui_set_window_scale(double scale, int is_dpi)
-{
-#if !defined(CONFIG_STANDALONE_CORE)
-    android_emulator_set_window_scale(scale, is_dpi);
-#else
-    coreuictl_set_window_scale(scale, is_dpi);
-#endif
-}
-
diff --git a/android/core-ui-protocol.h b/android/core-ui-protocol.h
deleted file mode 100644
index 5fc2372..0000000
--- a/android/core-ui-protocol.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * This file contains declarations of helper routines that are used to
- * establish communication between Core and UI components of the emulator.
- * This is a temporary file where we will collect functional dependencies
- * between Core and UI in the process of separating UI and Core in the
- * emulator build.
- */
-
-#ifndef QEMU_ANDROID_CORE_UI_PROTOCOL_H
-#define QEMU_ANDROID_CORE_UI_PROTOCOL_H
-
-/* Changes the scale of the emulator window at runtime. */
-void android_ui_set_window_scale(double scale, int is_dpi);
-
-#endif  // QEMU_ANDROID_CORE_UI_PROTOCOL_H
diff --git a/android/help.c b/android/help.c
index efdcd06..7df6704 100644
--- a/android/help.c
+++ b/android/help.c
@@ -10,7 +10,7 @@
 #include "audio/audio.h"
 #include <string.h>
 #include <stdlib.h>
-#include "android/ui-core-protocol.h"
+#include "android/protocol/core-commands-api.h"
 
 /* XXX: TODO: put most of the help stuff in auto-generated files */
 
@@ -778,7 +778,7 @@
 
     "  the format of -netspeed is one of the following (numbers are kbits/s):\n\n" );
 
-    for (n = 0; !android_core_get_android_netspeed(n, &android_netspeed); n++) {
+    for (n = 0; !corecmd_get_netspeed(n, &android_netspeed); n++) {
         PRINTF( "    -netspeed %-12s %-15s  (up: %.1f, down: %.1f)\n",
                         android_netspeed->name,
                         android_netspeed->display,
@@ -791,7 +791,7 @@
     PRINTF( "    -netspeed %-12s %s", "<up>:<down>", "select individual up and down speed\n");
 
     PRINTF( "\n  The format of -netdelay is one of the following (numbers are msec):\n\n" );
-    for (n = 0; !android_core_get_android_netdelay(n, &android_netdelay); n++) {
+    for (n = 0; !corecmd_get_netdelay(n, &android_netdelay); n++) {
         PRINTF( "    -netdelay %-10s   %-15s  (min %d, max %d)\n",
                         android_netdelay->name, android_netdelay->display,
                         android_netdelay->min_ms, android_netdelay->max_ms );
diff --git a/android/main-ui.c b/android/main-ui.c
index d150be5..4b86ad1 100644
--- a/android/main-ui.c
+++ b/android/main-ui.c
@@ -61,7 +61,9 @@
 #include "android/snapshot.h"
 #include "android/core-connection.h"
 #include "android/framebuffer-ui.h"
-#include "android/ui-ctl-ui.h"
+#include "android/protocol/user-events-proxy.h"
+#include "android/protocol/core-commands-proxy.h"
+#include "android/protocol/ui-commands-impl.h"
 
 #include "framebuffer.h"
 #include "iolooper.h"
@@ -760,8 +762,8 @@
 // Maximum number of core porocesses running simultaneously on a machine.
 #define MAX_CORE_PROCS          16
 
-// Socket timeout in millisec (set to half a second)
-#define CORE_PORT_TIMEOUT_MS    500
+// Socket timeout in millisec (set to 5 seconds)
+#define CORE_PORT_TIMEOUT_MS    5000
 
 #include "android/async-console.h"
 
@@ -856,9 +858,6 @@
     }
 }
 
-/* Implemented in user-events-ui.c */
-extern int clientue_create(SockAddress* console_socket);
-
 /* Attaches starting UI to a running core process.
  * This routine is called from main() when -attach-core parameter is set,
  * indicating that this UI instance should attach to a running core, rather than
@@ -968,7 +967,7 @@
     }
 
     // Connect to the core's user events service.
-    if (clientue_create(&console_socket)) {
+    if (userEventsProxy_create(&console_socket)) {
         return -1;
     }
 
@@ -976,7 +975,10 @@
     // implementation there are two UI control services: "ui-core-control" that
     // handle UI controls initiated in the UI, and "core-ui-control" that handle
     // UI controls initiated in the core.
-    if (clientuictl_create(&console_socket)) {
+    if (coreCmdProxy_create(&console_socket)) {
+        return -1;
+    }
+    if (uiCmdImpl_create(&console_socket)) {
         return -1;
     }
 
diff --git a/android/protocol/core-commands-api.h b/android/protocol/core-commands-api.h
new file mode 100644
index 0000000..93a569c
--- /dev/null
+++ b/android/protocol/core-commands-api.h
@@ -0,0 +1,95 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_CORE_COMMANDS_API_H
+#define _ANDROID_PROTOCOL_CORE_COMMANDS_API_H
+
+/*
+ * Contains the API for calling into the Core with UI control commands.
+ */
+
+#include "android/android.h"
+#include "android/hw-sensors.h"
+
+/* Instructs the Core to change the coarse orientation.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_set_coarse_orientation(AndroidCoarseOrientation orient);
+
+/* Toggles the network in the Core.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_toggle_network();
+
+/* Starts or stops tracing in the Core.
+ * Param:
+ *  start - Starts (> 0), or stops (== 0) tracing.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_trace_control(int start);
+
+/* Checks if network is disabled in the Core.
+ * Return:
+ *  0 if network is enabled, 1 if it is disabled, or < 0 on failure.
+ */
+extern int corecmd_is_network_disabled();
+
+/* Requests a NetworkSpeed instance from the Core.
+ * Param:
+ *  index - Index of an entry in the NetworkSpeed array.
+ *  netspeed - Upon success contains allocated and initialized NetworkSpeed
+ *      instance for the given index. Note that strings addressed by "name" and
+ *      "display" fileds in the returned NetworkSpeed instance are containd
+ *      inside the buffer allocated for the returned NetworkSpeed instance.
+ *      Caller of this routine must eventually free the buffer returned in this
+ *      parameter.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_get_netspeed(int index, NetworkSpeed** netspeed);
+
+/* Requests a NetworkLatency instance from the Core.
+ * Param:
+ *  index - Index of an entry in the NetworkLatency array.
+ *  netdelay - Upon success contains allocated and initialized NetworkLatency
+ *      instance for the given index. Note that strings addressed by "name" and
+ *      "display" fileds in the returned NetworkLatency instance are containd
+ *      inside the buffer allocated for the returned NetworkLatency instance.
+ *      Caller of this routine must eventually free the buffer returned in this
+ *      parameter.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_get_netdelay(int index, NetworkLatency** netdelay);
+
+/* Requests a QEMU file path from the Core.
+ * Param:
+ *  type, filename - Request parameters that define the file for which path is
+ *  requested.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int corecmd_get_qemu_path(int type,
+                                 const char* filename,
+                                 char* path,
+                                 size_t path_buf_size);
+
+/* Gets LCD density property from the core properties.
+ * Return:
+ *  LCD density on success, or < 0 on failure.
+ */
+extern int corecmd_get_hw_lcd_density(void);
+
+#endif /* _ANDROID_PROTOCOL_CORE_COMMANDS_API_H */
diff --git a/android/protocol/core-commands-impl.c b/android/protocol/core-commands-impl.c
new file mode 100644
index 0000000..7fa2a0b
--- /dev/null
+++ b/android/protocol/core-commands-impl.c
@@ -0,0 +1,440 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the Core-side implementation of the "ui-core-control" service that is
+ * part of the UI control protocol. Here we handle UI control commands sent by
+ * the UI to the Core.
+ */
+
+#include "android/android.h"
+#include "android/globals.h"
+#include "telephony/modem_driver.h"
+#include "trace.h"
+#include "android/looper.h"
+#include "android/async-utils.h"
+#include "android/sync-utils.h"
+#include "android/utils/debug.h"
+#include "android/protocol/core-commands.h"
+#include "android/protocol/core-commands-impl.h"
+
+/* Enumerates state values for the command reader in the CoreCmdImpl descriptor.
+ */
+typedef enum CoreCmdImplState {
+    /* The reader is waiting on command header. */
+    EXPECTS_HEADER,
+
+    /* The reader is waiting on command parameters. */
+    EXPECTS_PARAMETERS,
+} CoreCmdImplState;
+
+/* Descriptor for the Core-side implementation of the "ui-core-control" service.
+ */
+typedef struct CoreCmdImpl {
+    /* Reader to detect UI disconnection. */
+    AsyncReader         async_reader;
+
+    /* I/O associated with this descriptor. */
+    LoopIo              io;
+
+    /* Looper used to communicate with the UI. */
+    Looper*             looper;
+
+    /* Writer to send responses to the UI commands. */
+    SyncSocket*         sync_writer;
+
+    /* Socket descriptor for this service. */
+    int                 sock;
+
+    /* Command reader state. */
+    CoreCmdImplState    cmd_state;
+
+    /* Incoming command header. */
+    UICmdHeader         cmd_header;
+
+    /* A small preallocated buffer for command parameters. */
+    uint8_t             cmd_param[256];
+
+    /* Buffer to use for reading command parameters. Depending on expected size
+     * of the parameters this buffer can point to cmd_param field of this
+     * structure (for small commands), or can be allocated for large commands. */
+    void*               cmd_param_buf;
+} CoreCmdImpl;
+
+/* One and only one CoreCmdImpl instance. */
+static CoreCmdImpl    _coreCmdImpl;
+
+/* Implemented in android/console.c */
+extern void destroy_corecmd_client(void);
+/* Implemented in vl-android.c */
+extern char* qemu_find_file(int type, const char* filename);
+
+/* Properly initializes cmd_param_buf field in CoreCmdImpl instance to receive
+ * the expected command parameters.
+ */
+static uint8_t*
+_alloc_cmd_param_buf(CoreCmdImpl* corecmd, uint32_t size)
+{
+    if (size < sizeof(corecmd->cmd_param)) {
+        // cmd_param can contain all request data.
+        corecmd->cmd_param_buf = &corecmd->cmd_param[0];
+    } else {
+        // Expected request us too large to fit into preallocated buffer.
+        corecmd->cmd_param_buf = qemu_malloc(size);
+    }
+    return corecmd->cmd_param_buf;
+}
+
+/* Properly frees cmd_param_buf field in CoreCmdImpl instance.
+ */
+static void
+_free_cmd_param_buf(CoreCmdImpl* corecmd)
+{
+    if (corecmd->cmd_param_buf != &corecmd->cmd_param[0]) {
+        qemu_free(corecmd->cmd_param_buf);
+        corecmd->cmd_param_buf = &corecmd->cmd_param[0];
+    }
+}
+
+/* Calculates timeout for transferring the given number of bytes via socket.
+ * Return:
+ *  Number of milliseconds during which the entire number of bytes is expected
+ *  to be transferred via socket for this service.
+ */
+static int
+_coreCmdImpl_get_timeout(size_t data_size)
+{
+    // Min 2 seconds + 10 millisec for each transferring byte.
+    // TODO: Come up with a better arithmetics here.
+    return 2000 + data_size * 10;
+}
+
+/* Sends command response back to the UI.
+ * Param:
+ *  corecmd - CoreCmdImpl instance to use to send the response.
+ *  resp - Response header.
+ *  resp_data - Response data. Data size is defined by the header.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+static int
+_coreCmdImpl_respond(CoreCmdImpl* corecmd, UICmdRespHeader* resp, void* resp_data)
+{
+    int status = syncsocket_start_write(corecmd->sync_writer);
+    if (!status) {
+        // Write the header
+        status = syncsocket_write(corecmd->sync_writer, resp,
+                                  sizeof(UICmdRespHeader),
+                                  _coreCmdImpl_get_timeout(sizeof(UICmdRespHeader)));
+        // Write response data (if any).
+        if (status > 0 && resp_data != NULL && resp->resp_data_size != 0) {
+            status = syncsocket_write(corecmd->sync_writer, resp_data,
+                                      resp->resp_data_size,
+                                      _coreCmdImpl_get_timeout(resp->resp_data_size));
+        }
+        status = syncsocket_result(status);
+        syncsocket_stop_write(corecmd->sync_writer);
+    }
+    if (status < 0) {
+        derror("Core is unable to respond with %u bytes to the UI control command: %s\n",
+               resp->resp_data_size, errno_str);
+    }
+    return status;
+}
+
+/* Handles UI control command received from the UI.
+ * Param:
+ *  corecmd - CoreCmdImpl instance that received the command.
+ *  cmd_header - Command header.
+ *  cmd_param - Command data.
+ */
+static void
+_coreCmdImpl_handle_command(CoreCmdImpl* corecmd,
+                            const UICmdHeader* cmd_header,
+                            const uint8_t* cmd_param)
+{
+    switch (cmd_header->cmd_type) {
+        case AUICMD_SET_COARSE_ORIENTATION:
+        {
+            UICmdSetCoarseOrientation* cmd =
+                (UICmdSetCoarseOrientation*)cmd_param;
+            android_sensors_set_coarse_orientation(cmd->orient);
+            break;
+        }
+
+        case AUICMD_TOGGLE_NETWORK:
+            qemu_net_disable = !qemu_net_disable;
+            if (android_modem) {
+                amodem_set_data_registration(
+                        android_modem,
+                qemu_net_disable ? A_REGISTRATION_UNREGISTERED
+                    : A_REGISTRATION_HOME);
+            }
+            break;
+
+        case AUICMD_TRACE_CONTROL:
+        {
+            UICmdTraceControl* cmd = (UICmdTraceControl*)cmd_param;
+            if (cmd->start) {
+                start_tracing();
+            } else {
+                stop_tracing();
+            }
+            break;
+        }
+
+        case AUICMD_CHK_NETWORK_DISABLED:
+        {
+            UICmdRespHeader resp;
+            resp.resp_data_size = 0;
+            resp.result = qemu_net_disable;
+            _coreCmdImpl_respond(corecmd, &resp, NULL);
+            break;
+        }
+
+        case AUICMD_GET_NETSPEED:
+        {
+            UICmdRespHeader resp;
+            UICmdGetNetSpeedResp* resp_data = NULL;
+            UICmdGetNetSpeed* cmd = (UICmdGetNetSpeed*)cmd_param;
+
+            resp.resp_data_size = 0;
+            resp.result = 0;
+
+            if (cmd->index >= android_netspeeds_count ||
+                android_netspeeds[cmd->index].name == NULL) {
+                resp.result = -1;
+            } else {
+                const NetworkSpeed* netspeed = &android_netspeeds[cmd->index];
+                // Calculate size of the response data:
+                // fixed header + zero-terminated netspeed name.
+                resp.resp_data_size = sizeof(UICmdGetNetSpeedResp) +
+                                      strlen(netspeed->name) + 1;
+                // Count in zero-terminated netspeed display.
+                if (netspeed->display != NULL) {
+                    resp.resp_data_size += strlen(netspeed->display) + 1;
+                } else {
+                    resp.resp_data_size++;
+                }
+                // Allocate and initialize response data buffer.
+                resp_data =
+                    (UICmdGetNetSpeedResp*)qemu_malloc(resp.resp_data_size);
+                resp_data->upload = netspeed->upload;
+                resp_data->download = netspeed->download;
+                strcpy(resp_data->name, netspeed->name);
+                if (netspeed->display != NULL) {
+                    strcpy(resp_data->name + strlen(resp_data->name) + 1,
+                           netspeed->display);
+                } else {
+                    strcpy(resp_data->name + strlen(resp_data->name) + 1, "");
+                }
+            }
+            _coreCmdImpl_respond(corecmd, &resp, resp_data);
+            if (resp_data != NULL) {
+                qemu_free(resp_data);
+            }
+            break;
+        }
+
+        case AUICMD_GET_NETDELAY:
+        {
+            UICmdRespHeader resp;
+            UICmdGetNetDelayResp* resp_data = NULL;
+            UICmdGetNetDelay* cmd = (UICmdGetNetDelay*)cmd_param;
+
+            resp.resp_data_size = 0;
+            resp.result = 0;
+
+            if (cmd->index >= android_netdelays_count ||
+                android_netdelays[cmd->index].name == NULL) {
+                resp.result = -1;
+            } else {
+                const NetworkLatency* netdelay = &android_netdelays[cmd->index];
+                // Calculate size of the response data:
+                // fixed header + zero-terminated netdelay name.
+                resp.resp_data_size = sizeof(UICmdGetNetDelayResp) +
+                                      strlen(netdelay->name) + 1;
+                // Count in zero-terminated netdelay display.
+                if (netdelay->display != NULL) {
+                    resp.resp_data_size += strlen(netdelay->display) + 1;
+                } else {
+                    resp.resp_data_size++;
+                }
+                // Allocate and initialize response data buffer.
+                resp_data =
+                    (UICmdGetNetDelayResp*)qemu_malloc(resp.resp_data_size);
+                resp_data->min_ms = netdelay->min_ms;
+                resp_data->max_ms = netdelay->max_ms;
+                strcpy(resp_data->name, netdelay->name);
+                if (netdelay->display != NULL) {
+                    strcpy(resp_data->name + strlen(resp_data->name) + 1,
+                           netdelay->display);
+                } else {
+                    strcpy(resp_data->name + strlen(resp_data->name) + 1, "");
+                }
+            }
+            _coreCmdImpl_respond(corecmd, &resp, resp_data);
+            if (resp_data != NULL) {
+                qemu_free(resp_data);
+            }
+            break;
+        }
+
+        case AUICMD_GET_QEMU_PATH:
+        {
+            UICmdRespHeader resp;
+            UICmdGetQemuPath* cmd = (UICmdGetQemuPath*)cmd_param;
+            char* filepath = NULL;
+
+            resp.resp_data_size = 0;
+            resp.result = -1;
+            filepath = qemu_find_file(cmd->type, cmd->filename);
+            if (filepath != NULL) {
+                resp.resp_data_size = strlen(filepath) + 1;
+            }
+            _coreCmdImpl_respond(corecmd, &resp, filepath);
+            if (filepath != NULL) {
+                qemu_free(filepath);
+            }
+            break;
+        }
+
+        case AUICMD_GET_LCD_DENSITY:
+        {
+            UICmdRespHeader resp;
+            resp.resp_data_size = 0;
+            resp.result = android_hw->hw_lcd_density;
+            _coreCmdImpl_respond(corecmd, &resp, NULL);
+            break;
+        }
+
+        default:
+            derror("Unknown UI control command %d is received by the Core.\n",
+                   cmd_header->cmd_type);
+            break;
+    }
+}
+
+/* Asynchronous I/O callback reading UI control commands.
+ * Param:
+ *  opaque - CoreCmdImpl instance.
+ *  events - Lists I/O event (read or write) this callback is called for.
+ */
+static void
+_coreCmdImpl_io_func(void* opaque, int fd, unsigned events)
+{
+    AsyncStatus status;
+    CoreCmdImpl* corecmd;
+
+    if (events & LOOP_IO_WRITE) {
+        // We don't use async writer here, so we don't expect
+        // any write callbacks.
+        derror("Unexpected LOOP_IO_WRITE in _coreCmdImpl_io_func\n");
+        return;
+    }
+
+    corecmd = (CoreCmdImpl*)opaque;
+
+    // Read whatever is expected from the socket.
+    status = asyncReader_read(&corecmd->async_reader, &corecmd->io);
+    switch (status) {
+        case ASYNC_COMPLETE:
+            switch (corecmd->cmd_state) {
+                case EXPECTS_HEADER:
+                    // We just read the command  header. Now we expect the param.
+                    if (corecmd->cmd_header.cmd_param_size != 0) {
+                        corecmd->cmd_state = EXPECTS_PARAMETERS;
+                        // Setup the reader to read expected amount of data.
+                        _alloc_cmd_param_buf(corecmd,
+                                             corecmd->cmd_header.cmd_param_size);
+                        asyncReader_init(&corecmd->async_reader,
+                                         corecmd->cmd_param_buf,
+                                         corecmd->cmd_header.cmd_param_size,
+                                         &corecmd->io);
+                    } else {
+                        // Command doesn't have param. Go ahead and handle it.
+                        _coreCmdImpl_handle_command(corecmd, &corecmd->cmd_header,
+                                                NULL);
+                        // Prepare for the next header.
+                        corecmd->cmd_state = EXPECTS_HEADER;
+                        asyncReader_init(&corecmd->async_reader,
+                                         &corecmd->cmd_header,
+                                         sizeof(corecmd->cmd_header),
+                                         &corecmd->io);
+                    }
+                    break;
+
+                case EXPECTS_PARAMETERS:
+                    // Entore command is received. Handle it.
+                    _coreCmdImpl_handle_command(corecmd, &corecmd->cmd_header,
+                                            corecmd->cmd_param_buf);
+                    _free_cmd_param_buf(corecmd);
+                    // Prepare for the next command.
+                    corecmd->cmd_state = EXPECTS_HEADER;
+                    asyncReader_init(&corecmd->async_reader, &corecmd->cmd_header,
+                                     sizeof(corecmd->cmd_header), &corecmd->io);
+                    break;
+            }
+            break;
+
+        case ASYNC_ERROR:
+            loopIo_dontWantRead(&corecmd->io);
+            if (errno == ECONNRESET) {
+                // UI has exited. We need to destroy the service.
+                destroy_corecmd_client();
+            }
+            break;
+
+        case ASYNC_NEED_MORE:
+            // Transfer will eventually come back into this routine.
+            return;
+    }
+}
+
+int
+coreCmdImpl_create(int fd)
+{
+    _coreCmdImpl.sock = fd;
+    _coreCmdImpl.looper = looper_newCore();
+    loopIo_init(&_coreCmdImpl.io, _coreCmdImpl.looper, _coreCmdImpl.sock,
+                _coreCmdImpl_io_func, &_coreCmdImpl);
+    _coreCmdImpl.cmd_state = EXPECTS_HEADER;
+    _coreCmdImpl.cmd_param_buf = &_coreCmdImpl.cmd_param[0];
+    asyncReader_init(&_coreCmdImpl.async_reader, &_coreCmdImpl.cmd_header,
+                     sizeof(_coreCmdImpl.cmd_header), &_coreCmdImpl.io);
+    _coreCmdImpl.sync_writer = syncsocket_init(fd);
+    if (_coreCmdImpl.sync_writer == NULL) {
+        derror("Unable to create writer for CoreCmdImpl instance: %s\n",
+               errno_str);
+        coreCmdImpl_destroy();
+        return -1;
+    }
+    return 0;
+}
+
+void
+coreCmdImpl_destroy()
+{
+    // Destroy the writer
+    if (_coreCmdImpl.sync_writer != NULL) {
+        syncsocket_close(_coreCmdImpl.sync_writer);
+        syncsocket_free(_coreCmdImpl.sync_writer);
+    }
+    if (_coreCmdImpl.looper != NULL) {
+        // Stop all I/O that may still be going on.
+        loopIo_done(&_coreCmdImpl.io);
+        looper_free(_coreCmdImpl.looper);
+        _coreCmdImpl.looper = NULL;
+    }
+    // Free allocated memory.
+    _free_cmd_param_buf(&_coreCmdImpl);
+}
diff --git a/android/protocol/core-commands-impl.h b/android/protocol/core-commands-impl.h
new file mode 100644
index 0000000..8690613
--- /dev/null
+++ b/android/protocol/core-commands-impl.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_CORE_COMMANDS_IMPL_H
+#define _ANDROID_PROTOCOL_CORE_COMMANDS_IMPL_H
+
+/*
+ * Contains the Core-side implementation of the "ui-core-control" service that is
+ * part of the UI control protocol. Here we handle UI control commands sent by
+ * the UI to the Core.
+ */
+
+/* Creates and initializes descriptor for the Core-side of the "ui-core-control"
+ * service. Note that there can be only one instance of this service in the core.
+ * Param:
+ *  fd - Socket descriptor for the service.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int coreCmdImpl_create(int fd);
+
+/* Destroys the descriptor for the Core-side of the "ui-core-control" service. */
+extern void coreCmdImpl_destroy();
+
+#endif /* _ANDROID_PROTOCOL_CORE_COMMANDS_IMPL_H */
diff --git a/android/protocol/core-commands-proxy.c b/android/protocol/core-commands-proxy.c
new file mode 100644
index 0000000..1bd0937
--- /dev/null
+++ b/android/protocol/core-commands-proxy.c
@@ -0,0 +1,376 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the UI-side implementation of the "ui-core-control" service that is
+ * part of the UI control protocol. Here we send UI control commands to the Core.
+ */
+
+#include "console.h"
+#include "android/looper.h"
+#include "android/core-connection.h"
+#include "android/async-utils.h"
+#include "android/sync-utils.h"
+#include "android/utils/debug.h"
+#include "android/utils/panic.h"
+#include "android/protocol/core-commands.h"
+#include "android/protocol/core-commands-proxy.h"
+#include "android/protocol/core-commands-api.h"
+
+/* Descriptor for the UI-side "ui-core-control" service. */
+typedef struct CoreCmdProxy {
+    /* Core connection established for this service. */
+    CoreConnection*     core_connection;
+
+    /* Socket descriptor for the UI service. */
+    int                 sock;
+
+    /* Socket wrapper for sync srites. */
+    SyncSocket*         sync_writer;
+
+    /* Socket wrapper for sync reads. */
+    SyncSocket*         sync_reader;
+} CoreCmdProxy;
+
+/* One and only one CoreCmdProxy instance. */
+static CoreCmdProxy  _coreCmdProxy = { 0 };
+
+/* Sends UI command to the core.
+ * Param:
+ *  cmd_type, cmd_param, cmd_param_size - Define the command.
+ * Return:
+ *  0 On success, or < 0 on failure.
+ */
+static int
+_coreCmdProxy_send_command(uint8_t cmd_type,
+                           void* cmd_param,
+                           uint32_t cmd_param_size)
+{
+    int status;
+    UICmdHeader header;
+
+    // Prepare the command header.
+    header.cmd_type = cmd_type;
+    header.cmd_param_size = cmd_param_size;
+    status = syncsocket_start_write(_coreCmdProxy.sync_writer);
+    if (!status) {
+        // Send the header.
+        status = syncsocket_write(_coreCmdProxy.sync_writer, &header,
+                                  sizeof(header),
+                                  core_connection_get_timeout(sizeof(header)));
+        // If there is request data, send it too.
+        if (status > 0 && cmd_param != NULL && cmd_param_size > 0) {
+            status = syncsocket_write(_coreCmdProxy.sync_writer, cmd_param,
+                                      cmd_param_size,
+                                      core_connection_get_timeout(cmd_param_size));
+        }
+        status = syncsocket_result(status);
+        syncsocket_stop_write(_coreCmdProxy.sync_writer);
+    }
+    if (status < 0) {
+        derror("Unable to send UI control command %d (size %u): %s\n",
+                cmd_type, cmd_param_size, errno_str);
+    }
+    return status;
+}
+
+/* Reads UI control command response from the core.
+ * Param:
+ *  resp - Upon success contains command response header.
+ *  resp_data - Upon success contains allocated reponse data (if any). The caller
+ *      is responsible for deallocating the memory returned here.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+static int
+_coreCmdProxy_get_response(UICmdRespHeader* resp, void** resp_data)
+{
+    int status =  syncsocket_start_read(_coreCmdProxy.sync_reader);
+    if (!status) {
+        // Read the header.
+        status = syncsocket_read(_coreCmdProxy.sync_reader, resp,
+                                 sizeof(UICmdRespHeader),
+                                 core_connection_get_timeout(sizeof(UICmdRespHeader)));
+        // Read response data (if any).
+        if (status > 0 && resp->resp_data_size) {
+            *resp_data = malloc(resp->resp_data_size);
+            if (*resp_data == NULL) {
+                APANIC("_coreCmdProxy_get_response is unable to allocate response data buffer.\n");
+            }
+            status = syncsocket_read(_coreCmdProxy.sync_reader, *resp_data,
+                                     resp->resp_data_size,
+                                     core_connection_get_timeout(resp->resp_data_size));
+        }
+        status = syncsocket_result(status);
+        syncsocket_stop_read(_coreCmdProxy.sync_reader);
+    }
+    if (status < 0) {
+        derror("Unable to get UI command response from the Core: %s\n",
+               errno_str);
+    }
+    return status;
+}
+
+/* Destroys CoreCmdProxy instance. */
+static void
+_coreCmdProxy_destroy(void)
+{
+    if (_coreCmdProxy.sync_writer != NULL) {
+        syncsocket_close(_coreCmdProxy.sync_writer);
+        syncsocket_free(_coreCmdProxy.sync_writer);
+        _coreCmdProxy.sync_writer = NULL;
+    }
+    if (_coreCmdProxy.sync_reader != NULL) {
+        syncsocket_close(_coreCmdProxy.sync_reader);
+        syncsocket_free(_coreCmdProxy.sync_reader);
+        _coreCmdProxy.sync_reader = NULL;
+    }
+    if (_coreCmdProxy.core_connection != NULL) {
+        core_connection_close(_coreCmdProxy.core_connection);
+        core_connection_free(_coreCmdProxy.core_connection);
+        _coreCmdProxy.core_connection = NULL;
+    }
+}
+
+int
+corecmd_set_coarse_orientation(AndroidCoarseOrientation orient)
+{
+    UICmdSetCoarseOrientation cmd;
+    cmd.orient = orient;
+    return _coreCmdProxy_send_command(AUICMD_SET_COARSE_ORIENTATION,
+                                      &cmd, sizeof(cmd));
+}
+
+int
+corecmd_toggle_network()
+{
+    return _coreCmdProxy_send_command(AUICMD_TOGGLE_NETWORK, NULL, 0);
+}
+
+int
+corecmd_trace_control(int start)
+{
+    UICmdTraceControl cmd;
+    cmd.start = start;
+    return _coreCmdProxy_send_command(AUICMD_TRACE_CONTROL,
+                                      &cmd, sizeof(cmd));
+}
+
+int
+corecmd_is_network_disabled()
+{
+    UICmdRespHeader resp;
+    void* tmp = NULL;
+    int status;
+
+    status = _coreCmdProxy_send_command(AUICMD_CHK_NETWORK_DISABLED, NULL, 0);
+    if (status < 0) {
+        return status;
+    }
+    status = _coreCmdProxy_get_response(&resp, &tmp);
+    if (status < 0) {
+        return status;
+    }
+    return resp.result;
+}
+
+int
+corecmd_get_netspeed(int index, NetworkSpeed** netspeed)
+{
+    UICmdGetNetSpeed req;
+    UICmdRespHeader resp;
+    UICmdGetNetSpeedResp* resp_data = NULL;
+    int status;
+
+    // Initialize and send the query.
+    req.index = index;
+    status = _coreCmdProxy_send_command(AUICMD_GET_NETSPEED, &req, sizeof(req));
+    if (status < 0) {
+        return status;
+    }
+
+    // Obtain the response from the core.
+    status = _coreCmdProxy_get_response(&resp, (void**)&resp_data);
+    if (status < 0) {
+        return status;
+    }
+    if (!resp.result) {
+        NetworkSpeed* ret;
+        // Allocate memory for the returning NetworkSpeed instance.
+        // It includes: NetworkSpeed structure +
+        // size of zero-terminated "name" and "display" strings saved in
+        // resp_data.
+        *netspeed = malloc(sizeof(NetworkSpeed) + 1 +
+                           resp.resp_data_size - sizeof(UICmdGetNetSpeedResp));
+        ret = *netspeed;
+
+        // Copy data obtained from the core to the returning NetworkSpeed
+        // instance.
+        ret->upload = resp_data->upload;
+        ret->download = resp_data->download;
+        ret->name = (char*)ret + sizeof(NetworkSpeed);
+        strcpy((char*)ret->name, resp_data->name);
+        ret->display = ret->name + strlen(ret->name) + 1;
+        strcpy((char*)ret->display, resp_data->name + strlen(resp_data->name) + 1);
+    }
+    if (resp_data != NULL) {
+        free(resp_data);
+    }
+    return resp.result;
+}
+
+int
+corecmd_get_netdelay(int index, NetworkLatency** netdelay)
+{
+    UICmdGetNetDelay req;
+    UICmdRespHeader resp;
+    UICmdGetNetDelayResp* resp_data = NULL;
+    int status;
+
+    // Initialize and send the query.
+    req.index = index;
+    status = _coreCmdProxy_send_command(AUICMD_GET_NETDELAY, &req, sizeof(req));
+    if (status < 0) {
+        return status;
+    }
+
+    // Obtain the response from the core.
+    status = _coreCmdProxy_get_response(&resp, (void**)&resp_data);
+    if (status < 0) {
+        return status;
+    }
+    if (!resp.result) {
+        NetworkLatency* ret;
+        // Allocate memory for the returning NetworkLatency instance.
+        // It includes: NetworkLatency structure +
+        // size of zero-terminated "name" and "display" strings saved in
+        // resp_data.
+        *netdelay = malloc(sizeof(NetworkLatency) + 1 +
+                           resp.resp_data_size - sizeof(UICmdGetNetDelayResp));
+        ret = *netdelay;
+
+        // Copy data obtained from the core to the returning NetworkLatency
+        // instance.
+        ret->min_ms = resp_data->min_ms;
+        ret->max_ms = resp_data->max_ms;
+        ret->name = (char*)ret + sizeof(NetworkLatency);
+        strcpy((char*)ret->name, resp_data->name);
+        ret->display = ret->name + strlen(ret->name) + 1;
+        strcpy((char*)ret->display, resp_data->name + strlen(resp_data->name) + 1);
+    }
+    if (resp_data != NULL) {
+        free(resp_data);
+    }
+    return resp.result;
+}
+
+int
+corecmd_get_qemu_path(int type,
+                      const char* filename,
+                      char* path,
+                      size_t path_buf_size)
+{
+    UICmdRespHeader resp;
+    char* resp_data = NULL;
+    int status;
+
+    // Initialize and send the query.
+    uint32_t cmd_data_size = sizeof(UICmdGetQemuPath) + strlen(filename) + 1;
+    UICmdGetQemuPath* req = (UICmdGetQemuPath*)malloc(cmd_data_size);
+    if (req == NULL) {
+        APANIC("corecmd_get_qemu_path is unable to allocate %u bytes\n",
+               cmd_data_size);
+    }
+    req->type = type;
+    strcpy(req->filename, filename);
+    status = _coreCmdProxy_send_command(AUICMD_GET_QEMU_PATH, req,
+                                        cmd_data_size);
+    if (status < 0) {
+        return status;
+    }
+
+    // Obtain the response from the core.
+    status = _coreCmdProxy_get_response(&resp, (void**)&resp_data);
+    if (status < 0) {
+        return status;
+    }
+    if (!resp.result && resp_data != NULL) {
+        strncpy(path, resp_data, path_buf_size);
+        path[path_buf_size - 1] = '\0';
+    }
+    if (resp_data != NULL) {
+        free(resp_data);
+    }
+    return resp.result;
+}
+
+int
+corecmd_get_hw_lcd_density(void)
+{
+    UICmdRespHeader resp;
+    void* tmp = NULL;
+    int status;
+
+    status = _coreCmdProxy_send_command(AUICMD_GET_LCD_DENSITY, NULL, 0);
+    if (status < 0) {
+        return status;
+    }
+    status = _coreCmdProxy_get_response(&resp, &tmp);
+    if (status < 0) {
+        return status;
+    }
+    return resp.result;
+}
+
+int
+coreCmdProxy_create(SockAddress* console_socket)
+{
+    char* handshake = NULL;
+
+    // Connect to the ui-core-control service.
+    _coreCmdProxy.core_connection =
+        core_connection_create_and_switch(console_socket, "ui-core-control",
+                                          &handshake);
+    if (_coreCmdProxy.core_connection == NULL) {
+        derror("Unable to connect to the ui-core-control service: %s\n",
+               errno_str);
+        return -1;
+    }
+
+    // Initialze command writer and response reader.
+    _coreCmdProxy.sock = core_connection_get_socket(_coreCmdProxy.core_connection);
+    _coreCmdProxy.sync_writer = syncsocket_init(_coreCmdProxy.sock);
+    if (_coreCmdProxy.sync_writer == NULL) {
+        derror("Unable to initialize CoreCmdProxy writer: %s\n", errno_str);
+        _coreCmdProxy_destroy();
+        return -1;
+    }
+    _coreCmdProxy.sync_reader = syncsocket_init(_coreCmdProxy.sock);
+    if (_coreCmdProxy.sync_reader == NULL) {
+        derror("Unable to initialize CoreCmdProxy reader: %s\n", errno_str);
+        _coreCmdProxy_destroy();
+        return -1;
+    }
+
+
+    fprintf(stdout, "ui-core-control is now connected to the core at %s.",
+            sock_address_to_string(console_socket));
+    if (handshake != NULL) {
+        if (handshake[0] != '\0') {
+            fprintf(stdout, " Handshake: %s", handshake);
+        }
+        free(handshake);
+    }
+    fprintf(stdout, "\n");
+
+    return 0;
+}
diff --git a/android/protocol/core-commands-proxy.h b/android/protocol/core-commands-proxy.h
new file mode 100644
index 0000000..8303ed4
--- /dev/null
+++ b/android/protocol/core-commands-proxy.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_CORE_COMMANDS_PROXY_H
+#define _ANDROID_PROTOCOL_CORE_COMMANDS_PROXY_H
+
+#include "sockets.h"
+
+/*
+ * Contains the UI-side implementation of the "ui-core-control" service that is
+ * part of the UI control protocol. Here we send UI control commands to the Core.
+ */
+
+/* Creates and initializes descriptor for the UI-side of the "ui-core-control"
+ * service. Note that there can be only one instance of this service in the UI.
+ * Param:
+ *  console_socket - Addresses Core's console.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int coreCmdProxy_create(SockAddress* console_socket);
+
+#endif /* _ANDROID_PROTOCOL_CORE_COMMANDS_PROXY_H */
diff --git a/android/protocol/core-commands-qemu.c b/android/protocol/core-commands-qemu.c
new file mode 100644
index 0000000..03fef64
--- /dev/null
+++ b/android/protocol/core-commands-qemu.c
@@ -0,0 +1,108 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains implementation of the API for calling into the Core with the UI
+ * control commands for standalone (monolithic) emulator.
+ */
+
+#include "android/android.h"
+#include "android/globals.h"
+#include "android/hw-sensors.h"
+#include "telephony/modem_driver.h"
+#include "trace.h"
+#include "audio/audio.h"
+#include "android/protocol/core-commands-api.h"
+
+/* Implemented in vl-android.c */
+extern char* qemu_find_file(int type, const char* filename);
+
+int
+corecmd_set_coarse_orientation(AndroidCoarseOrientation orient)
+{
+    android_sensors_set_coarse_orientation(orient);
+    return 0;
+}
+
+int
+corecmd_toggle_network()
+{
+    qemu_net_disable = !qemu_net_disable;
+    if (android_modem) {
+        amodem_set_data_registration(
+                android_modem,
+        qemu_net_disable ? A_REGISTRATION_UNREGISTERED
+            : A_REGISTRATION_HOME);
+    }
+    return 0;
+}
+
+int corecmd_trace_control(int start)
+{
+    if (start) {
+        start_tracing();
+    } else {
+        stop_tracing();
+    }
+    return 0;
+}
+
+int corecmd_is_network_disabled()
+{
+    return qemu_net_disable;
+}
+
+int
+corecmd_get_netspeed(int index, NetworkSpeed** netspeed)
+{
+    if (index >= android_netspeeds_count ||
+        android_netspeeds[index].name == NULL) {
+        return -1;
+    }
+    *netspeed = (NetworkSpeed*)malloc(sizeof(NetworkSpeed));
+    memcpy(*netspeed, &android_netspeeds[index], sizeof(NetworkSpeed));
+    return 0;
+}
+
+int
+corecmd_get_netdelay(int index, NetworkLatency** netdelay)
+{
+    if (index >= android_netdelays_count ||
+        android_netdelays[index].name == NULL) {
+        return -1;
+    }
+    *netdelay = (NetworkLatency*)malloc(sizeof(NetworkLatency));
+    memcpy(*netdelay, &android_netdelays[index], sizeof(NetworkLatency));
+    return 0;
+}
+
+int
+corecmd_get_qemu_path(int type,
+                      const char* filename,
+                      char* path,
+                      size_t path_buf_size)
+{
+    char* filepath = qemu_find_file(type, filename);
+    if (filepath == NULL) {
+        return -1;
+    }
+    strncpy(path, filepath, path_buf_size);
+    path[path_buf_size - 1] = '\0';
+    qemu_free(filepath);
+    return 0;
+}
+
+int
+corecmd_get_hw_lcd_density(void)
+{
+    return android_hw->hw_lcd_density;
+}
diff --git a/android/protocol/core-commands.h b/android/protocol/core-commands.h
new file mode 100644
index 0000000..3ac0ca5
--- /dev/null
+++ b/android/protocol/core-commands.h
@@ -0,0 +1,104 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_CORE_COMMANDS_H
+#define _ANDROID_PROTOCOL_CORE_COMMANDS_H
+
+/*
+ * Contains declarations related to the UI control commands sent by the UI and
+ * handled by the Core.
+ */
+
+#include "android/hw-sensors.h"
+#include "android/protocol/ui-common.h"
+
+/* Sets coarse orientation. */
+#define AUICMD_SET_COARSE_ORIENTATION       1
+
+/* Toggles the network. */
+#define AUICMD_TOGGLE_NETWORK               2
+
+/* Starts / stops the tracing. */
+#define AUICMD_TRACE_CONTROL                3
+
+/* Checks if network is disabled. */
+#define AUICMD_CHK_NETWORK_DISABLED         4
+
+/* Gets network speed. */
+#define AUICMD_GET_NETSPEED                 5
+
+/* Gets network delays */
+#define AUICMD_GET_NETDELAY                 6
+
+/* Gets path to a QEMU file on local host. */
+#define AUICMD_GET_QEMU_PATH                7
+
+/* Gets LCD density. */
+#define AUICMD_GET_LCD_DENSITY              8
+
+/* Formats AUICMD_SET_COARSE_ORIENTATION UI control command parameters. */
+typedef struct UICmdSetCoarseOrientation {
+    AndroidCoarseOrientation    orient;
+} UICmdSetCoarseOrientation;
+
+/* Formats AUICMD_TRACE_CONTROL UI control command parameters. */
+typedef struct UICmdTraceControl {
+    int start;
+} UICmdTraceControl;
+
+/* Formats AUICMD_GET_NETSPEED UI control command parameters. */
+typedef struct UICmdGetNetSpeed {
+    int index;
+} UICmdGetNetSpeed;
+
+/* Formats AUICMD_GET_NETSPEED UI control command response.
+ * Instances of this structure contains content of the NetworkSpeed structure,
+ * including actual "name" and "display" strings. */
+typedef struct UICmdGetNetSpeedResp {
+    int     upload;
+    int     download;
+    /* Zero-terminated NetworkSpeed's "name" strings starts here. The "display"
+     * string begins inside this structure, right after the "name"'s
+     * zero-terminator. */
+    char    name[0];
+} UICmdGetNetSpeedResp;
+
+/* Formats AUICMD_GET_NETDELAY UI control command parameters. */
+typedef struct UICmdGetNetDelay {
+    int index;
+} UICmdGetNetDelay;
+
+/* Formats AUICMD_GET_NETDELAY UI control command response.
+ * Instances of this structure contains content of the NetworkLatency structure,
+ * including actual "name" and "display" strings. */
+typedef struct UICmdGetNetDelayResp {
+    int     min_ms;
+    int     max_ms;
+    /* Zero-terminated NetworkLatency's "name" strings starts here. The "display"
+     * string begins inside this structure, right after the "name"'s
+     * zero-terminator. */
+    char    name[0];
+} UICmdGetNetDelayResp;
+
+/* Formats AUICMD_GET_QEMU_PATH UI control command parameters. */
+typedef struct UICmdGetQemuPath {
+    int     type;
+    char    filename[0];
+} UICmdGetQemuPath;
+
+/* Formats AUICMD_GET_QEMU_PATH UI control command response. */
+typedef struct UICmdGetQemuPathResp {
+    /* Queried qemu path begins here. */
+    char    path[0];
+} UICmdGetQemuPathResp;
+
+#endif /* _ANDROID_PROTOCOL_CORE_COMMANDS_H */
diff --git a/android/protocol/ui-commands-api.h b/android/protocol/ui-commands-api.h
new file mode 100644
index 0000000..d9fe6b0
--- /dev/null
+++ b/android/protocol/ui-commands-api.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_UI_COMMANDS_API_H
+#define _ANDROID_PROTOCOL_UI_COMMANDS_API_H
+
+/*
+ * Contains the API for calling into the UI with the Core control commands.
+ */
+
+/* Changes the scale of the emulator window at runtime.
+ * Param:
+ *  scale, is_dpi - New window scale parameters
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int uicmd_set_window_scale(double scale, int is_dpi);
+
+/* This is temporary redeclaration for AndroidHwLightBrightnessFunc declared
+ * in android/hw-control.h We redeclare it here in order to keep type
+ * consistency between android_core_set_brightness_change_callback and
+ * light_brightness field of AndroidHwControlFuncs structure.
+ */
+typedef void  (*AndroidHwLightBrightnessCallback)(void* opaque,
+                                                  const char* light,
+                                                  int  brightness);
+
+/* Registers a UI callback to be called when brightness is changed by the core. */
+extern int uicmd_set_brightness_change_callback(AndroidHwLightBrightnessCallback callback,
+                                                void* opaque);
+
+#endif /* _ANDROID_PROTOCOL_UI_COMMANDS_API_H */
diff --git a/android/protocol/ui-commands-impl.c b/android/protocol/ui-commands-impl.c
new file mode 100644
index 0000000..456c61e
--- /dev/null
+++ b/android/protocol/ui-commands-impl.c
@@ -0,0 +1,257 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the UI-side implementation of the "core-ui-control" service that is
+ * part of the UI control protocol. Here we handle UI control commands received
+ * from the Core.
+ */
+
+#include "console.h"
+//#include "android/hw-control.h"
+#include "android/looper.h"
+#include "android/core-connection.h"
+#include "android/async-utils.h"
+#include "android/sync-utils.h"
+#include "android/utils/system.h"
+#include "android/utils/debug.h"
+#include "android/utils/panic.h"
+#include "android/protocol/ui-commands-impl.h"
+#include "android/protocol/ui-commands-api.h"
+
+/* Enumerates states for the command reader in UICmdImpl instance. */
+typedef enum UICmdImplState {
+    /* The reader is waiting on command header. */
+    EXPECTS_HEADER,
+
+    /* The reader is waiting on command parameters. */
+    EXPECTS_PARAMETERS,
+} UICmdImplState;
+
+/* Descriptor for the UI-side of the "core-ui-control" service. */
+typedef struct UICmdImpl {
+    /* Core connection established for this service. */
+    CoreConnection* core_connection;
+
+    /* Socket descriptor for the UI service. */
+    int             sock;
+
+    /* Command reader state. */
+    UICmdImplState  reader_state;
+
+    /* Incoming command header. */
+    UICmdHeader     cmd_header;
+
+    /* Reader's buffer. This field can point to the cmd_header field of this
+     * structure (when we expect a command header), or to a buffer allocated for
+     * the (when we expect command parameters). */
+    uint8_t*        reader_buffer;
+
+    /* Offset in the reader's buffer where to read next chunk of data. */
+    size_t          reader_offset;
+
+    /* Total number of bytes the reader expects to read. */
+    size_t          reader_bytes;
+} UICmdImpl;
+
+/* Implemented in android/qemulator.c */
+extern void android_emulator_set_window_scale(double scale, int is_dpi);
+
+/* One and only one UICmdImpl instance. */
+static UICmdImpl  _uiCmdImpl;
+
+/* Display brightness change callback. */
+static AndroidHwLightBrightnessCallback _brightness_change_callback = NULL;
+static void* _brightness_change_callback_param = NULL;
+
+/* Destroys UICmdImpl instance. */
+static void
+_uiCmdImpl_destroy()
+{
+    if (_uiCmdImpl.core_connection != NULL) {
+        // Disable I/O callbacks.
+        qemu_set_fd_handler(_uiCmdImpl.sock, NULL, NULL, NULL);
+        core_connection_close(_uiCmdImpl.core_connection);
+        core_connection_free(_uiCmdImpl.core_connection);
+        _uiCmdImpl.core_connection = NULL;
+    }
+    // Properly deallocate the reader buffer.
+    if (_uiCmdImpl.reader_buffer != NULL &&
+        _uiCmdImpl.reader_buffer != (uint8_t*)&_uiCmdImpl.cmd_header) {
+        free(_uiCmdImpl.reader_buffer);
+        _uiCmdImpl.reader_buffer = (uint8_t*)&_uiCmdImpl.cmd_header;
+    }
+}
+
+/* Handles UI control command received from the core.
+ * Param:
+ *  uicmd - UICmdImpl instance that received the command.
+ *  header - UI control command header.
+ *  data - Command parameters formatted accordingly to the command type.
+ */
+static void
+_uiCmdImpl_handle_command(UICmdImpl* uicmd,
+                          const UICmdHeader* header,
+                          const uint8_t* data)
+{
+    switch (header->cmd_type) {
+        case AUICMD_SET_WINDOWS_SCALE:
+        {
+            UICmdSetWindowsScale* cmd = (UICmdSetWindowsScale*)data;
+            android_emulator_set_window_scale(cmd->scale, cmd->is_dpi);
+            break;
+        }
+
+        case AUICMD_CHANGE_DISP_BRIGHTNESS:
+        {
+            UICmdChangeDispBrightness* cmd = (UICmdChangeDispBrightness*)data;
+            if (_brightness_change_callback != NULL) {
+                _brightness_change_callback(_brightness_change_callback_param,
+                                            cmd->light, cmd->brightness);
+            }
+            break;
+        }
+
+        default:
+            derror("Unknown command %d is received from the Core\n",
+                   header->cmd_type);
+            break;
+    }
+}
+
+/* Asynchronous I/O callback reading UI control commands.
+ * Param:
+ *  opaque - UICmdImpl instance.
+ */
+static void
+_uiCmdImpl_io_read(void* opaque)
+{
+    UICmdImpl* uicmd = opaque;
+    int status;
+
+    // Read requests while they are immediately available.
+    for (;;) {
+        // Read next chunk of data.
+        status = read(uicmd->sock, uicmd->reader_buffer + uicmd->reader_offset,
+                      uicmd->reader_bytes - uicmd->reader_offset);
+        if (status == 0) {
+            /* Disconnection, meaning that the core process got termonated. */
+            fprintf(stderr, "core-ui-control service got disconnected\n");
+            _uiCmdImpl_destroy();
+            return;
+        }
+        if (status < 0) {
+            if (errno == EINTR) {
+                /* loop on EINTR */
+                continue;
+            } else if (errno == EWOULDBLOCK || errno == EAGAIN) {
+                // Chunk is not avalable at this point. Come back later.
+                return;
+            }
+        }
+
+        uicmd->reader_offset += status;
+        if (uicmd->reader_offset != uicmd->reader_bytes) {
+            // There are still some data left in the pipe.
+            continue;
+        }
+
+        // All expected data has been read. Time to change the state.
+        if (uicmd->reader_state == EXPECTS_HEADER) {
+            // Header has been read.
+            if (uicmd->cmd_header.cmd_param_size) {
+                // Prepare for the command parameters.
+                uicmd->reader_state = EXPECTS_PARAMETERS;
+                uicmd->reader_offset = 0;
+                uicmd->reader_bytes = uicmd->cmd_header.cmd_param_size;
+                uicmd->reader_buffer = malloc(uicmd->reader_bytes);
+                if (uicmd->reader_buffer == NULL) {
+                    APANIC("Unable to allocate memory for UI command parameters.\n");
+                }
+            } else {
+                // This command doesn't have any parameters. Handle it now.
+                _uiCmdImpl_handle_command(uicmd, &uicmd->cmd_header, NULL);
+                // Prepare for the next command header.
+                uicmd->reader_state = EXPECTS_HEADER;
+                uicmd->reader_offset = 0;
+                uicmd->reader_bytes = sizeof(uicmd->cmd_header);
+                uicmd->reader_buffer = (uint8_t*)&uicmd->cmd_header;
+            }
+        } else {
+            // All command data is in. Handle it.
+            _uiCmdImpl_handle_command(uicmd, &uicmd->cmd_header,
+                                      uicmd->reader_buffer);
+            // Prepare for the next command header.
+            free(uicmd->reader_buffer);
+            uicmd->reader_state = EXPECTS_HEADER;
+            uicmd->reader_offset = 0;
+            uicmd->reader_bytes = sizeof(uicmd->cmd_header);
+            uicmd->reader_buffer = (uint8_t*)&uicmd->cmd_header;
+        }
+    }
+}
+
+int
+uiCmdImpl_create(SockAddress* console_socket)
+{
+    char* handshake = NULL;
+
+    // Setup command reader.
+    _uiCmdImpl.reader_buffer = (uint8_t*)&_uiCmdImpl.cmd_header;
+    _uiCmdImpl.reader_state = EXPECTS_HEADER;
+    _uiCmdImpl.reader_offset = 0;
+    _uiCmdImpl.reader_bytes = sizeof(UICmdHeader);
+
+    // Connect to the core-ui-control service.
+    _uiCmdImpl.core_connection =
+        core_connection_create_and_switch(console_socket, "core-ui-control",
+                                          &handshake);
+    if (_uiCmdImpl.core_connection == NULL) {
+        derror("Unable to connect to the core-ui-control service: %s\n",
+               errno_str);
+        return -1;
+    }
+
+    // Initialze UI command reader.
+    _uiCmdImpl.sock = core_connection_get_socket(_uiCmdImpl.core_connection);
+    if (qemu_set_fd_handler(_uiCmdImpl.sock, _uiCmdImpl_io_read, NULL,
+                            &_uiCmdImpl)) {
+        derror("Unable to set up UI _uiCmdImpl_io_read callback: %s\n",
+               errno_str);
+        _uiCmdImpl_destroy();
+        if (handshake != NULL) {
+            free(handshake);
+        }
+        return -1;
+    }
+
+    fprintf(stdout, "core-ui-control is now connected to the core at %s.",
+            sock_address_to_string(console_socket));
+    if (handshake != NULL) {
+        if (handshake[0] != '\0') {
+            fprintf(stdout, " Handshake: %s", handshake);
+        }
+        free(handshake);
+    }
+    fprintf(stdout, "\n");
+
+    return 0;
+}
+
+int
+uicmd_set_brightness_change_callback(AndroidHwLightBrightnessCallback callback,
+                                     void* opaque)
+{
+    _brightness_change_callback = callback;
+    _brightness_change_callback_param = opaque;
+    return 0;
+}
diff --git a/android/protocol/ui-commands-impl.h b/android/protocol/ui-commands-impl.h
new file mode 100644
index 0000000..0e5b52f
--- /dev/null
+++ b/android/protocol/ui-commands-impl.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_UI_COMMANDS_IMPL_H
+#define _ANDROID_PROTOCOL_UI_COMMANDS_IMPL_H
+
+#include "sockets.h"
+#include "android/protocol/ui-commands.h"
+
+/*
+ * Contains the UI-side implementation of the "core-ui-control" service that is
+ * part of the UI control protocol. Here we handle UI control commands sent by
+ * the Core to the UI.
+ */
+
+/* Creates and initializes descriptor for the UI-side of the "core-ui-control"
+ * service. Note that there can be only one instance of this service in the UI.
+ * Param:
+ *  console_socket - Addresses Core's console.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int uiCmdImpl_create(SockAddress* console_socket);
+
+#endif /* _ANDROID_PROTOCOL_UI_COMMANDS_IMPL_H */
diff --git a/android/protocol/ui-commands-proxy.c b/android/protocol/ui-commands-proxy.c
new file mode 100644
index 0000000..76bf883
--- /dev/null
+++ b/android/protocol/ui-commands-proxy.c
@@ -0,0 +1,209 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the Core-side implementation of the "core-ui-control" service that is
+ * part of the UI control protocol. Here we send UI control commands to the UI.
+ */
+
+#include "android/android.h"
+#include "android/hw-control.h"
+#include "android/looper.h"
+#include "android/async-utils.h"
+#include "android/sync-utils.h"
+#include "android/utils/debug.h"
+#include "android/protocol/ui-commands.h"
+#include "android/protocol/ui-commands-proxy.h"
+#include "android/protocol/ui-commands-api.h"
+
+/* Descriptor for the UI commands proxy. */
+typedef struct UICmdProxy {
+    /* I/O associated with this descriptor. */
+    LoopIo          io;
+
+    /* Looper associated with this descriptor. */
+    Looper*         looper;
+
+    /* Writer to send UI commands. */
+    SyncSocket*     sync_writer;
+
+    /* Socket descriptor for this service. */
+    int             sock;
+} UICmdProxy;
+
+/* One and only one UICmdProxy instance. */
+static UICmdProxy    _uiCmdProxy;
+
+/* Implemented in android/console.c */
+extern void destroy_uicmd_client(void);
+
+/* Calculates timeout for transferring the given number of bytes via socket.
+ * Return:
+ *  Number of milliseconds during which the entire number of bytes is expected
+ *  to be transferred via socket.
+ */
+static int
+_uiCmdProxy_get_timeout(size_t data_size)
+{
+    // Min 2 seconds + 10 millisec for each transferring byte.
+    // TODO: Come up with a better arithmetics here.
+    return 2000 + data_size * 10;
+}
+
+/* Sends request to the UI client of this service.
+ * Param:
+ *  cmd_type, cmd_param, cmd_param_size - Define the command to send.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+static int
+_uiCmdProxy_send_command(uint8_t cmd_type,
+                         void* cmd_param,
+                         uint32_t cmd_param_size)
+{
+    UICmdHeader header;
+    int status = syncsocket_start_write(_uiCmdProxy.sync_writer);
+    if (!status) {
+        // Initialize and send the header.
+        header.cmd_type = cmd_type;
+        header.cmd_param_size = cmd_param_size;
+        status = syncsocket_write(_uiCmdProxy.sync_writer, &header, sizeof(header),
+                                  _uiCmdProxy_get_timeout(sizeof(header)));
+        // If there are command parameters, send them too.
+        if (status > 0 && cmd_param != NULL && cmd_param_size > 0) {
+            status = syncsocket_write(_uiCmdProxy.sync_writer, cmd_param,
+                                      cmd_param_size,
+                                      _uiCmdProxy_get_timeout(cmd_param_size));
+        }
+        status = syncsocket_result(status);
+        syncsocket_stop_write(_uiCmdProxy.sync_writer);
+    }
+    if (status < 0) {
+        derror("Send UI command %d (%u bytes) has failed: %s\n",
+               cmd_type, cmd_param_size, errno_str);
+    }
+    return status;
+}
+
+/* Asynchronous I/O callback for UICmdProxy instance.
+ * We expect this callback to be called only on UI detachment condition. In this
+ * case the event should be LOOP_IO_READ, and read should fail with errno set
+ * to ECONNRESET.
+ * Param:
+ *  opaque - UICmdProxy instance.
+ */
+static void
+_uiCmdProxy_io_func(void* opaque, int fd, unsigned events)
+{
+    UICmdProxy* uicmd = (UICmdProxy*)opaque;
+    AsyncReader reader;
+    AsyncStatus status;
+    uint8_t read_buf[1];
+
+    if (events & LOOP_IO_WRITE) {
+        derror("Unexpected LOOP_IO_WRITE in _uiCmdProxy_io_func.\n");
+        return;
+    }
+
+    // Try to read
+    asyncReader_init(&reader, read_buf, sizeof(read_buf), &uicmd->io);
+    status = asyncReader_read(&reader, &uicmd->io);
+    // We expect only error status here.
+    if (status != ASYNC_ERROR) {
+        derror("Unexpected read status %d in _uiCmdProxy_io_func\n", status);
+        return;
+    }
+    // We expect only socket disconnection error here.
+    if (errno != ECONNRESET) {
+        derror("Unexpected read error %d (%s) in _uiCmdProxy_io_func.\n",
+               errno, errno_str);
+        return;
+    }
+
+    // Client got disconnectted.
+    destroy_uicmd_client();
+}
+/* a callback function called when the system wants to change the brightness
+ * of a given light. 'light' is a string which can be one of:
+ * 'lcd_backlight', 'button_backlight' or 'Keyboard_backlight'
+ *
+ * brightness is an integer (acceptable range are 0..255), however the
+ * default is around 105, and we probably don't want to dim the emulator's
+ * output at that level.
+ */
+static void
+_uiCmdProxy_brightness_change_callback(void* opaque,
+                                       const char* light,
+                                       int brightness)
+{
+    // Calculate size of the command parameters.
+    const size_t cmd_size = sizeof(UICmdChangeDispBrightness) + strlen(light) + 1;
+    // Allocate and initialize parameters.
+    UICmdChangeDispBrightness* cmd =
+        (UICmdChangeDispBrightness*)qemu_malloc(cmd_size);
+    cmd->brightness = brightness;
+    strcpy(cmd->light, light);
+    // Send the command.
+    _uiCmdProxy_send_command(AUICMD_CHANGE_DISP_BRIGHTNESS, cmd, cmd_size);
+    qemu_free(cmd);
+}
+
+int
+uiCmdProxy_create(int fd)
+{
+    // Initialize the only UICmdProxy instance.
+    _uiCmdProxy.sock = fd;
+    _uiCmdProxy.looper = looper_newCore();
+    loopIo_init(&_uiCmdProxy.io, _uiCmdProxy.looper, _uiCmdProxy.sock,
+                _uiCmdProxy_io_func, &_uiCmdProxy);
+    loopIo_wantRead(&_uiCmdProxy.io);
+    _uiCmdProxy.sync_writer = syncsocket_init(fd);
+    if (_uiCmdProxy.sync_writer == NULL) {
+        derror("Unable to initialize UICmdProxy writer: %s\n", errno_str);
+        uiCmdProxy_destroy();
+        return -1;
+    }
+    {
+        // Set brighness change callback, so we can notify
+        // the UI about the event.
+        AndroidHwControlFuncs  funcs;
+        funcs.light_brightness = _uiCmdProxy_brightness_change_callback;
+        android_hw_control_init(&_uiCmdProxy, &funcs);
+    }
+    return 0;
+}
+
+void
+uiCmdProxy_destroy()
+{
+    // Destroy the sync writer.
+    if (_uiCmdProxy.sync_writer != NULL) {
+        syncsocket_close(_uiCmdProxy.sync_writer);
+        syncsocket_free(_uiCmdProxy.sync_writer);
+    }
+    if (_uiCmdProxy.looper != NULL) {
+        // Stop all I/O that may still be going on.
+        loopIo_done(&_uiCmdProxy.io);
+        looper_free(_uiCmdProxy.looper);
+        _uiCmdProxy.looper = NULL;
+    }
+    _uiCmdProxy.sock = -1;
+}
+
+int
+uicmd_set_window_scale(double scale, int is_dpi)
+{
+    UICmdSetWindowsScale cmd;
+    cmd.scale = scale;
+    cmd.is_dpi = is_dpi;
+    return _uiCmdProxy_send_command(AUICMD_SET_WINDOWS_SCALE, &cmd, sizeof(cmd));
+}
diff --git a/android/protocol/ui-commands-proxy.h b/android/protocol/ui-commands-proxy.h
new file mode 100644
index 0000000..8627537
--- /dev/null
+++ b/android/protocol/ui-commands-proxy.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_UI_COMMANDS_PROXY_H
+#define _ANDROID_PROTOCOL_UI_COMMANDS_PROXY_H
+
+/*
+ * Contains the Core-side implementation of the "core-ui-control" service that is
+ * part of the UI control protocol. Here we send UI control commands to the UI.
+ */
+
+/* Creates and initializes descriptor for the Core-side of the "core-ui-control"
+ * service. Note that there can be only one instance of this service in the core.
+ * Param:
+ *  fd - Socket descriptor for the proxy.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int uiCmdProxy_create(int fd);
+
+/* Destroys the descriptor for the Core-side of the "core-ui-control" service. */
+extern void uiCmdProxy_destroy();
+
+/* Changes the scale of the emulator window at runtime.
+ * Param:
+ *  scale, is_dpi - New window scale parameters
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int uicmd_set_window_scale(double scale, int is_dpi);
+
+#endif /* _ANDROID_PROTOCOL_UI_COMMANDS_PROXY_H */
diff --git a/android/protocol/ui-commands-qemu.c b/android/protocol/ui-commands-qemu.c
new file mode 100644
index 0000000..3dbed31
--- /dev/null
+++ b/android/protocol/ui-commands-qemu.c
@@ -0,0 +1,40 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains implementation of the API for calling into the UI with the Core
+ * control commands for standalone (monolithic) emulator.
+ */
+
+#include "android/android.h"
+#include "android/hw-control.h"
+#include "android/protocol/ui-commands-api.h"
+
+/* Implemented in android/qemulator.c */
+extern void android_emulator_set_window_scale(double scale, int is_dpi);
+
+int
+uicmd_set_window_scale(double scale, int is_dpi)
+{
+    android_emulator_set_window_scale(scale, is_dpi);
+    return 0;
+}
+
+int
+uicmd_set_brightness_change_callback(AndroidHwLightBrightnessCallback callback,
+                                     void* opaque)
+{
+    AndroidHwControlFuncs  funcs;
+    funcs.light_brightness = callback;
+    android_hw_control_init(opaque, &funcs);
+    return 0;
+}
diff --git a/android/protocol/ui-commands.h b/android/protocol/ui-commands.h
new file mode 100644
index 0000000..4e47b83
--- /dev/null
+++ b/android/protocol/ui-commands.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_UI_COMMANDS_H
+#define _ANDROID_PROTOCOL_UI_COMMANDS_H
+
+/*
+ * Contains declarations related to the UI control commands sent by the Core and
+ * handled by the UI.
+ */
+
+#include "android/protocol/ui-common.h"
+
+/* Sets window scale. */
+#define AUICMD_SET_WINDOWS_SCALE        1
+
+/* Changes display brightness. */
+#define AUICMD_CHANGE_DISP_BRIGHTNESS   2
+
+/* Formats AUICMD_SET_WINDOWS_SCALE UI control command parameters.
+ * Contains parameters required by android_emulator_set_window_scale routine.
+ */
+typedef struct UICmdSetWindowsScale {
+    double  scale;
+    int     is_dpi;
+} UICmdSetWindowsScale;
+
+/* Formats AUICMD_CHANGE_DISP_BRIGHTNESS UI control command parameters.
+ */
+typedef struct UICmdChangeDispBrightness {
+    int     brightness;
+    char    light[0];
+} UICmdChangeDispBrightness;
+
+#endif /* _ANDROID_PROTOCOL_UI_COMMANDS_H */
diff --git a/android/protocol/ui-common.h b/android/protocol/ui-common.h
new file mode 100644
index 0000000..003ed6d
--- /dev/null
+++ b/android/protocol/ui-common.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _ANDROID_PROTOCOL_UI_COMMON_H
+#define _ANDROID_PROTOCOL_UI_COMMON_H
+
+/*
+ * Contains declarations for UI control protocol used by both the Core,
+ * and the UI.
+ */
+
+/* UI control command header.
+ * Every UI control command sent by the Core, or by the UI begins with this
+ * header, immediately followed by the command parameters (if there are any).
+ * Command type is defined by cmd_type field of this header. If command doesn't
+ * have any command-specific parameters, cmd_param_size field of this header
+ * must be 0.
+ */
+typedef struct UICmdHeader {
+    /* Command type. */
+    uint8_t     cmd_type;
+
+    /* Byte size of the buffer containing parameters for the comand defined by
+     * the cmd_type field. The buffer containing parameters must immediately
+     * follow this header. If command doesn't have any parameters, this field
+     * must be 0 */
+    uint32_t    cmd_param_size;
+} UICmdHeader;
+
+/* UI control command response header.
+ * If UI control command assumes a response from the remote end, the response
+ * must start with this header, immediately followed by the response data buffer.
+ */
+typedef struct UICmdRespHeader {
+    /* Result of the command handling. */
+    int         result;
+
+    /* Byte size of the buffer containing response data immediately following
+     * this header. If there are no response data for the command, this field
+     * must be 0. */
+    uint32_t    resp_data_size;
+} UICmdRespHeader;
+
+#endif /* _ANDROID_PROTOCOL_UI_COMMON_H */
diff --git a/android/protocol/user-events-impl.c b/android/protocol/user-events-impl.c
new file mode 100644
index 0000000..5c9525e
--- /dev/null
+++ b/android/protocol/user-events-impl.c
@@ -0,0 +1,206 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the Core-side of the "user events" service. Here we receive and
+ * handle user events sent from the UI.
+ */
+
+#include "user-events.h"
+#include "android/globals.h"
+#include "android/android.h"
+#include "android/looper.h"
+#include "android/async-utils.h"
+#include "android/sync-utils.h"
+#include "android/utils/system.h"
+#include "android/utils/debug.h"
+#include "android/protocol/user-events-protocol.h"
+#include "android/protocol/user-events-impl.h"
+
+/* Enumerates state values for the event reader in the UserEventsImpl descriptor.
+ */
+typedef enum UserEventsImplState {
+    /* The reader is waiting on event header. */
+    EXPECTS_HEADER,
+
+    /* The reader is waiting on event parameters. */
+    EXPECTS_PARAMETERS,
+} UserEventsImplState;
+
+
+/* Core user events service descriptor. */
+typedef struct UserEventsImpl {
+    /* Reader to receive user events. */
+    AsyncReader         user_events_reader;
+
+    /* I/O associated with this descriptor. */
+    LoopIo              io;
+
+    /* Looper used to communicate user events. */
+    Looper*             looper;
+
+    /* Socket for this service. */
+    int                 sock;
+
+    /* State of the service (see UE_STATE_XXX for possible values). */
+    UserEventsImplState state;
+
+    /* Current event header. */
+    UserEventHeader     event_header;
+
+    /* Current event parameters. */
+    union {
+        UserEventGeneric    generic_event;
+        UserEventMouse      mouse_event;
+        UserEventKeycode    keycode_event;
+    };
+} UserEventsImpl;
+
+/* Implemented in android/console.c */
+extern void destroy_user_events_client(void);
+
+/* One and only one UserEventsImpl instance. */
+static UserEventsImpl   _UserEventsImpl;
+
+/* Asynchronous I/O callback reading user events.
+ * Param:
+ *  opaque - UserEventsImpl instance.
+ */
+static void
+_userEventsImpl_io_func(void* opaque, int fd, unsigned events)
+{
+    UserEventsImpl* ueimpl;
+    AsyncStatus status;
+
+    if (events & LOOP_IO_WRITE) {
+        // We don't use async writer here, so we don't expect
+        // any write callbacks.
+        derror("Unexpected LOOP_IO_WRITE in _userEventsImpl_io_func\n");
+        return;
+    }
+
+    ueimpl = (UserEventsImpl*)opaque;
+    // Read whatever is expected from the socket.
+    status = asyncReader_read(&ueimpl->user_events_reader, &ueimpl->io);
+
+
+    switch (status) {
+        case ASYNC_COMPLETE:
+            switch (ueimpl->state) {
+                case EXPECTS_HEADER:
+                    // We just read event header. Now we expect event parameters.
+                    ueimpl->state = EXPECTS_PARAMETERS;
+                    // Setup the reader depending on the event type.
+                    switch (ueimpl->event_header.event_type) {
+                        case AUSER_EVENT_MOUSE:
+                            asyncReader_init(&ueimpl->user_events_reader,
+                                             &ueimpl->mouse_event,
+                                             sizeof(ueimpl->mouse_event),
+                                             &ueimpl->io);
+                            break;
+
+                        case AUSER_EVENT_KEYCODE:
+                            asyncReader_init(&ueimpl->user_events_reader,
+                                             &ueimpl->keycode_event,
+                                             sizeof(ueimpl->keycode_event),
+                                             &ueimpl->io);
+                            break;
+
+                        case AUSER_EVENT_GENERIC:
+                            asyncReader_init(&ueimpl->user_events_reader,
+                                             &ueimpl->generic_event,
+                                             sizeof(ueimpl->generic_event),
+                                             &ueimpl->io);
+                            break;
+
+                        default:
+                            derror("Unexpected user event type %d\n",
+                                   ueimpl->event_header.event_type);
+                            break;
+                    }
+                    break;
+
+                case EXPECTS_PARAMETERS:
+                    // We just read event parameters. Lets fire the event.
+                    switch (ueimpl->event_header.event_type) {
+                        case AUSER_EVENT_MOUSE:
+                            user_event_mouse(ueimpl->mouse_event.dx,
+                                             ueimpl->mouse_event.dy,
+                                             ueimpl->mouse_event.dz,
+                                             ueimpl->mouse_event.buttons_state);
+                            break;
+
+                        case AUSER_EVENT_KEYCODE:
+                            user_event_keycode(ueimpl->keycode_event.keycode);
+                            break;
+
+                        case AUSER_EVENT_GENERIC:
+                            user_event_generic(ueimpl->generic_event.type,
+                                               ueimpl->generic_event.code,
+                                               ueimpl->generic_event.value);
+                            break;
+
+                        default:
+                            derror("Unexpected user event type %d\n",
+                                   ueimpl->event_header.event_type);
+                            break;
+                    }
+                    // Prepare to receive the next event header.
+                    ueimpl->event_header.event_type = -1;
+                    ueimpl->state = EXPECTS_HEADER;
+                    asyncReader_init(&ueimpl->user_events_reader,
+                                     &ueimpl->event_header,
+                                     sizeof(ueimpl->event_header), &ueimpl->io);
+                    break;
+            }
+            break;
+        case ASYNC_ERROR:
+            loopIo_dontWantRead(&ueimpl->io);
+            if (errno == ECONNRESET) {
+                // UI has exited. We need to destroy user event service.
+                destroy_user_events_client();
+            } else {
+                derror("User event read error %d -> %s\n", errno, errno_str);
+            }
+            break;
+
+        case ASYNC_NEED_MORE:
+            // Transfer will eventually come back into this routine.
+            return;
+    }
+}
+
+int
+userEventsImpl_create(int fd)
+{
+    _UserEventsImpl.sock = fd;
+    _UserEventsImpl.event_header.event_type = -1;
+    _UserEventsImpl.state = EXPECTS_HEADER;
+    _UserEventsImpl.looper = looper_newCore();
+    loopIo_init(&_UserEventsImpl.io, _UserEventsImpl.looper, _UserEventsImpl.sock,
+                _userEventsImpl_io_func, &_UserEventsImpl);
+    asyncReader_init(&_UserEventsImpl.user_events_reader,
+                     &_UserEventsImpl.event_header,
+                     sizeof(_UserEventsImpl.event_header), &_UserEventsImpl.io);
+    return 0;
+}
+
+void
+userEventsImpl_destroy(void)
+{
+    if (_UserEventsImpl.looper != NULL) {
+        // Stop all I/O that may still be going on.
+        loopIo_done(&_UserEventsImpl.io);
+        looper_free(_UserEventsImpl.looper);
+        _UserEventsImpl.looper = NULL;
+    }
+}
diff --git a/android/protocol/user-events-impl.h b/android/protocol/user-events-impl.h
new file mode 100644
index 0000000..af5d5a4
--- /dev/null
+++ b/android/protocol/user-events-impl.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the Core-side of the "user events" service. Here we receive and
+ * handle user events sent from the UI.
+ */
+
+#ifndef _ANDROID_PROTOCOL_USER_EVENTS_IMPL_H
+#define _ANDROID_PROTOCOL_USER_EVENTS_IMPL_H
+
+/* Creates and initializes descriptor for the Core-side of the "user-events"
+ * service. Note that there can be only one instance of this service in the core.
+ * Param:
+ *  fd - Socket descriptor for the service.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int userEventsImpl_create(int fd);
+
+/* Destroys the descriptor for the Core-side of the "user-events" service. */
+extern void userEventsImpl_destroy(void);
+
+#endif /* _ANDROID_PROTOCOL_USER_EVENTS_IMPL_H */
diff --git a/android/user-events-common.h b/android/protocol/user-events-protocol.h
similarity index 65%
rename from android/user-events-common.h
rename to android/protocol/user-events-protocol.h
index 4d02c09..c1e64e2 100644
--- a/android/user-events-common.h
+++ b/android/protocol/user-events-protocol.h
@@ -10,10 +10,14 @@
 ** GNU General Public License for more details.
 */
 
-#ifndef _ANDROID_USER_EVENTS_COMMON_H
-#define _ANDROID_USER_EVENTS_COMMON_H
+#ifndef _ANDROID_PROTOCOL_USER_EVENTS_H
+#define _ANDROID_PROTOCOL_USER_EVENTS_H
 
-#include "globals.h"
+/*
+ * Contains declarations related to the UI events handled by the Core.
+ */
+
+#include "android/globals.h"
 
 /* Mouse event. */
 #define AUSER_EVENT_MOUSE     0
@@ -22,15 +26,16 @@
 /* Generic event. */
 #define AUSER_EVENT_GENERIC   2
 
-/* Header for user event message sent from UI to the core. */
+/* Header for user event message sent from the UI to the Core.
+ * Every user event sent by the UI begins with this header, immediately followed
+ * by the event parameters (if there are any).
+ */
 typedef struct UserEventHeader {
     /* Event type. See AUSER_EVENT_XXX for possible values. */
     uint8_t event_type;
 } UserEventHeader;
 
-/* Formats mouse event message (AUSER_EVENT_MOUSE) sent from
- * UI to the core.
- */
+/* Formats mouse event message (AUSER_EVENT_MOUSE) */
 typedef struct UserEventMouse {
     int         dx;
     int         dy;
@@ -38,20 +43,16 @@
     unsigned    buttons_state;
 } UserEventMouse;
 
-/* Formats keycode event message (AUSER_EVENT_KEYCODE) sent from
- * UI to the core.
- */
+/* Formats keycode event message (AUSER_EVENT_KEYCODE) */
 typedef struct UserEventKeycode {
     int         keycode;
 } UserEventKeycode;
 
-/* Formats generic event message (AUSER_EVENT_GENERIC) sent from
- * UI to the core.
- */
+/* Formats generic event message (AUSER_EVENT_GENERIC) */
 typedef struct UserEventGeneric {
     int         type;
     int         code;
     int         value;
 } UserEventGeneric;
 
-#endif /* _ANDROID_USER_EVENTS_COMMON_H */
+#endif /* _ANDROID_PROTOCOL_USER_EVENTS_H */
diff --git a/android/protocol/user-events-proxy.c b/android/protocol/user-events-proxy.c
new file mode 100644
index 0000000..d35012f
--- /dev/null
+++ b/android/protocol/user-events-proxy.c
@@ -0,0 +1,180 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#include "user-events.h"
+#include "console.h"
+#include "android/looper.h"
+#include "android/async-utils.h"
+#include "android/core-connection.h"
+#include "android/utils/debug.h"
+#include "android/protocol/user-events-protocol.h"
+#include "android/protocol/user-events-proxy.h"
+
+/* Descriptor for the user events client. */
+typedef struct UserEventsProxy {
+    /* Core connection instance for the user events client. */
+    CoreConnection* core_connection;
+
+    /* Socket for the client. */
+    int             sock;
+
+    /* Writes user events to the socket. */
+    SyncSocket*     sync_writer;
+} UserEventsProxy;
+
+/* One and only one user events client instance. */
+static UserEventsProxy _userEventsProxy = { 0 };
+
+/* Destroys CoreCmdProxy instance. */
+static void
+_userEventsProxy_destroy(void)
+{
+    if (_userEventsProxy.sync_writer != NULL) {
+        syncsocket_close(_userEventsProxy.sync_writer);
+        syncsocket_free(_userEventsProxy.sync_writer);
+        _userEventsProxy.sync_writer = NULL;
+    }
+    if (_userEventsProxy.core_connection != NULL) {
+        core_connection_close(_userEventsProxy.core_connection);
+        core_connection_free(_userEventsProxy.core_connection);
+        _userEventsProxy.core_connection = NULL;
+    }
+}
+
+/* Sends an event to the core.
+ * Parameters:
+ *  event - Event type. Must be one of the AUSER_EVENT_XXX.
+ *  event_param - Event parameters.
+ *  size - Byte size of the event parameters buffer.
+ * Return:
+ *  0 on success, or -1 on failure.
+ */
+static int
+_userEventsProxy_send(uint8_t event, const void* event_param, size_t size)
+{
+    int res;
+    UserEventHeader header;
+
+    header.event_type = event;
+    res = syncsocket_start_write(_userEventsProxy.sync_writer);
+    if (!res) {
+        // Send event type first (event header)
+        res = syncsocket_write(_userEventsProxy.sync_writer, &header,
+                               sizeof(header),
+                               core_connection_get_timeout(sizeof(header)));
+        if (res > 0) {
+            // Send event param next.
+            res = syncsocket_write(_userEventsProxy.sync_writer, event_param,
+                                   size,
+                                   core_connection_get_timeout(sizeof(size)));
+        }
+        res = syncsocket_result(res);
+        syncsocket_stop_write(_userEventsProxy.sync_writer);
+    }
+    if (res < 0) {
+        derror("Unable to send user event: %s\n", errno_str);
+    }
+    return res;
+}
+
+int
+userEventsProxy_create(SockAddress* console_socket)
+{
+    char* handshake = NULL;
+
+    // Connect to the user-events service.
+    _userEventsProxy.core_connection =
+        core_connection_create_and_switch(console_socket, "user-events",
+                                          &handshake);
+    if (_userEventsProxy.core_connection == NULL) {
+        derror("Unable to connect to the user-events service: %s\n",
+               errno_str);
+        return -1;
+    }
+
+    // Initialze event writer.
+    _userEventsProxy.sock =
+        core_connection_get_socket(_userEventsProxy.core_connection);
+    _userEventsProxy.sync_writer = syncsocket_init(_userEventsProxy.sock);
+    if (_userEventsProxy.sync_writer == NULL) {
+        derror("Unable to initialize UserEventsProxy writer: %s\n", errno_str);
+        _userEventsProxy_destroy();
+        return -1;
+    }
+
+    fprintf(stdout, "user-events is now connected to the core at %s.",
+            sock_address_to_string(console_socket));
+    if (handshake != NULL) {
+        if (handshake[0] != '\0') {
+            fprintf(stdout, " Handshake: %s", handshake);
+        }
+        free(handshake);
+    }
+    fprintf(stdout, "\n");
+
+    return 0;
+}
+
+void
+user_event_keycodes(int *kcodes, int count)
+{
+    int nn;
+    for (nn = 0; nn < count; nn++)
+        user_event_keycode(kcodes[nn]);
+}
+
+void
+user_event_keycode(int  kcode)
+{
+    UserEventKeycode    message;
+    message.keycode = kcode;
+    _userEventsProxy_send(AUSER_EVENT_KEYCODE, &message, sizeof(message));
+}
+
+void
+user_event_key(unsigned code, unsigned down)
+{
+    if(code == 0) {
+        return;
+    }
+    if (VERBOSE_CHECK(keys))
+        printf(">> KEY [0x%03x,%s]\n", (code & 0x1ff), down ? "down" : " up " );
+
+    user_event_keycode((code & 0x1ff) | (down ? 0x200 : 0));
+}
+
+
+void
+user_event_mouse(int dx, int dy, int dz, unsigned buttons_state)
+{
+    UserEventMouse    message;
+    message.dx = dx;
+    message.dy = dy;
+    message.dz = dz;
+    message.buttons_state = buttons_state;
+    _userEventsProxy_send(AUSER_EVENT_MOUSE, &message, sizeof(message));
+}
+
+void
+user_event_register_generic(void* opaque, QEMUPutGenericEvent *callback)
+{
+}
+
+void
+user_event_generic(int type, int code, int value)
+{
+    UserEventGeneric    message;
+    message.type = type;
+    message.code = code;
+    message.value = value;
+    _userEventsProxy_send(AUSER_EVENT_GENERIC, &message, sizeof(message));
+}
diff --git a/android/protocol/user-events-proxy.h b/android/protocol/user-events-proxy.h
new file mode 100644
index 0000000..95f6614
--- /dev/null
+++ b/android/protocol/user-events-proxy.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2010 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+/*
+ * Contains the UI-side of the "user events" service. Here we send user events
+ * to the Core.
+ */
+
+#ifndef _ANDROID_PROTOCOL_USER_EVENTS_PROXY_H
+#define _ANDROID_PROTOCOL_USER_EVENTS_PROXY_H
+
+/* Creates and initializes descriptor for the UI-side of the "user-events"
+ * service. Note that there can be only one instance of this service in the UI.
+ * Param:
+ *  console_socket - Addresses Core's console.
+ * Return:
+ *  0 on success, or < 0 on failure.
+ */
+extern int userEventsProxy_create(SockAddress* console_socket);
+
+#endif /* _ANDROID_PROTOCOL_USER_EVENTS_PROXY_H */
diff --git a/android/qemu-setup.c b/android/qemu-setup.c
index f0157cd..f1a12ee 100644
--- a/android/qemu-setup.c
+++ b/android/qemu-setup.c
@@ -23,7 +23,6 @@
 #include "android/utils/path.h"
 #include "android/utils/system.h"
 #include "android/utils/bufprint.h"
-#include "android/core-ui-protocol.h"
 
 #define  D(...)  do {  if (VERBOSE_CHECK(init)) dprint(__VA_ARGS__); } while (0)
 
diff --git a/android/qemulator.c b/android/qemulator.c
index a5f0dc0..f1b2dbc 100644
--- a/android/qemulator.c
+++ b/android/qemulator.c
@@ -14,7 +14,8 @@
 #include "android/utils/bufprint.h"
 #include "android/globals.h"
 #include "android/qemulator.h"
-#include "android/ui-core-protocol.h"
+#include "android/protocol/core-commands-api.h"
+#include "android/protocol/ui-commands-api.h"
 #include "user-events.h"
 
 #define  D(...)  do {  if (VERBOSE_CHECK(init)) dprint(__VA_ARGS__); } while (0)
@@ -87,8 +88,8 @@
     }
 
     /* initialize hardware control support */
-    android_core_set_brightness_change_callback(qemulator_light_brightness,
-                                                emulator);
+    uicmd_set_brightness_change_callback(qemulator_light_brightness,
+                                         emulator);
 }
 
 static void
@@ -272,7 +273,7 @@
 int
 get_device_dpi( AndroidOptions*  opts )
 {
-    int    dpi_device  = android_core_get_hw_lcd_density();
+    int    dpi_device  = corecmd_get_hw_lcd_density();
 
     if (opts->dpi_device != NULL) {
         char*  end;
@@ -405,8 +406,8 @@
     {
     case SKIN_KEY_COMMAND_TOGGLE_NETWORK:
         {
-            android_core_toggle_network();
-            D( "network is now %s", android_core_is_network_disabled() ?
+            corecmd_toggle_network();
+            D( "network is now %s", corecmd_is_network_disabled() ?
                                     "disconnected" : "connected" );
         }
         break;
@@ -421,10 +422,7 @@
         {
 #ifdef CONFIG_TRACE
             tracing = !tracing;
-            if (tracing)
-                android_core_tracing_start();
-            else
-                android_core_tracing_stop();
+            corecmd_trace_control(tracing);
 #endif
         }
         break;
diff --git a/android/skin/window.c b/android/skin/window.c
index 431412b..c2d0bf4 100644
--- a/android/skin/window.c
+++ b/android/skin/window.c
@@ -16,7 +16,7 @@
 #include "android/utils/debug.h"
 #include "android/utils/system.h"
 #include "android/utils/duff.h"
-#include "android/ui-core-protocol.h"
+#include "android/protocol/core-commands-api.h"
 #include <SDL_syswm.h>
 #include "user-events.h"
 #include <math.h>
@@ -1340,9 +1340,9 @@
         user_event_generic( slayout->event_type, slayout->event_code, slayout->event_value );
         /* XXX: hack, replace by better code here */
         if (slayout->event_value != 0)
-            android_core_sensors_set_coarse_orientation( ANDROID_COARSE_PORTRAIT );
+            corecmd_set_coarse_orientation( ANDROID_COARSE_PORTRAIT );
         else
-            android_core_sensors_set_coarse_orientation( ANDROID_COARSE_LANDSCAPE );
+            corecmd_set_coarse_orientation( ANDROID_COARSE_LANDSCAPE );
     }
 
     return 0;
diff --git a/android/ui-core-protocol.c b/android/ui-core-protocol.c
deleted file mode 100644
index 8dd14a7..0000000
--- a/android/ui-core-protocol.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * This file contains helper routines that are used to establish communication
- * between UI and Core components of the emulator. This is a temporary file
- * where we will collect functional dependencies between UI and Core in the
- * process of separating UI and Core in the emulator build. Ideally at the
- * end this will be replaced with a message protocol over sockets, or other
- * means of interprocess communication.
- */
-
-#include "android/android.h"
-#include "android/globals.h"
-#include "android/hw-control.h"
-#include "android/ui-core-protocol.h"
-#include "android/ui-ctl-ui.h"
-#if !defined(CONFIG_STANDALONE_UI)
-#include "telephony/modem_driver.h"
-#include "trace.h"
-#include "audio/audio.h"
-/* Implemented in vl-android.c */
-extern char* qemu_find_file(int type, const char* filename);
-#endif  // CONFIG_STANDALONE_UI
-
-int
-android_core_get_hw_lcd_density(void)
-{
-    return android_hw->hw_lcd_density;
-}
-
-void
-android_core_set_brightness_change_callback(AndroidHwLightBrightnessCallback callback,
-                                            void* opaque)
-{
-    AndroidHwControlFuncs  funcs;
-
-    funcs.light_brightness = callback;
-#if !defined(CONFIG_STANDALONE_UI)
-    android_hw_control_init( opaque, &funcs );
-#endif  // CONFIG_STANDALONE_UI
-}
-
-void
-android_core_sensors_set_coarse_orientation( AndroidCoarseOrientation  orient )
-{
-#if !defined(CONFIG_STANDALONE_UI)
-    android_sensors_set_coarse_orientation(orient);
-#else
-    clientuictl_set_coarse_orientation(orient);
-#endif  // CONFIG_STANDALONE_UI
-}
-
-void
-android_core_toggle_network(void)
-{
-    /* Temporary implementation for the monolitic (core + ui) builds. */
-#if !defined(CONFIG_STANDALONE_UI)
-    qemu_net_disable = !qemu_net_disable;
-    if (android_modem) {
-        amodem_set_data_registration(
-                android_modem,
-        qemu_net_disable ? A_REGISTRATION_UNREGISTERED
-            : A_REGISTRATION_HOME);
-    }
-#else
-    clientuictl_toggle_network();
-#endif  // CONFIG_STANDALONE_UI
-}
-
-int
-android_core_is_network_disabled(void)
-{
-    /* Temporary implementation for the monolitic (core + ui) builds. */
-#if !defined(CONFIG_STANDALONE_UI)
-    return qemu_net_disable;
-#else
-    return clientuictl_check_network_disabled();
-#endif  // CONFIG_STANDALONE_UI
-}
-
-void android_core_tracing_start(void)
-{
-#if !defined(CONFIG_STANDALONE_UI)
-    start_tracing();
-#else
-    clientuictl_trace_control(1);
-#endif  // CONFIG_STANDALONE_UI
-}
-
-void android_core_tracing_stop(void)
-{
-#if !defined(CONFIG_STANDALONE_UI)
-    stop_tracing();
-#else
-    clientuictl_trace_control(0);
-#endif  // CONFIG_STANDALONE_UI
-}
-
-int
-android_core_get_android_netspeed(int index, NetworkSpeed** netspeed) {
-    /* This is a temporary code used to support current behavior of the
-     *monolitic (core + ui in one executable) emulator executed with
-     * -help-netspeed option. In the future, when ui and core get separated,
-     * behavior of help may change, and this code should be reviewed. */
-#if !defined(CONFIG_STANDALONE_UI)
-    if (index >= android_netspeeds_count ||
-        android_netspeeds[index].name == NULL) {
-        return -1;
-    }
-    *netspeed = (NetworkSpeed*)malloc(sizeof(NetworkSpeed));
-    memcpy(*netspeed, &android_netspeeds[index], sizeof(NetworkSpeed));
-    return 0;
-#else
-    return clientuictl_get_netspeed(index, netspeed);
-#endif  // !CONFIG_STANDALONE_UI
-}
-
-int
-android_core_get_android_netdelay(int index, NetworkLatency** delay) {
-    /* This is a temporary code used to support current behavior of the
-     * monolitic (core + ui in one executable) emulator executed with
-     * -help-netdelays option. In the future, when ui and core get separated,
-     * behavior of help may change, and this code should be reviewed. */
-#if !defined(CONFIG_STANDALONE_UI)
-    if (index >= android_netdelays_count ||
-        android_netdelays[index].name == NULL) {
-        return -1;
-    }
-    *delay = (NetworkLatency*)malloc(sizeof(NetworkLatency));
-    memcpy(*delay, &android_netdelays[index], sizeof(NetworkLatency));
-    return 0;
-#else
-    return clientuictl_get_netdelay(index, delay);
-#endif  // !CONFIG_STANDALONE_UI
-}
-
-int
-android_core_qemu_find_file(int type, const char *filename,
-                            char* path, size_t path_buf_size)
-{
-    /* Temporary implementation for the monolitic (core + ui) builds. */
-#if !defined(CONFIG_STANDALONE_UI)
-    char* filepath = qemu_find_file(type, filename);
-    if (filepath == NULL) {
-        return -1;
-    }
-    strncpy(path, filepath, path_buf_size);
-    path[path_buf_size - 1] = '\0';
-    qemu_free(filepath);
-    return 0;
-#else
-    char* ret_path = NULL;
-    int status = clientuictl_get_qemu_path(type, filename, &ret_path);
-    if (!status && ret_path != NULL) {
-        strncpy(path, ret_path, path_buf_size);
-        path[path_buf_size - 1] = '\0';
-    }
-    if (ret_path != NULL) {
-        free(ret_path);
-    }
-    return status;
-#endif  // !CONFIG_STANDALONE_UI
-}
diff --git a/android/ui-core-protocol.h b/android/ui-core-protocol.h
deleted file mode 100644
index 2a4cee6..0000000
--- a/android/ui-core-protocol.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * This file contains declarations of helper routines that are used to
- * establish communication between UI and Core components of the emulator.
- * This is a temporary file where we will collect functional dependencies
- * between UI and Core in the process of separating UI and Core in the
- * emulator build.
- */
-
-#ifndef QEMU_ANDROID_UI_CORE_PROTOCOL_H
-#define QEMU_ANDROID_UI_CORE_PROTOCOL_H
-
-#include "android/hw-sensors.h"
-
-/* Gets LCD density property from the core properties. */
-int android_core_get_hw_lcd_density(void);
-
-/* This is temporary redeclaration for AndroidHwLightBrightnessFunc declared
- * in android/hw-control.h We redeclare it here in order to keep type
- * consistency between android_core_set_brightness_change_callback and
- * light_brightness field of AndroidHwControlFuncs structure.
- */
-typedef void  (*AndroidHwLightBrightnessCallback)( void*       opaque,
-                                               const char* light,
-                                               int         brightness );
-
-/* Registers a UI callback to be called when brightness is changed by the core. */
-void android_core_set_brightness_change_callback(AndroidHwLightBrightnessCallback callback,
-                                                 void* opaque);
-
-/* change the coarse orientation value */
-void  android_core_sensors_set_coarse_orientation( AndroidCoarseOrientation  orient );
-
-/* Toggles the network state */
-void android_core_toggle_network(void);
-
-/* Gets the network state */
-int android_core_is_network_disabled(void);
-
-/* Start/stop tracing in the guest system */
-void android_core_tracing_start(void);
-void android_core_tracing_stop(void);
-
-/* Gets an entry in android_netspeeds array defined in net-android.c
- * Parameters:
- *  index - Index of the entry to get from the array.
- *  netspeed - Upon successful return contains copy of the requested entry.
- * Return:
- *  0 on success, or -1 if requested entry index is too large.
- */
-int android_core_get_android_netspeed(int index, NetworkSpeed** netspeed);
-
-/* Gets an entry in android_netdelays array defined in net-android.c
- * Parameters:
- *  index - Index of the entry to get from the array.
- *  netspeed - Upon successful return contains copy of the requested entry.
- * Return:
- *  0 on success, or -1 if requested entry index is too large.
- */
-int android_core_get_android_netdelay(int index, NetworkLatency** delay);
-
-/* Builds a path to a file of the given type in the emulator's data directory.
- * Param:
- *  type - Type of the file to find. Only QEMU_FILE_TYPE_BIOS, and
- *      QEMU_FILE_TYPE_KEYMAP are allowed for this value.
- *  filename - Name of the file to build path for.
- *  path - Upon success contains path to the requested file inside the
- *      emulator's data directory.
- *  path_buf_size Character size of the buffer addressed by the path parameter.
- * Return:
- *  0 on success, or -1 on an error.
- */
-int
-android_core_qemu_find_file(int type, const char *filename,
-                            char* path, size_t path_buf_size);
-
-#endif  // QEMU_ANDROID_UI_CORE_PROTOCOL_H
diff --git a/android/ui-ctl-common.h b/android/ui-ctl-common.h
deleted file mode 100644
index bc5960c..0000000
--- a/android/ui-ctl-common.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#ifndef _ANDROID_UI_CONTROL_COMMON_H
-#define _ANDROID_UI_CONTROL_COMMON_H
-
-#include "android/hw-sensors.h"
-
-/*
- * UI control requests sent by the core to the UI.
- */
-
-/* Sets window scale. */
-#define ACORE_UICTL_SET_WINDOWS_SCALE       1
-
-/*
- * UI control requests sent by the UI to the core.
- */
-
-/* Sets coarse orientation. */
-#define AUI_UICTL_SET_COARSE_ORIENTATION    2
-
-/* Toggles the network (no parameters). */
-#define AUI_UICTL_TOGGLE_NETWORK            3
-
-/* Starts / stops the tracing. */
-#define AUI_UICTL_TRACE_CONTROL             4
-
-/* Checks if network is disabled (no params) */
-#define AUI_UICTL_CHK_NETWORK_DISABLED      5
-
-/* Gets net speed */
-#define AUI_UICTL_GET_NETSPEED              6
-
-/* Gets net delays */
-#define AUI_UICTL_GET_NETDELAY              7
-
-/* Gets path to a QEMU file on local host. */
-#define AUI_UICTL_GET_QEMU_PATH             8
-
-/* UI control message header. */
-typedef struct UICtlHeader {
-    /* Message type. */
-    uint8_t     msg_type;
-
-    /* Size of the message data following this header. */
-    uint32_t    msg_data_size;
-} UICtlHeader;
-
-/* UI control response header. */
-typedef struct UICtlRespHeader {
-    /* Result of the request handling. */
-    int result;
-
-    /* Size of the response data following this header. */
-    uint32_t    resp_data_size;
-} UICtlRespHeader;
-
-/* Formats ACORE_UICTL_SET_WINDOWS_SCALE UI control request.
- */
-typedef struct UICtlSetWindowsScale {
-    double  scale;
-    int     is_dpi;
-} UICtlSetWindowsScale;
-
-/* Formats AUI_UICTL_SET_COARSE_ORIENTATION UI control request.
- */
-typedef struct UICtlSetCoarseOrientation {
-    AndroidCoarseOrientation    orient;
-} UICtlSetCoarseOrientation;
-
-/* Formats AUI_UICTL_TRACE_CONTROL UI control request.
- */
-typedef struct UICtlTraceControl {
-    int start;
-} UICtlTraceControl;
-
-/* Formats AUI_UICTL_GET_NETSPEED UI control request.
- */
-typedef struct UICtlGetNetSpeed {
-    int index;
-} UICtlGetNetSpeed;
-
-/* Formats AUI_UICTL_GET_NETSPEED UI control request response.
- */
-typedef struct UICtlGetNetSpeedResp {
-    /* Size of the entire response structure including name and display strings. */
-    int     upload;
-    int     download;
-    /* display field of NetworkSpeed structure is immediately following
-     * this field. */
-    char    name[0];
-} UICtlGetNetSpeedResp;
-
-/* Formats AUI_UICTL_GET_NETDELAY UI control request.
- */
-typedef struct UICtlGetNetDelay {
-    int index;
-} UICtlGetNetDelay;
-
-/* Formats AUI_UICTL_GET_NETDELAY UI control request response.
- */
-typedef struct UICtlGetNetDelayResp {
-    /* Size of the entire response structure including name and display strings. */
-    int     min_ms;
-    int     max_ms;
-    /* display field of NetworkLatency structure is immediately following
-     * this field. */
-    char    name[0];
-} UICtlGetNetDelayResp;
-
-/* Formats AUI_UICTL_GET_QEMU_PATH UI control request.
- */
-typedef struct UICtlGetQemuPath {
-    int     type;
-    char    filename[0];
-} UICtlGetQemuPath;
-
-/* Formats AUI_UICTL_GET_QEMU_PATH UI control request response.
- */
-typedef struct UICtlGetQemuPathResp {
-    /* Size of the entire response structure. */
-    char    path[0];
-} UICtlGetQemuPathResp;
-
-#if 0
-android_core_get_android_netspeed(int index, NetworkSpeed* netspeed) {
-android_core_get_android_netdelay(int index, NetworkLatency* delay) {
-int
-android_core_qemu_find_file(int type, const char *filename,
-                            char* path, size_t path_buf_size)
-#endif
-
-#endif /* _ANDROID_UI_CONTROL_COMMON_H */
-
diff --git a/android/ui-ctl-core.c b/android/ui-ctl-core.c
deleted file mode 100644
index 4552bb8..0000000
--- a/android/ui-ctl-core.c
+++ /dev/null
@@ -1,568 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#include "qemu-common.h"
-#include "android/globals.h"
-#include "android/android.h"
-#include "android/looper.h"
-#include "android/async-utils.h"
-#include "android/sync-utils.h"
-#include "android/utils/system.h"
-#include "android/utils/debug.h"
-#include "android/ui-ctl-common.h"
-#include "android/ui-ctl-core.h"
-#include "android/hw-sensors.h"
-#include "telephony/modem_driver.h"
-#include "trace.h"
-#include "audio/audio.h"
-
-/* Enumerates state values for UICoreCtl descriptor. */
-typedef enum UICoreCtlState {
-    /* UI message header is expected in the pipe. */
-    UI_STATE_EXPECT_HEADER,
-    /* UI message data are expected in the pipe. */
-    UI_STATE_EXPECT_DATA
-} UICoreCtlState;
-
-/* Core UI control service descriptor used for UI->Core communication. */
-typedef struct UICoreCtl {
-    /* Reader to detect UI disconnection. */
-    AsyncReader     async_reader;
-
-    /* I/O associated with this descriptor. */
-    LoopIo          io;
-
-    /* Looper used to communicate user events. */
-    Looper*         looper;
-
-    /* Writer to send responses to UI requests. */
-    SyncSocket*     sync_writer;
-
-    /* Socket descriptor for this service. */
-    int             sock;
-
-    /* State of incoming requests. */
-    UICoreCtlState  in_req_state;
-
-    /* Incoming request header. */
-    UICtlHeader     req_header;
-
-    /* A buffer for small incoming requests. */
-    uint8_t         req_data[256];
-
-    /* Buffer to use for reading incoming request data. Depending on expected
-     * incoming request size this buffer can point to req_data field of this
-     * structure (for small requests), or can be allocated for large requests. */
-    void*           req_data_buffer;
-} UICoreCtl;
-
-/* Core UI control service descriptor used for Core->UI communication. */
-typedef struct CoreUICtl {
-    /* I/O associated with this descriptor. */
-    LoopIo          io;
-
-    /* Looper associated with this descriptor. */
-    Looper*         looper;
-
-    /* Writer to send UI commands. */
-    SyncSocket*     sync_writer;
-
-    /* Socket descriptor for this service. */
-    int             sock;
-} CoreUICtl;
-
-/* One and only one CoreUICtl instance. */
-static CoreUICtl    _core_ui_ctl;
-
-/* One and only one UICoreCtl instance. */
-static UICoreCtl    _ui_core_ctl;
-
-/* Calculates timeout for transferring the given number of bytes via UI control
- * socket.
- * Return:
- *  Number of milliseconds during which the entire number of bytes is expected
- *  to be transferred.
- */
-static int
-_get_transfer_timeout(size_t data_size)
-{
-    // Min 200 millisec + one millisec for each transferring byte.
-    // TODO: Come up with a better arithmetics here.
-    return 200 + data_size;
-}
-
-/*
- * Core -> UI control implementation
- */
-
-/* Implemented in android/console.c */
-extern void destroy_core_ui_ctl_client(void);
-
-/* Sends request to the UI client.
- * Param:
- *  msg_type, msg_data, msg_data_size - Define core request to send.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-static int
-_coreuictl_send_request(uint8_t msg_type,
-                        void* msg_data,
-                        uint32_t msg_data_size)
-{
-    UICtlHeader header;
-    int status = syncsocket_start_write(_core_ui_ctl.sync_writer);
-    if (!status) {
-
-        // Initialize and send the header.
-        header.msg_type = msg_type;
-        header.msg_data_size = msg_data_size;
-        status = syncsocket_write(_core_ui_ctl.sync_writer, &header, sizeof(header),
-                                  _get_transfer_timeout(sizeof(header)));
-        // If there is request data, send it too.
-        if (status > 0 && msg_data != NULL && msg_data_size > 0) {
-            status = syncsocket_write(_core_ui_ctl.sync_writer, msg_data,
-                                      msg_data_size,
-                                      _get_transfer_timeout(msg_data_size));
-        }
-        status = syncsocket_result(status);
-        syncsocket_stop_write(_core_ui_ctl.sync_writer);
-    }
-    if (status < 0) {
-        derror("Unable to send core UI control request: %s\n", errno_str);
-    }
-    return status;
-}
-
-/*
- * Asynchronous I/O callback for CoreUICtl instance.
- * We expect this callback to be called only on UI detachment condition. In this
- * case the event should be LOOP_IO_READ, and read should fail with errno set
- * to ECONNRESET.
- * Param:
- *  opaque - CoreUICtl instance.
- */
-static void
-_coreuictl_io_func(void* opaque, int fd, unsigned events)
-{
-    CoreUICtl* uictl = (CoreUICtl*)opaque;
-    AsyncReader reader;
-    AsyncStatus status;
-    uint8_t read_buf[1];
-
-    if (events & LOOP_IO_WRITE) {
-        derror("Unexpected LOOP_IO_WRITE in coreuictl_io_func\n");
-        return;
-    }
-
-    // Try to read
-    asyncReader_init(&reader, read_buf, sizeof(read_buf), &uictl->io);
-    status = asyncReader_read(&reader, &uictl->io);
-    // We expect only error status here.
-    if (status != ASYNC_ERROR) {
-        derror("Unexpected read status %d in coreuictl_io_func\n", status);
-        return;
-    }
-    // We expect only socket disconnection here.
-    if (errno != ECONNRESET) {
-        derror("Unexpected read error %d (%s) in coreuictl_io_func\n",
-               errno, errno_str);
-        return;
-    }
-
-    // Client got disconnectted.
-    destroy_core_ui_ctl_client();
-}
-
-int
-coreuictl_create(int fd)
-{
-    // Initialize _core_ui_ctl instance.
-    _core_ui_ctl.sock = fd;
-    _core_ui_ctl.looper = looper_newCore();
-    loopIo_init(&_core_ui_ctl.io, _core_ui_ctl.looper, _core_ui_ctl.sock,
-                _coreuictl_io_func, &_core_ui_ctl);
-    loopIo_wantRead(&_core_ui_ctl.io);
-    _core_ui_ctl.sync_writer = syncsocket_init(fd);
-    if (_core_ui_ctl.sync_writer == NULL) {
-        derror("Unable to initialize CoreUICtl writer: %s\n", errno_str);
-        return -1;
-    }
-    return 0;
-}
-
-void
-coreuictl_destroy()
-{
-    if (_core_ui_ctl.looper != NULL) {
-        // Stop all I/O that may still be going on.
-        loopIo_done(&_core_ui_ctl.io);
-        looper_free(_core_ui_ctl.looper);
-        _core_ui_ctl.looper = NULL;
-    }
-    if (_core_ui_ctl.sync_writer != NULL) {
-        syncsocket_close(_core_ui_ctl.sync_writer);
-        syncsocket_free(_core_ui_ctl.sync_writer);
-    }
-    _core_ui_ctl.sock = -1;
-}
-
-int
-coreuictl_set_window_scale(double scale, int is_dpi)
-{
-    UICtlSetWindowsScale msg;
-    msg.scale = scale;
-    msg.is_dpi = is_dpi;
-    return _coreuictl_send_request(ACORE_UICTL_SET_WINDOWS_SCALE, &msg,
-                                   sizeof(msg));
-}
-
-/*
- * UI -> Core control implementation
- */
-
-/* Implemented in android/console.c */
-extern void destroy_ui_core_ctl_client(void);
-/* Implemented in vl-android.c */
-extern char* qemu_find_file(int type, const char* filename);
-
-/* Properly initializes req_data_buffer field in UICoreCtl instance to receive
- * the expected incoming request data buffer.
- */
-static uint8_t*
-_alloc_req_data_buffer(UICoreCtl* uictl, uint32_t size)
-{
-    if (size < sizeof(uictl->req_data)) {
-        // req_data can contain all request data.
-        uictl->req_data_buffer = &uictl->req_data[0];
-    } else {
-        // Expected request us too large to fit into preallocated buffer.
-        uictl->req_data_buffer = qemu_malloc(size);
-    }
-    return uictl->req_data_buffer;
-}
-
-/* Properly frees req_data_buffer field in UICoreCtl instance.
- */
-static void
-_free_req_data_buffer(UICoreCtl* uictl)
-{
-    if (uictl->req_data_buffer != &uictl->req_data[0]) {
-        qemu_free(uictl->req_data_buffer);
-        uictl->req_data_buffer = &uictl->req_data[0];
-    }
-}
-
-/* Sends response back to the UI
- * Param:
- *  uictl - UICoreCtl instance to use for the response sending.
- *  resp - Response header.
- *  resp_data - Response data. Data size is defined by the header.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-static int
-_uicorectl_send_response(UICoreCtl* uictl, UICtlRespHeader* resp, void* resp_data)
-{
-    int status = syncsocket_start_write(uictl->sync_writer);
-    if (!status) {
-        // Write the header
-        status = syncsocket_write(uictl->sync_writer, resp,
-                                  sizeof(UICtlRespHeader),
-                                  _get_transfer_timeout(sizeof(UICtlRespHeader)));
-        // Write response data (if any).
-        if (status > 0 && resp_data != NULL && resp->resp_data_size != 0) {
-            status = syncsocket_write(uictl->sync_writer, resp_data,
-                                      resp->resp_data_size,
-                                      _get_transfer_timeout(resp->resp_data_size));
-        }
-        status = syncsocket_result(status);
-        syncsocket_stop_write(uictl->sync_writer);
-    }
-    if (status < 0) {
-        derror("Unable to send UI control response: %s\n", errno_str);
-    }
-    return status;
-}
-
-/* Handles UI control request from the UI.
- * Param:
- *  uictl - UICoreCtl instance that received the request.
- *  req_header - Request header.
- *  req_data - Request data.
- */
-static void
-_handle_uictl_request(UICoreCtl* uictl,
-                      const UICtlHeader* req_header,
-                      const uint8_t* req_data)
-{
-    switch (req_header->msg_type) {
-        case AUI_UICTL_SET_COARSE_ORIENTATION:
-        {
-            UICtlSetCoarseOrientation* req = (UICtlSetCoarseOrientation*)req_data;
-            android_sensors_set_coarse_orientation(req->orient);
-            break;
-        }
-
-        case AUI_UICTL_TOGGLE_NETWORK:
-            qemu_net_disable = !qemu_net_disable;
-            if (android_modem) {
-                amodem_set_data_registration(
-                        android_modem,
-                qemu_net_disable ? A_REGISTRATION_UNREGISTERED
-                    : A_REGISTRATION_HOME);
-            }
-            break;
-
-        case AUI_UICTL_TRACE_CONTROL:
-        {
-            UICtlTraceControl* req = (UICtlTraceControl*)req_data;
-            if (req->start) {
-                start_tracing();
-            } else {
-                stop_tracing();
-            }
-            break;
-        }
-
-        case AUI_UICTL_CHK_NETWORK_DISABLED:
-        {
-            UICtlRespHeader resp;
-            resp.resp_data_size = 0;
-            resp.result = qemu_net_disable;
-            _uicorectl_send_response(uictl, &resp, NULL);
-            break;
-        }
-
-        case AUI_UICTL_GET_NETSPEED:
-        {
-            UICtlRespHeader resp;
-            UICtlGetNetSpeedResp* resp_data = NULL;
-            UICtlGetNetSpeed* req = (UICtlGetNetSpeed*)req_data;
-
-            resp.resp_data_size = 0;
-            resp.result = 0;
-
-            if (req->index >= android_netspeeds_count ||
-                android_netspeeds[req->index].name == NULL) {
-                resp.result = -1;
-            } else {
-                const NetworkSpeed* netspeed = &android_netspeeds[req->index];
-                // Calculate size of the response data:
-                // fixed header + zero-terminated netspeed name.
-                resp.resp_data_size = sizeof(UICtlGetNetSpeedResp) +
-                                      strlen(netspeed->name) + 1;
-                // Count in zero-terminated netspeed display.
-                if (netspeed->display != NULL) {
-                    resp.resp_data_size += strlen(netspeed->display) + 1;
-                } else {
-                    resp.resp_data_size++;
-                }
-                // Allocate and initialize response data buffer.
-                resp_data =
-                    (UICtlGetNetSpeedResp*)qemu_malloc(resp.resp_data_size);
-                resp_data->upload = netspeed->upload;
-                resp_data->download = netspeed->download;
-                strcpy(resp_data->name, netspeed->name);
-                if (netspeed->display != NULL) {
-                    strcpy(resp_data->name + strlen(resp_data->name) + 1,
-                           netspeed->display);
-                } else {
-                    strcpy(resp_data->name + strlen(resp_data->name) + 1, "");
-                }
-            }
-            _uicorectl_send_response(uictl, &resp, resp_data);
-            if (resp_data != NULL) {
-                qemu_free(resp_data);
-            }
-            break;
-        }
-
-        case AUI_UICTL_GET_NETDELAY:
-        {
-            UICtlRespHeader resp;
-            UICtlGetNetDelayResp* resp_data = NULL;
-            UICtlGetNetDelay* req = (UICtlGetNetDelay*)req_data;
-
-            resp.resp_data_size = 0;
-            resp.result = 0;
-
-            if (req->index >= android_netdelays_count ||
-                android_netdelays[req->index].name == NULL) {
-                resp.result = -1;
-            } else {
-                const NetworkLatency* netdelay = &android_netdelays[req->index];
-                // Calculate size of the response data:
-                // fixed header + zero-terminated netdelay name.
-                resp.resp_data_size = sizeof(UICtlGetNetDelayResp) +
-                                      strlen(netdelay->name) + 1;
-                // Count in zero-terminated netdelay display.
-                if (netdelay->display != NULL) {
-                    resp.resp_data_size += strlen(netdelay->display) + 1;
-                } else {
-                    resp.resp_data_size++;
-                }
-                // Allocate and initialize response data buffer.
-                resp_data =
-                    (UICtlGetNetDelayResp*)qemu_malloc(resp.resp_data_size);
-                resp_data->min_ms = netdelay->min_ms;
-                resp_data->max_ms = netdelay->max_ms;
-                strcpy(resp_data->name, netdelay->name);
-                if (netdelay->display != NULL) {
-                    strcpy(resp_data->name + strlen(resp_data->name) + 1,
-                           netdelay->display);
-                } else {
-                    strcpy(resp_data->name + strlen(resp_data->name) + 1, "");
-                }
-            }
-            _uicorectl_send_response(uictl, &resp, resp_data);
-            if (resp_data != NULL) {
-                qemu_free(resp_data);
-            }
-            break;
-        }
-
-        case AUI_UICTL_GET_QEMU_PATH:
-        {
-            UICtlRespHeader resp;
-            UICtlGetQemuPath* req = (UICtlGetQemuPath*)req_data;
-            char* filepath = NULL;
-
-            resp.resp_data_size = 0;
-            resp.result = -1;
-            filepath = qemu_find_file(req->type, req->filename);
-            if (filepath != NULL) {
-                resp.resp_data_size = strlen(filepath) + 1;
-            }
-            _uicorectl_send_response(uictl, &resp, filepath);
-            if (filepath != NULL) {
-                qemu_free(filepath);
-            }
-            break;
-        }
-
-        default:
-            derror("Unknown UI control request %d\n", req_header->msg_type);
-            break;
-    }
-}
-
-/* Asynchronous read I/O callback launched when reading UI control requests.
- */
-static void
-_uicorectl_io_read(UICoreCtl* uictl)
-{
-    // Read whatever is expected from the socket.
-    const AsyncStatus status =
-        asyncReader_read(&uictl->async_reader, &uictl->io);
-
-    switch (status) {
-        case ASYNC_COMPLETE:
-            switch (uictl->in_req_state) {
-                case UI_STATE_EXPECT_HEADER:
-                    // We just read the request header. Now we expect the data.
-                    if (uictl->req_header.msg_data_size != 0) {
-                        uictl->in_req_state = UI_STATE_EXPECT_DATA;
-                        // Setup the reader to read expected amount of the data.
-                        _alloc_req_data_buffer(uictl,
-                                               uictl->req_header.msg_data_size);
-                        asyncReader_init(&uictl->async_reader,
-                                         uictl->req_data_buffer,
-                                         uictl->req_header.msg_data_size,
-                                         &uictl->io);
-                    } else {
-                        // Request doesn't contain data. Go ahead and handle it.
-                        _handle_uictl_request(uictl, &uictl->req_header,
-                                              uictl->req_data_buffer);
-                        // Prepare for the next header.
-                        asyncReader_init(&uictl->async_reader,
-                                         &uictl->req_header,
-                                         sizeof(uictl->req_header), &uictl->io);
-                    }
-                    break;
-
-                case UI_STATE_EXPECT_DATA:
-                    // Request header and data are received. Handle the request.
-                    _handle_uictl_request(uictl, &uictl->req_header,
-                                          uictl->req_data_buffer);
-                    _free_req_data_buffer(uictl);
-                    // Prepare for the next request.
-                    uictl->in_req_state = UI_STATE_EXPECT_HEADER;
-                    asyncReader_init(&uictl->async_reader, &uictl->req_header,
-                                     sizeof(uictl->req_header), &uictl->io);
-                    break;
-            }
-            break;
-        case ASYNC_ERROR:
-            loopIo_dontWantRead(&uictl->io);
-            if (errno == ECONNRESET) {
-                // UI has exited. We need to destroy the service.
-                destroy_ui_core_ctl_client();
-            }
-            break;
-
-        case ASYNC_NEED_MORE:
-            // Transfer will eventually come back into this routine.
-            return;
-    }
-}
-
-/*
- * Asynchronous I/O callback launched when UI control is received from the UI.
- * Param:
- *  opaque - UICoreCtl instance.
- */
-static void
-_uicorectl_io_func(void* opaque, int fd, unsigned events)
-{
-    if (events & LOOP_IO_READ) {
-        _uicorectl_io_read((UICoreCtl*)opaque);
-    } else if (events & LOOP_IO_WRITE) {
-        // We don't use async writer here, so we don't expect
-        // any write callbacks.
-        derror("Unexpected LOOP_IO_WRITE in _uicorectl_io_func\n");
-    }
-}
-
-int
-uicorectl_create(int fd)
-{
-    _ui_core_ctl.sock = fd;
-    _ui_core_ctl.looper = looper_newCore();
-    loopIo_init(&_ui_core_ctl.io, _ui_core_ctl.looper, _ui_core_ctl.sock,
-                _uicorectl_io_func, &_ui_core_ctl);
-    _ui_core_ctl.in_req_state = UI_STATE_EXPECT_HEADER;
-    _ui_core_ctl.req_data_buffer = &_ui_core_ctl.req_data[0];
-    asyncReader_init(&_ui_core_ctl.async_reader, &_ui_core_ctl.req_header,
-                     sizeof(_ui_core_ctl.req_header), &_ui_core_ctl.io);
-    _ui_core_ctl.sync_writer = syncsocket_init(fd);
-    if (_ui_core_ctl.sync_writer == NULL) {
-        derror("Unable to create writer for UICoreCtl instance: %s\n", errno_str);
-        return -1;
-    }
-    return 0;
-}
-
-void
-uicorectl_destroy()
-{
-    if (_ui_core_ctl.looper != NULL) {
-        // Stop all I/O that may still be going on.
-        loopIo_done(&_ui_core_ctl.io);
-        looper_free(_ui_core_ctl.looper);
-        _ui_core_ctl.looper = NULL;
-    }
-    if (_ui_core_ctl.sync_writer != NULL) {
-        syncsocket_close(_ui_core_ctl.sync_writer);
-        syncsocket_free(_ui_core_ctl.sync_writer);
-    }
-    _free_req_data_buffer(&_ui_core_ctl);
-}
diff --git a/android/ui-ctl-core.h b/android/ui-ctl-core.h
deleted file mode 100644
index f7d7ecf..0000000
--- a/android/ui-ctl-core.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#ifndef _ANDROID_UI_CONTROL_CORE_H
-#define _ANDROID_UI_CONTROL_CORE_H
-
-/*
- * Contains core-side of UI control protocols. For the simplicity of the
- * implementation there are two UI control services: "ui-core-control" that
- * handle UI controls initiated in the UI, and "core-ui-control" that handle UI
- * controls initiated in the core. The reason for hawing two services is that
- * some of the UI controls expect the core to respond with some data. The
- * simplest way to differentiate core commands from core responses to the UI
- * commands, is to have two separate services: one sends commands only, and
- * another sends only responses.
- */
-
-/*
- * Creates and initializes Core->UI UI control service.
- * Param:
- *  fd - Socket descriptor for the service.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-extern int coreuictl_create(int fd);
-
-/*
- * Destroys Core->UI UI control service.
- */
-extern void coreuictl_destroy();
-
-/* Changes the scale of the emulator window at runtime.
- * Param:
- *  scale, is_dpi - New window scale parameters
- * Return:
- *  0 on success, or < 0 on failure.
- */
-extern int coreuictl_set_window_scale(double scale, int is_dpi);
-
-/*
- * Creates and initializes UI->Core UI control instance.
- * Param:
- *  fd - Socket descriptor for the service.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-extern int uicorectl_create(int fd);
-
-/*
- * Destroys UI->Core UI control service.
- */
-extern void uicorectl_destroy();
-
-#endif /* _ANDROID_UI_CONTROL_CORE_H */
diff --git a/android/ui-ctl-ui.c b/android/ui-ctl-ui.c
deleted file mode 100644
index 47d0603..0000000
--- a/android/ui-ctl-ui.c
+++ /dev/null
@@ -1,568 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * Contains UI-side of UI control protocol.
- */
-
-#include "console.h"
-#include "android/android.h"
-#include "android/globals.h"
-#include "android/looper.h"
-#include "android/core-connection.h"
-#include "android/async-utils.h"
-#include "android/utils/system.h"
-#include "android/utils/debug.h"
-#include "android/sync-utils.h"
-#include "android/ui-ctl-common.h"
-#include "android/ui-ctl-ui.h"
-
-#define  PANIC(...) do { fprintf(stderr, __VA_ARGS__);  \
-                         exit(1);                       \
-                    } while (0)
-
-
-/*
- * Enumerates states for the request reader in CoreUICtlClient instance.
- */
-typedef enum CoreUICtlClientState {
-    /* The reader is waiting on request header. */
-    WAIT_HEADER,
-
-    /* The reader is waiting on request data. */
-    WAIT_DATA,
-} CoreUICtlClientState;
-
-/* Common descriptor for UI control clients. */
-typedef struct UICtlCommon {
-    /* Core connection instance for the UI control client. */
-    CoreConnection*     core_connection;
-
-    /* Socket wrapper for sync writes. */
-    SyncSocket*         sync_writer;
-
-    /* Socket descriptor for the UI control client. */
-    int                 sock;
-} UICtlCommon;
-
-/* Descriptor for the Core->UI control client. */
-typedef struct CoreUICtlClient {
-    /* Common UI control client descriptor. */
-    UICtlCommon             common;
-
-    /* Current reader state. */
-    CoreUICtlClientState    reader_state;
-
-    /* Incoming request header. */
-    UICtlHeader             req_header;
-
-    /* Reader's buffer. */
-    uint8_t*                reader_buffer;
-
-    /* Offset in the reader's buffer where to read next chunk of data. */
-    size_t                  reader_offset;
-
-    /* Total number of bytes the reader expects to read. */
-    size_t                  reader_bytes;
-} CoreUICtlClient;
-
-/* Descriptor for the UI->Core control client. */
-typedef struct UICoreCtlClient {
-    /* Common UI control client descriptor. */
-    UICtlCommon         common;
-
-    /* Socket wrapper for sync reads. */
-    SyncSocket*         sync_reader;
-} UICoreCtlClient;
-
-/* One and only one Core->UI control client instance. */
-static CoreUICtlClient  _core_ui_client;
-
-/* One and only one UI->Core control client instance. */
-static UICoreCtlClient  _ui_core_client;
-
-/* Calculates timeout for transferring the given number of bytes via UI control
- * socket.
- * Return:
- *  Number of milliseconds during which the entire number of bytes is expected
- *  to be transferred.
- */
-static int
-_get_transfer_timeout(size_t data_size)
-{
-    // Min 200 millisec + one millisec for each transferring byte.
-    // TODO: Come up with a better arithmetics here.
-    return 200 + data_size;
-}
-
-/* Initializes UICtlCommon instance.
- * Param:
- *  console_socket - Addresses core's console service.
- *  name - Name of the core's service to attach to ("ui-core client",
- *  or "core-ui client").
- *  uictl_common - UICtlCommon instance to initialize.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-static int
-_clientuictl_create_client(SockAddress* console_socket,
-                           char* name,
-                           UICtlCommon* uictl_common)
-{
-    char* connect_message = NULL;
-    char switch_cmd[256];
-
-    // Connect to the console service.
-    uictl_common->core_connection = core_connection_create(console_socket);
-    if (uictl_common->core_connection == NULL) {
-        derror("UI control client %s is unable to connect to the console: %s\n",
-               name, errno_str);
-        return -1;
-    }
-    if (core_connection_open(uictl_common->core_connection)) {
-        core_connection_free(uictl_common->core_connection);
-        uictl_common->core_connection = NULL;
-        derror("UI control client %s is unable to open the console: %s\n",
-               name, errno_str);
-        return -1;
-    }
-    snprintf(switch_cmd, sizeof(switch_cmd), "%s", name);
-    if (core_connection_switch_stream(uictl_common->core_connection, switch_cmd,
-                                      &connect_message)) {
-        derror("Unable to connect to the UI control service %s: %s\n",
-               name, connect_message ? connect_message : "");
-        if (connect_message != NULL) {
-            free(connect_message);
-        }
-        core_connection_close(uictl_common->core_connection);
-        core_connection_free(uictl_common->core_connection);
-        uictl_common->core_connection = NULL;
-        return -1;
-    }
-    if (connect_message != NULL) {
-        free(connect_message);
-    }
-
-    // Initialize UICtlCommon instance.
-    uictl_common->sock = core_connection_get_socket(uictl_common->core_connection);
-    uictl_common->sync_writer = syncsocket_init(uictl_common->sock);
-    if (uictl_common->sync_writer == NULL) {
-        derror("Unable to initialize sync writer for %s UI control client: %s\n",
-               name, errno_str);
-        return -1;
-    }
-    return 0;
-}
-
-/* Destroys UICtlCommon instance. */
-static void
-_uictlcommon_destroy(UICtlCommon* desc)
-{
-    if (desc->core_connection != NULL) {
-        // Disable I/O callbacks.
-        qemu_set_fd_handler(desc->sock, NULL, NULL, NULL);
-        syncsocket_close(desc->sync_writer);
-        syncsocket_free(desc->sync_writer);
-        core_connection_close(desc->core_connection);
-        core_connection_free(desc->core_connection);
-        desc->core_connection = NULL;
-    }
-}
-
-/*
- * Core->UI control client implementation.
- */
-
-/* Implemented in android/qemulator.c */
-extern void android_emulator_set_window_scale( double  scale, int  is_dpi );
-
-/* Destroys CoreUICtlClient instance. */
-static void
-_core_ui_client_destroy()
-{
-    _uictlcommon_destroy(&_core_ui_client.common);
-    if (_core_ui_client.reader_buffer != NULL &&
-        _core_ui_client.reader_buffer != (uint8_t*)&_core_ui_client.req_header) {
-        free(_core_ui_client.reader_buffer);
-    }
-}
-
-/*
- * Handles UI control request received from the core.
- * Param:
- *  uictl - CoreUICtlClient instance that received the request.
- *  header - UI control request header.
- *  data - Request data formatted accordingly to the request type.
- */
-static void
-_core_ui_ctl_handle_request(CoreUICtlClient* uictl,
-                            UICtlHeader* header,
-                            uint8_t* data)
-{
-    switch (header->msg_type) {
-        case ACORE_UICTL_SET_WINDOWS_SCALE:
-        {
-            UICtlSetWindowsScale* req = (UICtlSetWindowsScale*)data;
-            android_emulator_set_window_scale(req->scale, req->is_dpi);
-            break;
-        }
-        default:
-            derror("Unknown Core UI control %d\n", header->msg_type);
-            break;
-    }
-}
-
-/*
- * Asynchronous I/O callback launched when UI control requests received from the
- * core are ready to be read.
- * Param:
- *  opaque - CoreUICtlClient instance.
- */
-static void
-_core_ui_client_read_cb(void* opaque)
-{
-    CoreUICtlClient* uictl = opaque;
-    int  ret;
-
-    // Read requests while they are immediately available.
-    for (;;) {
-        // Read next chunk of data.
-        ret = read(uictl->common.sock,
-                   uictl->reader_buffer + uictl->reader_offset,
-                   uictl->reader_bytes - uictl->reader_offset);
-        if (ret == 0) {
-            /* disconnection ! */
-            _core_ui_client_destroy();
-            return;
-        }
-        if (ret < 0) {
-            if (errno == EINTR) {
-                /* loop on EINTR */
-                continue;
-            } else if (errno == EWOULDBLOCK || errno == EAGAIN) {
-                // Chunk is not avalable at this point. Come back later.
-                return;
-            }
-        }
-
-        uictl->reader_offset += ret;
-        if (uictl->reader_offset != uictl->reader_bytes) {
-            // There are still some data left in the pipe.
-            continue;
-        }
-
-        // All expected data has been read. Time to change the state.
-        if (uictl->reader_state == WAIT_HEADER) {
-            // Header has been read. Prepare for the data.
-            uictl->reader_state = WAIT_DATA;
-            uictl->reader_offset = 0;
-            uictl->reader_bytes = uictl->req_header.msg_data_size;
-            uictl->reader_buffer = malloc(uictl->reader_bytes);
-            if (uictl->reader_buffer == NULL) {
-                PANIC("Unable to allocate memory for UI control request.\n");
-            }
-        } else {
-            _core_ui_ctl_handle_request(uictl, &uictl->req_header,
-                                        uictl->reader_buffer);
-            free(uictl->reader_buffer);
-            uictl->reader_state = WAIT_HEADER;
-            uictl->reader_offset = 0;
-            uictl->reader_bytes = sizeof(uictl->req_header);
-            uictl->reader_buffer = (uint8_t*)&uictl->req_header;
-        }
-    }
-}
-
-/*
- * UI->Core control client implementation.
- */
-
-/* Sends UI request to the core.
- * Param:
- *  msg_type, msg_data, msg_data_size - Define the request.
- * Return:
- *  0 On success, or < 0 on failure.
- */
-static int
-_ui_core_ctl_send_request(uint8_t msg_type,
-                          void* msg_data,
-                          uint32_t msg_data_size)
-{
-    int status;
-    UICtlHeader header;
-
-    // Prepare and send the header.
-    header.msg_type = msg_type;
-    header.msg_data_size = msg_data_size;
-    status = syncsocket_start_write(_ui_core_client.common.sync_writer);
-    if (!status) {
-        // Send the header.
-        status = syncsocket_write(_ui_core_client.common.sync_writer, &header,
-                                  sizeof(header),
-                                  _get_transfer_timeout(sizeof(header)));
-        // If there is request data, send it too.
-        if (status > 0 && msg_data != NULL && msg_data_size > 0) {
-            status = syncsocket_write(_ui_core_client.common.sync_writer, msg_data,
-                                      msg_data_size,
-                                      _get_transfer_timeout(msg_data_size));
-        }
-        status = syncsocket_result(status);
-        syncsocket_stop_write(_ui_core_client.common.sync_writer);
-    }
-    if (status < 0) {
-        derror("Unable to send UI control request: %s\n", errno_str);
-    }
-    return status;
-}
-
-/* Reads response to a UI control request from the core.
- * Param:
- *  resp - Upon success contains response header.
- *  resp_data - Upon success contains allocated reponse data (if any). The caller
- *      is responsible for deallocating of the memory returned in this parameter.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-static int
-_ui_core_ctl_get_response(UICtlRespHeader* resp, void** resp_data)
-{
-    int status =  syncsocket_start_read(_ui_core_client.sync_reader);
-    if (!status) {
-        // Read the header.
-        status = syncsocket_read(_ui_core_client.sync_reader, resp,
-                                 sizeof(UICtlRespHeader),
-                                 _get_transfer_timeout(sizeof(UICtlRespHeader)));
-        // Read response data (if any).
-        if (status > 0 && resp->resp_data_size) {
-            *resp_data = malloc(resp->resp_data_size);
-            if (*resp_data == NULL) {
-                PANIC("Unable to allocate response data buffer\n");
-            }
-            status = syncsocket_read(_ui_core_client.sync_reader, *resp_data,
-                                     resp->resp_data_size,
-                                     _get_transfer_timeout(resp->resp_data_size));
-        }
-        status = syncsocket_result(status);
-        syncsocket_stop_read(_ui_core_client.sync_reader);
-    }
-    if (status < 0) {
-        derror("Unable to get UI control response: %s\n", errno_str);
-    }
-    return status;
-}
-
-int
-clientuictl_set_coarse_orientation(AndroidCoarseOrientation orient)
-{
-    UICtlSetCoarseOrientation msg;
-    msg.orient = orient;
-    return _ui_core_ctl_send_request(AUI_UICTL_SET_COARSE_ORIENTATION,
-                                     &msg, sizeof(msg));
-}
-
-int
-clientuictl_toggle_network()
-{
-    return _ui_core_ctl_send_request(AUI_UICTL_TOGGLE_NETWORK, NULL, 0);
-}
-
-int
-clientuictl_trace_control(int start)
-{
-    UICtlTraceControl msg;
-    msg.start = start;
-    return _ui_core_ctl_send_request(AUI_UICTL_TRACE_CONTROL,
-                                     &msg, sizeof(msg));
-}
-
-int
-clientuictl_check_network_disabled()
-{
-    UICtlRespHeader resp;
-    void* tmp = NULL;
-    int status;
-
-    status = _ui_core_ctl_send_request(AUI_UICTL_CHK_NETWORK_DISABLED, NULL, 0);
-    if (status < 0) {
-        return status;
-    }
-    status = _ui_core_ctl_get_response(&resp, &tmp);
-    if (status < 0) {
-        return status;
-    }
-    return resp.result;
-}
-
-int
-clientuictl_get_netspeed(int index, NetworkSpeed** netspeed)
-{
-    UICtlGetNetSpeed req;
-    UICtlRespHeader resp;
-    UICtlGetNetSpeedResp* resp_data = NULL;
-    int status;
-
-    // Initialize and send the query.
-    req.index = index;
-    status = _ui_core_ctl_send_request(AUI_UICTL_GET_NETSPEED, &req, sizeof(req));
-    if (status < 0) {
-        return status;
-    }
-
-    // Obtain the response from the core.
-    status = _ui_core_ctl_get_response(&resp, (void**)&resp_data);
-    if (status < 0) {
-        return status;
-    }
-    if (!resp.result) {
-        NetworkSpeed* ret;
-        // Allocate memory for the returning NetworkSpeed instance.
-        // It includes: NetworkSpeed structure +
-        // size of zero-terminated "name" and "display" strings saved in
-        // resp_data.
-        *netspeed = malloc(sizeof(NetworkSpeed) + 1 +
-                           resp.resp_data_size - sizeof(UICtlGetNetSpeedResp));
-        ret = *netspeed;
-
-        // Copy data obtained from the core to the returning NetworkSpeed
-        // instance.
-        ret->upload = resp_data->upload;
-        ret->download = resp_data->download;
-        ret->name = (char*)ret + sizeof(NetworkSpeed);
-        strcpy((char*)ret->name, resp_data->name);
-        ret->display = ret->name + strlen(ret->name) + 1;
-        strcpy((char*)ret->display, resp_data->name + strlen(resp_data->name) + 1);
-    }
-    if (resp_data != NULL) {
-        free(resp_data);
-    }
-    return resp.result;
-}
-
-int
-clientuictl_get_netdelay(int index, NetworkLatency** netdelay)
-{
-    UICtlGetNetDelay req;
-    UICtlRespHeader resp;
-    UICtlGetNetDelayResp* resp_data = NULL;
-    int status;
-
-    // Initialize and send the query.
-    req.index = index;
-    status = _ui_core_ctl_send_request(AUI_UICTL_GET_NETDELAY, &req, sizeof(req));
-    if (status < 0) {
-        return status;
-    }
-
-    // Obtain the response from the core.
-    status = _ui_core_ctl_get_response(&resp, (void**)&resp_data);
-    if (status < 0) {
-        return status;
-    }
-    if (!resp.result) {
-        NetworkLatency* ret;
-        // Allocate memory for the returning NetworkLatency instance.
-        // It includes: NetworkLatency structure +
-        // size of zero-terminated "name" and "display" strings saved in
-        // resp_data.
-        *netdelay = malloc(sizeof(NetworkLatency) + 1 +
-                           resp.resp_data_size - sizeof(UICtlGetNetDelayResp));
-        ret = *netdelay;
-
-        // Copy data obtained from the core to the returning NetworkLatency
-        // instance.
-        ret->min_ms = resp_data->min_ms;
-        ret->max_ms = resp_data->max_ms;
-        ret->name = (char*)ret + sizeof(NetworkLatency);
-        strcpy((char*)ret->name, resp_data->name);
-        ret->display = ret->name + strlen(ret->name) + 1;
-        strcpy((char*)ret->display, resp_data->name + strlen(resp_data->name) + 1);
-    }
-    if (resp_data != NULL) {
-        free(resp_data);
-    }
-    return resp.result;
-}
-
-int
-clientuictl_get_qemu_path(int type, const char* filename, char** path)
-{
-    UICtlRespHeader resp;
-    char* resp_data = NULL;
-    int status;
-
-    // Initialize and send the query.
-    uint32_t req_data_size = sizeof(UICtlGetQemuPath) + strlen(filename) + 1;
-    UICtlGetQemuPath* req = (UICtlGetQemuPath*)malloc(req_data_size);
-    if (req == NULL) {
-        PANIC("Unable to allocate query qemu path request\n");
-    }
-    req->type = type;
-    strcpy(req->filename, filename);
-    status = _ui_core_ctl_send_request(AUI_UICTL_GET_QEMU_PATH, req,
-                                       req_data_size);
-    if (status < 0) {
-        return status;
-    }
-
-    // Obtain the response from the core.
-    status = _ui_core_ctl_get_response(&resp, (void**)&resp_data);
-    if (status < 0) {
-        return status;
-    }
-    if (!resp.result && resp_data != NULL) {
-        *path = strdup(resp_data);
-    }
-    if (resp_data != NULL) {
-        free(resp_data);
-    }
-    return resp.result;
-}
-
-int
-clientuictl_create(SockAddress* console_socket)
-{
-    // Connect to Core->UI service
-    if (_clientuictl_create_client(console_socket, "core-ui-control",
-                                   &_core_ui_client.common)) {
-        return -1;
-    }
-    _core_ui_client.reader_state = WAIT_HEADER;
-    if (qemu_set_fd_handler(_core_ui_client.common.sock, _core_ui_client_read_cb,
-                            NULL, &_core_ui_client)) {
-        derror("Unable to set up UI control read callback\n");
-        core_connection_close(_core_ui_client.common.core_connection);
-        core_connection_free(_core_ui_client.common.core_connection);
-        _core_ui_client.common.core_connection = NULL;
-        return -1;
-    }
-    fprintf(stdout, "Core->UI client is now attached to the core %s\n",
-            sock_address_to_string(console_socket));
-
-    // Connect to UI->Core service
-    if (_clientuictl_create_client(console_socket, "ui-core-control",
-                                   &_ui_core_client.common)) {
-        _core_ui_client_destroy();
-        return -1;
-    }
-    _ui_core_client.sync_reader = syncsocket_init(_ui_core_client.common.sock);
-    if (_ui_core_client.sync_reader == NULL) {
-        derror("Unable to create reader for CoreUICtlClient instance: %s\n",
-               errno_str);
-        _core_ui_client_destroy();
-        return -1;
-    }
-
-    fprintf(stdout, "UI->Core client is now attached to the core %s\n",
-            sock_address_to_string(console_socket));
-
-    return 0;
-}
diff --git a/android/ui-ctl-ui.h b/android/ui-ctl-ui.h
deleted file mode 100644
index 4359ba1..0000000
--- a/android/ui-ctl-ui.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#ifndef _ANDROID_UI_CONTROL_UI_H
-#define _ANDROID_UI_CONTROL_UI_H
-
-/*
- * Contains UI-side of UI control protocols. For the simplicity of implementation
- * there are two UI control services: "ui-core-control" that handle UI controls
- * initiated in the UI, and "core-ui-control" that handle UI controls initiated
- * in the core. The reason for hawing two services is that some of the UI
- * controls expect the core to respond with some data. The simplest way to
- * differentiate core commands from core responses to the UI commands, is to have
- * two separate services: one sends commands only, and another sends only
- * responses.
- */
-
-#include "sockets.h"
-#include "android/ui-ctl-common.h"
-
-/* Establishes connection with UI control services in the core.
- * Param:
- *  console_socket Core's console socket.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_create(SockAddress* console_socket);
-
-/*
- * UI->Core API
- */
-
-/* Sends AUI_UICTL_SET_COARSE_ORIENTATION message to the core.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_set_coarse_orientation(AndroidCoarseOrientation orient);
-
-/* Sends AUI_UICTL_TOGGLE_NETWORK message to the core.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_toggle_network();
-
-/* Sends AUI_UICTL_TRACE_CONTROL message to the core.
- * Param:
- *  start - Starts (> 0), or stops (== 0) tracing.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_trace_control(int start);
-
-/* Sends AUI_UICTL_CHK_NETWORK_DISABLED message to the core.
- * Return:
- *  0 if network is enabled, 1 if it is disabled, or < 0 on failure.
- */
-int clientuictl_check_network_disabled();
-
-/* Sends AUI_UICTL_GET_NETSPEED message to the core.
- * Param:
- *  index - Index of an entry in the NetworkSpeed array.
- *  netspeed - Upon success contains allocated and initialized NetworkSpeed
- *  instance for the given index. Note that strings addressed by "name" and
- *  "display" fileds in the returned NetworkSpeed instance are containd inside
- *  the buffer allocated for the returned NetworkSpeed instance. Caller of this
- *  routine must eventually free the buffer returned in this parameter.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_get_netspeed(int index, NetworkSpeed** netspeed);
-
-/* Sends AUI_UICTL_GET_NETDELAY message to the core.
- * Param:
- *  index - Index of an entry in the NetworkLatency array.
- *  netdelay - Upon success contains allocated and initialized NetworkLatency
- *  instance for the given index. Note that strings addressed by "name" and
- *  "display" fileds in the returned NetworkLatency instance are containd inside
- *  the buffer allocated for the returned NetworkLatency instance. Caller of this
- *  routine must eventually free the buffer returned in this parameter.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_get_netdelay(int index, NetworkLatency** netdelay);
-
-/* Sends AUI_UICTL_GET_QEMU_PATH message to the core.
- * Param:
- *  type, filename - Query parameters
- *  netdelay - Upon success contains allocated and initialized NetworkLatency
- *  instance for the given index. Note that strings addressed by "name" and
- *  "display" fileds in the returned NetworkLatency instance are containd inside
- *  the buffer allocated for the returned NetworkLatency instance. Caller of this
- *  routine must eventually free the buffer returned in this parameter.
- * Return:
- *  0 on success, or < 0 on failure.
- */
-int clientuictl_get_qemu_path(int type, const char* filename, char** path);
-
-#endif /* _ANDROID_UI_CONTROL_UI_H */
-
diff --git a/android/user-events-core.c b/android/user-events-core.c
deleted file mode 100644
index 3b97c00..0000000
--- a/android/user-events-core.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#include "user-events.h"
-#include "android/globals.h"
-#include "android/android.h"
-#include "android/looper.h"
-#include "android/async-utils.h"
-#include "android/utils/system.h"
-#include "android/utils/debug.h"
-#include "android/user-events-common.h"
-#include "android/user-events-core.h"
-#include "android/sync-utils.h"
-
-/* States of the core user events service.
- */
-
-/* Event header is expected in the pipe. */
-#define UE_STATE_EVENT_HEADER  0
-/* Event parameters are expected in the pipe. */
-#define UE_STATE_EVENT_PARAM   1
-
-/* Core user events service descriptor. */
-struct CoreUserEvents {
-    /* Reader to receive user events. */
-    AsyncReader     user_events_reader;
-
-    /* I/O associated with this descriptor. */
-    LoopIo          io;
-
-    /* Looper used to communicate user events. */
-    Looper*         looper;
-
-    /* Socket for this service. */
-    int             sock;
-
-    /* State of the service (see UE_STATE_XXX for possible values). */
-    int             state;
-
-    /* Current event header. */
-    UserEventHeader event_header;
-
-    /* Current event parameters. */
-    union {
-        UserEventGeneric    generic_event;
-        UserEventMouse      mouse_event;
-        UserEventKeycode    keycode_event;
-    };
-};
-
-/* Implemented in android/console.c */
-extern void destroy_control_ue_client(void);
-
-/*
- * Asynchronous I/O callback launched when reading user events from the socket.
- * Param:
- *  opaque - CoreUserEvents instance.
- */
-static void
-coreue_io_func(void* opaque, int fd, unsigned events)
-{
-    CoreUserEvents* ue = opaque;
-    // Read whatever is expected from the socket.
-    const AsyncStatus status = asyncReader_read(&ue->user_events_reader, &ue->io);
-
-    switch (status) {
-        case ASYNC_COMPLETE:
-            switch (ue->state) {
-                case UE_STATE_EVENT_HEADER:
-                    // We just read event header. Now we expect event parameters.
-                    ue->state = UE_STATE_EVENT_PARAM;
-                    // Setup the reader depending on the event type.
-                    switch (ue->event_header.event_type) {
-                        case AUSER_EVENT_MOUSE:
-                            asyncReader_init(&ue->user_events_reader,
-                                             &ue->mouse_event,
-                                             sizeof(ue->mouse_event),
-                                             &ue->io);
-                            break;
-                        case AUSER_EVENT_KEYCODE:
-                            asyncReader_init(&ue->user_events_reader,
-                                             &ue->keycode_event,
-                                             sizeof(ue->keycode_event),
-                                             &ue->io);
-                            break;
-                        case AUSER_EVENT_GENERIC:
-                            asyncReader_init(&ue->user_events_reader,
-                                             &ue->generic_event,
-                                             sizeof(ue->generic_event),
-                                             &ue->io);
-                            break;
-                        default:
-                            derror("Unexpected event type %d\n",
-                                   ue->event_header.event_type);
-                            break;
-                    }
-                    break;
-
-                case UE_STATE_EVENT_PARAM:
-                    // We just read event parameters. Lets fire the event.
-                    switch (ue->event_header.event_type) {
-                        case AUSER_EVENT_MOUSE:
-                            user_event_mouse(ue->mouse_event.dx,
-                                             ue->mouse_event.dy,
-                                             ue->mouse_event.dz,
-                                             ue->mouse_event.buttons_state);
-                            break;
-                        case AUSER_EVENT_KEYCODE:
-                            user_event_keycode(ue->keycode_event.keycode);
-                            break;
-                        case AUSER_EVENT_GENERIC:
-                            user_event_generic(ue->generic_event.type,
-                                               ue->generic_event.code,
-                                               ue->generic_event.value);
-                            break;
-                        default:
-                            derror("Unexpected event type %d\n",
-                                   ue->event_header.event_type);
-                            break;
-                    }
-                    // Now we expect event header.
-                    ue->event_header.event_type = -1;
-                    ue->state = UE_STATE_EVENT_HEADER;
-                    asyncReader_init(&ue->user_events_reader, &ue->event_header,
-                                     sizeof(ue->event_header), &ue->io);
-                    break;
-            }
-            break;
-        case ASYNC_ERROR:
-            loopIo_dontWantRead(&ue->io);
-            if (errno == ECONNRESET) {
-                // UI has exited. We need to destroy user event service.
-                destroy_control_ue_client();
-            }
-            break;
-
-        case ASYNC_NEED_MORE:
-            // Transfer will eventually come back into this routine.
-            return;
-    }
-}
-
-CoreUserEvents*
-coreue_create(int fd)
-{
-    CoreUserEvents* ue;
-    ANEW0(ue);
-    ue->sock = fd;
-    ue->state = UE_STATE_EVENT_HEADER;
-    ue->looper = looper_newCore();
-    loopIo_init(&ue->io, ue->looper, ue->sock, coreue_io_func, ue);
-    asyncReader_init(&ue->user_events_reader, &ue->event_header,
-                     sizeof(ue->event_header), &ue->io);
-    return ue;
-}
-
-void
-coreue_destroy(CoreUserEvents* ue)
-{
-    if (ue != NULL) {
-        if (ue->looper != NULL) {
-            // Stop all I/O that may still be going on.
-            loopIo_done(&ue->io);
-            looper_free(ue->looper);
-            ue->looper = NULL;
-        }
-        free(ue);
-    }
-}
diff --git a/android/user-events-core.h b/android/user-events-core.h
deleted file mode 100644
index 04bab6c..0000000
--- a/android/user-events-core.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * Contains recepient of user events sent from the UI.
- */
-
-#ifndef _ANDROID_USER_EVENTS_CORE_H
-#define _ANDROID_USER_EVENTS_CORE_H
-
-/* Descriptor for a core user events instance */
-typedef struct CoreUserEvents CoreUserEvents;
-
-/*
- * Creates and initializes core user events instance.
- * Param:
- *  fd - Socket descriptor for the service.
- */
-extern CoreUserEvents* coreue_create(int fd);
-
-/*
- * Destroys core user events service.
- * Param:
- *  ue - User event service descriptor to destroy.
- */
-extern void coreue_destroy(CoreUserEvents* ue);
-
-#endif /* _ANDROID_USER_EVENTS_CORE_H */
diff --git a/hw/apic.c b/hw/apic.c
new file mode 100644
index 0000000..b059185
--- /dev/null
+++ b/hw/apic.c
@@ -0,0 +1,967 @@
+/*
+ *  APIC support
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#include "hw.h"
+#include "pc.h"
+#include "qemu-timer.h"
+#include "host-utils.h"
+
+//#define DEBUG_APIC
+
+/* APIC Local Vector Table */
+#define APIC_LVT_TIMER   0
+#define APIC_LVT_THERMAL 1
+#define APIC_LVT_PERFORM 2
+#define APIC_LVT_LINT0   3
+#define APIC_LVT_LINT1   4
+#define APIC_LVT_ERROR   5
+#define APIC_LVT_NB      6
+
+/* APIC delivery modes */
+#define APIC_DM_FIXED	0
+#define APIC_DM_LOWPRI	1
+#define APIC_DM_SMI	2
+#define APIC_DM_NMI	4
+#define APIC_DM_INIT	5
+#define APIC_DM_SIPI	6
+#define APIC_DM_EXTINT	7
+
+/* APIC destination mode */
+#define APIC_DESTMODE_FLAT	0xf
+#define APIC_DESTMODE_CLUSTER	1
+
+#define APIC_TRIGGER_EDGE  0
+#define APIC_TRIGGER_LEVEL 1
+
+#define	APIC_LVT_TIMER_PERIODIC		(1<<17)
+#define	APIC_LVT_MASKED			(1<<16)
+#define	APIC_LVT_LEVEL_TRIGGER		(1<<15)
+#define	APIC_LVT_REMOTE_IRR		(1<<14)
+#define	APIC_INPUT_POLARITY		(1<<13)
+#define	APIC_SEND_PENDING		(1<<12)
+
+#define ESR_ILLEGAL_ADDRESS (1 << 7)
+
+#define APIC_SV_ENABLE (1 << 8)
+
+#define MAX_APICS 255
+#define MAX_APIC_WORDS 8
+
+typedef struct APICState {
+    CPUState *cpu_env;
+    uint32_t apicbase;
+    uint8_t id;
+    uint8_t arb_id;
+    uint8_t tpr;
+    uint32_t spurious_vec;
+    uint8_t log_dest;
+    uint8_t dest_mode;
+    uint32_t isr[8];  /* in service register */
+    uint32_t tmr[8];  /* trigger mode register */
+    uint32_t irr[8]; /* interrupt request register */
+    uint32_t lvt[APIC_LVT_NB];
+    uint32_t esr; /* error register */
+    uint32_t icr[2];
+
+    uint32_t divide_conf;
+    int count_shift;
+    uint32_t initial_count;
+    int64_t initial_count_load_time, next_time;
+    uint32_t idx;
+    QEMUTimer *timer;
+    int sipi_vector;
+    int wait_for_sipi;
+} APICState;
+
+static int apic_io_memory;
+static APICState *local_apics[MAX_APICS + 1];
+static int last_apic_idx = 0;
+static int apic_irq_delivered;
+
+
+static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
+static void apic_update_irq(APICState *s);
+static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
+                                      uint8_t dest, uint8_t dest_mode);
+
+/* Find first bit starting from msb */
+static int fls_bit(uint32_t value)
+{
+    return 31 - clz32(value);
+}
+
+/* Find first bit starting from lsb */
+static int ffs_bit(uint32_t value)
+{
+    return ctz32(value);
+}
+
+static inline void set_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    tab[i] |= mask;
+}
+
+static inline void reset_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    tab[i] &= ~mask;
+}
+
+static inline int get_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    return !!(tab[i] & mask);
+}
+
+static void apic_local_deliver(CPUState *env, int vector)
+{
+    APICState *s = env->apic_state;
+    uint32_t lvt = s->lvt[vector];
+    int trigger_mode;
+
+    if (lvt & APIC_LVT_MASKED)
+        return;
+
+    switch ((lvt >> 8) & 7) {
+    case APIC_DM_SMI:
+        cpu_interrupt(env, CPU_INTERRUPT_SMI);
+        break;
+
+    case APIC_DM_NMI:
+        cpu_interrupt(env, CPU_INTERRUPT_NMI);
+        break;
+
+    case APIC_DM_EXTINT:
+        cpu_interrupt(env, CPU_INTERRUPT_HARD);
+        break;
+
+    case APIC_DM_FIXED:
+        trigger_mode = APIC_TRIGGER_EDGE;
+        if ((vector == APIC_LVT_LINT0 || vector == APIC_LVT_LINT1) &&
+            (lvt & APIC_LVT_LEVEL_TRIGGER))
+            trigger_mode = APIC_TRIGGER_LEVEL;
+        apic_set_irq(s, lvt & 0xff, trigger_mode);
+    }
+}
+
+void apic_deliver_pic_intr(CPUState *env, int level)
+{
+    if (level)
+        apic_local_deliver(env, APIC_LVT_LINT0);
+    else {
+        APICState *s = env->apic_state;
+        uint32_t lvt = s->lvt[APIC_LVT_LINT0];
+
+        switch ((lvt >> 8) & 7) {
+        case APIC_DM_FIXED:
+            if (!(lvt & APIC_LVT_LEVEL_TRIGGER))
+                break;
+            reset_bit(s->irr, lvt & 0xff);
+            /* fall through */
+        case APIC_DM_EXTINT:
+            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+            break;
+        }
+    }
+}
+
+#define foreach_apic(apic, deliver_bitmask, code) \
+{\
+    int __i, __j, __mask;\
+    for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+        __mask = deliver_bitmask[__i];\
+        if (__mask) {\
+            for(__j = 0; __j < 32; __j++) {\
+                if (__mask & (1 << __j)) {\
+                    apic = local_apics[__i * 32 + __j];\
+                    if (apic) {\
+                        code;\
+                    }\
+                }\
+            }\
+        }\
+    }\
+}
+
+static void apic_bus_deliver(const uint32_t *deliver_bitmask,
+                             uint8_t delivery_mode,
+                             uint8_t vector_num, uint8_t polarity,
+                             uint8_t trigger_mode)
+{
+    APICState *apic_iter;
+
+    switch (delivery_mode) {
+        case APIC_DM_LOWPRI:
+            /* XXX: search for focus processor, arbitration */
+            {
+                int i, d;
+                d = -1;
+                for(i = 0; i < MAX_APIC_WORDS; i++) {
+                    if (deliver_bitmask[i]) {
+                        d = i * 32 + ffs_bit(deliver_bitmask[i]);
+                        break;
+                    }
+                }
+                if (d >= 0) {
+                    apic_iter = local_apics[d];
+                    if (apic_iter) {
+                        apic_set_irq(apic_iter, vector_num, trigger_mode);
+                    }
+                }
+            }
+            return;
+
+        case APIC_DM_FIXED:
+            break;
+
+        case APIC_DM_SMI:
+            foreach_apic(apic_iter, deliver_bitmask,
+                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_SMI) );
+            return;
+
+        case APIC_DM_NMI:
+            foreach_apic(apic_iter, deliver_bitmask,
+                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_NMI) );
+            return;
+
+        case APIC_DM_INIT:
+            /* normal INIT IPI sent to processors */
+            foreach_apic(apic_iter, deliver_bitmask,
+                         cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_INIT) );
+            return;
+
+        case APIC_DM_EXTINT:
+            /* handled in I/O APIC code */
+            break;
+
+        default:
+            return;
+    }
+
+    foreach_apic(apic_iter, deliver_bitmask,
+                 apic_set_irq(apic_iter, vector_num, trigger_mode) );
+}
+
+void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
+                      uint8_t delivery_mode, uint8_t vector_num,
+                      uint8_t polarity, uint8_t trigger_mode)
+{
+    uint32_t deliver_bitmask[MAX_APIC_WORDS];
+
+    apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
+    apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, polarity,
+                     trigger_mode);
+}
+
+void cpu_set_apic_base(CPUState *env, uint64_t val)
+{
+    APICState *s = env->apic_state;
+#ifdef DEBUG_APIC
+    printf("cpu_set_apic_base: %016" PRIx64 "\n", val);
+#endif
+    if (!s)
+        return;
+    s->apicbase = (val & 0xfffff000) |
+        (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
+    /* if disabled, cannot be enabled again */
+    if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+        s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
+        env->cpuid_features &= ~CPUID_APIC;
+        s->spurious_vec &= ~APIC_SV_ENABLE;
+    }
+}
+
+uint64_t cpu_get_apic_base(CPUState *env)
+{
+    APICState *s = env->apic_state;
+#ifdef DEBUG_APIC
+    printf("cpu_get_apic_base: %016" PRIx64 "\n",
+           s ? (uint64_t)s->apicbase: 0);
+#endif
+    return s ? s->apicbase : 0;
+}
+
+void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
+{
+    APICState *s = env->apic_state;
+    if (!s)
+        return;
+    s->tpr = (val & 0x0f) << 4;
+    apic_update_irq(s);
+}
+
+uint8_t cpu_get_apic_tpr(CPUX86State *env)
+{
+    APICState *s = env->apic_state;
+    return s ? s->tpr >> 4 : 0;
+}
+
+/* return -1 if no bit is set */
+static int get_highest_priority_int(uint32_t *tab)
+{
+    int i;
+    for(i = 7; i >= 0; i--) {
+        if (tab[i] != 0) {
+            return i * 32 + fls_bit(tab[i]);
+        }
+    }
+    return -1;
+}
+
+static int apic_get_ppr(APICState *s)
+{
+    int tpr, isrv, ppr;
+
+    tpr = (s->tpr >> 4);
+    isrv = get_highest_priority_int(s->isr);
+    if (isrv < 0)
+        isrv = 0;
+    isrv >>= 4;
+    if (tpr >= isrv)
+        ppr = s->tpr;
+    else
+        ppr = isrv << 4;
+    return ppr;
+}
+
+static int apic_get_arb_pri(APICState *s)
+{
+    /* XXX: arbitration */
+    return 0;
+}
+
+/* signal the CPU if an irq is pending */
+static void apic_update_irq(APICState *s)
+{
+    int irrv, ppr;
+    if (!(s->spurious_vec & APIC_SV_ENABLE))
+        return;
+    irrv = get_highest_priority_int(s->irr);
+    if (irrv < 0)
+        return;
+    ppr = apic_get_ppr(s);
+    if (ppr && (irrv & 0xf0) <= (ppr & 0xf0))
+        return;
+    cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
+}
+
+void apic_reset_irq_delivered(void)
+{
+    apic_irq_delivered = 0;
+}
+
+int apic_get_irq_delivered(void)
+{
+    return apic_irq_delivered;
+}
+
+static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
+{
+    apic_irq_delivered += !get_bit(s->irr, vector_num);
+
+    set_bit(s->irr, vector_num);
+    if (trigger_mode)
+        set_bit(s->tmr, vector_num);
+    else
+        reset_bit(s->tmr, vector_num);
+    apic_update_irq(s);
+}
+
+static void apic_eoi(APICState *s)
+{
+    int isrv;
+    isrv = get_highest_priority_int(s->isr);
+    if (isrv < 0)
+        return;
+    reset_bit(s->isr, isrv);
+    /* XXX: send the EOI packet to the APIC bus to allow the I/O APIC to
+            set the remote IRR bit for level triggered interrupts. */
+    apic_update_irq(s);
+}
+
+static int apic_find_dest(uint8_t dest)
+{
+    APICState *apic = local_apics[dest];
+    int i;
+
+    if (apic && apic->id == dest)
+        return dest;  /* shortcut in case apic->id == apic->idx */
+
+    for (i = 0; i < MAX_APICS; i++) {
+        apic = local_apics[i];
+	if (apic && apic->id == dest)
+            return i;
+    }
+
+    return -1;
+}
+
+static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
+                                      uint8_t dest, uint8_t dest_mode)
+{
+    APICState *apic_iter;
+    int i;
+
+    if (dest_mode == 0) {
+        if (dest == 0xff) {
+            memset(deliver_bitmask, 0xff, MAX_APIC_WORDS * sizeof(uint32_t));
+        } else {
+            int idx = apic_find_dest(dest);
+            memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t));
+            if (idx >= 0)
+                set_bit(deliver_bitmask, idx);
+        }
+    } else {
+        /* XXX: cluster mode */
+        memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t));
+        for(i = 0; i < MAX_APICS; i++) {
+            apic_iter = local_apics[i];
+            if (apic_iter) {
+                if (apic_iter->dest_mode == 0xf) {
+                    if (dest & apic_iter->log_dest)
+                        set_bit(deliver_bitmask, i);
+                } else if (apic_iter->dest_mode == 0x0) {
+                    if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
+                        (dest & apic_iter->log_dest & 0x0f)) {
+                        set_bit(deliver_bitmask, i);
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+void apic_init_reset(CPUState *env)
+{
+    APICState *s = env->apic_state;
+    int i;
+
+    if (!s)
+        return;
+
+    s->tpr = 0;
+    s->spurious_vec = 0xff;
+    s->log_dest = 0;
+    s->dest_mode = 0xf;
+    memset(s->isr, 0, sizeof(s->isr));
+    memset(s->tmr, 0, sizeof(s->tmr));
+    memset(s->irr, 0, sizeof(s->irr));
+    for(i = 0; i < APIC_LVT_NB; i++)
+        s->lvt[i] = 1 << 16; /* mask LVT */
+    s->esr = 0;
+    memset(s->icr, 0, sizeof(s->icr));
+    s->divide_conf = 0;
+    s->count_shift = 0;
+    s->initial_count = 0;
+    s->initial_count_load_time = 0;
+    s->next_time = 0;
+    s->wait_for_sipi = 1;
+
+    env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
+}
+
+static void apic_startup(APICState *s, int vector_num)
+{
+    s->sipi_vector = vector_num;
+    cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
+}
+
+void apic_sipi(CPUState *env)
+{
+    APICState *s = env->apic_state;
+
+    cpu_reset_interrupt(env, CPU_INTERRUPT_SIPI);
+
+    if (!s->wait_for_sipi)
+        return;
+
+    env->eip = 0;
+    cpu_x86_load_seg_cache(env, R_CS, s->sipi_vector << 8, s->sipi_vector << 12,
+                           0xffff, 0);
+    env->halted = 0;
+    s->wait_for_sipi = 0;
+}
+
+static void apic_deliver(APICState *s, uint8_t dest, uint8_t dest_mode,
+                         uint8_t delivery_mode, uint8_t vector_num,
+                         uint8_t polarity, uint8_t trigger_mode)
+{
+    uint32_t deliver_bitmask[MAX_APIC_WORDS];
+    int dest_shorthand = (s->icr[0] >> 18) & 3;
+    APICState *apic_iter;
+
+    switch (dest_shorthand) {
+    case 0:
+        apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
+        break;
+    case 1:
+        memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask));
+        set_bit(deliver_bitmask, s->idx);
+        break;
+    case 2:
+        memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask));
+        break;
+    case 3:
+        memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask));
+        reset_bit(deliver_bitmask, s->idx);
+        break;
+    }
+
+    switch (delivery_mode) {
+        case APIC_DM_INIT:
+            {
+                int trig_mode = (s->icr[0] >> 15) & 1;
+                int level = (s->icr[0] >> 14) & 1;
+                if (level == 0 && trig_mode == 1) {
+                    foreach_apic(apic_iter, deliver_bitmask,
+                                 apic_iter->arb_id = apic_iter->id );
+                    return;
+                }
+            }
+            break;
+
+        case APIC_DM_SIPI:
+            foreach_apic(apic_iter, deliver_bitmask,
+                         apic_startup(apic_iter, vector_num) );
+            return;
+    }
+
+    apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, polarity,
+                     trigger_mode);
+}
+
+int apic_get_interrupt(CPUState *env)
+{
+    APICState *s = env->apic_state;
+    int intno;
+
+    /* if the APIC is installed or enabled, we let the 8259 handle the
+       IRQs */
+    if (!s)
+        return -1;
+    if (!(s->spurious_vec & APIC_SV_ENABLE))
+        return -1;
+
+    /* XXX: spurious IRQ handling */
+    intno = get_highest_priority_int(s->irr);
+    if (intno < 0)
+        return -1;
+    if (s->tpr && intno <= s->tpr)
+        return s->spurious_vec & 0xff;
+    reset_bit(s->irr, intno);
+    set_bit(s->isr, intno);
+    apic_update_irq(s);
+    return intno;
+}
+
+int apic_accept_pic_intr(CPUState *env)
+{
+    APICState *s = env->apic_state;
+    uint32_t lvt0;
+
+    if (!s)
+        return -1;
+
+    lvt0 = s->lvt[APIC_LVT_LINT0];
+
+    if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) == 0 ||
+        (lvt0 & APIC_LVT_MASKED) == 0)
+        return 1;
+
+    return 0;
+}
+
+static uint32_t apic_get_current_count(APICState *s)
+{
+    int64_t d;
+    uint32_t val;
+    d = (qemu_get_clock(vm_clock) - s->initial_count_load_time) >>
+        s->count_shift;
+    if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
+        /* periodic */
+        val = s->initial_count - (d % ((uint64_t)s->initial_count + 1));
+    } else {
+        if (d >= s->initial_count)
+            val = 0;
+        else
+            val = s->initial_count - d;
+    }
+    return val;
+}
+
+static void apic_timer_update(APICState *s, int64_t current_time)
+{
+    int64_t next_time, d;
+
+    if (!(s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED)) {
+        d = (current_time - s->initial_count_load_time) >>
+            s->count_shift;
+        if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
+            if (!s->initial_count)
+                goto no_timer;
+            d = ((d / ((uint64_t)s->initial_count + 1)) + 1) * ((uint64_t)s->initial_count + 1);
+        } else {
+            if (d >= s->initial_count)
+                goto no_timer;
+            d = (uint64_t)s->initial_count + 1;
+        }
+        next_time = s->initial_count_load_time + (d << s->count_shift);
+        qemu_mod_timer(s->timer, next_time);
+        s->next_time = next_time;
+    } else {
+    no_timer:
+        qemu_del_timer(s->timer);
+    }
+}
+
+static void apic_timer(void *opaque)
+{
+    APICState *s = opaque;
+
+    apic_local_deliver(s->cpu_env, APIC_LVT_TIMER);
+    apic_timer_update(s, s->next_time);
+}
+
+static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+    return 0;
+}
+
+static uint32_t apic_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+    return 0;
+}
+
+static void apic_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+}
+
+static void apic_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+}
+
+static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+    CPUState *env;
+    APICState *s;
+    uint32_t val;
+    int index;
+
+    env = cpu_single_env;
+    if (!env)
+        return 0;
+    s = env->apic_state;
+
+    index = (addr >> 4) & 0xff;
+    switch(index) {
+    case 0x02: /* id */
+        val = s->id << 24;
+        break;
+    case 0x03: /* version */
+        val = 0x11 | ((APIC_LVT_NB - 1) << 16); /* version 0x11 */
+        break;
+    case 0x08:
+        val = s->tpr;
+        break;
+    case 0x09:
+        val = apic_get_arb_pri(s);
+        break;
+    case 0x0a:
+        /* ppr */
+        val = apic_get_ppr(s);
+        break;
+    case 0x0b:
+        val = 0;
+        break;
+    case 0x0d:
+        val = s->log_dest << 24;
+        break;
+    case 0x0e:
+        val = s->dest_mode << 28;
+        break;
+    case 0x0f:
+        val = s->spurious_vec;
+        break;
+    case 0x10 ... 0x17:
+        val = s->isr[index & 7];
+        break;
+    case 0x18 ... 0x1f:
+        val = s->tmr[index & 7];
+        break;
+    case 0x20 ... 0x27:
+        val = s->irr[index & 7];
+        break;
+    case 0x28:
+        val = s->esr;
+        break;
+    case 0x30:
+    case 0x31:
+        val = s->icr[index & 1];
+        break;
+    case 0x32 ... 0x37:
+        val = s->lvt[index - 0x32];
+        break;
+    case 0x38:
+        val = s->initial_count;
+        break;
+    case 0x39:
+        val = apic_get_current_count(s);
+        break;
+    case 0x3e:
+        val = s->divide_conf;
+        break;
+    default:
+        s->esr |= ESR_ILLEGAL_ADDRESS;
+        val = 0;
+        break;
+    }
+#ifdef DEBUG_APIC
+    printf("APIC read: %08x = %08x\n", (uint32_t)addr, val);
+#endif
+    return val;
+}
+
+static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    CPUState *env;
+    APICState *s;
+    int index;
+
+    env = cpu_single_env;
+    if (!env)
+        return;
+    s = env->apic_state;
+
+#ifdef DEBUG_APIC
+    printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
+#endif
+
+    index = (addr >> 4) & 0xff;
+    switch(index) {
+    case 0x02:
+        s->id = (val >> 24);
+        break;
+    case 0x03:
+        break;
+    case 0x08:
+        s->tpr = val;
+        apic_update_irq(s);
+        break;
+    case 0x09:
+    case 0x0a:
+        break;
+    case 0x0b: /* EOI */
+        apic_eoi(s);
+        break;
+    case 0x0d:
+        s->log_dest = val >> 24;
+        break;
+    case 0x0e:
+        s->dest_mode = val >> 28;
+        break;
+    case 0x0f:
+        s->spurious_vec = val & 0x1ff;
+        apic_update_irq(s);
+        break;
+    case 0x10 ... 0x17:
+    case 0x18 ... 0x1f:
+    case 0x20 ... 0x27:
+    case 0x28:
+        break;
+    case 0x30:
+        s->icr[0] = val;
+        apic_deliver(s, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1,
+                     (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff),
+                     (s->icr[0] >> 14) & 1, (s->icr[0] >> 15) & 1);
+        break;
+    case 0x31:
+        s->icr[1] = val;
+        break;
+    case 0x32 ... 0x37:
+        {
+            int n = index - 0x32;
+            s->lvt[n] = val;
+            if (n == APIC_LVT_TIMER)
+                apic_timer_update(s, qemu_get_clock(vm_clock));
+        }
+        break;
+    case 0x38:
+        s->initial_count = val;
+        s->initial_count_load_time = qemu_get_clock(vm_clock);
+        apic_timer_update(s, s->initial_count_load_time);
+        break;
+    case 0x39:
+        break;
+    case 0x3e:
+        {
+            int v;
+            s->divide_conf = val & 0xb;
+            v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+            s->count_shift = (v + 1) & 7;
+        }
+        break;
+    default:
+        s->esr |= ESR_ILLEGAL_ADDRESS;
+        break;
+    }
+}
+
+static void apic_save(QEMUFile *f, void *opaque)
+{
+    APICState *s = opaque;
+    int i;
+
+    qemu_put_be32s(f, &s->apicbase);
+    qemu_put_8s(f, &s->id);
+    qemu_put_8s(f, &s->arb_id);
+    qemu_put_8s(f, &s->tpr);
+    qemu_put_be32s(f, &s->spurious_vec);
+    qemu_put_8s(f, &s->log_dest);
+    qemu_put_8s(f, &s->dest_mode);
+    for (i = 0; i < 8; i++) {
+        qemu_put_be32s(f, &s->isr[i]);
+        qemu_put_be32s(f, &s->tmr[i]);
+        qemu_put_be32s(f, &s->irr[i]);
+    }
+    for (i = 0; i < APIC_LVT_NB; i++) {
+        qemu_put_be32s(f, &s->lvt[i]);
+    }
+    qemu_put_be32s(f, &s->esr);
+    qemu_put_be32s(f, &s->icr[0]);
+    qemu_put_be32s(f, &s->icr[1]);
+    qemu_put_be32s(f, &s->divide_conf);
+    qemu_put_be32(f, s->count_shift);
+    qemu_put_be32s(f, &s->initial_count);
+    qemu_put_be64(f, s->initial_count_load_time);
+    qemu_put_be64(f, s->next_time);
+
+    qemu_put_timer(f, s->timer);
+}
+
+static int apic_load(QEMUFile *f, void *opaque, int version_id)
+{
+    APICState *s = opaque;
+    int i;
+
+    if (version_id > 2)
+        return -EINVAL;
+
+    /* XXX: what if the base changes? (registered memory regions) */
+    qemu_get_be32s(f, &s->apicbase);
+    qemu_get_8s(f, &s->id);
+    qemu_get_8s(f, &s->arb_id);
+    qemu_get_8s(f, &s->tpr);
+    qemu_get_be32s(f, &s->spurious_vec);
+    qemu_get_8s(f, &s->log_dest);
+    qemu_get_8s(f, &s->dest_mode);
+    for (i = 0; i < 8; i++) {
+        qemu_get_be32s(f, &s->isr[i]);
+        qemu_get_be32s(f, &s->tmr[i]);
+        qemu_get_be32s(f, &s->irr[i]);
+    }
+    for (i = 0; i < APIC_LVT_NB; i++) {
+        qemu_get_be32s(f, &s->lvt[i]);
+    }
+    qemu_get_be32s(f, &s->esr);
+    qemu_get_be32s(f, &s->icr[0]);
+    qemu_get_be32s(f, &s->icr[1]);
+    qemu_get_be32s(f, &s->divide_conf);
+    s->count_shift=qemu_get_be32(f);
+    qemu_get_be32s(f, &s->initial_count);
+    s->initial_count_load_time=qemu_get_be64(f);
+    s->next_time=qemu_get_be64(f);
+
+    if (version_id >= 2)
+        qemu_get_timer(f, s->timer);
+    return 0;
+}
+
+static void apic_reset(void *opaque)
+{
+    APICState *s = opaque;
+    int bsp = cpu_is_bsp(s->cpu_env);
+
+    s->apicbase = 0xfee00000 |
+        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
+
+    cpu_reset(s->cpu_env);
+    apic_init_reset(s->cpu_env);
+
+    if (bsp) {
+        /*
+         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
+         * time typically by BIOS, so PIC interrupt can be delivered to the
+         * processor when local APIC is enabled.
+         */
+        s->lvt[APIC_LVT_LINT0] = 0x700;
+    }
+}
+
+static CPUReadMemoryFunc *apic_mem_read[3] = {
+    apic_mem_readb,
+    apic_mem_readw,
+    apic_mem_readl,
+};
+
+static CPUWriteMemoryFunc *apic_mem_write[3] = {
+    apic_mem_writeb,
+    apic_mem_writew,
+    apic_mem_writel,
+};
+
+int apic_init(CPUState *env)
+{
+    APICState *s;
+
+    if (last_apic_idx >= MAX_APICS)
+        return -1;
+    s = qemu_mallocz(sizeof(APICState));
+    env->apic_state = s;
+    s->idx = last_apic_idx++;
+    s->id = env->cpuid_apic_id;
+    s->cpu_env = env;
+
+    apic_reset(s);
+
+    /* XXX: mapping more APICs at the same memory location */
+    if (apic_io_memory == 0) {
+        /* NOTE: the APIC is directly connected to the CPU - it is not
+           on the global memory bus. */
+        apic_io_memory = cpu_register_io_memory(apic_mem_read,
+                                                apic_mem_write, NULL);
+        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+                                     apic_io_memory);
+    }
+    s->timer = qemu_new_timer(vm_clock, apic_timer, s);
+
+    register_savevm("apic", s->idx, 2, apic_save, apic_load, s);
+    qemu_register_reset(apic_reset, 0, s);
+
+    local_apics[s->idx] = s;
+    return 0;
+}
+
diff --git a/hw/fdc.h b/hw/fdc.h
new file mode 100644
index 0000000..7b6a9de
--- /dev/null
+++ b/hw/fdc.h
@@ -0,0 +1,11 @@
+/* fdc.c */
+#define MAX_FD 2
+
+typedef struct fdctrl_t fdctrl_t;
+
+fdctrl_t *fdctrl_init (qemu_irq irq, int dma_chann, int mem_mapped,
+                       target_phys_addr_t io_base,
+                       BlockDriverState **fds);
+fdctrl_t *sun4m_fdctrl_init (qemu_irq irq, target_phys_addr_t io_base,
+                             BlockDriverState **fds, qemu_irq *fdc_tc);
+int fdctrl_get_drive_type(fdctrl_t *fdctrl, int drive_num);
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
new file mode 100644
index 0000000..276c396
--- /dev/null
+++ b/hw/fw_cfg.c
@@ -0,0 +1,288 @@
+/*
+ * QEMU Firmware configuration device emulation
+ *
+ * Copyright (c) 2008 Gleb Natapov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "sysemu.h"
+#include "isa.h"
+#include "fw_cfg.h"
+
+/* debug firmware config */
+//#define DEBUG_FW_CFG
+
+#ifdef DEBUG_FW_CFG
+#define FW_CFG_DPRINTF(fmt, ...)                        \
+    do { printf("FW_CFG: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define FW_CFG_DPRINTF(fmt, ...)
+#endif
+
+#define FW_CFG_SIZE 2
+
+typedef struct _FWCfgEntry {
+    uint16_t len;
+    uint8_t *data;
+    void *callback_opaque;
+    FWCfgCallback callback;
+} FWCfgEntry;
+
+typedef struct _FWCfgState {
+    FWCfgEntry entries[2][FW_CFG_MAX_ENTRY];
+    uint16_t cur_entry;
+    uint16_t cur_offset;
+} FWCfgState;
+
+static void fw_cfg_write(FWCfgState *s, uint8_t value)
+{
+    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
+    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
+
+    FW_CFG_DPRINTF("write %d\n", value);
+
+    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && s->cur_offset < e->len) {
+        e->data[s->cur_offset++] = value;
+        if (s->cur_offset == e->len) {
+            e->callback(e->callback_opaque, e->data);
+            s->cur_offset = 0;
+        }
+    }
+}
+
+static int fw_cfg_select(FWCfgState *s, uint16_t key)
+{
+    int ret;
+
+    s->cur_offset = 0;
+    if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
+        s->cur_entry = FW_CFG_INVALID;
+        ret = 0;
+    } else {
+        s->cur_entry = key;
+        ret = 1;
+    }
+
+    FW_CFG_DPRINTF("select key %d (%sfound)\n", key, ret ? "" : "not ");
+
+    return ret;
+}
+
+static uint8_t fw_cfg_read(FWCfgState *s)
+{
+    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
+    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
+    uint8_t ret;
+
+    if (s->cur_entry == FW_CFG_INVALID || !e->data || s->cur_offset >= e->len)
+        ret = 0;
+    else
+        ret = e->data[s->cur_offset++];
+
+    FW_CFG_DPRINTF("read %d\n", ret);
+
+    return ret;
+}
+
+static uint32_t fw_cfg_io_readb(void *opaque, uint32_t addr)
+{
+    return fw_cfg_read(opaque);
+}
+
+static void fw_cfg_io_writeb(void *opaque, uint32_t addr, uint32_t value)
+{
+    fw_cfg_write(opaque, (uint8_t)value);
+}
+
+static void fw_cfg_io_writew(void *opaque, uint32_t addr, uint32_t value)
+{
+    fw_cfg_select(opaque, (uint16_t)value);
+}
+
+static uint32_t fw_cfg_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+    return fw_cfg_read(opaque);
+}
+
+static void fw_cfg_mem_writeb(void *opaque, target_phys_addr_t addr,
+                              uint32_t value)
+{
+    fw_cfg_write(opaque, (uint8_t)value);
+}
+
+static void fw_cfg_mem_writew(void *opaque, target_phys_addr_t addr,
+                              uint32_t value)
+{
+    fw_cfg_select(opaque, (uint16_t)value);
+}
+
+static CPUReadMemoryFunc *fw_cfg_ctl_mem_read[3] = {
+    NULL,
+    NULL,
+    NULL,
+};
+
+static CPUWriteMemoryFunc *fw_cfg_ctl_mem_write[3] = {
+    NULL,
+    fw_cfg_mem_writew,
+    NULL,
+};
+
+static CPUReadMemoryFunc *fw_cfg_data_mem_read[3] = {
+    fw_cfg_mem_readb,
+    NULL,
+    NULL,
+};
+
+static CPUWriteMemoryFunc *fw_cfg_data_mem_write[3] = {
+    fw_cfg_mem_writeb,
+    NULL,
+    NULL,
+};
+
+static void fw_cfg_reset(void *opaque)
+{
+    FWCfgState *s = opaque;
+
+    fw_cfg_select(s, 0);
+}
+
+static void fw_cfg_save(QEMUFile *f, void *opaque)
+{
+    FWCfgState *s = opaque;
+
+    qemu_put_be16s(f, &s->cur_entry);
+    qemu_put_be16s(f, &s->cur_offset);
+}
+
+static int fw_cfg_load(QEMUFile *f, void *opaque, int version_id)
+{
+    FWCfgState *s = opaque;
+
+    if (version_id > 1)
+        return -EINVAL;
+
+    qemu_get_be16s(f, &s->cur_entry);
+    qemu_get_be16s(f, &s->cur_offset);
+
+    return 0;
+}
+
+int fw_cfg_add_bytes(void *opaque, uint16_t key, uint8_t *data, uint16_t len)
+{
+    FWCfgState *s = opaque;
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    if (key >= FW_CFG_MAX_ENTRY)
+        return 0;
+
+    s->entries[arch][key].data = data;
+    s->entries[arch][key].len = len;
+
+    return 1;
+}
+
+int fw_cfg_add_i16(void *opaque, uint16_t key, uint16_t value)
+{
+    uint16_t *copy;
+
+    copy = qemu_malloc(sizeof(value));
+    *copy = cpu_to_le16(value);
+    return fw_cfg_add_bytes(opaque, key, (uint8_t *)copy, sizeof(value));
+}
+
+int fw_cfg_add_i32(void *opaque, uint16_t key, uint32_t value)
+{
+    uint32_t *copy;
+
+    copy = qemu_malloc(sizeof(value));
+    *copy = cpu_to_le32(value);
+    return fw_cfg_add_bytes(opaque, key, (uint8_t *)copy, sizeof(value));
+}
+
+int fw_cfg_add_i64(void *opaque, uint16_t key, uint64_t value)
+{
+    uint64_t *copy;
+
+    copy = qemu_malloc(sizeof(value));
+    *copy = cpu_to_le64(value);
+    return fw_cfg_add_bytes(opaque, key, (uint8_t *)copy, sizeof(value));
+}
+
+int fw_cfg_add_callback(void *opaque, uint16_t key, FWCfgCallback callback,
+                        void *callback_opaque, uint8_t *data, size_t len)
+{
+    FWCfgState *s = opaque;
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+
+    if (!(key & FW_CFG_WRITE_CHANNEL))
+        return 0;
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    if (key >= FW_CFG_MAX_ENTRY || len > 65535)
+        return 0;
+
+    s->entries[arch][key].data = data;
+    s->entries[arch][key].len = len;
+    s->entries[arch][key].callback_opaque = callback_opaque;
+    s->entries[arch][key].callback = callback;
+
+    return 1;
+}
+
+void *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
+		target_phys_addr_t ctl_addr, target_phys_addr_t data_addr)
+{
+    FWCfgState *s;
+    int io_ctl_memory, io_data_memory;
+
+    s = qemu_mallocz(sizeof(FWCfgState));
+
+    if (ctl_port) {
+        register_ioport_write(ctl_port, 2, 2, fw_cfg_io_writew, s);
+    }
+    if (data_port) {
+        register_ioport_read(data_port, 1, 1, fw_cfg_io_readb, s);
+        register_ioport_write(data_port, 1, 1, fw_cfg_io_writeb, s);
+    }
+    if (ctl_addr) {
+        io_ctl_memory = cpu_register_io_memory(fw_cfg_ctl_mem_read,
+                                           fw_cfg_ctl_mem_write, s);
+        cpu_register_physical_memory(ctl_addr, FW_CFG_SIZE, io_ctl_memory);
+    }
+    if (data_addr) {
+        io_data_memory = cpu_register_io_memory(fw_cfg_data_mem_read,
+                                           fw_cfg_data_mem_write, s);
+        cpu_register_physical_memory(data_addr, FW_CFG_SIZE, io_data_memory);
+    }
+    fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
+    fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
+    fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
+    fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+
+    register_savevm("fw_cfg", -1, 1, fw_cfg_save, fw_cfg_load, s);
+    qemu_register_reset(fw_cfg_reset, 0, s);
+    fw_cfg_reset(s);
+
+    return s;
+}
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
new file mode 100644
index 0000000..f616ed2
--- /dev/null
+++ b/hw/fw_cfg.h
@@ -0,0 +1,40 @@
+#ifndef FW_CFG_H
+#define FW_CFG_H
+
+#define FW_CFG_SIGNATURE        0x00
+#define FW_CFG_ID               0x01
+#define FW_CFG_UUID             0x02
+#define FW_CFG_RAM_SIZE         0x03
+#define FW_CFG_NOGRAPHIC        0x04
+#define FW_CFG_NB_CPUS          0x05
+#define FW_CFG_MACHINE_ID       0x06
+#define FW_CFG_KERNEL_ADDR      0x07
+#define FW_CFG_KERNEL_SIZE      0x08
+#define FW_CFG_KERNEL_CMDLINE   0x09
+#define FW_CFG_INITRD_ADDR      0x0a
+#define FW_CFG_INITRD_SIZE      0x0b
+#define FW_CFG_BOOT_DEVICE      0x0c
+#define FW_CFG_NUMA             0x0d
+#define FW_CFG_MAX_ENTRY        0x10
+
+#define FW_CFG_WRITE_CHANNEL    0x4000
+#define FW_CFG_ARCH_LOCAL       0x8000
+#define FW_CFG_ENTRY_MASK       ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)
+
+#define FW_CFG_INVALID          0xffff
+
+#ifndef NO_QEMU_PROTOS
+typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
+
+int fw_cfg_add_bytes(void *opaque, uint16_t key, uint8_t *data, uint16_t len);
+int fw_cfg_add_i16(void *opaque, uint16_t key, uint16_t value);
+int fw_cfg_add_i32(void *opaque, uint16_t key, uint32_t value);
+int fw_cfg_add_i64(void *opaque, uint16_t key, uint64_t value);
+int fw_cfg_add_callback(void *opaque, uint16_t key, FWCfgCallback callback,
+                        void *callback_opaque, uint8_t *data, size_t len);
+void *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
+		target_phys_addr_t crl_addr, target_phys_addr_t data_addr);
+
+#endif /* NO_QEMU_PROTOS */
+
+#endif
diff --git a/hw/i8254.c b/hw/i8254.c
new file mode 100644
index 0000000..c202c9c
--- /dev/null
+++ b/hw/i8254.c
@@ -0,0 +1,507 @@
+/*
+ * QEMU 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pc.h"
+#include "isa.h"
+#include "qemu-timer.h"
+
+//#define DEBUG_PIT
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+typedef struct PITChannelState {
+    int count; /* can be 65536 */
+    uint16_t latched_count;
+    uint8_t count_latched;
+    uint8_t status_latched;
+    uint8_t status;
+    uint8_t read_state;
+    uint8_t write_state;
+    uint8_t write_latch;
+    uint8_t rw_mode;
+    uint8_t mode;
+    uint8_t bcd; /* not supported */
+    uint8_t gate; /* timer start */
+    int64_t count_load_time;
+    /* irq handling */
+    int64_t next_transition_time;
+    QEMUTimer *irq_timer;
+    qemu_irq irq;
+} PITChannelState;
+
+struct PITState {
+    PITChannelState channels[3];
+};
+
+static PITState pit_state;
+
+static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
+
+static int pit_get_count(PITChannelState *s)
+{
+    uint64_t d;
+    int counter;
+
+    d = muldiv64(qemu_get_clock(vm_clock) - s->count_load_time, PIT_FREQ, get_ticks_per_sec());
+    switch(s->mode) {
+    case 0:
+    case 1:
+    case 4:
+    case 5:
+        counter = (s->count - d) & 0xffff;
+        break;
+    case 3:
+        /* XXX: may be incorrect for odd counts */
+        counter = s->count - ((2 * d) % s->count);
+        break;
+    default:
+        counter = s->count - (d % s->count);
+        break;
+    }
+    return counter;
+}
+
+/* get pit output bit */
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+    uint64_t d;
+    int out;
+
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, get_ticks_per_sec());
+    switch(s->mode) {
+    default:
+    case 0:
+        out = (d >= s->count);
+        break;
+    case 1:
+        out = (d < s->count);
+        break;
+    case 2:
+        if ((d % s->count) == 0 && d != 0)
+            out = 1;
+        else
+            out = 0;
+        break;
+    case 3:
+        out = (d % s->count) < ((s->count + 1) >> 1);
+        break;
+    case 4:
+    case 5:
+        out = (d == s->count);
+        break;
+    }
+    return out;
+}
+
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
+{
+    PITChannelState *s = &pit->channels[channel];
+    return pit_get_out1(s, current_time);
+}
+
+/* return -1 if no transition will occur.  */
+static int64_t pit_get_next_transition_time(PITChannelState *s,
+                                            int64_t current_time)
+{
+    uint64_t d, next_time, base;
+    int period2;
+
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, get_ticks_per_sec());
+    switch(s->mode) {
+    default:
+    case 0:
+    case 1:
+        if (d < s->count)
+            next_time = s->count;
+        else
+            return -1;
+        break;
+    case 2:
+        base = (d / s->count) * s->count;
+        if ((d - base) == 0 && d != 0)
+            next_time = base + s->count;
+        else
+            next_time = base + s->count + 1;
+        break;
+    case 3:
+        base = (d / s->count) * s->count;
+        period2 = ((s->count + 1) >> 1);
+        if ((d - base) < period2)
+            next_time = base + period2;
+        else
+            next_time = base + s->count;
+        break;
+    case 4:
+    case 5:
+        if (d < s->count)
+            next_time = s->count;
+        else if (d == s->count)
+            next_time = s->count + 1;
+        else
+            return -1;
+        break;
+    }
+    /* convert to timer units */
+    next_time = s->count_load_time + muldiv64(next_time, get_ticks_per_sec(), PIT_FREQ);
+    /* fix potential rounding problems */
+    /* XXX: better solution: use a clock at PIT_FREQ Hz */
+    if (next_time <= current_time)
+        next_time = current_time + 1;
+    return next_time;
+}
+
+/* val must be 0 or 1 */
+void pit_set_gate(PITState *pit, int channel, int val)
+{
+    PITChannelState *s = &pit->channels[channel];
+
+    switch(s->mode) {
+    default:
+    case 0:
+    case 4:
+        /* XXX: just disable/enable counting */
+        break;
+    case 1:
+    case 5:
+        if (s->gate < val) {
+            /* restart counting on rising edge */
+            s->count_load_time = qemu_get_clock(vm_clock);
+            pit_irq_timer_update(s, s->count_load_time);
+        }
+        break;
+    case 2:
+    case 3:
+        if (s->gate < val) {
+            /* restart counting on rising edge */
+            s->count_load_time = qemu_get_clock(vm_clock);
+            pit_irq_timer_update(s, s->count_load_time);
+        }
+        /* XXX: disable/enable counting */
+        break;
+    }
+    s->gate = val;
+}
+
+int pit_get_gate(PITState *pit, int channel)
+{
+    PITChannelState *s = &pit->channels[channel];
+    return s->gate;
+}
+
+int pit_get_initial_count(PITState *pit, int channel)
+{
+    PITChannelState *s = &pit->channels[channel];
+    return s->count;
+}
+
+int pit_get_mode(PITState *pit, int channel)
+{
+    PITChannelState *s = &pit->channels[channel];
+    return s->mode;
+}
+
+static inline void pit_load_count(PITChannelState *s, int val)
+{
+    if (val == 0)
+        val = 0x10000;
+    s->count_load_time = qemu_get_clock(vm_clock);
+    s->count = val;
+    pit_irq_timer_update(s, s->count_load_time);
+}
+
+/* if already latched, do not latch again */
+static void pit_latch_count(PITChannelState *s)
+{
+    if (!s->count_latched) {
+        s->latched_count = pit_get_count(s);
+        s->count_latched = s->rw_mode;
+    }
+}
+
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PITState *pit = opaque;
+    int channel, access;
+    PITChannelState *s;
+
+    addr &= 3;
+    if (addr == 3) {
+        channel = val >> 6;
+        if (channel == 3) {
+            /* read back command */
+            for(channel = 0; channel < 3; channel++) {
+                s = &pit->channels[channel];
+                if (val & (2 << channel)) {
+                    if (!(val & 0x20)) {
+                        pit_latch_count(s);
+                    }
+                    if (!(val & 0x10) && !s->status_latched) {
+                        /* status latch */
+                        /* XXX: add BCD and null count */
+                        s->status =  (pit_get_out1(s, qemu_get_clock(vm_clock)) << 7) |
+                            (s->rw_mode << 4) |
+                            (s->mode << 1) |
+                            s->bcd;
+                        s->status_latched = 1;
+                    }
+                }
+            }
+        } else {
+            s = &pit->channels[channel];
+            access = (val >> 4) & 3;
+            if (access == 0) {
+                pit_latch_count(s);
+            } else {
+                s->rw_mode = access;
+                s->read_state = access;
+                s->write_state = access;
+
+                s->mode = (val >> 1) & 7;
+                s->bcd = val & 1;
+                /* XXX: update irq timer ? */
+            }
+        }
+    } else {
+        s = &pit->channels[addr];
+        switch(s->write_state) {
+        default:
+        case RW_STATE_LSB:
+            pit_load_count(s, val);
+            break;
+        case RW_STATE_MSB:
+            pit_load_count(s, val << 8);
+            break;
+        case RW_STATE_WORD0:
+            s->write_latch = val;
+            s->write_state = RW_STATE_WORD1;
+            break;
+        case RW_STATE_WORD1:
+            pit_load_count(s, s->write_latch | (val << 8));
+            s->write_state = RW_STATE_WORD0;
+            break;
+        }
+    }
+}
+
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+    PITState *pit = opaque;
+    int ret, count;
+    PITChannelState *s;
+
+    addr &= 3;
+    s = &pit->channels[addr];
+    if (s->status_latched) {
+        s->status_latched = 0;
+        ret = s->status;
+    } else if (s->count_latched) {
+        switch(s->count_latched) {
+        default:
+        case RW_STATE_LSB:
+            ret = s->latched_count & 0xff;
+            s->count_latched = 0;
+            break;
+        case RW_STATE_MSB:
+            ret = s->latched_count >> 8;
+            s->count_latched = 0;
+            break;
+        case RW_STATE_WORD0:
+            ret = s->latched_count & 0xff;
+            s->count_latched = RW_STATE_MSB;
+            break;
+        }
+    } else {
+        switch(s->read_state) {
+        default:
+        case RW_STATE_LSB:
+            count = pit_get_count(s);
+            ret = count & 0xff;
+            break;
+        case RW_STATE_MSB:
+            count = pit_get_count(s);
+            ret = (count >> 8) & 0xff;
+            break;
+        case RW_STATE_WORD0:
+            count = pit_get_count(s);
+            ret = count & 0xff;
+            s->read_state = RW_STATE_WORD1;
+            break;
+        case RW_STATE_WORD1:
+            count = pit_get_count(s);
+            ret = (count >> 8) & 0xff;
+            s->read_state = RW_STATE_WORD0;
+            break;
+        }
+    }
+    return ret;
+}
+
+static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
+{
+    int64_t expire_time;
+    int irq_level;
+
+    if (!s->irq_timer)
+        return;
+    expire_time = pit_get_next_transition_time(s, current_time);
+    irq_level = pit_get_out1(s, current_time);
+    qemu_set_irq(s->irq, irq_level);
+#ifdef DEBUG_PIT
+    printf("irq_level=%d next_delay=%f\n",
+           irq_level,
+           (double)(expire_time - current_time) / get_ticks_per_sec());
+#endif
+    s->next_transition_time = expire_time;
+    if (expire_time != -1)
+        qemu_mod_timer(s->irq_timer, expire_time);
+    else
+        qemu_del_timer(s->irq_timer);
+}
+
+static void pit_irq_timer(void *opaque)
+{
+    PITChannelState *s = opaque;
+
+    pit_irq_timer_update(s, s->next_transition_time);
+}
+
+static void pit_save(QEMUFile *f, void *opaque)
+{
+    PITState *pit = opaque;
+    PITChannelState *s;
+    int i;
+
+    for(i = 0; i < 3; i++) {
+        s = &pit->channels[i];
+        qemu_put_be32(f, s->count);
+        qemu_put_be16s(f, &s->latched_count);
+        qemu_put_8s(f, &s->count_latched);
+        qemu_put_8s(f, &s->status_latched);
+        qemu_put_8s(f, &s->status);
+        qemu_put_8s(f, &s->read_state);
+        qemu_put_8s(f, &s->write_state);
+        qemu_put_8s(f, &s->write_latch);
+        qemu_put_8s(f, &s->rw_mode);
+        qemu_put_8s(f, &s->mode);
+        qemu_put_8s(f, &s->bcd);
+        qemu_put_8s(f, &s->gate);
+        qemu_put_be64(f, s->count_load_time);
+        if (s->irq_timer) {
+            qemu_put_be64(f, s->next_transition_time);
+            qemu_put_timer(f, s->irq_timer);
+        }
+    }
+}
+
+static int pit_load(QEMUFile *f, void *opaque, int version_id)
+{
+    PITState *pit = opaque;
+    PITChannelState *s;
+    int i;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    for(i = 0; i < 3; i++) {
+        s = &pit->channels[i];
+        s->count=qemu_get_be32(f);
+        qemu_get_be16s(f, &s->latched_count);
+        qemu_get_8s(f, &s->count_latched);
+        qemu_get_8s(f, &s->status_latched);
+        qemu_get_8s(f, &s->status);
+        qemu_get_8s(f, &s->read_state);
+        qemu_get_8s(f, &s->write_state);
+        qemu_get_8s(f, &s->write_latch);
+        qemu_get_8s(f, &s->rw_mode);
+        qemu_get_8s(f, &s->mode);
+        qemu_get_8s(f, &s->bcd);
+        qemu_get_8s(f, &s->gate);
+        s->count_load_time=qemu_get_be64(f);
+        if (s->irq_timer) {
+            s->next_transition_time=qemu_get_be64(f);
+            qemu_get_timer(f, s->irq_timer);
+        }
+    }
+    return 0;
+}
+
+static void pit_reset(void *opaque)
+{
+    PITState *pit = opaque;
+    PITChannelState *s;
+    int i;
+
+    for(i = 0;i < 3; i++) {
+        s = &pit->channels[i];
+        s->mode = 3;
+        s->gate = (i != 2);
+        pit_load_count(s, 0);
+    }
+}
+
+/* When HPET is operating in legacy mode, i8254 timer0 is disabled */
+void hpet_pit_disable(void) {
+    PITChannelState *s;
+    s = &pit_state.channels[0];
+    if (s->irq_timer)
+        qemu_del_timer(s->irq_timer);
+}
+
+/* When HPET is reset or leaving legacy mode, it must reenable i8254
+ * timer 0
+ */
+
+void hpet_pit_enable(void)
+{
+    PITState *pit = &pit_state;
+    PITChannelState *s;
+    s = &pit->channels[0];
+    s->mode = 3;
+    s->gate = 1;
+    pit_load_count(s, 0);
+}
+
+PITState *pit_init(int base, qemu_irq irq)
+{
+    PITState *pit = &pit_state;
+    PITChannelState *s;
+
+    s = &pit->channels[0];
+    /* the timer 0 is connected to an IRQ */
+    s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
+    s->irq = irq;
+
+    register_savevm("i8254", base, 1, pit_save, pit_load, pit);
+
+    qemu_register_reset(pit_reset, 0, pit);
+    register_ioport_write(base, 4, 1, pit_ioport_write, pit);
+    register_ioport_read(base, 3, 1, pit_ioport_read, pit);
+
+    pit_reset(pit);
+
+    return pit;
+}
diff --git a/hw/i8259.c b/hw/i8259.c
new file mode 100644
index 0000000..091ba7a
--- /dev/null
+++ b/hw/i8259.c
@@ -0,0 +1,570 @@
+/*
+ * QEMU 8259 interrupt controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pc.h"
+#include "isa.h"
+#include "monitor.h"
+
+/* debug PIC */
+//#define DEBUG_PIC
+
+//#define DEBUG_IRQ_LATENCY
+//#define DEBUG_IRQ_COUNT
+
+typedef struct PicState {
+    uint8_t last_irr; /* edge detection */
+    uint8_t irr; /* interrupt request register */
+    uint8_t imr; /* interrupt mask register */
+    uint8_t isr; /* interrupt service register */
+    uint8_t priority_add; /* highest irq priority */
+    uint8_t irq_base;
+    uint8_t read_reg_select;
+    uint8_t poll;
+    uint8_t special_mask;
+    uint8_t init_state;
+    uint8_t auto_eoi;
+    uint8_t rotate_on_auto_eoi;
+    uint8_t special_fully_nested_mode;
+    uint8_t init4; /* true if 4 byte init */
+    uint8_t single_mode; /* true if slave pic is not initialized */
+    uint8_t elcr; /* PIIX edge/trigger selection*/
+    uint8_t elcr_mask;
+    PicState2 *pics_state;
+} PicState;
+
+struct PicState2 {
+    /* 0 is master pic, 1 is slave pic */
+    /* XXX: better separation between the two pics */
+    PicState pics[2];
+    qemu_irq parent_irq;
+    void *irq_request_opaque;
+    /* IOAPIC callback support */
+    SetIRQFunc *alt_irq_func;
+    void *alt_irq_opaque;
+};
+
+#if defined(DEBUG_PIC) || defined (DEBUG_IRQ_COUNT)
+static int irq_level[16];
+#endif
+#ifdef DEBUG_IRQ_COUNT
+static uint64_t irq_count[16];
+#endif
+
+/* set irq level. If an edge is detected, then the IRR is set to 1 */
+static inline void pic_set_irq1(PicState *s, int irq, int level)
+{
+    int mask;
+    mask = 1 << irq;
+    if (s->elcr & mask) {
+        /* level triggered */
+        if (level) {
+            s->irr |= mask;
+            s->last_irr |= mask;
+        } else {
+            s->irr &= ~mask;
+            s->last_irr &= ~mask;
+        }
+    } else {
+        /* edge triggered */
+        if (level) {
+            if ((s->last_irr & mask) == 0)
+                s->irr |= mask;
+            s->last_irr |= mask;
+        } else {
+            s->last_irr &= ~mask;
+        }
+    }
+}
+
+/* return the highest priority found in mask (highest = smallest
+   number). Return 8 if no irq */
+static inline int get_priority(PicState *s, int mask)
+{
+    int priority;
+    if (mask == 0)
+        return 8;
+    priority = 0;
+    while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
+        priority++;
+    return priority;
+}
+
+/* return the pic wanted interrupt. return -1 if none */
+static int pic_get_irq(PicState *s)
+{
+    int mask, cur_priority, priority;
+
+    mask = s->irr & ~s->imr;
+    priority = get_priority(s, mask);
+    if (priority == 8)
+        return -1;
+    /* compute current priority. If special fully nested mode on the
+       master, the IRQ coming from the slave is not taken into account
+       for the priority computation. */
+    mask = s->isr;
+    if (s->special_mask)
+        mask &= ~s->imr;
+    if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
+        mask &= ~(1 << 2);
+    cur_priority = get_priority(s, mask);
+    if (priority < cur_priority) {
+        /* higher priority found: an irq should be generated */
+        return (priority + s->priority_add) & 7;
+    } else {
+        return -1;
+    }
+}
+
+/* raise irq to CPU if necessary. must be called every time the active
+   irq may change */
+/* XXX: should not export it, but it is needed for an APIC kludge */
+void pic_update_irq(PicState2 *s)
+{
+    int irq2, irq;
+
+    /* first look at slave pic */
+    irq2 = pic_get_irq(&s->pics[1]);
+    if (irq2 >= 0) {
+        /* if irq request by slave pic, signal master PIC */
+        pic_set_irq1(&s->pics[0], 2, 1);
+        pic_set_irq1(&s->pics[0], 2, 0);
+    }
+    /* look at requested irq */
+    irq = pic_get_irq(&s->pics[0]);
+    if (irq >= 0) {
+#if defined(DEBUG_PIC)
+        {
+            int i;
+            for(i = 0; i < 2; i++) {
+                printf("pic%d: imr=%x irr=%x padd=%d\n",
+                       i, s->pics[i].imr, s->pics[i].irr,
+                       s->pics[i].priority_add);
+
+            }
+        }
+        printf("pic: cpu_interrupt\n");
+#endif
+        qemu_irq_raise(s->parent_irq);
+    }
+
+/* all targets should do this rather than acking the IRQ in the cpu */
+#if defined(TARGET_MIPS) || defined(TARGET_PPC) || defined(TARGET_ALPHA)
+    else {
+        qemu_irq_lower(s->parent_irq);
+    }
+#endif
+}
+
+#ifdef DEBUG_IRQ_LATENCY
+int64_t irq_time[16];
+#endif
+
+static void i8259_set_irq(void *opaque, int irq, int level)
+{
+    PicState2 *s = opaque;
+
+#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
+    if (level != irq_level[irq]) {
+#if defined(DEBUG_PIC)
+        printf("i8259_set_irq: irq=%d level=%d\n", irq, level);
+#endif
+        irq_level[irq] = level;
+#ifdef DEBUG_IRQ_COUNT
+	if (level == 1)
+	    irq_count[irq]++;
+#endif
+    }
+#endif
+#ifdef DEBUG_IRQ_LATENCY
+    if (level) {
+        irq_time[irq] = qemu_get_clock(vm_clock);
+    }
+#endif
+    pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+    /* used for IOAPIC irqs */
+    if (s->alt_irq_func)
+        s->alt_irq_func(s->alt_irq_opaque, irq, level);
+    pic_update_irq(s);
+}
+
+/* acknowledge interrupt 'irq' */
+static inline void pic_intack(PicState *s, int irq)
+{
+    if (s->auto_eoi) {
+        if (s->rotate_on_auto_eoi)
+            s->priority_add = (irq + 1) & 7;
+    } else {
+        s->isr |= (1 << irq);
+    }
+    /* We don't clear a level sensitive interrupt here */
+    if (!(s->elcr & (1 << irq)))
+        s->irr &= ~(1 << irq);
+}
+
+int pic_read_irq(PicState2 *s)
+{
+    int irq, irq2, intno;
+
+    irq = pic_get_irq(&s->pics[0]);
+    if (irq >= 0) {
+        pic_intack(&s->pics[0], irq);
+        if (irq == 2) {
+            irq2 = pic_get_irq(&s->pics[1]);
+            if (irq2 >= 0) {
+                pic_intack(&s->pics[1], irq2);
+            } else {
+                /* spurious IRQ on slave controller */
+                irq2 = 7;
+            }
+            intno = s->pics[1].irq_base + irq2;
+            irq = irq2 + 8;
+        } else {
+            intno = s->pics[0].irq_base + irq;
+        }
+    } else {
+        /* spurious IRQ on host controller */
+        irq = 7;
+        intno = s->pics[0].irq_base + irq;
+    }
+    pic_update_irq(s);
+
+#ifdef DEBUG_IRQ_LATENCY
+    printf("IRQ%d latency=%0.3fus\n",
+           irq,
+           (double)(qemu_get_clock(vm_clock) - irq_time[irq]) * 1000000.0 / get_ticks_per_sec);
+#endif
+#if defined(DEBUG_PIC)
+    printf("pic_interrupt: irq=%d\n", irq);
+#endif
+    return intno;
+}
+
+static void pic_reset(void *opaque)
+{
+    PicState *s = opaque;
+
+    s->last_irr = 0;
+    s->irr = 0;
+    s->imr = 0;
+    s->isr = 0;
+    s->priority_add = 0;
+    s->irq_base = 0;
+    s->read_reg_select = 0;
+    s->poll = 0;
+    s->special_mask = 0;
+    s->init_state = 0;
+    s->auto_eoi = 0;
+    s->rotate_on_auto_eoi = 0;
+    s->special_fully_nested_mode = 0;
+    s->init4 = 0;
+    s->single_mode = 0;
+    /* Note: ELCR is not reset */
+}
+
+static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PicState *s = opaque;
+    int priority, cmd, irq;
+
+#ifdef DEBUG_PIC
+    printf("pic_write: addr=0x%02x val=0x%02x\n", addr, val);
+#endif
+    addr &= 1;
+    if (addr == 0) {
+        if (val & 0x10) {
+            /* init */
+            pic_reset(s);
+            /* deassert a pending interrupt */
+            qemu_irq_lower(s->pics_state->parent_irq);
+            s->init_state = 1;
+            s->init4 = val & 1;
+            s->single_mode = val & 2;
+            if (val & 0x08)
+                hw_error("level sensitive irq not supported");
+        } else if (val & 0x08) {
+            if (val & 0x04)
+                s->poll = 1;
+            if (val & 0x02)
+                s->read_reg_select = val & 1;
+            if (val & 0x40)
+                s->special_mask = (val >> 5) & 1;
+        } else {
+            cmd = val >> 5;
+            switch(cmd) {
+            case 0:
+            case 4:
+                s->rotate_on_auto_eoi = cmd >> 2;
+                break;
+            case 1: /* end of interrupt */
+            case 5:
+                priority = get_priority(s, s->isr);
+                if (priority != 8) {
+                    irq = (priority + s->priority_add) & 7;
+                    s->isr &= ~(1 << irq);
+                    if (cmd == 5)
+                        s->priority_add = (irq + 1) & 7;
+                    pic_update_irq(s->pics_state);
+                }
+                break;
+            case 3:
+                irq = val & 7;
+                s->isr &= ~(1 << irq);
+                pic_update_irq(s->pics_state);
+                break;
+            case 6:
+                s->priority_add = (val + 1) & 7;
+                pic_update_irq(s->pics_state);
+                break;
+            case 7:
+                irq = val & 7;
+                s->isr &= ~(1 << irq);
+                s->priority_add = (irq + 1) & 7;
+                pic_update_irq(s->pics_state);
+                break;
+            default:
+                /* no operation */
+                break;
+            }
+        }
+    } else {
+        switch(s->init_state) {
+        case 0:
+            /* normal mode */
+            s->imr = val;
+            pic_update_irq(s->pics_state);
+            break;
+        case 1:
+            s->irq_base = val & 0xf8;
+            s->init_state = s->single_mode ? (s->init4 ? 3 : 0) : 2;
+            break;
+        case 2:
+            if (s->init4) {
+                s->init_state = 3;
+            } else {
+                s->init_state = 0;
+            }
+            break;
+        case 3:
+            s->special_fully_nested_mode = (val >> 4) & 1;
+            s->auto_eoi = (val >> 1) & 1;
+            s->init_state = 0;
+            break;
+        }
+    }
+}
+
+static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
+{
+    int ret;
+
+    ret = pic_get_irq(s);
+    if (ret >= 0) {
+        if (addr1 >> 7) {
+            s->pics_state->pics[0].isr &= ~(1 << 2);
+            s->pics_state->pics[0].irr &= ~(1 << 2);
+        }
+        s->irr &= ~(1 << ret);
+        s->isr &= ~(1 << ret);
+        if (addr1 >> 7 || ret != 2)
+            pic_update_irq(s->pics_state);
+    } else {
+        ret = 0x07;
+        pic_update_irq(s->pics_state);
+    }
+
+    return ret;
+}
+
+static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
+{
+    PicState *s = opaque;
+    unsigned int addr;
+    int ret;
+
+    addr = addr1;
+    addr &= 1;
+    if (s->poll) {
+        ret = pic_poll_read(s, addr1);
+        s->poll = 0;
+    } else {
+        if (addr == 0) {
+            if (s->read_reg_select)
+                ret = s->isr;
+            else
+                ret = s->irr;
+        } else {
+            ret = s->imr;
+        }
+    }
+#ifdef DEBUG_PIC
+    printf("pic_read: addr=0x%02x val=0x%02x\n", addr1, ret);
+#endif
+    return ret;
+}
+
+/* memory mapped interrupt status */
+/* XXX: may be the same than pic_read_irq() */
+uint32_t pic_intack_read(PicState2 *s)
+{
+    int ret;
+
+    ret = pic_poll_read(&s->pics[0], 0x00);
+    if (ret == 2)
+        ret = pic_poll_read(&s->pics[1], 0x80) + 8;
+    /* Prepare for ISR read */
+    s->pics[0].read_reg_select = 1;
+
+    return ret;
+}
+
+static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PicState *s = opaque;
+    s->elcr = val & s->elcr_mask;
+}
+
+static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
+{
+    PicState *s = opaque;
+    return s->elcr;
+}
+
+static void pic_save(QEMUFile *f, void *opaque)
+{
+    PicState *s = opaque;
+
+    qemu_put_8s(f, &s->last_irr);
+    qemu_put_8s(f, &s->irr);
+    qemu_put_8s(f, &s->imr);
+    qemu_put_8s(f, &s->isr);
+    qemu_put_8s(f, &s->priority_add);
+    qemu_put_8s(f, &s->irq_base);
+    qemu_put_8s(f, &s->read_reg_select);
+    qemu_put_8s(f, &s->poll);
+    qemu_put_8s(f, &s->special_mask);
+    qemu_put_8s(f, &s->init_state);
+    qemu_put_8s(f, &s->auto_eoi);
+    qemu_put_8s(f, &s->rotate_on_auto_eoi);
+    qemu_put_8s(f, &s->special_fully_nested_mode);
+    qemu_put_8s(f, &s->init4);
+    qemu_put_8s(f, &s->single_mode);
+    qemu_put_8s(f, &s->elcr);
+}
+
+static int pic_load(QEMUFile *f, void *opaque, int version_id)
+{
+    PicState *s = opaque;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    qemu_get_8s(f, &s->last_irr);
+    qemu_get_8s(f, &s->irr);
+    qemu_get_8s(f, &s->imr);
+    qemu_get_8s(f, &s->isr);
+    qemu_get_8s(f, &s->priority_add);
+    qemu_get_8s(f, &s->irq_base);
+    qemu_get_8s(f, &s->read_reg_select);
+    qemu_get_8s(f, &s->poll);
+    qemu_get_8s(f, &s->special_mask);
+    qemu_get_8s(f, &s->init_state);
+    qemu_get_8s(f, &s->auto_eoi);
+    qemu_get_8s(f, &s->rotate_on_auto_eoi);
+    qemu_get_8s(f, &s->special_fully_nested_mode);
+    qemu_get_8s(f, &s->init4);
+    qemu_get_8s(f, &s->single_mode);
+    qemu_get_8s(f, &s->elcr);
+    return 0;
+}
+
+/* XXX: add generic master/slave system */
+static void pic_init1(int io_addr, int elcr_addr, PicState *s)
+{
+    register_ioport_write(io_addr, 2, 1, pic_ioport_write, s);
+    register_ioport_read(io_addr, 2, 1, pic_ioport_read, s);
+    if (elcr_addr >= 0) {
+        register_ioport_write(elcr_addr, 1, 1, elcr_ioport_write, s);
+        register_ioport_read(elcr_addr, 1, 1, elcr_ioport_read, s);
+    }
+    register_savevm("i8259", io_addr, 1, pic_save, pic_load, s);
+    qemu_register_reset(pic_reset, 0, s);
+}
+
+void pic_info(Monitor *mon)
+{
+    int i;
+    PicState *s;
+
+    if (!isa_pic)
+        return;
+
+    for(i=0;i<2;i++) {
+        s = &isa_pic->pics[i];
+        monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d "
+                       "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n",
+                       i, s->irr, s->imr, s->isr, s->priority_add,
+                       s->irq_base, s->read_reg_select, s->elcr,
+                       s->special_fully_nested_mode);
+    }
+}
+
+void irq_info(Monitor *mon)
+{
+#ifndef DEBUG_IRQ_COUNT
+    monitor_printf(mon, "irq statistic code not compiled.\n");
+#else
+    int i;
+    int64_t count;
+
+    monitor_printf(mon, "IRQ statistics:\n");
+    for (i = 0; i < 16; i++) {
+        count = irq_count[i];
+        if (count > 0)
+            monitor_printf(mon, "%2d: %" PRId64 "\n", i, count);
+    }
+#endif
+}
+
+qemu_irq *i8259_init(qemu_irq parent_irq)
+{
+    PicState2 *s;
+
+    s = qemu_mallocz(sizeof(PicState2));
+    pic_init1(0x20, 0x4d0, &s->pics[0]);
+    pic_init1(0xa0, 0x4d1, &s->pics[1]);
+    s->pics[0].elcr_mask = 0xf8;
+    s->pics[1].elcr_mask = 0xde;
+    s->parent_irq = parent_irq;
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    isa_pic = s;
+    return qemu_allocate_irqs(i8259_set_irq, s, 16);
+}
+
+void pic_set_alt_irq_func(PicState2 *s, SetIRQFunc *alt_irq_func,
+                          void *alt_irq_opaque)
+{
+    s->alt_irq_func = alt_irq_func;
+    s->alt_irq_opaque = alt_irq_opaque;
+}
diff --git a/hw/ioapic.c b/hw/ioapic.c
new file mode 100644
index 0000000..b179e6e
--- /dev/null
+++ b/hw/ioapic.c
@@ -0,0 +1,261 @@
+/*
+ *  ioapic.c IOAPIC emulation logic
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *
+ *  Split the ioapic logic from apic.c
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "qemu-timer.h"
+#include "host-utils.h"
+
+//#define DEBUG_IOAPIC
+
+#define IOAPIC_NUM_PINS			0x18
+#define IOAPIC_LVT_MASKED 		(1<<16)
+
+#define IOAPIC_TRIGGER_EDGE		0
+#define IOAPIC_TRIGGER_LEVEL		1
+
+/*io{apic,sapic} delivery mode*/
+#define IOAPIC_DM_FIXED			0x0
+#define IOAPIC_DM_LOWEST_PRIORITY	0x1
+#define IOAPIC_DM_PMI			0x2
+#define IOAPIC_DM_NMI			0x4
+#define IOAPIC_DM_INIT			0x5
+#define IOAPIC_DM_SIPI			0x5
+#define IOAPIC_DM_EXTINT		0x7
+
+struct IOAPICState {
+    uint8_t id;
+    uint8_t ioregsel;
+
+    uint32_t irr;
+    uint64_t ioredtbl[IOAPIC_NUM_PINS];
+};
+
+static void ioapic_service(IOAPICState *s)
+{
+    uint8_t i;
+    uint8_t trig_mode;
+    uint8_t vector;
+    uint8_t delivery_mode;
+    uint32_t mask;
+    uint64_t entry;
+    uint8_t dest;
+    uint8_t dest_mode;
+    uint8_t polarity;
+
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        mask = 1 << i;
+        if (s->irr & mask) {
+            entry = s->ioredtbl[i];
+            if (!(entry & IOAPIC_LVT_MASKED)) {
+                trig_mode = ((entry >> 15) & 1);
+                dest = entry >> 56;
+                dest_mode = (entry >> 11) & 1;
+                delivery_mode = (entry >> 8) & 7;
+                polarity = (entry >> 13) & 1;
+                if (trig_mode == IOAPIC_TRIGGER_EDGE)
+                    s->irr &= ~mask;
+                if (delivery_mode == IOAPIC_DM_EXTINT)
+                    vector = pic_read_irq(isa_pic);
+                else
+                    vector = entry & 0xff;
+
+                apic_deliver_irq(dest, dest_mode, delivery_mode,
+                                 vector, polarity, trig_mode);
+            }
+        }
+    }
+}
+
+void ioapic_set_irq(void *opaque, int vector, int level)
+{
+    IOAPICState *s = opaque;
+
+    /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
+     * to GSI 2.  GSI maps to ioapic 1-1.  This is not
+     * the cleanest way of doing it but it should work. */
+
+    if (vector == 0)
+        vector = 2;
+
+    if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
+        uint32_t mask = 1 << vector;
+        uint64_t entry = s->ioredtbl[vector];
+
+        if ((entry >> 15) & 1) {
+            /* level triggered */
+            if (level) {
+                s->irr |= mask;
+                ioapic_service(s);
+            } else {
+                s->irr &= ~mask;
+            }
+        } else {
+            /* edge triggered */
+            if (level) {
+                s->irr |= mask;
+                ioapic_service(s);
+            }
+        }
+    }
+}
+
+static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+    IOAPICState *s = opaque;
+    int index;
+    uint32_t val = 0;
+
+    addr &= 0xff;
+    if (addr == 0x00) {
+        val = s->ioregsel;
+    } else if (addr == 0x10) {
+        switch (s->ioregsel) {
+            case 0x00:
+                val = s->id << 24;
+                break;
+            case 0x01:
+                val = 0x11 | ((IOAPIC_NUM_PINS - 1) << 16); /* version 0x11 */
+                break;
+            case 0x02:
+                val = 0;
+                break;
+            default:
+                index = (s->ioregsel - 0x10) >> 1;
+                if (index >= 0 && index < IOAPIC_NUM_PINS) {
+                    if (s->ioregsel & 1)
+                        val = s->ioredtbl[index] >> 32;
+                    else
+                        val = s->ioredtbl[index] & 0xffffffff;
+                }
+        }
+#ifdef DEBUG_IOAPIC
+        printf("I/O APIC read: %08x = %08x\n", s->ioregsel, val);
+#endif
+    }
+    return val;
+}
+
+static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    IOAPICState *s = opaque;
+    int index;
+
+    addr &= 0xff;
+    if (addr == 0x00)  {
+        s->ioregsel = val;
+        return;
+    } else if (addr == 0x10) {
+#ifdef DEBUG_IOAPIC
+        printf("I/O APIC write: %08x = %08x\n", s->ioregsel, val);
+#endif
+        switch (s->ioregsel) {
+            case 0x00:
+                s->id = (val >> 24) & 0xff;
+                return;
+            case 0x01:
+            case 0x02:
+                return;
+            default:
+                index = (s->ioregsel - 0x10) >> 1;
+                if (index >= 0 && index < IOAPIC_NUM_PINS) {
+                    if (s->ioregsel & 1) {
+                        s->ioredtbl[index] &= 0xffffffff;
+                        s->ioredtbl[index] |= (uint64_t)val << 32;
+                    } else {
+                        s->ioredtbl[index] &= ~0xffffffffULL;
+                        s->ioredtbl[index] |= val;
+                    }
+                    ioapic_service(s);
+                }
+        }
+    }
+}
+
+static void ioapic_save(QEMUFile *f, void *opaque)
+{
+    IOAPICState *s = opaque;
+    int i;
+
+    qemu_put_8s(f, &s->id);
+    qemu_put_8s(f, &s->ioregsel);
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        qemu_put_be64s(f, &s->ioredtbl[i]);
+    }
+}
+
+static int ioapic_load(QEMUFile *f, void *opaque, int version_id)
+{
+    IOAPICState *s = opaque;
+    int i;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    qemu_get_8s(f, &s->id);
+    qemu_get_8s(f, &s->ioregsel);
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        qemu_get_be64s(f, &s->ioredtbl[i]);
+    }
+    return 0;
+}
+
+static void ioapic_reset(void *opaque)
+{
+    IOAPICState *s = opaque;
+    int i;
+
+    memset(s, 0, sizeof(*s));
+    for(i = 0; i < IOAPIC_NUM_PINS; i++)
+        s->ioredtbl[i] = 1 << 16; /* mask LVT */
+}
+
+static CPUReadMemoryFunc *ioapic_mem_read[3] = {
+    ioapic_mem_readl,
+    ioapic_mem_readl,
+    ioapic_mem_readl,
+};
+
+static CPUWriteMemoryFunc *ioapic_mem_write[3] = {
+    ioapic_mem_writel,
+    ioapic_mem_writel,
+    ioapic_mem_writel,
+};
+
+IOAPICState *ioapic_init(void)
+{
+    IOAPICState *s;
+    int io_memory;
+
+    s = qemu_mallocz(sizeof(IOAPICState));
+    ioapic_reset(s);
+
+    io_memory = cpu_register_io_memory(ioapic_mem_read,
+                                       ioapic_mem_write, s);
+    cpu_register_physical_memory(0xfec00000, 0x1000, io_memory);
+
+    register_savevm("ioapic", 0, 1, ioapic_save, ioapic_load, s);
+    qemu_register_reset(ioapic_reset, 0, s);
+
+    return s;
+}
diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
new file mode 100644
index 0000000..f93a3cb
--- /dev/null
+++ b/hw/mc146818rtc.c
@@ -0,0 +1,754 @@
+/*
+ * QEMU MC146818 RTC emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "qemu-timer.h"
+#include "sysemu.h"
+#include "pc.h"
+#include "isa.h"
+//#include "hpet_emul.h"
+
+//#define DEBUG_CMOS
+
+#define RTC_SECONDS             0
+#define RTC_SECONDS_ALARM       1
+#define RTC_MINUTES             2
+#define RTC_MINUTES_ALARM       3
+#define RTC_HOURS               4
+#define RTC_HOURS_ALARM         5
+#define RTC_ALARM_DONT_CARE    0xC0
+
+#define RTC_DAY_OF_WEEK         6
+#define RTC_DAY_OF_MONTH        7
+#define RTC_MONTH               8
+#define RTC_YEAR                9
+
+#define RTC_REG_A               10
+#define RTC_REG_B               11
+#define RTC_REG_C               12
+#define RTC_REG_D               13
+
+#define REG_A_UIP 0x80
+
+#define REG_B_SET  0x80
+#define REG_B_PIE  0x40
+#define REG_B_AIE  0x20
+#define REG_B_UIE  0x10
+#define REG_B_SQWE 0x08
+#define REG_B_DM   0x04
+
+#define REG_C_UF   0x10
+#define REG_C_IRQF 0x80
+#define REG_C_PF   0x40
+#define REG_C_AF   0x20
+
+struct RTCState {
+    uint8_t cmos_data[128];
+    uint8_t cmos_index;
+    struct tm current_tm;
+    int base_year;
+    qemu_irq irq;
+    qemu_irq sqw_irq;
+    int it_shift;
+    /* periodic timer */
+    QEMUTimer *periodic_timer;
+    int64_t next_periodic_time;
+    /* second update */
+    int64_t next_second_time;
+#ifdef TARGET_I386
+    uint32_t irq_coalesced;
+    uint32_t period;
+    QEMUTimer *coalesced_timer;
+#endif
+    QEMUTimer *second_timer;
+    QEMUTimer *second_timer2;
+};
+
+static void rtc_irq_raise(qemu_irq irq) {
+    /* When HPET is operating in legacy mode, RTC interrupts are disabled
+     * We block qemu_irq_raise, but not qemu_irq_lower, in case legacy
+     * mode is established while interrupt is raised. We want it to
+     * be lowered in any case
+     */
+#ifndef CONFIG_ANDROID
+#if defined TARGET_I386 || defined TARGET_X86_64
+    if (!hpet_in_legacy_mode())
+#endif
+#endif
+        qemu_irq_raise(irq);
+}
+
+static void rtc_set_time(RTCState *s);
+static void rtc_copy_date(RTCState *s);
+
+#ifdef TARGET_I386
+static void rtc_coalesced_timer_update(RTCState *s)
+{
+    if (s->irq_coalesced == 0) {
+        qemu_del_timer(s->coalesced_timer);
+    } else {
+        /* divide each RTC interval to 2 - 8 smaller intervals */
+        int c = MIN(s->irq_coalesced, 7) + 1; 
+        int64_t next_clock = qemu_get_clock(vm_clock) +
+		muldiv64(s->period / c, get_ticks_per_sec(), 32768);
+        qemu_mod_timer(s->coalesced_timer, next_clock);
+    }
+}
+
+static void rtc_coalesced_timer(void *opaque)
+{
+    RTCState *s = opaque;
+
+    if (s->irq_coalesced != 0) {
+        apic_reset_irq_delivered();
+        s->cmos_data[RTC_REG_C] |= 0xc0;
+        rtc_irq_raise(s->irq);
+        if (apic_get_irq_delivered()) {
+            s->irq_coalesced--;
+        }
+    }
+
+    rtc_coalesced_timer_update(s);
+}
+#endif
+
+static void rtc_timer_update(RTCState *s, int64_t current_time)
+{
+    int period_code, period;
+    int64_t cur_clock, next_irq_clock;
+    int enable_pie;
+
+    period_code = s->cmos_data[RTC_REG_A] & 0x0f;
+#ifndef CONFIG_ANDROID
+#if defined TARGET_I386 || defined TARGET_X86_64
+    /* disable periodic timer if hpet is in legacy mode, since interrupts are
+     * disabled anyway.
+     */
+    enable_pie = !hpet_in_legacy_mode();
+#else
+    enable_pie = 1;
+#endif
+#endif
+    enable_pie = 1;
+	
+    if (period_code != 0
+        && (((s->cmos_data[RTC_REG_B] & REG_B_PIE) && enable_pie)
+            || ((s->cmos_data[RTC_REG_B] & REG_B_SQWE) && s->sqw_irq))) {
+        if (period_code <= 2)
+            period_code += 7;
+        /* period in 32 Khz cycles */
+        period = 1 << (period_code - 1);
+#ifdef TARGET_I386
+        if(period != s->period)
+            s->irq_coalesced = (s->irq_coalesced * s->period) / period;
+        s->period = period;
+#endif
+        /* compute 32 khz clock */
+        cur_clock = muldiv64(current_time, 32768, get_ticks_per_sec());
+        next_irq_clock = (cur_clock & ~(period - 1)) + period;
+        s->next_periodic_time = muldiv64(next_irq_clock, get_ticks_per_sec(), 32768) + 1;
+        qemu_mod_timer(s->periodic_timer, s->next_periodic_time);
+    } else {
+#ifdef TARGET_I386
+        s->irq_coalesced = 0;
+#endif
+        qemu_del_timer(s->periodic_timer);
+    }
+}
+
+static void rtc_periodic_timer(void *opaque)
+{
+    RTCState *s = opaque;
+
+    rtc_timer_update(s, s->next_periodic_time);
+    if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
+        s->cmos_data[RTC_REG_C] |= 0xc0;
+#ifdef TARGET_I386
+        if(rtc_td_hack) {
+            apic_reset_irq_delivered();
+            rtc_irq_raise(s->irq);
+            if (!apic_get_irq_delivered()) {
+                s->irq_coalesced++;
+                rtc_coalesced_timer_update(s);
+            }
+        } else
+#endif
+        rtc_irq_raise(s->irq);
+    }
+    if (s->cmos_data[RTC_REG_B] & REG_B_SQWE) {
+        /* Not square wave at all but we don't want 2048Hz interrupts!
+           Must be seen as a pulse.  */
+        qemu_irq_raise(s->sqw_irq);
+    }
+}
+
+static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
+{
+    RTCState *s = opaque;
+
+    if ((addr & 1) == 0) {
+        s->cmos_index = data & 0x7f;
+    } else {
+#ifdef DEBUG_CMOS
+        printf("cmos: write index=0x%02x val=0x%02x\n",
+               s->cmos_index, data);
+#endif
+        switch(s->cmos_index) {
+        case RTC_SECONDS_ALARM:
+        case RTC_MINUTES_ALARM:
+        case RTC_HOURS_ALARM:
+            /* XXX: not supported */
+            s->cmos_data[s->cmos_index] = data;
+            break;
+        case RTC_SECONDS:
+        case RTC_MINUTES:
+        case RTC_HOURS:
+        case RTC_DAY_OF_WEEK:
+        case RTC_DAY_OF_MONTH:
+        case RTC_MONTH:
+        case RTC_YEAR:
+            s->cmos_data[s->cmos_index] = data;
+            /* if in set mode, do not update the time */
+            if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+                rtc_set_time(s);
+            }
+            break;
+        case RTC_REG_A:
+            /* UIP bit is read only */
+            s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
+                (s->cmos_data[RTC_REG_A] & REG_A_UIP);
+            rtc_timer_update(s, qemu_get_clock(vm_clock));
+            break;
+        case RTC_REG_B:
+            if (data & REG_B_SET) {
+                /* set mode: reset UIP mode */
+                s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+                data &= ~REG_B_UIE;
+            } else {
+                /* if disabling set mode, update the time */
+                if (s->cmos_data[RTC_REG_B] & REG_B_SET) {
+                    rtc_set_time(s);
+                }
+            }
+            s->cmos_data[RTC_REG_B] = data;
+            rtc_timer_update(s, qemu_get_clock(vm_clock));
+            break;
+        case RTC_REG_C:
+        case RTC_REG_D:
+            /* cannot write to them */
+            break;
+        default:
+            s->cmos_data[s->cmos_index] = data;
+            break;
+        }
+    }
+}
+
+static inline int rtc_to_bcd(RTCState *s, int a)
+{
+    if (s->cmos_data[RTC_REG_B] & REG_B_DM) {
+        return a;
+    } else {
+        return ((a / 10) << 4) | (a % 10);
+    }
+}
+
+static inline int rtc_from_bcd(RTCState *s, int a)
+{
+    if (s->cmos_data[RTC_REG_B] & REG_B_DM) {
+        return a;
+    } else {
+        return ((a >> 4) * 10) + (a & 0x0f);
+    }
+}
+
+static void rtc_set_time(RTCState *s)
+{
+    struct tm *tm = &s->current_tm;
+
+    tm->tm_sec = rtc_from_bcd(s, s->cmos_data[RTC_SECONDS]);
+    tm->tm_min = rtc_from_bcd(s, s->cmos_data[RTC_MINUTES]);
+    tm->tm_hour = rtc_from_bcd(s, s->cmos_data[RTC_HOURS] & 0x7f);
+    if (!(s->cmos_data[RTC_REG_B] & 0x02) &&
+        (s->cmos_data[RTC_HOURS] & 0x80)) {
+        tm->tm_hour += 12;
+    }
+    tm->tm_wday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_WEEK]) - 1;
+    tm->tm_mday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_MONTH]);
+    tm->tm_mon = rtc_from_bcd(s, s->cmos_data[RTC_MONTH]) - 1;
+    tm->tm_year = rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year - 1900;
+}
+
+static void rtc_copy_date(RTCState *s)
+{
+    const struct tm *tm = &s->current_tm;
+    int year;
+
+    s->cmos_data[RTC_SECONDS] = rtc_to_bcd(s, tm->tm_sec);
+    s->cmos_data[RTC_MINUTES] = rtc_to_bcd(s, tm->tm_min);
+    if (s->cmos_data[RTC_REG_B] & 0x02) {
+        /* 24 hour format */
+        s->cmos_data[RTC_HOURS] = rtc_to_bcd(s, tm->tm_hour);
+    } else {
+        /* 12 hour format */
+        s->cmos_data[RTC_HOURS] = rtc_to_bcd(s, tm->tm_hour % 12);
+        if (tm->tm_hour >= 12)
+            s->cmos_data[RTC_HOURS] |= 0x80;
+    }
+    s->cmos_data[RTC_DAY_OF_WEEK] = rtc_to_bcd(s, tm->tm_wday + 1);
+    s->cmos_data[RTC_DAY_OF_MONTH] = rtc_to_bcd(s, tm->tm_mday);
+    s->cmos_data[RTC_MONTH] = rtc_to_bcd(s, tm->tm_mon + 1);
+    year = (tm->tm_year - s->base_year) % 100;
+    if (year < 0)
+        year += 100;
+    s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year);
+}
+
+/* month is between 0 and 11. */
+static int get_days_in_month(int month, int year)
+{
+    static const int days_tab[12] = {
+        31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+    };
+    int d;
+    if ((unsigned )month >= 12)
+        return 31;
+    d = days_tab[month];
+    if (month == 1) {
+        if ((year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0))
+            d++;
+    }
+    return d;
+}
+
+/* update 'tm' to the next second */
+static void rtc_next_second(struct tm *tm)
+{
+    int days_in_month;
+
+    tm->tm_sec++;
+    if ((unsigned)tm->tm_sec >= 60) {
+        tm->tm_sec = 0;
+        tm->tm_min++;
+        if ((unsigned)tm->tm_min >= 60) {
+            tm->tm_min = 0;
+            tm->tm_hour++;
+            if ((unsigned)tm->tm_hour >= 24) {
+                tm->tm_hour = 0;
+                /* next day */
+                tm->tm_wday++;
+                if ((unsigned)tm->tm_wday >= 7)
+                    tm->tm_wday = 0;
+                days_in_month = get_days_in_month(tm->tm_mon,
+                                                  tm->tm_year + 1900);
+                tm->tm_mday++;
+                if (tm->tm_mday < 1) {
+                    tm->tm_mday = 1;
+                } else if (tm->tm_mday > days_in_month) {
+                    tm->tm_mday = 1;
+                    tm->tm_mon++;
+                    if (tm->tm_mon >= 12) {
+                        tm->tm_mon = 0;
+                        tm->tm_year++;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+static void rtc_update_second(void *opaque)
+{
+    RTCState *s = opaque;
+    int64_t delay;
+
+    /* if the oscillator is not in normal operation, we do not update */
+    if ((s->cmos_data[RTC_REG_A] & 0x70) != 0x20) {
+        s->next_second_time += get_ticks_per_sec();
+        qemu_mod_timer(s->second_timer, s->next_second_time);
+    } else {
+        rtc_next_second(&s->current_tm);
+
+        if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+            /* update in progress bit */
+            s->cmos_data[RTC_REG_A] |= REG_A_UIP;
+        }
+        /* should be 244 us = 8 / 32768 seconds, but currently the
+           timers do not have the necessary resolution. */
+        delay = (get_ticks_per_sec() * 1) / 100;
+        if (delay < 1)
+            delay = 1;
+        qemu_mod_timer(s->second_timer2,
+                       s->next_second_time + delay);
+    }
+}
+
+static void rtc_update_second2(void *opaque)
+{
+    RTCState *s = opaque;
+
+    if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+        rtc_copy_date(s);
+    }
+
+    /* check alarm */
+    if (s->cmos_data[RTC_REG_B] & REG_B_AIE) {
+        if (((s->cmos_data[RTC_SECONDS_ALARM] & 0xc0) == 0xc0 ||
+             s->cmos_data[RTC_SECONDS_ALARM] == s->current_tm.tm_sec) &&
+            ((s->cmos_data[RTC_MINUTES_ALARM] & 0xc0) == 0xc0 ||
+             s->cmos_data[RTC_MINUTES_ALARM] == s->current_tm.tm_mon) &&
+            ((s->cmos_data[RTC_HOURS_ALARM] & 0xc0) == 0xc0 ||
+             s->cmos_data[RTC_HOURS_ALARM] == s->current_tm.tm_hour)) {
+
+            s->cmos_data[RTC_REG_C] |= 0xa0;
+            rtc_irq_raise(s->irq);
+        }
+    }
+
+    /* update ended interrupt */
+    if (s->cmos_data[RTC_REG_B] & REG_B_UIE) {
+        s->cmos_data[RTC_REG_C] |= 0x90;
+        rtc_irq_raise(s->irq);
+    }
+
+    /* clear update in progress bit */
+    s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+
+    s->next_second_time += get_ticks_per_sec();
+    qemu_mod_timer(s->second_timer, s->next_second_time);
+}
+
+static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
+{
+    RTCState *s = opaque;
+    int ret;
+    if ((addr & 1) == 0) {
+        return 0xff;
+    } else {
+        switch(s->cmos_index) {
+        case RTC_SECONDS:
+        case RTC_MINUTES:
+        case RTC_HOURS:
+        case RTC_DAY_OF_WEEK:
+        case RTC_DAY_OF_MONTH:
+        case RTC_MONTH:
+        case RTC_YEAR:
+            ret = s->cmos_data[s->cmos_index];
+            break;
+        case RTC_REG_A:
+            ret = s->cmos_data[s->cmos_index];
+            break;
+        case RTC_REG_C:
+            ret = s->cmos_data[s->cmos_index];
+            qemu_irq_lower(s->irq);
+            s->cmos_data[RTC_REG_C] = 0x00;
+            break;
+        default:
+            ret = s->cmos_data[s->cmos_index];
+            break;
+        }
+#ifdef DEBUG_CMOS
+        printf("cmos: read index=0x%02x val=0x%02x\n",
+               s->cmos_index, ret);
+#endif
+        return ret;
+    }
+}
+
+void rtc_set_memory(RTCState *s, int addr, int val)
+{
+    if (addr >= 0 && addr <= 127)
+        s->cmos_data[addr] = val;
+}
+
+void rtc_set_date(RTCState *s, const struct tm *tm)
+{
+    s->current_tm = *tm;
+    rtc_copy_date(s);
+}
+
+/* PC cmos mappings */
+#define REG_IBM_CENTURY_BYTE        0x32
+#define REG_IBM_PS2_CENTURY_BYTE    0x37
+
+static void rtc_set_date_from_host(RTCState *s)
+{
+    struct tm tm;
+    int val;
+
+    /* set the CMOS date */
+    qemu_get_timedate(&tm, 0);
+    rtc_set_date(s, &tm);
+
+    val = rtc_to_bcd(s, (tm.tm_year / 100) + 19);
+    rtc_set_memory(s, REG_IBM_CENTURY_BYTE, val);
+    rtc_set_memory(s, REG_IBM_PS2_CENTURY_BYTE, val);
+}
+
+static void rtc_save(QEMUFile *f, void *opaque)
+{
+    RTCState *s = opaque;
+
+    qemu_put_buffer(f, s->cmos_data, 128);
+    qemu_put_8s(f, &s->cmos_index);
+
+    qemu_put_be32(f, s->current_tm.tm_sec);
+    qemu_put_be32(f, s->current_tm.tm_min);
+    qemu_put_be32(f, s->current_tm.tm_hour);
+    qemu_put_be32(f, s->current_tm.tm_wday);
+    qemu_put_be32(f, s->current_tm.tm_mday);
+    qemu_put_be32(f, s->current_tm.tm_mon);
+    qemu_put_be32(f, s->current_tm.tm_year);
+
+    qemu_put_timer(f, s->periodic_timer);
+    qemu_put_be64(f, s->next_periodic_time);
+
+    qemu_put_be64(f, s->next_second_time);
+    qemu_put_timer(f, s->second_timer);
+    qemu_put_timer(f, s->second_timer2);
+}
+
+static int rtc_load(QEMUFile *f, void *opaque, int version_id)
+{
+    RTCState *s = opaque;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    qemu_get_buffer(f, s->cmos_data, 128);
+    qemu_get_8s(f, &s->cmos_index);
+
+    s->current_tm.tm_sec=qemu_get_be32(f);
+    s->current_tm.tm_min=qemu_get_be32(f);
+    s->current_tm.tm_hour=qemu_get_be32(f);
+    s->current_tm.tm_wday=qemu_get_be32(f);
+    s->current_tm.tm_mday=qemu_get_be32(f);
+    s->current_tm.tm_mon=qemu_get_be32(f);
+    s->current_tm.tm_year=qemu_get_be32(f);
+
+    qemu_get_timer(f, s->periodic_timer);
+    s->next_periodic_time=qemu_get_be64(f);
+
+    s->next_second_time=qemu_get_be64(f);
+    qemu_get_timer(f, s->second_timer);
+    qemu_get_timer(f, s->second_timer2);
+    return 0;
+}
+
+#ifdef TARGET_I386
+static void rtc_save_td(QEMUFile *f, void *opaque)
+{
+    RTCState *s = opaque;
+
+    qemu_put_be32(f, s->irq_coalesced);
+    qemu_put_be32(f, s->period);
+}
+
+static int rtc_load_td(QEMUFile *f, void *opaque, int version_id)
+{
+    RTCState *s = opaque;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    s->irq_coalesced = qemu_get_be32(f);
+    s->period = qemu_get_be32(f);
+    rtc_coalesced_timer_update(s);
+    return 0;
+}
+#endif
+
+static void rtc_reset(void *opaque)
+{
+    RTCState *s = opaque;
+
+    s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE);
+    s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF);
+
+    qemu_irq_lower(s->irq);
+
+#ifdef TARGET_I386
+    if (rtc_td_hack)
+	    s->irq_coalesced = 0;
+#endif
+}
+
+RTCState *rtc_init_sqw(int base, qemu_irq irq, qemu_irq sqw_irq, int base_year)
+{
+    RTCState *s;
+
+    s = qemu_mallocz(sizeof(RTCState));
+
+    s->irq = irq;
+    s->sqw_irq = sqw_irq;
+    s->cmos_data[RTC_REG_A] = 0x26;
+    s->cmos_data[RTC_REG_B] = 0x02;
+    s->cmos_data[RTC_REG_C] = 0x00;
+    s->cmos_data[RTC_REG_D] = 0x80;
+
+    s->base_year = base_year;
+    rtc_set_date_from_host(s);
+
+    s->periodic_timer = qemu_new_timer(vm_clock,
+                                       rtc_periodic_timer, s);
+#ifdef TARGET_I386
+    if (rtc_td_hack)
+        s->coalesced_timer = qemu_new_timer(vm_clock, rtc_coalesced_timer, s);
+#endif
+    s->second_timer = qemu_new_timer(vm_clock,
+                                     rtc_update_second, s);
+    s->second_timer2 = qemu_new_timer(vm_clock,
+                                      rtc_update_second2, s);
+
+    s->next_second_time = qemu_get_clock(vm_clock) + (get_ticks_per_sec() * 99) / 100;
+    qemu_mod_timer(s->second_timer2, s->next_second_time);
+
+    register_ioport_write(base, 2, 1, cmos_ioport_write, s);
+    register_ioport_read(base, 2, 1, cmos_ioport_read, s);
+
+    register_savevm("mc146818rtc", base, 1, rtc_save, rtc_load, s);
+#ifdef TARGET_I386
+    if (rtc_td_hack)
+        register_savevm("mc146818rtc-td", base, 1, rtc_save_td, rtc_load_td, s);
+#endif
+    qemu_register_reset(rtc_reset, 0, s);
+
+    return s;
+}
+
+RTCState *rtc_init(int base, qemu_irq irq, int base_year)
+{
+    return rtc_init_sqw(base, irq, NULL, base_year);
+}
+
+/* Memory mapped interface */
+static uint32_t cmos_mm_readb (void *opaque, target_phys_addr_t addr)
+{
+    RTCState *s = opaque;
+
+    return cmos_ioport_read(s, addr >> s->it_shift) & 0xFF;
+}
+
+static void cmos_mm_writeb (void *opaque,
+                            target_phys_addr_t addr, uint32_t value)
+{
+    RTCState *s = opaque;
+
+    cmos_ioport_write(s, addr >> s->it_shift, value & 0xFF);
+}
+
+static uint32_t cmos_mm_readw (void *opaque, target_phys_addr_t addr)
+{
+    RTCState *s = opaque;
+    uint32_t val;
+
+    val = cmos_ioport_read(s, addr >> s->it_shift) & 0xFFFF;
+#ifdef TARGET_WORDS_BIGENDIAN
+    val = bswap16(val);
+#endif
+    return val;
+}
+
+static void cmos_mm_writew (void *opaque,
+                            target_phys_addr_t addr, uint32_t value)
+{
+    RTCState *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+    value = bswap16(value);
+#endif
+    cmos_ioport_write(s, addr >> s->it_shift, value & 0xFFFF);
+}
+
+static uint32_t cmos_mm_readl (void *opaque, target_phys_addr_t addr)
+{
+    RTCState *s = opaque;
+    uint32_t val;
+
+    val = cmos_ioport_read(s, addr >> s->it_shift);
+#ifdef TARGET_WORDS_BIGENDIAN
+    val = bswap32(val);
+#endif
+    return val;
+}
+
+static void cmos_mm_writel (void *opaque,
+                            target_phys_addr_t addr, uint32_t value)
+{
+    RTCState *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+    value = bswap32(value);
+#endif
+    cmos_ioport_write(s, addr >> s->it_shift, value);
+}
+
+static CPUReadMemoryFunc *rtc_mm_read[] = {
+    &cmos_mm_readb,
+    &cmos_mm_readw,
+    &cmos_mm_readl,
+};
+
+static CPUWriteMemoryFunc *rtc_mm_write[] = {
+    &cmos_mm_writeb,
+    &cmos_mm_writew,
+    &cmos_mm_writel,
+};
+
+RTCState *rtc_mm_init(target_phys_addr_t base, int it_shift, qemu_irq irq,
+                      int base_year)
+{
+    RTCState *s;
+    int io_memory;
+
+    s = qemu_mallocz(sizeof(RTCState));
+
+    s->irq = irq;
+    s->cmos_data[RTC_REG_A] = 0x26;
+    s->cmos_data[RTC_REG_B] = 0x02;
+    s->cmos_data[RTC_REG_C] = 0x00;
+    s->cmos_data[RTC_REG_D] = 0x80;
+
+    s->base_year = base_year;
+    rtc_set_date_from_host(s);
+
+    s->periodic_timer = qemu_new_timer(vm_clock,
+                                       rtc_periodic_timer, s);
+    s->second_timer = qemu_new_timer(vm_clock,
+                                     rtc_update_second, s);
+    s->second_timer2 = qemu_new_timer(vm_clock,
+                                      rtc_update_second2, s);
+
+    s->next_second_time = qemu_get_clock(vm_clock) + (get_ticks_per_sec() * 99) / 100;
+    qemu_mod_timer(s->second_timer2, s->next_second_time);
+
+    io_memory = cpu_register_io_memory(rtc_mm_read, rtc_mm_write, s);
+    cpu_register_physical_memory(base, 2 << it_shift, io_memory);
+
+    register_savevm("mc146818rtc", base, 1, rtc_save, rtc_load, s);
+#ifdef TARGET_I386
+    if (rtc_td_hack)
+        register_savevm("mc146818rtc-td", base, 1, rtc_save_td, rtc_load_td, s);
+#endif
+    qemu_register_reset(rtc_reset, 0, s);
+    return s;
+}
diff --git a/hw/ne2000.c b/hw/ne2000.c
new file mode 100644
index 0000000..66ae9ab
--- /dev/null
+++ b/hw/ne2000.c
@@ -0,0 +1,840 @@
+/*
+ * QEMU NE2000 emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pci.h"
+#include "pc.h"
+#include "net.h"
+
+/* debug NE2000 card */
+//#define DEBUG_NE2000
+
+#define MAX_ETH_FRAME_SIZE 1514
+
+#define E8390_CMD	0x00  /* The command register (for all pages) */
+/* Page 0 register offsets. */
+#define EN0_CLDALO	0x01	/* Low byte of current local dma addr  RD */
+#define EN0_STARTPG	0x01	/* Starting page of ring bfr WR */
+#define EN0_CLDAHI	0x02	/* High byte of current local dma addr  RD */
+#define EN0_STOPPG	0x02	/* Ending page +1 of ring bfr WR */
+#define EN0_BOUNDARY	0x03	/* Boundary page of ring bfr RD WR */
+#define EN0_TSR		0x04	/* Transmit status reg RD */
+#define EN0_TPSR	0x04	/* Transmit starting page WR */
+#define EN0_NCR		0x05	/* Number of collision reg RD */
+#define EN0_TCNTLO	0x05	/* Low  byte of tx byte count WR */
+#define EN0_FIFO	0x06	/* FIFO RD */
+#define EN0_TCNTHI	0x06	/* High byte of tx byte count WR */
+#define EN0_ISR		0x07	/* Interrupt status reg RD WR */
+#define EN0_CRDALO	0x08	/* low byte of current remote dma address RD */
+#define EN0_RSARLO	0x08	/* Remote start address reg 0 */
+#define EN0_CRDAHI	0x09	/* high byte, current remote dma address RD */
+#define EN0_RSARHI	0x09	/* Remote start address reg 1 */
+#define EN0_RCNTLO	0x0a	/* Remote byte count reg WR */
+#define EN0_RTL8029ID0	0x0a	/* Realtek ID byte #1 RD */
+#define EN0_RCNTHI	0x0b	/* Remote byte count reg WR */
+#define EN0_RTL8029ID1	0x0b	/* Realtek ID byte #2 RD */
+#define EN0_RSR		0x0c	/* rx status reg RD */
+#define EN0_RXCR	0x0c	/* RX configuration reg WR */
+#define EN0_TXCR	0x0d	/* TX configuration reg WR */
+#define EN0_COUNTER0	0x0d	/* Rcv alignment error counter RD */
+#define EN0_DCFG	0x0e	/* Data configuration reg WR */
+#define EN0_COUNTER1	0x0e	/* Rcv CRC error counter RD */
+#define EN0_IMR		0x0f	/* Interrupt mask reg WR */
+#define EN0_COUNTER2	0x0f	/* Rcv missed frame error counter RD */
+
+#define EN1_PHYS        0x11
+#define EN1_CURPAG      0x17
+#define EN1_MULT        0x18
+
+#define EN2_STARTPG	0x21	/* Starting page of ring bfr RD */
+#define EN2_STOPPG	0x22	/* Ending page +1 of ring bfr RD */
+
+#define EN3_CONFIG0	0x33
+#define EN3_CONFIG1	0x34
+#define EN3_CONFIG2	0x35
+#define EN3_CONFIG3	0x36
+
+/*  Register accessed at EN_CMD, the 8390 base addr.  */
+#define E8390_STOP	0x01	/* Stop and reset the chip */
+#define E8390_START	0x02	/* Start the chip, clear reset */
+#define E8390_TRANS	0x04	/* Transmit a frame */
+#define E8390_RREAD	0x08	/* Remote read */
+#define E8390_RWRITE	0x10	/* Remote write  */
+#define E8390_NODMA	0x20	/* Remote DMA */
+#define E8390_PAGE0	0x00	/* Select page chip registers */
+#define E8390_PAGE1	0x40	/* using the two high-order bits */
+#define E8390_PAGE2	0x80	/* Page 3 is invalid. */
+
+/* Bits in EN0_ISR - Interrupt status register */
+#define ENISR_RX	0x01	/* Receiver, no error */
+#define ENISR_TX	0x02	/* Transmitter, no error */
+#define ENISR_RX_ERR	0x04	/* Receiver, with error */
+#define ENISR_TX_ERR	0x08	/* Transmitter, with error */
+#define ENISR_OVER	0x10	/* Receiver overwrote the ring */
+#define ENISR_COUNTERS	0x20	/* Counters need emptying */
+#define ENISR_RDC	0x40	/* remote dma complete */
+#define ENISR_RESET	0x80	/* Reset completed */
+#define ENISR_ALL	0x3f	/* Interrupts we will enable */
+
+/* Bits in received packet status byte and EN0_RSR*/
+#define ENRSR_RXOK	0x01	/* Received a good packet */
+#define ENRSR_CRC	0x02	/* CRC error */
+#define ENRSR_FAE	0x04	/* frame alignment error */
+#define ENRSR_FO	0x08	/* FIFO overrun */
+#define ENRSR_MPA	0x10	/* missed pkt */
+#define ENRSR_PHY	0x20	/* physical/multicast address */
+#define ENRSR_DIS	0x40	/* receiver disable. set in monitor mode */
+#define ENRSR_DEF	0x80	/* deferring */
+
+/* Transmitted packet status, EN0_TSR. */
+#define ENTSR_PTX 0x01	/* Packet transmitted without error */
+#define ENTSR_ND  0x02	/* The transmit wasn't deferred. */
+#define ENTSR_COL 0x04	/* The transmit collided at least once. */
+#define ENTSR_ABT 0x08  /* The transmit collided 16 times, and was deferred. */
+#define ENTSR_CRS 0x10	/* The carrier sense was lost. */
+#define ENTSR_FU  0x20  /* A "FIFO underrun" occurred during transmit. */
+#define ENTSR_CDH 0x40	/* The collision detect "heartbeat" signal was lost. */
+#define ENTSR_OWC 0x80  /* There was an out-of-window collision. */
+
+#define NE2000_PMEM_SIZE    (32*1024)
+#define NE2000_PMEM_START   (16*1024)
+#define NE2000_PMEM_END     (NE2000_PMEM_SIZE+NE2000_PMEM_START)
+#define NE2000_MEM_SIZE     NE2000_PMEM_END
+
+typedef struct NE2000State {
+    uint8_t cmd;
+    uint32_t start;
+    uint32_t stop;
+    uint8_t boundary;
+    uint8_t tsr;
+    uint8_t tpsr;
+    uint16_t tcnt;
+    uint16_t rcnt;
+    uint32_t rsar;
+    uint8_t rsr;
+    uint8_t rxcr;
+    uint8_t isr;
+    uint8_t dcfg;
+    uint8_t imr;
+    uint8_t phys[6]; /* mac address */
+    uint8_t curpag;
+    uint8_t mult[8]; /* multicast mask array */
+    qemu_irq irq;
+    int isa_io_base;
+    PCIDevice *pci_dev;
+    VLANClientState *vc;
+    uint8_t macaddr[6];
+    uint8_t mem[NE2000_MEM_SIZE];
+} NE2000State;
+
+static void ne2000_reset(NE2000State *s)
+{
+    int i;
+
+    s->isr = ENISR_RESET;
+    memcpy(s->mem, s->macaddr, 6);
+    s->mem[14] = 0x57;
+    s->mem[15] = 0x57;
+
+    /* duplicate prom data */
+    for(i = 15;i >= 0; i--) {
+        s->mem[2 * i] = s->mem[i];
+        s->mem[2 * i + 1] = s->mem[i];
+    }
+}
+
+static void ne2000_update_irq(NE2000State *s)
+{
+    int isr;
+    isr = (s->isr & s->imr) & 0x7f;
+#if defined(DEBUG_NE2000)
+    printf("NE2000: Set IRQ to %d (%02x %02x)\n",
+	   isr ? 1 : 0, s->isr, s->imr);
+#endif
+    qemu_set_irq(s->irq, (isr != 0));
+}
+
+#define POLYNOMIAL 0x04c11db6
+
+/* From FreeBSD */
+/* XXX: optimize */
+static int compute_mcast_idx(const uint8_t *ep)
+{
+    uint32_t crc;
+    int carry, i, j;
+    uint8_t b;
+
+    crc = 0xffffffff;
+    for (i = 0; i < 6; i++) {
+        b = *ep++;
+        for (j = 0; j < 8; j++) {
+            carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
+            crc <<= 1;
+            b >>= 1;
+            if (carry)
+                crc = ((crc ^ POLYNOMIAL) | carry);
+        }
+    }
+    return (crc >> 26);
+}
+
+static int ne2000_buffer_full(NE2000State *s)
+{
+    int avail, index, boundary;
+
+    index = s->curpag << 8;
+    boundary = s->boundary << 8;
+    if (index < boundary)
+        avail = boundary - index;
+    else
+        avail = (s->stop - s->start) - (index - boundary);
+    if (avail < (MAX_ETH_FRAME_SIZE + 4))
+        return 1;
+    return 0;
+}
+
+static int ne2000_can_receive(VLANClientState *vc)
+{
+    NE2000State *s = vc->opaque;
+
+    if (s->cmd & E8390_STOP)
+        return 1;
+    return !ne2000_buffer_full(s);
+}
+
+#define MIN_BUF_SIZE 60
+
+static ssize_t ne2000_receive(VLANClientState *vc, const uint8_t *buf, size_t size_)
+{
+    NE2000State *s = vc->opaque;
+    int size = size_;
+    uint8_t *p;
+    unsigned int total_len, next, avail, len, index, mcast_idx;
+    uint8_t buf1[60];
+    static const uint8_t broadcast_macaddr[6] =
+        { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+#if defined(DEBUG_NE2000)
+    printf("NE2000: received len=%d\n", size);
+#endif
+
+    if (s->cmd & E8390_STOP || ne2000_buffer_full(s))
+        return -1;
+
+    /* XXX: check this */
+    if (s->rxcr & 0x10) {
+        /* promiscuous: receive all */
+    } else {
+        if (!memcmp(buf,  broadcast_macaddr, 6)) {
+            /* broadcast address */
+            if (!(s->rxcr & 0x04))
+                return size;
+        } else if (buf[0] & 0x01) {
+            /* multicast */
+            if (!(s->rxcr & 0x08))
+                return size;
+            mcast_idx = compute_mcast_idx(buf);
+            if (!(s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))))
+                return size;
+        } else if (s->mem[0] == buf[0] &&
+                   s->mem[2] == buf[1] &&
+                   s->mem[4] == buf[2] &&
+                   s->mem[6] == buf[3] &&
+                   s->mem[8] == buf[4] &&
+                   s->mem[10] == buf[5]) {
+            /* match */
+        } else {
+            return size;
+        }
+    }
+
+
+    /* if too small buffer, then expand it */
+    if (size < MIN_BUF_SIZE) {
+        memcpy(buf1, buf, size);
+        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
+        buf = buf1;
+        size = MIN_BUF_SIZE;
+    }
+
+    index = s->curpag << 8;
+    /* 4 bytes for header */
+    total_len = size + 4;
+    /* address for next packet (4 bytes for CRC) */
+    next = index + ((total_len + 4 + 255) & ~0xff);
+    if (next >= s->stop)
+        next -= (s->stop - s->start);
+    /* prepare packet header */
+    p = s->mem + index;
+    s->rsr = ENRSR_RXOK; /* receive status */
+    /* XXX: check this */
+    if (buf[0] & 0x01)
+        s->rsr |= ENRSR_PHY;
+    p[0] = s->rsr;
+    p[1] = next >> 8;
+    p[2] = total_len;
+    p[3] = total_len >> 8;
+    index += 4;
+
+    /* write packet data */
+    while (size > 0) {
+        if (index <= s->stop)
+            avail = s->stop - index;
+        else
+            avail = 0;
+        len = size;
+        if (len > avail)
+            len = avail;
+        memcpy(s->mem + index, buf, len);
+        buf += len;
+        index += len;
+        if (index == s->stop)
+            index = s->start;
+        size -= len;
+    }
+    s->curpag = next >> 8;
+
+    /* now we can signal we have received something */
+    s->isr |= ENISR_RX;
+    ne2000_update_irq(s);
+
+    return size_;
+}
+
+static void ne2000_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    NE2000State *s = opaque;
+    int offset, page, index;
+
+    addr &= 0xf;
+#ifdef DEBUG_NE2000
+    printf("NE2000: write addr=0x%x val=0x%02x\n", addr, val);
+#endif
+    if (addr == E8390_CMD) {
+        /* control register */
+        s->cmd = val;
+        if (!(val & E8390_STOP)) { /* START bit makes no sense on RTL8029... */
+            s->isr &= ~ENISR_RESET;
+            /* test specific case: zero length transfer */
+            if ((val & (E8390_RREAD | E8390_RWRITE)) &&
+                s->rcnt == 0) {
+                s->isr |= ENISR_RDC;
+                ne2000_update_irq(s);
+            }
+            if (val & E8390_TRANS) {
+                index = (s->tpsr << 8);
+                /* XXX: next 2 lines are a hack to make netware 3.11 work */
+                if (index >= NE2000_PMEM_END)
+                    index -= NE2000_PMEM_SIZE;
+                /* fail safe: check range on the transmitted length  */
+                if (index + s->tcnt <= NE2000_PMEM_END) {
+                    qemu_send_packet(s->vc, s->mem + index, s->tcnt);
+                }
+                /* signal end of transfer */
+                s->tsr = ENTSR_PTX;
+                s->isr |= ENISR_TX;
+                s->cmd &= ~E8390_TRANS;
+                ne2000_update_irq(s);
+            }
+        }
+    } else {
+        page = s->cmd >> 6;
+        offset = addr | (page << 4);
+        switch(offset) {
+        case EN0_STARTPG:
+            s->start = val << 8;
+            break;
+        case EN0_STOPPG:
+            s->stop = val << 8;
+            break;
+        case EN0_BOUNDARY:
+            s->boundary = val;
+            break;
+        case EN0_IMR:
+            s->imr = val;
+            ne2000_update_irq(s);
+            break;
+        case EN0_TPSR:
+            s->tpsr = val;
+            break;
+        case EN0_TCNTLO:
+            s->tcnt = (s->tcnt & 0xff00) | val;
+            break;
+        case EN0_TCNTHI:
+            s->tcnt = (s->tcnt & 0x00ff) | (val << 8);
+            break;
+        case EN0_RSARLO:
+            s->rsar = (s->rsar & 0xff00) | val;
+            break;
+        case EN0_RSARHI:
+            s->rsar = (s->rsar & 0x00ff) | (val << 8);
+            break;
+        case EN0_RCNTLO:
+            s->rcnt = (s->rcnt & 0xff00) | val;
+            break;
+        case EN0_RCNTHI:
+            s->rcnt = (s->rcnt & 0x00ff) | (val << 8);
+            break;
+        case EN0_RXCR:
+            s->rxcr = val;
+            break;
+        case EN0_DCFG:
+            s->dcfg = val;
+            break;
+        case EN0_ISR:
+            s->isr &= ~(val & 0x7f);
+            ne2000_update_irq(s);
+            break;
+        case EN1_PHYS ... EN1_PHYS + 5:
+            s->phys[offset - EN1_PHYS] = val;
+            break;
+        case EN1_CURPAG:
+            s->curpag = val;
+            break;
+        case EN1_MULT ... EN1_MULT + 7:
+            s->mult[offset - EN1_MULT] = val;
+            break;
+        }
+    }
+}
+
+static uint32_t ne2000_ioport_read(void *opaque, uint32_t addr)
+{
+    NE2000State *s = opaque;
+    int offset, page, ret;
+
+    addr &= 0xf;
+    if (addr == E8390_CMD) {
+        ret = s->cmd;
+    } else {
+        page = s->cmd >> 6;
+        offset = addr | (page << 4);
+        switch(offset) {
+        case EN0_TSR:
+            ret = s->tsr;
+            break;
+        case EN0_BOUNDARY:
+            ret = s->boundary;
+            break;
+        case EN0_ISR:
+            ret = s->isr;
+            break;
+	case EN0_RSARLO:
+	    ret = s->rsar & 0x00ff;
+	    break;
+	case EN0_RSARHI:
+	    ret = s->rsar >> 8;
+	    break;
+        case EN1_PHYS ... EN1_PHYS + 5:
+            ret = s->phys[offset - EN1_PHYS];
+            break;
+        case EN1_CURPAG:
+            ret = s->curpag;
+            break;
+        case EN1_MULT ... EN1_MULT + 7:
+            ret = s->mult[offset - EN1_MULT];
+            break;
+        case EN0_RSR:
+            ret = s->rsr;
+            break;
+        case EN2_STARTPG:
+            ret = s->start >> 8;
+            break;
+        case EN2_STOPPG:
+            ret = s->stop >> 8;
+            break;
+	case EN0_RTL8029ID0:
+	    ret = 0x50;
+	    break;
+	case EN0_RTL8029ID1:
+	    ret = 0x43;
+	    break;
+	case EN3_CONFIG0:
+	    ret = 0;		/* 10baseT media */
+	    break;
+	case EN3_CONFIG2:
+	    ret = 0x40;		/* 10baseT active */
+	    break;
+	case EN3_CONFIG3:
+	    ret = 0x40;		/* Full duplex */
+	    break;
+        default:
+            ret = 0x00;
+            break;
+        }
+    }
+#ifdef DEBUG_NE2000
+    printf("NE2000: read addr=0x%x val=%02x\n", addr, ret);
+#endif
+    return ret;
+}
+
+static inline void ne2000_mem_writeb(NE2000State *s, uint32_t addr,
+                                     uint32_t val)
+{
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        s->mem[addr] = val;
+    }
+}
+
+static inline void ne2000_mem_writew(NE2000State *s, uint32_t addr,
+                                     uint32_t val)
+{
+    addr &= ~1; /* XXX: check exact behaviour if not even */
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        *(uint16_t *)(s->mem + addr) = cpu_to_le16(val);
+    }
+}
+
+static inline void ne2000_mem_writel(NE2000State *s, uint32_t addr,
+                                     uint32_t val)
+{
+    addr &= ~1; /* XXX: check exact behaviour if not even */
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        cpu_to_le32wu((uint32_t *)(s->mem + addr), val);
+    }
+}
+
+static inline uint32_t ne2000_mem_readb(NE2000State *s, uint32_t addr)
+{
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        return s->mem[addr];
+    } else {
+        return 0xff;
+    }
+}
+
+static inline uint32_t ne2000_mem_readw(NE2000State *s, uint32_t addr)
+{
+    addr &= ~1; /* XXX: check exact behaviour if not even */
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        return le16_to_cpu(*(uint16_t *)(s->mem + addr));
+    } else {
+        return 0xffff;
+    }
+}
+
+static inline uint32_t ne2000_mem_readl(NE2000State *s, uint32_t addr)
+{
+    addr &= ~1; /* XXX: check exact behaviour if not even */
+    if (addr < 32 ||
+        (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+        return le32_to_cpupu((uint32_t *)(s->mem + addr));
+    } else {
+        return 0xffffffff;
+    }
+}
+
+static inline void ne2000_dma_update(NE2000State *s, int len)
+{
+    s->rsar += len;
+    /* wrap */
+    /* XXX: check what to do if rsar > stop */
+    if (s->rsar == s->stop)
+        s->rsar = s->start;
+
+    if (s->rcnt <= len) {
+        s->rcnt = 0;
+        /* signal end of transfer */
+        s->isr |= ENISR_RDC;
+        ne2000_update_irq(s);
+    } else {
+        s->rcnt -= len;
+    }
+}
+
+static void ne2000_asic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    NE2000State *s = opaque;
+
+#ifdef DEBUG_NE2000
+    printf("NE2000: asic write val=0x%04x\n", val);
+#endif
+    if (s->rcnt == 0)
+        return;
+    if (s->dcfg & 0x01) {
+        /* 16 bit access */
+        ne2000_mem_writew(s, s->rsar, val);
+        ne2000_dma_update(s, 2);
+    } else {
+        /* 8 bit access */
+        ne2000_mem_writeb(s, s->rsar, val);
+        ne2000_dma_update(s, 1);
+    }
+}
+
+static uint32_t ne2000_asic_ioport_read(void *opaque, uint32_t addr)
+{
+    NE2000State *s = opaque;
+    int ret;
+
+    if (s->dcfg & 0x01) {
+        /* 16 bit access */
+        ret = ne2000_mem_readw(s, s->rsar);
+        ne2000_dma_update(s, 2);
+    } else {
+        /* 8 bit access */
+        ret = ne2000_mem_readb(s, s->rsar);
+        ne2000_dma_update(s, 1);
+    }
+#ifdef DEBUG_NE2000
+    printf("NE2000: asic read val=0x%04x\n", ret);
+#endif
+    return ret;
+}
+
+static void ne2000_asic_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    NE2000State *s = opaque;
+
+#ifdef DEBUG_NE2000
+    printf("NE2000: asic writel val=0x%04x\n", val);
+#endif
+    if (s->rcnt == 0)
+        return;
+    /* 32 bit access */
+    ne2000_mem_writel(s, s->rsar, val);
+    ne2000_dma_update(s, 4);
+}
+
+static uint32_t ne2000_asic_ioport_readl(void *opaque, uint32_t addr)
+{
+    NE2000State *s = opaque;
+    int ret;
+
+    /* 32 bit access */
+    ret = ne2000_mem_readl(s, s->rsar);
+    ne2000_dma_update(s, 4);
+#ifdef DEBUG_NE2000
+    printf("NE2000: asic readl val=0x%04x\n", ret);
+#endif
+    return ret;
+}
+
+static void ne2000_reset_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    /* nothing to do (end of reset pulse) */
+}
+
+static uint32_t ne2000_reset_ioport_read(void *opaque, uint32_t addr)
+{
+    NE2000State *s = opaque;
+    ne2000_reset(s);
+    return 0;
+}
+
+static void ne2000_save(QEMUFile* f,void* opaque)
+{
+	NE2000State* s=(NE2000State*)opaque;
+        uint32_t tmp;
+
+        if (s->pci_dev)
+            pci_device_save(s->pci_dev, f);
+
+        qemu_put_8s(f, &s->rxcr);
+
+	qemu_put_8s(f, &s->cmd);
+	qemu_put_be32s(f, &s->start);
+	qemu_put_be32s(f, &s->stop);
+	qemu_put_8s(f, &s->boundary);
+	qemu_put_8s(f, &s->tsr);
+	qemu_put_8s(f, &s->tpsr);
+	qemu_put_be16s(f, &s->tcnt);
+	qemu_put_be16s(f, &s->rcnt);
+	qemu_put_be32s(f, &s->rsar);
+	qemu_put_8s(f, &s->rsr);
+	qemu_put_8s(f, &s->isr);
+	qemu_put_8s(f, &s->dcfg);
+	qemu_put_8s(f, &s->imr);
+	qemu_put_buffer(f, s->phys, 6);
+	qemu_put_8s(f, &s->curpag);
+	qemu_put_buffer(f, s->mult, 8);
+        tmp = 0;
+	qemu_put_be32s(f, &tmp); /* ignored, was irq */
+	qemu_put_buffer(f, s->mem, NE2000_MEM_SIZE);
+}
+
+static int ne2000_load(QEMUFile* f,void* opaque,int version_id)
+{
+	NE2000State* s=(NE2000State*)opaque;
+        int ret;
+        uint32_t tmp;
+
+        if (version_id > 3)
+            return -EINVAL;
+
+        if (s->pci_dev && version_id >= 3) {
+            ret = pci_device_load(s->pci_dev, f);
+            if (ret < 0)
+                return ret;
+        }
+
+        if (version_id >= 2) {
+            qemu_get_8s(f, &s->rxcr);
+        } else {
+            s->rxcr = 0x0c;
+        }
+
+	qemu_get_8s(f, &s->cmd);
+	qemu_get_be32s(f, &s->start);
+	qemu_get_be32s(f, &s->stop);
+	qemu_get_8s(f, &s->boundary);
+	qemu_get_8s(f, &s->tsr);
+	qemu_get_8s(f, &s->tpsr);
+	qemu_get_be16s(f, &s->tcnt);
+	qemu_get_be16s(f, &s->rcnt);
+	qemu_get_be32s(f, &s->rsar);
+	qemu_get_8s(f, &s->rsr);
+	qemu_get_8s(f, &s->isr);
+	qemu_get_8s(f, &s->dcfg);
+	qemu_get_8s(f, &s->imr);
+	qemu_get_buffer(f, s->phys, 6);
+	qemu_get_8s(f, &s->curpag);
+	qemu_get_buffer(f, s->mult, 8);
+	qemu_get_be32s(f, &tmp); /* ignored */
+	qemu_get_buffer(f, s->mem, NE2000_MEM_SIZE);
+
+	return 0;
+}
+
+static void isa_ne2000_cleanup(VLANClientState *vc)
+{
+    NE2000State *s = vc->opaque;
+
+    unregister_savevm("ne2000", s);
+
+    isa_unassign_ioport(s->isa_io_base, 16);
+    isa_unassign_ioport(s->isa_io_base + 0x10, 2);
+    isa_unassign_ioport(s->isa_io_base + 0x1f, 1);
+
+    qemu_free(s);
+}
+
+void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd)
+{
+    NE2000State *s;
+
+    qemu_check_nic_model(nd, "ne2k_isa");
+
+    s = qemu_mallocz(sizeof(NE2000State));
+
+    register_ioport_write(base, 16, 1, ne2000_ioport_write, s);
+    register_ioport_read(base, 16, 1, ne2000_ioport_read, s);
+
+    register_ioport_write(base + 0x10, 1, 1, ne2000_asic_ioport_write, s);
+    register_ioport_read(base + 0x10, 1, 1, ne2000_asic_ioport_read, s);
+    register_ioport_write(base + 0x10, 2, 2, ne2000_asic_ioport_write, s);
+    register_ioport_read(base + 0x10, 2, 2, ne2000_asic_ioport_read, s);
+
+    register_ioport_write(base + 0x1f, 1, 1, ne2000_reset_ioport_write, s);
+    register_ioport_read(base + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
+    s->isa_io_base = base;
+    s->irq = irq;
+    memcpy(s->macaddr, nd->macaddr, 6);
+
+    ne2000_reset(s);
+
+    s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
+                                 ne2000_can_receive, ne2000_receive, NULL,
+                                 isa_ne2000_cleanup, s);
+
+    qemu_format_nic_info_str(s->vc, s->macaddr);
+
+    register_savevm("ne2000", -1, 2, ne2000_save, ne2000_load, s);
+}
+
+/***********************************************************/
+/* PCI NE2000 definitions */
+
+typedef struct PCINE2000State {
+    PCIDevice dev;
+    NE2000State ne2000;
+} PCINE2000State;
+
+static void ne2000_map(PCIDevice *pci_dev, int region_num,
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCINE2000State *d = (PCINE2000State *)pci_dev;
+    NE2000State *s = &d->ne2000;
+
+    register_ioport_write(addr, 16, 1, ne2000_ioport_write, s);
+    register_ioport_read(addr, 16, 1, ne2000_ioport_read, s);
+
+    register_ioport_write(addr + 0x10, 1, 1, ne2000_asic_ioport_write, s);
+    register_ioport_read(addr + 0x10, 1, 1, ne2000_asic_ioport_read, s);
+    register_ioport_write(addr + 0x10, 2, 2, ne2000_asic_ioport_write, s);
+    register_ioport_read(addr + 0x10, 2, 2, ne2000_asic_ioport_read, s);
+    register_ioport_write(addr + 0x10, 4, 4, ne2000_asic_ioport_writel, s);
+    register_ioport_read(addr + 0x10, 4, 4, ne2000_asic_ioport_readl, s);
+
+    register_ioport_write(addr + 0x1f, 1, 1, ne2000_reset_ioport_write, s);
+    register_ioport_read(addr + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
+}
+
+static void ne2000_cleanup(VLANClientState *vc)
+{
+    NE2000State *s = vc->opaque;
+
+    unregister_savevm("ne2000", s);
+}
+
+static void pci_ne2000_init(PCIDevice *pci_dev)
+{
+    PCINE2000State *d = (PCINE2000State *)pci_dev;
+    NE2000State *s;
+    uint8_t *pci_conf;
+
+    pci_conf = d->dev.config;
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REALTEK);
+    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REALTEK_8029);
+    pci_config_set_class(pci_conf, PCI_CLASS_NETWORK_ETHERNET);
+    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
+    pci_conf[0x3d] = 1; // interrupt pin 0
+
+    pci_register_bar(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_IO, ne2000_map);
+    s = &d->ne2000;
+    s->irq = d->dev.irq[0];
+    s->pci_dev = (PCIDevice *)d;
+    qdev_get_macaddr(&d->dev.qdev, s->macaddr);
+    ne2000_reset(s);
+    s->vc = qdev_get_vlan_client(&d->dev.qdev,
+                                 ne2000_can_receive, ne2000_receive, NULL,
+                                 ne2000_cleanup, s);
+
+    qemu_format_nic_info_str(s->vc, s->macaddr);
+
+    register_savevm("ne2000", -1, 3, ne2000_save, ne2000_load, s);
+}
+
+static void ne2000_register_devices(void)
+{
+    pci_qdev_register("ne2k_pci", sizeof(PCINE2000State), pci_ne2000_init);
+}
+
+device_init(ne2000_register_devices)
diff --git a/hw/pc.c b/hw/pc.c
new file mode 100644
index 0000000..7c32211
--- /dev/null
+++ b/hw/pc.c
@@ -0,0 +1,1310 @@
+/*
+ * QEMU PC System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pc.h"
+#include "fdc.h"
+#include "pci.h"
+#include "block.h"
+#include "sysemu.h"
+#include "blockdev.h"
+#include "audio/audio.h"
+#include "net.h"
+//#include "smbus.h"
+#include "boards.h"
+#include "android/globals.h"
+#include "monitor.h"
+#include "fw_cfg.h"
+//#include "hpet_emul.h"
+#include "watchdog.h"
+#include "smbios.h"
+#include "console.h"
+
+#include "goldfish_device.h"
+
+char* audio_input_source = NULL;
+/* output Bochs bios info messages */
+//#define DEBUG_BIOS
+
+#define BIOS_FILENAME "bios.bin"
+#define VGABIOS_FILENAME "vgabios.bin"
+#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
+
+#define PC_MAX_BIOS_SIZE (4 * 1024 * 1024)
+
+/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
+#define ACPI_DATA_SIZE       0x10000
+#define BIOS_CFG_IOPORT 0x510
+#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
+#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
+
+#define MAX_IDE_BUS 2
+#ifndef CONFIG_ANDROID
+static fdctrl_t *floppy_controller;
+#endif
+static RTCState *rtc_state;
+static PITState *pit;
+static IOAPICState *ioapic;
+static PCIDevice *i440fx_state;
+
+typedef struct rom_reset_data {
+    uint8_t *data;
+    target_phys_addr_t addr;
+    unsigned size;
+} RomResetData;
+
+static void option_rom_reset(void *_rrd)
+{
+    RomResetData *rrd = _rrd;
+
+    cpu_physical_memory_write_rom(rrd->addr, rrd->data, rrd->size);
+}
+
+static void option_rom_setup_reset(target_phys_addr_t addr, unsigned size)
+{
+    RomResetData *rrd = qemu_malloc(sizeof *rrd);
+
+    rrd->data = qemu_malloc(size);
+    cpu_physical_memory_read(addr, rrd->data, size);
+    rrd->addr = addr;
+    rrd->size = size;
+    qemu_register_reset(option_rom_reset, 0, rrd);
+}
+
+static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
+{
+}
+
+/* MSDOS compatibility mode FPU exception support */
+static qemu_irq ferr_irq;
+/* XXX: add IGNNE support */
+void cpu_set_ferr(CPUX86State *s)
+{
+    qemu_irq_raise(ferr_irq);
+}
+
+static void ioportF0_write(void *opaque, uint32_t addr, uint32_t data)
+{
+    qemu_irq_lower(ferr_irq);
+}
+
+/* TSC handling */
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+    /* Note: when using kqemu, it is more logical to return the host TSC
+       because kqemu does not trap the RDTSC instruction for
+       performance reasons */
+#ifdef CONFIG_KQEMU
+    if (env->kqemu_enabled) {
+        return cpu_get_real_ticks();
+    } else
+#endif
+    {
+        return cpu_get_ticks();
+    }
+}
+
+/* SMM support */
+void cpu_smm_update(CPUState *env)
+{
+    if (i440fx_state && env == first_cpu)
+        i440fx_set_smm(i440fx_state, (env->hflags >> HF_SMM_SHIFT) & 1);
+}
+
+
+/* IRQ handling */
+int cpu_get_pic_interrupt(CPUState *env)
+{
+    int intno;
+
+    intno = apic_get_interrupt(env);
+    if (intno >= 0) {
+        /* set irq request if a PIC irq is still pending */
+        /* XXX: improve that */
+        pic_update_irq(isa_pic);
+        return intno;
+    }
+    /* read the irq from the PIC */
+    if (!apic_accept_pic_intr(env))
+        return -1;
+
+    intno = pic_read_irq(isa_pic);
+    return intno;
+}
+
+static void pic_irq_request(void *opaque, int irq, int level)
+{
+    CPUState *env = first_cpu;
+
+    if (env->apic_state) {
+        while (env) {
+            if (apic_accept_pic_intr(env))
+                apic_deliver_pic_intr(env, level);
+            env = env->next_cpu;
+        }
+    } else {
+        if (level)
+            cpu_interrupt(env, CPU_INTERRUPT_HARD);
+        else
+            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+    }
+}
+
+/* PC cmos mappings */
+
+#define REG_EQUIPMENT_BYTE          0x14
+
+#ifndef CONFIG_ANDROID
+static int cmos_get_fd_drive_type(int fd0)
+{
+    int val;
+
+    switch (fd0) {
+    case 0:
+        /* 1.44 Mb 3"5 drive */
+        val = 4;
+        break;
+    case 1:
+        /* 2.88 Mb 3"5 drive */
+        val = 5;
+        break;
+    case 2:
+        /* 1.2 Mb 5"5 drive */
+        val = 2;
+        break;
+    default:
+        val = 0;
+        break;
+    }
+    return val;
+}
+#endif
+
+static void cmos_init_hd(int type_ofs, int info_ofs, BlockDriverState *hd)
+{
+    RTCState *s = rtc_state;
+    int cylinders, heads, sectors;
+    bdrv_get_geometry_hint(hd, &cylinders, &heads, &sectors);
+    rtc_set_memory(s, type_ofs, 47);
+    rtc_set_memory(s, info_ofs, cylinders);
+    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
+    rtc_set_memory(s, info_ofs + 2, heads);
+    rtc_set_memory(s, info_ofs + 3, 0xff);
+    rtc_set_memory(s, info_ofs + 4, 0xff);
+    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
+    rtc_set_memory(s, info_ofs + 6, cylinders);
+    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
+    rtc_set_memory(s, info_ofs + 8, sectors);
+}
+
+/* convert boot_device letter to something recognizable by the bios */
+static int boot_device2nibble(char boot_device)
+{
+    switch(boot_device) {
+    case 'a':
+    case 'b':
+        return 0x01; /* floppy boot */
+    case 'c':
+        return 0x02; /* hard drive boot */
+    case 'd':
+        return 0x03; /* CD-ROM boot */
+    case 'n':
+        return 0x04; /* Network boot */
+    }
+    return 0;
+}
+
+/* copy/pasted from cmos_init, should be made a general function
+ and used there as well */
+static int pc_boot_set(void *opaque, const char *boot_device)
+{
+    Monitor *mon = cur_mon;
+#define PC_MAX_BOOT_DEVICES 3
+    RTCState *s = (RTCState *)opaque;
+    int nbds, bds[3] = { 0, };
+    int i;
+
+    nbds = strlen(boot_device);
+    if (nbds > PC_MAX_BOOT_DEVICES) {
+        monitor_printf(mon, "Too many boot devices for PC\n");
+        return(1);
+    }
+    for (i = 0; i < nbds; i++) {
+        bds[i] = boot_device2nibble(boot_device[i]);
+        if (bds[i] == 0) {
+            monitor_printf(mon, "Invalid boot device for PC: '%c'\n",
+                           boot_device[i]);
+            return(1);
+        }
+    }
+    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
+    rtc_set_memory(s, 0x38, (bds[2] << 4));
+    return(0);
+}
+
+/* hd_table must contain 4 block drivers */
+static void cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+                      const char *boot_device, BlockDriverState **hd_table)
+{
+    RTCState *s = rtc_state;
+    int nbds, bds[3] = { 0, };
+    int val;
+#ifndef CONFIG_ANDROID
+    int fd0, fd1, nb;
+#endif
+    int i;
+
+    /* various important CMOS locations needed by PC/Bochs bios */
+
+    /* memory size */
+    val = 640; /* base memory in K */
+    rtc_set_memory(s, 0x15, val);
+    rtc_set_memory(s, 0x16, val >> 8);
+
+    val = (ram_size / 1024) - 1024;
+    if (val > 65535)
+        val = 65535;
+    rtc_set_memory(s, 0x17, val);
+    rtc_set_memory(s, 0x18, val >> 8);
+    rtc_set_memory(s, 0x30, val);
+    rtc_set_memory(s, 0x31, val >> 8);
+
+    if (above_4g_mem_size) {
+        rtc_set_memory(s, 0x5b, (unsigned int)above_4g_mem_size >> 16);
+        rtc_set_memory(s, 0x5c, (unsigned int)above_4g_mem_size >> 24);
+        rtc_set_memory(s, 0x5d, (uint64_t)above_4g_mem_size >> 32);
+    }
+
+    if (ram_size > (16 * 1024 * 1024))
+        val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536);
+    else
+        val = 0;
+    if (val > 65535)
+        val = 65535;
+    rtc_set_memory(s, 0x34, val);
+    rtc_set_memory(s, 0x35, val >> 8);
+
+    /* set the number of CPU */
+    rtc_set_memory(s, 0x5f, smp_cpus - 1);
+
+    /* set boot devices, and disable floppy signature check if requested */
+#define PC_MAX_BOOT_DEVICES 3
+    nbds = strlen(boot_device);
+    if (nbds > PC_MAX_BOOT_DEVICES) {
+        fprintf(stderr, "Too many boot devices for PC\n");
+        exit(1);
+    }
+    for (i = 0; i < nbds; i++) {
+        bds[i] = boot_device2nibble(boot_device[i]);
+        if (bds[i] == 0) {
+            fprintf(stderr, "Invalid boot device for PC: '%c'\n",
+                    boot_device[i]);
+            exit(1);
+        }
+    }
+    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
+    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ?  0x0 : 0x1));
+
+    /* floppy type */
+
+#ifndef CONFIG_ANDROID
+    fd0 = fdctrl_get_drive_type(floppy_controller, 0);
+    fd1 = fdctrl_get_drive_type(floppy_controller, 1);
+
+    val = (cmos_get_fd_drive_type(fd0) << 4) | cmos_get_fd_drive_type(fd1);
+    rtc_set_memory(s, 0x10, val);
+
+    val = 0;
+    nb = 0;
+    if (fd0 < 3)
+        nb++;
+    if (fd1 < 3)
+        nb++;
+    switch (nb) {
+    case 0:
+        break;
+    case 1:
+        val |= 0x01; /* 1 drive, ready for boot */
+        break;
+    case 2:
+        val |= 0x41; /* 2 drives, ready for boot */
+        break;
+    }
+    val |= 0x02; /* FPU is there */
+    val |= 0x04; /* PS/2 mouse installed */
+    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);
+#endif
+
+    /* hard drives */
+
+    rtc_set_memory(s, 0x12, (hd_table[0] ? 0xf0 : 0) | (hd_table[1] ? 0x0f : 0));
+    if (hd_table[0])
+        cmos_init_hd(0x19, 0x1b, hd_table[0]);
+    if (hd_table[1])
+        cmos_init_hd(0x1a, 0x24, hd_table[1]);
+
+    val = 0;
+    for (i = 0; i < 4; i++) {
+        if (hd_table[i]) {
+            int cylinders, heads, sectors, translation;
+            /* NOTE: bdrv_get_geometry_hint() returns the physical
+                geometry.  It is always such that: 1 <= sects <= 63, 1
+                <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
+                geometry can be different if a translation is done. */
+            translation = bdrv_get_translation_hint(hd_table[i]);
+            if (translation == BIOS_ATA_TRANSLATION_AUTO) {
+                bdrv_get_geometry_hint(hd_table[i], &cylinders, &heads, &sectors);
+                if (cylinders <= 1024 && heads <= 16 && sectors <= 63) {
+                    /* No translation. */
+                    translation = 0;
+                } else {
+                    /* LBA translation. */
+                    translation = 1;
+                }
+            } else {
+                translation--;
+            }
+            val |= translation << (i * 2);
+        }
+    }
+    rtc_set_memory(s, 0x39, val);
+}
+
+void ioport_set_a20(int enable)
+{
+    /* XXX: send to all CPUs ? */
+    cpu_x86_set_a20(first_cpu, enable);
+}
+
+int ioport_get_a20(void)
+{
+    return ((first_cpu->a20_mask >> 20) & 1);
+}
+
+static void ioport92_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    ioport_set_a20((val >> 1) & 1);
+    /* XXX: bit 0 is fast reset */
+}
+
+static uint32_t ioport92_read(void *opaque, uint32_t addr)
+{
+    return ioport_get_a20() << 1;
+}
+
+/***********************************************************/
+/* Bochs BIOS debug ports */
+
+static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    static const char shutdown_str[8] = "Shutdown";
+    static int shutdown_index = 0;
+
+    switch(addr) {
+        /* Bochs BIOS messages */
+    case 0x400:
+    case 0x401:
+        fprintf(stderr, "BIOS panic at rombios.c, line %d\n", val);
+        exit(1);
+    case 0x402:
+    case 0x403:
+#ifdef DEBUG_BIOS
+        fprintf(stderr, "%c", val);
+#endif
+        break;
+    case 0x8900:
+        /* same as Bochs power off */
+        if (val == shutdown_str[shutdown_index]) {
+            shutdown_index++;
+            if (shutdown_index == 8) {
+                shutdown_index = 0;
+                qemu_system_shutdown_request();
+            }
+        } else {
+            shutdown_index = 0;
+        }
+        break;
+
+        /* LGPL'ed VGA BIOS messages */
+    case 0x501:
+    case 0x502:
+        fprintf(stderr, "VGA BIOS panic, line %d\n", val);
+        exit(1);
+    case 0x500:
+    case 0x503:
+#ifdef DEBUG_BIOS
+        fprintf(stderr, "%c", val);
+#endif
+        break;
+    }
+}
+
+extern uint64_t node_cpumask[MAX_NODES];
+
+static void bochs_bios_init(void)
+{
+    void *fw_cfg;
+    uint8_t *smbios_table;
+    size_t smbios_len;
+    uint64_t *numa_fw_cfg;
+    int i, j;
+
+    register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
+    register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
+    register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL);
+    register_ioport_write(0x403, 1, 1, bochs_bios_write, NULL);
+    register_ioport_write(0x8900, 1, 1, bochs_bios_write, NULL);
+
+    register_ioport_write(0x501, 1, 2, bochs_bios_write, NULL);
+    register_ioport_write(0x502, 1, 2, bochs_bios_write, NULL);
+    register_ioport_write(0x500, 1, 1, bochs_bios_write, NULL);
+    register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
+
+    fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
+    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
+#ifndef CONFIG_ANDROID
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
+                     acpi_tables_len);
+#endif
+    smbios_table = smbios_get_table(&smbios_len);
+    if (smbios_table)
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
+                         smbios_table, smbios_len);
+
+    /* allocate memory for the NUMA channel: one (64bit) word for the number
+     * of nodes, one word for each VCPU->node and one word for each node to
+     * hold the amount of memory.
+     */
+    numa_fw_cfg = qemu_mallocz((1 + smp_cpus + nb_numa_nodes) * 8);
+    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < nb_numa_nodes; j++) {
+            if (node_cpumask[j] & (1 << i)) {
+                numa_fw_cfg[i + 1] = cpu_to_le64(j);
+                break;
+            }
+        }
+    }
+    for (i = 0; i < nb_numa_nodes; i++) {
+        numa_fw_cfg[smp_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+    }
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+                     (1 + smp_cpus + nb_numa_nodes) * 8);
+}
+
+/* Generate an initial boot sector which sets state and jump to
+   a specified vector */
+static void generate_bootsect(target_phys_addr_t option_rom,
+                              uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
+{
+    uint8_t rom[512], *p, *reloc;
+    uint8_t sum;
+    int i;
+
+    memset(rom, 0, sizeof(rom));
+
+    p = rom;
+    /* Make sure we have an option rom signature */
+    *p++ = 0x55;
+    *p++ = 0xaa;
+
+    /* ROM size in sectors*/
+    *p++ = 1;
+
+    /* Hook int19 */
+
+    *p++ = 0x50;		/* push ax */
+    *p++ = 0x1e;		/* push ds */
+    *p++ = 0x31; *p++ = 0xc0;	/* xor ax, ax */
+    *p++ = 0x8e; *p++ = 0xd8;	/* mov ax, ds */
+
+    *p++ = 0xc7; *p++ = 0x06;   /* movvw _start,0x64 */
+    *p++ = 0x64; *p++ = 0x00;
+    reloc = p;
+    *p++ = 0x00; *p++ = 0x00;
+
+    *p++ = 0x8c; *p++ = 0x0e;   /* mov cs,0x66 */
+    *p++ = 0x66; *p++ = 0x00;
+
+    *p++ = 0x1f;		/* pop ds */
+    *p++ = 0x58;		/* pop ax */
+    *p++ = 0xcb;		/* lret */
+    
+    /* Actual code */
+    *reloc = (p - rom);
+
+    *p++ = 0xfa;		/* CLI */
+    *p++ = 0xfc;		/* CLD */
+
+    for (i = 0; i < 6; i++) {
+	if (i == 1)		/* Skip CS */
+	    continue;
+
+	*p++ = 0xb8;		/* MOV AX,imm16 */
+	*p++ = segs[i];
+	*p++ = segs[i] >> 8;
+	*p++ = 0x8e;		/* MOV <seg>,AX */
+	*p++ = 0xc0 + (i << 3);
+    }
+
+    for (i = 0; i < 8; i++) {
+	*p++ = 0x66;		/* 32-bit operand size */
+	*p++ = 0xb8 + i;	/* MOV <reg>,imm32 */
+	*p++ = gpr[i];
+	*p++ = gpr[i] >> 8;
+	*p++ = gpr[i] >> 16;
+	*p++ = gpr[i] >> 24;
+    }
+
+    *p++ = 0xea;		/* JMP FAR */
+    *p++ = ip;			/* IP */
+    *p++ = ip >> 8;
+    *p++ = segs[1];		/* CS */
+    *p++ = segs[1] >> 8;
+
+    /* sign rom */
+    sum = 0;
+    for (i = 0; i < (sizeof(rom) - 1); i++)
+        sum += rom[i];
+    rom[sizeof(rom) - 1] = -sum;
+
+    cpu_physical_memory_write_rom(option_rom, rom, sizeof(rom));
+    option_rom_setup_reset(option_rom, sizeof (rom));
+}
+
+static long get_file_size(FILE *f)
+{
+    long where, size;
+
+    /* XXX: on Unix systems, using fstat() probably makes more sense */
+
+    where = ftell(f);
+    fseek(f, 0, SEEK_END);
+    size = ftell(f);
+    fseek(f, where, SEEK_SET);
+
+    return size;
+}
+
+static void load_linux(target_phys_addr_t option_rom,
+                       const char *kernel_filename,
+		       const char *initrd_filename,
+		       const char *kernel_cmdline,
+               target_phys_addr_t max_ram_size)
+{
+    uint16_t protocol;
+    uint32_t gpr[8];
+    uint16_t seg[6];
+    uint16_t real_seg;
+    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
+    uint32_t initrd_max;
+    uint8_t header[1024];
+    target_phys_addr_t real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
+    FILE *f, *fi;
+
+    /* Align to 16 bytes as a paranoia measure */
+    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
+
+    /* load the kernel header */
+    f = fopen(kernel_filename, "rb");
+    if (!f || !(kernel_size = get_file_size(f)) ||
+	fread(header, 1, 1024, f) != 1024) {
+	fprintf(stderr, "qemu: could not load kernel '%s'\n",
+		kernel_filename);
+	exit(1);
+    }
+
+    /* kernel protocol version */
+#if 0
+    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
+#endif
+    if (ldl_p(header+0x202) == 0x53726448)
+	protocol = lduw_p(header+0x206);
+    else
+	protocol = 0;
+
+    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
+	/* Low kernel */
+	real_addr    = 0x90000;
+	cmdline_addr = 0x9a000 - cmdline_size;
+	prot_addr    = 0x10000;
+    } else if (protocol < 0x202) {
+	/* High but ancient kernel */
+	real_addr    = 0x90000;
+	cmdline_addr = 0x9a000 - cmdline_size;
+	prot_addr    = 0x100000;
+    } else {
+	/* High and recent kernel */
+	real_addr    = 0x10000;
+	cmdline_addr = 0x20000;
+	prot_addr    = 0x100000;
+    }
+
+#if 0
+    fprintf(stderr,
+	    "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
+	    "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
+	    "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
+	    real_addr,
+	    cmdline_addr,
+	    prot_addr);
+#endif
+
+    /* highest address for loading the initrd */
+    if (protocol >= 0x203)
+	initrd_max = ldl_p(header+0x22c);
+    else
+	initrd_max = 0x37ffffff;
+
+    if (initrd_max >= max_ram_size-ACPI_DATA_SIZE)
+    	initrd_max = max_ram_size-ACPI_DATA_SIZE-1;
+
+    /* kernel command line */
+    pstrcpy_targphys(cmdline_addr, 4096, kernel_cmdline);
+
+    if (protocol >= 0x202) {
+	stl_p(header+0x228, cmdline_addr);
+    } else {
+	stw_p(header+0x20, 0xA33F);
+	stw_p(header+0x22, cmdline_addr-real_addr);
+    }
+
+    /* loader type */
+    /* High nybble = B reserved for Qemu; low nybble is revision number.
+       If this code is substantially changed, you may want to consider
+       incrementing the revision. */
+    if (protocol >= 0x200)
+	header[0x210] = 0xB0;
+
+    /* heap */
+    if (protocol >= 0x201) {
+	header[0x211] |= 0x80;	/* CAN_USE_HEAP */
+	stw_p(header+0x224, cmdline_addr-real_addr-0x200);
+    }
+
+    /* load initrd */
+    if (initrd_filename) {
+	if (protocol < 0x200) {
+	    fprintf(stderr, "qemu: linux kernel too old to load a ram disk %s, %s, %s\n",
+			kernel_filename, initrd_filename, kernel_cmdline);
+	    exit(1);
+	}
+
+	fi = fopen(initrd_filename, "rb");
+	if (!fi) {
+	    fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+		    initrd_filename);
+	    exit(1);
+	}
+
+	initrd_size = get_file_size(fi);
+	initrd_addr = (initrd_max-initrd_size) & ~4095;
+
+	if (!fread_targphys_ok(initrd_addr, initrd_size, fi)) {
+	    fprintf(stderr, "qemu: read error on initial ram disk '%s'\n",
+		    initrd_filename);
+	    exit(1);
+	}
+	fclose(fi);
+
+	stl_p(header+0x218, initrd_addr);
+	stl_p(header+0x21c, initrd_size);
+    }
+
+    /* store the finalized header and load the rest of the kernel */
+    cpu_physical_memory_write(real_addr, header, 1024);
+
+    setup_size = header[0x1f1];
+    if (setup_size == 0)
+	setup_size = 4;
+
+    setup_size = (setup_size+1)*512;
+    kernel_size -= setup_size;	/* Size of protected-mode code */
+
+    if (!fread_targphys_ok(real_addr+1024, setup_size-1024, f) ||
+	!fread_targphys_ok(prot_addr, kernel_size, f)) {
+	fprintf(stderr, "qemu: read error on kernel '%s'\n",
+		kernel_filename);
+	exit(1);
+    }
+    fclose(f);
+
+    /* generate bootsector to set up the initial register state */
+    real_seg = real_addr >> 4;
+    seg[0] = seg[2] = seg[3] = seg[4] = seg[4] = real_seg;
+    seg[1] = real_seg+0x20;	/* CS */
+    memset(gpr, 0, sizeof gpr);
+    gpr[4] = cmdline_addr-real_addr-16;	/* SP (-16 is paranoia) */
+
+    option_rom_setup_reset(real_addr, setup_size);
+    option_rom_setup_reset(prot_addr, kernel_size);
+    option_rom_setup_reset(cmdline_addr, cmdline_size);
+    if (initrd_filename)
+        option_rom_setup_reset(initrd_addr, initrd_size);
+
+    generate_bootsect(option_rom, gpr, seg, 0);
+}
+
+static void main_cpu_reset(void *opaque)
+{
+    CPUState *env = opaque;
+    cpu_reset(env);
+}
+
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 14, 15 };
+
+#define NE2000_NB_MAX 6
+
+static int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
+static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
+
+/* static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
+
+static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
+static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 }; */
+
+#ifdef HAS_AUDIO
+static void audio_init (PCIBus *pci_bus, qemu_irq *pic)
+{
+    struct soundhw *c;
+
+    for (c = soundhw; c->name; ++c) {
+        if (c->enabled) {
+            if (c->isa) {
+                c->init.init_isa(pic);
+            } else {
+                if (pci_bus) {
+                    c->init.init_pci(pci_bus);
+                }
+            }
+        }
+    }
+}
+#endif
+
+static void pc_init_ne2k_isa(NICInfo *nd, qemu_irq *pic)
+{
+    static int nb_ne2k = 0;
+
+    if (nb_ne2k == NE2000_NB_MAX)
+        return;
+    isa_ne2000_init(ne2000_io[nb_ne2k], pic[ne2000_irq[nb_ne2k]], nd);
+    nb_ne2k++;
+}
+
+static int load_option_rom(const char *oprom, target_phys_addr_t start,
+                           target_phys_addr_t end)
+{
+        int size;
+        char *filename;
+
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, oprom);
+        if (filename) {
+            size = get_image_size(filename);
+            if (size > 0 && start + size > end) {
+                fprintf(stderr, "Not enough space to load option rom '%s'\n",
+                        oprom);
+                exit(1);
+            }
+            size = load_image_targphys(filename, start, end - start);
+            qemu_free(filename);
+        } else {
+            size = -1;
+        }
+        if (size < 0) {
+            fprintf(stderr, "Could not load option rom '%s'\n", oprom);
+            exit(1);
+        }
+        /* Round up optiom rom size to the next 2k boundary */
+        size = (size + 2047) & ~2047;
+        option_rom_setup_reset(start, size);
+        return size;
+}
+
+int cpu_is_bsp(CPUState *env)
+{
+	return env->cpuid_apic_id == 0;
+}
+
+static struct goldfish_device event0_device = {
+    .name = "goldfish_events",
+    .id = 0,
+    .size = 0x1000,
+    .irq_count = 1
+};
+
+static struct goldfish_device nand_device = {
+    .name = "goldfish_nand",
+    .id = 0,
+    .size = 0x1000
+};
+
+void goldfish_memlog_init(uint32_t base);
+
+/* PC hardware initialisation */
+static void pc_init1(ram_addr_t ram_size,
+                     const char *boot_device,
+                     const char *kernel_filename, const char *kernel_cmdline,
+                     const char *initrd_filename,
+                     int pci_enabled, const char *cpu_model)
+{
+    char *filename;
+    int ret, linux_boot, i;
+    ram_addr_t ram_addr, bios_offset, option_rom_offset;
+    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
+    int bios_size, isa_bios_size, oprom_area_size;
+    PCIBus *pci_bus;
+    int piix3_devfn = -1;
+    CPUState *env;
+    qemu_irq *cpu_irq;
+    qemu_irq *i8259;
+#ifndef CONFIG_ANDROID
+    int index;
+#endif
+    BlockDriverState *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+#ifndef CONFIG_ANDROID
+    BlockDriverState *fd[MAX_FD];
+#endif
+    int using_vga = cirrus_vga_enabled || std_vga_enabled || vmsvga_enabled;
+
+    if (ram_size >= 0xe0000000 ) {
+        above_4g_mem_size = ram_size - 0xe0000000;
+        below_4g_mem_size = 0xe0000000;
+    } else {
+        below_4g_mem_size = ram_size;
+    }
+
+    linux_boot = (kernel_filename != NULL);
+
+    /* init CPUs */
+    if (cpu_model == NULL) {
+#ifdef TARGET_X86_64
+        cpu_model = "qemu64";
+#else
+        cpu_model = "qemu32";
+#endif
+    }
+    
+    for(i = 0; i < smp_cpus; i++) {
+        env = cpu_init(cpu_model);
+        if (!env) {
+            fprintf(stderr, "Unable to find x86 CPU definition\n");
+            exit(1);
+        }
+        if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
+            env->cpuid_apic_id = env->cpu_index;
+            apic_init(env);
+        }
+        qemu_register_reset(main_cpu_reset, 0, env);
+    }
+#ifndef CONFIG_ANDROID
+    vmport_init();
+#endif
+    /* allocate RAM */
+    ram_addr = qemu_ram_alloc(0xa0000);
+    cpu_register_physical_memory(0, 0xa0000, ram_addr);
+
+    /* Allocate, even though we won't register, so we don't break the
+     * phys_ram_base + PA assumption. This range includes vga (0xa0000 - 0xc0000),
+     * and some bios areas, which will be registered later
+     */
+    ram_addr = qemu_ram_alloc(0x100000 - 0xa0000);
+    ram_addr = qemu_ram_alloc(below_4g_mem_size - 0x100000);
+    cpu_register_physical_memory(0x100000,
+                 below_4g_mem_size - 0x100000,
+                 ram_addr);
+
+    /* above 4giga memory allocation */
+    if (above_4g_mem_size > 0) {
+#if TARGET_PHYS_ADDR_BITS == 32
+        hw_error("To much RAM for 32-bit physical address");
+#else
+        ram_addr = qemu_ram_alloc(above_4g_mem_size);
+        cpu_register_physical_memory(0x100000000ULL,
+                                     above_4g_mem_size,
+                                     ram_addr);
+#endif
+    }
+
+
+    /* BIOS load */
+    if (bios_name == NULL)
+        bios_name = BIOS_FILENAME;
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (filename) {
+        bios_size = get_image_size(filename);
+    } else {
+        bios_size = -1;
+    }
+    if (bios_size <= 0 ||
+        (bios_size % 65536) != 0) {
+        goto bios_error;
+    }
+    bios_offset = qemu_ram_alloc(bios_size);
+    ret = load_image(filename, qemu_get_ram_ptr(bios_offset));
+    if (ret != bios_size) {
+    bios_error:
+        fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name);
+        exit(1);
+    }
+    if (filename) {
+        qemu_free(filename);
+    }
+    /* map the last 128KB of the BIOS in ISA space */
+    isa_bios_size = bios_size;
+    if (isa_bios_size > (128 * 1024))
+        isa_bios_size = 128 * 1024;
+    cpu_register_physical_memory(0x100000 - isa_bios_size,
+                                 isa_bios_size,
+                                 (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
+
+
+
+    option_rom_offset = qemu_ram_alloc(0x20000);
+    oprom_area_size = 0;
+    cpu_register_physical_memory(0xc0000, 0x20000, option_rom_offset);
+
+    if (using_vga) {
+        const char *vgabios_filename;
+        /* VGA BIOS load */
+        if (cirrus_vga_enabled) {
+            vgabios_filename = VGABIOS_CIRRUS_FILENAME;
+        } else {
+            vgabios_filename = VGABIOS_FILENAME;
+        }
+        oprom_area_size = load_option_rom(vgabios_filename, 0xc0000, 0xe0000);
+    }
+    /* Although video roms can grow larger than 0x8000, the area between
+     * 0xc0000 - 0xc8000 is reserved for them. It means we won't be looking
+     * for any other kind of option rom inside this area */
+    if (oprom_area_size < 0x8000)
+        oprom_area_size = 0x8000;
+
+    if (linux_boot) {
+        load_linux(0xc0000 + oprom_area_size,
+                   kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size);
+        oprom_area_size += 2048;
+    }
+
+    for (i = 0; i < nb_option_roms; i++) {
+        oprom_area_size += load_option_rom(option_rom[i],
+                                           0xc0000 + oprom_area_size, 0xe0000);
+    }
+
+    /* map all the bios at the top of memory */
+    cpu_register_physical_memory((uint32_t)(-bios_size),
+                                 bios_size, bios_offset | IO_MEM_ROM);
+
+    bochs_bios_init();
+
+    cpu_irq = qemu_allocate_irqs(pic_irq_request, NULL, 1);
+    i8259 = i8259_init(cpu_irq[0]);
+    ferr_irq = i8259[13];
+
+#define IRQ_PDEV_BUS 4
+    goldfish_device_init(i8259, 0xff010000, 0x7f0000, 5, 5);
+    goldfish_device_bus_init(0xff001000, IRQ_PDEV_BUS);
+
+    if (android_hw->hw_battery)
+        goldfish_battery_init();
+
+    goldfish_memlog_init(0);
+
+#ifdef CONFIG_NAND
+    goldfish_add_device_no_io(&nand_device);
+    nand_dev_init(nand_device.base);
+#endif
+
+    {
+        DriveInfo* info = drive_get( IF_IDE, 0, 0 );
+        if (info != NULL) {
+            goldfish_mmc_init(0xff005000, 0, info->bdrv);
+        }
+    }
+
+    if (pci_enabled) {
+        pci_bus = i440fx_init(&i440fx_state, i8259);
+        piix3_devfn = piix3_init(pci_bus, -1);
+    } else {
+        pci_bus = NULL;
+    }
+
+    /* init basic PC hardware */
+    register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
+
+    register_ioport_write(0xf0, 1, 1, ioportF0_write, NULL);
+
+#ifndef CONFIG_ANDROID
+    if (cirrus_vga_enabled) {
+        if (pci_enabled) {
+            pci_cirrus_vga_init(pci_bus);
+        } else {
+            isa_cirrus_vga_init();
+        }
+    } else if (vmsvga_enabled) {
+        if (pci_enabled)
+            pci_vmsvga_init(pci_bus);
+        else
+            fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __FUNCTION__);
+    } else if (std_vga_enabled) {
+        if (pci_enabled) {
+            pci_vga_init(pci_bus, 0, 0);
+        } else {
+            isa_vga_init();
+        }
+    }
+#endif
+
+    rtc_state = rtc_init(0x70, i8259[8], 2000);
+
+    qemu_register_boot_set(pc_boot_set, rtc_state);
+
+    register_ioport_read(0x92, 1, 1, ioport92_read, NULL);
+    register_ioport_write(0x92, 1, 1, ioport92_write, NULL);
+
+    if (pci_enabled) {
+        ioapic = ioapic_init();
+    }
+    pit = pit_init(0x40, i8259[0]);
+
+#ifndef CONFIG_ANDROID
+    pcspk_init(pit);
+
+    if (!no_hpet) {
+        hpet_init(i8259);
+    }
+#endif
+
+    if (pci_enabled) {
+        pic_set_alt_irq_func(isa_pic, ioapic_set_irq, ioapic);
+    }
+
+    goldfish_tty_add(serial_hds[0], 0, 0, 0);
+    for(i = 1; i < MAX_SERIAL_PORTS; i++) {
+        if(serial_hds[i]) {
+            goldfish_tty_add(serial_hds[i], i, 0, 0);
+        }
+    }
+
+#ifndef CONFIG_ANDROID
+    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+        if (serial_hds[i]) {
+            serial_init(serial_io[i], i8259[serial_irq[i]], 115200,
+                        serial_hds[i]);
+        }
+    }
+
+    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
+        if (parallel_hds[i]) {
+            parallel_init(parallel_io[i], i8259[parallel_irq[i]],
+                          parallel_hds[i]);
+        }
+    }
+#endif
+
+    watchdog_pc_init(pci_bus);
+
+    for(i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+
+        if (!pci_enabled || (nd->model && strcmp(nd->model, "ne2k_isa") == 0))
+            pc_init_ne2k_isa(nd, i8259);
+        else
+            pci_nic_init(pci_bus, nd, -1, "ne2k_pci");
+    }
+
+#ifdef CONFIG_ANDROID
+    for(i = 0; i < MAX_IDE_BUS * MAX_IDE_DEVS; i++)
+       hd[i] = NULL;
+#else
+    qemu_system_hot_add_init();
+
+    if (drive_get_max_bus(IF_IDE) >= MAX_IDE_BUS) {
+        fprintf(stderr, "qemu: too many IDE bus\n");
+        exit(1);
+    }
+
+    for(i = 0; i < MAX_IDE_BUS * MAX_IDE_DEVS; i++) {
+        index = drive_get_index(IF_IDE, i / MAX_IDE_DEVS, i % MAX_IDE_DEVS);
+	if (index != -1)
+	    hd[i] = drives_table[index].bdrv;
+	else
+	    hd[i] = NULL;
+    }
+
+    if (pci_enabled) {
+        pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1, i8259);
+    } else {
+        for(i = 0; i < MAX_IDE_BUS; i++) {
+            isa_ide_init(ide_iobase[i], ide_iobase2[i], i8259[ide_irq[i]],
+	                 hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]);
+        }
+    }
+#endif
+
+    i8042_init(i8259[1], i8259[12], 0x60);
+    DMA_init(0);
+
+    goldfish_fb_init(0);
+
+    goldfish_add_device_no_io(&event0_device);
+    events_dev_init(event0_device.base, i8259[event0_device.irq]);
+
+#ifdef HAS_AUDIO
+    audio_init(pci_enabled ? pci_bus : NULL, i8259);
+#endif
+
+#ifndef CONFIG_ANDROID
+    for(i = 0; i < MAX_FD; i++) {
+        index = drive_get_index(IF_FLOPPY, 0, i);
+	if (index != -1)
+	    fd[i] = drives_table[index].bdrv;
+	else
+	    fd[i] = NULL;
+    }
+
+    floppy_controller = fdctrl_init(i8259[6], 2, 0, 0x3f0, fd);
+#endif
+
+    cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device, hd);
+
+#ifndef CONFIG_ANDROID
+    if (pci_enabled && usb_enabled) {
+        usb_uhci_piix3_init(pci_bus, piix3_devfn + 2);
+    }
+
+    if (pci_enabled && acpi_enabled) {
+        uint8_t *eeprom_buf = qemu_mallocz(8 * 256); /* XXX: make this persistent */
+        i2c_bus *smbus;
+
+        /* TODO: Populate SPD eeprom data.  */
+        smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, i8259[9]);
+        for (i = 0; i < 8; i++) {
+            DeviceState *eeprom;
+            eeprom = qdev_create((BusState *)smbus, "smbus-eeprom");
+            qdev_set_prop_int(eeprom, "address", 0x50 + i);
+            qdev_set_prop_ptr(eeprom, "data", eeprom_buf + (i * 256));
+            qdev_init(eeprom);
+        }
+    }
+#endif
+
+    if (i440fx_state) {
+        i440fx_init_memory_mappings(i440fx_state);
+    }
+
+    if (pci_enabled) {
+	int max_bus;
+        int bus;
+
+        max_bus = drive_get_max_bus(IF_SCSI);
+	for (bus = 0; bus <= max_bus; bus++) {
+            pci_create_simple(pci_bus, -1, "lsi53c895a");
+        }
+    }
+#ifndef CONFIG_ANDROID
+    /* Add virtio block devices */
+    if (pci_enabled) {
+        int index;
+        int unit_id = 0;
+
+        while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
+            pci_create_simple(pci_bus, -1, "virtio-blk-pci");
+            unit_id++;
+        }
+    }
+
+    /* Add virtio balloon device */
+    if (pci_enabled && !no_virtio_balloon) {
+        pci_create_simple(pci_bus, -1, "virtio-balloon-pci");
+    }
+
+    /* Add virtio console devices */
+    if (pci_enabled) {
+        for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
+            if (virtcon_hds[i]) {
+                pci_create_simple(pci_bus, -1, "virtio-console-pci");
+            }
+        }
+    }
+#endif
+}
+
+static void pc_init_pci(ram_addr_t ram_size,
+                        const char *boot_device,
+                        const char *kernel_filename,
+                        const char *kernel_cmdline,
+                        const char *initrd_filename,
+                        const char *cpu_model)
+{
+    pc_init1(ram_size, boot_device,
+             kernel_filename, kernel_cmdline,
+             initrd_filename, 1, cpu_model);
+}
+
+static void pc_init_isa(ram_addr_t ram_size,
+                        const char *boot_device,
+                        const char *kernel_filename,
+                        const char *kernel_cmdline,
+                        const char *initrd_filename,
+                        const char *cpu_model)
+{
+    pc_init1(ram_size, boot_device,
+             kernel_filename, kernel_cmdline,
+             initrd_filename, 0, cpu_model);
+}
+
+/* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
+   BIOS will read it and start S3 resume at POST Entry */
+void cmos_set_s3_resume(void)
+{
+    if (rtc_state)
+        rtc_set_memory(rtc_state, 0xF, 0xFE);
+}
+
+static QEMUMachine pc_machine = {
+    .name = "pc",
+    .desc = "Standard PC",
+    .init = pc_init_pci,
+    .max_cpus = 255,
+    .is_default = 1,
+};
+
+static QEMUMachine isapc_machine = {
+    .name = "isapc",
+    .desc = "ISA-only PC",
+    .init = pc_init_isa,
+    .max_cpus = 1,
+};
+
+static void pc_machine_init(void)
+{
+    qemu_register_machine(&pc_machine);
+    qemu_register_machine(&isapc_machine);
+}
+
+machine_init(pc_machine_init);
diff --git a/hw/pckbd.c b/hw/pckbd.c
new file mode 100644
index 0000000..e1c6d40
--- /dev/null
+++ b/hw/pckbd.c
@@ -0,0 +1,446 @@
+/*
+ * QEMU PC keyboard emulation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "isa.h"
+#include "pc.h"
+#include "ps2.h"
+#include "sysemu.h"
+
+/* debug PC keyboard */
+//#define DEBUG_KBD
+
+/*	Keyboard Controller Commands */
+#define KBD_CCMD_READ_MODE	0x20	/* Read mode bits */
+#define KBD_CCMD_WRITE_MODE	0x60	/* Write mode bits */
+#define KBD_CCMD_GET_VERSION	0xA1	/* Get controller version */
+#define KBD_CCMD_MOUSE_DISABLE	0xA7	/* Disable mouse interface */
+#define KBD_CCMD_MOUSE_ENABLE	0xA8	/* Enable mouse interface */
+#define KBD_CCMD_TEST_MOUSE	0xA9	/* Mouse interface test */
+#define KBD_CCMD_SELF_TEST	0xAA	/* Controller self test */
+#define KBD_CCMD_KBD_TEST	0xAB	/* Keyboard interface test */
+#define KBD_CCMD_KBD_DISABLE	0xAD	/* Keyboard interface disable */
+#define KBD_CCMD_KBD_ENABLE	0xAE	/* Keyboard interface enable */
+#define KBD_CCMD_READ_INPORT    0xC0    /* read input port */
+#define KBD_CCMD_READ_OUTPORT	0xD0    /* read output port */
+#define KBD_CCMD_WRITE_OUTPORT	0xD1    /* write output port */
+#define KBD_CCMD_WRITE_OBUF	0xD2
+#define KBD_CCMD_WRITE_AUX_OBUF	0xD3    /* Write to output buffer as if
+					   initiated by the auxiliary device */
+#define KBD_CCMD_WRITE_MOUSE	0xD4	/* Write the following byte to the mouse */
+#define KBD_CCMD_DISABLE_A20    0xDD    /* HP vectra only ? */
+#define KBD_CCMD_ENABLE_A20     0xDF    /* HP vectra only ? */
+#define KBD_CCMD_RESET	        0xFE
+
+/* Keyboard Commands */
+#define KBD_CMD_SET_LEDS	0xED	/* Set keyboard leds */
+#define KBD_CMD_ECHO     	0xEE
+#define KBD_CMD_GET_ID 	        0xF2	/* get keyboard ID */
+#define KBD_CMD_SET_RATE	0xF3	/* Set typematic rate */
+#define KBD_CMD_ENABLE		0xF4	/* Enable scanning */
+#define KBD_CMD_RESET_DISABLE	0xF5	/* reset and disable scanning */
+#define KBD_CMD_RESET_ENABLE   	0xF6    /* reset and enable scanning */
+#define KBD_CMD_RESET		0xFF	/* Reset */
+
+/* Keyboard Replies */
+#define KBD_REPLY_POR		0xAA	/* Power on reset */
+#define KBD_REPLY_ACK		0xFA	/* Command ACK */
+#define KBD_REPLY_RESEND	0xFE	/* Command NACK, send the cmd again */
+
+/* Status Register Bits */
+#define KBD_STAT_OBF 		0x01	/* Keyboard output buffer full */
+#define KBD_STAT_IBF 		0x02	/* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST	0x04	/* Self test successful */
+#define KBD_STAT_CMD		0x08	/* Last write was a command write (0=data) */
+#define KBD_STAT_UNLOCKED	0x10	/* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF	0x20	/* Mouse output buffer full */
+#define KBD_STAT_GTO 		0x40	/* General receive/xmit timeout */
+#define KBD_STAT_PERR 		0x80	/* Parity error */
+
+/* Controller Mode Register Bits */
+#define KBD_MODE_KBD_INT	0x01	/* Keyboard data generate IRQ1 */
+#define KBD_MODE_MOUSE_INT	0x02	/* Mouse data generate IRQ12 */
+#define KBD_MODE_SYS 		0x04	/* The system flag (?) */
+#define KBD_MODE_NO_KEYLOCK	0x08	/* The keylock doesn't affect the keyboard if set */
+#define KBD_MODE_DISABLE_KBD	0x10	/* Disable keyboard interface */
+#define KBD_MODE_DISABLE_MOUSE	0x20	/* Disable mouse interface */
+#define KBD_MODE_KCC 		0x40	/* Scan code conversion to PC format */
+#define KBD_MODE_RFU		0x80
+
+/* Mouse Commands */
+#define AUX_SET_SCALE11		0xE6	/* Set 1:1 scaling */
+#define AUX_SET_SCALE21		0xE7	/* Set 2:1 scaling */
+#define AUX_SET_RES		0xE8	/* Set resolution */
+#define AUX_GET_SCALE		0xE9	/* Get scaling factor */
+#define AUX_SET_STREAM		0xEA	/* Set stream mode */
+#define AUX_POLL		0xEB	/* Poll */
+#define AUX_RESET_WRAP		0xEC	/* Reset wrap mode */
+#define AUX_SET_WRAP		0xEE	/* Set wrap mode */
+#define AUX_SET_REMOTE		0xF0	/* Set remote mode */
+#define AUX_GET_TYPE		0xF2	/* Get type */
+#define AUX_SET_SAMPLE		0xF3	/* Set sample rate */
+#define AUX_ENABLE_DEV		0xF4	/* Enable aux device */
+#define AUX_DISABLE_DEV		0xF5	/* Disable aux device */
+#define AUX_SET_DEFAULT		0xF6
+#define AUX_RESET		0xFF	/* Reset aux device */
+#define AUX_ACK			0xFA	/* Command byte ACK. */
+
+#define MOUSE_STATUS_REMOTE     0x40
+#define MOUSE_STATUS_ENABLED    0x20
+#define MOUSE_STATUS_SCALE21    0x10
+
+#define KBD_PENDING_KBD         1
+#define KBD_PENDING_AUX         2
+
+typedef struct KBDState {
+    uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
+    uint8_t status;
+    uint8_t mode;
+    /* Bitmask of devices with data available.  */
+    uint8_t pending;
+    void *kbd;
+    void *mouse;
+
+    qemu_irq irq_kbd;
+    qemu_irq irq_mouse;
+    target_phys_addr_t mask;
+} KBDState;
+
+static KBDState kbd_state;
+
+/* update irq and KBD_STAT_[MOUSE_]OBF */
+/* XXX: not generating the irqs if KBD_MODE_DISABLE_KBD is set may be
+   incorrect, but it avoids having to simulate exact delays */
+static void kbd_update_irq(KBDState *s)
+{
+    int irq_kbd_level, irq_mouse_level;
+
+    irq_kbd_level = 0;
+    irq_mouse_level = 0;
+    s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF);
+    if (s->pending) {
+        s->status |= KBD_STAT_OBF;
+        /* kbd data takes priority over aux data.  */
+        if (s->pending == KBD_PENDING_AUX) {
+            s->status |= KBD_STAT_MOUSE_OBF;
+            if (s->mode & KBD_MODE_MOUSE_INT)
+                irq_mouse_level = 1;
+        } else {
+            if ((s->mode & KBD_MODE_KBD_INT) &&
+                !(s->mode & KBD_MODE_DISABLE_KBD))
+                irq_kbd_level = 1;
+        }
+    }
+    qemu_set_irq(s->irq_kbd, irq_kbd_level);
+    qemu_set_irq(s->irq_mouse, irq_mouse_level);
+}
+
+static void kbd_update_kbd_irq(void *opaque, int level)
+{
+    KBDState *s = (KBDState *)opaque;
+
+    if (level)
+        s->pending |= KBD_PENDING_KBD;
+    else
+        s->pending &= ~KBD_PENDING_KBD;
+    kbd_update_irq(s);
+}
+
+static void kbd_update_aux_irq(void *opaque, int level)
+{
+    KBDState *s = (KBDState *)opaque;
+
+    if (level)
+        s->pending |= KBD_PENDING_AUX;
+    else
+        s->pending &= ~KBD_PENDING_AUX;
+    kbd_update_irq(s);
+}
+
+static uint32_t kbd_read_status(void *opaque, uint32_t addr)
+{
+    KBDState *s = opaque;
+    int val;
+    val = s->status;
+#if defined(DEBUG_KBD)
+    printf("kbd: read status=0x%02x\n", val);
+#endif
+    return val;
+}
+
+static void kbd_queue(KBDState *s, int b, int aux)
+{
+    if (aux)
+        ps2_queue(s->mouse, b);
+    else
+        ps2_queue(s->kbd, b);
+}
+
+static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
+{
+    KBDState *s = opaque;
+
+#ifdef DEBUG_KBD
+    printf("kbd: write cmd=0x%02x\n", val);
+#endif
+    switch(val) {
+    case KBD_CCMD_READ_MODE:
+        kbd_queue(s, s->mode, 0);
+        break;
+    case KBD_CCMD_WRITE_MODE:
+    case KBD_CCMD_WRITE_OBUF:
+    case KBD_CCMD_WRITE_AUX_OBUF:
+    case KBD_CCMD_WRITE_MOUSE:
+    case KBD_CCMD_WRITE_OUTPORT:
+        s->write_cmd = val;
+        break;
+    case KBD_CCMD_MOUSE_DISABLE:
+        s->mode |= KBD_MODE_DISABLE_MOUSE;
+        break;
+    case KBD_CCMD_MOUSE_ENABLE:
+        s->mode &= ~KBD_MODE_DISABLE_MOUSE;
+        break;
+    case KBD_CCMD_TEST_MOUSE:
+        kbd_queue(s, 0x00, 0);
+        break;
+    case KBD_CCMD_SELF_TEST:
+        s->status |= KBD_STAT_SELFTEST;
+        kbd_queue(s, 0x55, 0);
+        break;
+    case KBD_CCMD_KBD_TEST:
+        kbd_queue(s, 0x00, 0);
+        break;
+    case KBD_CCMD_KBD_DISABLE:
+        s->mode |= KBD_MODE_DISABLE_KBD;
+        kbd_update_irq(s);
+        break;
+    case KBD_CCMD_KBD_ENABLE:
+        s->mode &= ~KBD_MODE_DISABLE_KBD;
+        kbd_update_irq(s);
+        break;
+    case KBD_CCMD_READ_INPORT:
+        kbd_queue(s, 0x00, 0);
+        break;
+    case KBD_CCMD_READ_OUTPORT:
+        /* XXX: check that */
+#ifdef TARGET_I386
+        val = 0x01 | (ioport_get_a20() << 1);
+#else
+        val = 0x01;
+#endif
+        if (s->status & KBD_STAT_OBF)
+            val |= 0x10;
+        if (s->status & KBD_STAT_MOUSE_OBF)
+            val |= 0x20;
+        kbd_queue(s, val, 0);
+        break;
+#ifdef TARGET_I386
+    case KBD_CCMD_ENABLE_A20:
+        ioport_set_a20(1);
+        break;
+    case KBD_CCMD_DISABLE_A20:
+        ioport_set_a20(0);
+        break;
+#endif
+    case KBD_CCMD_RESET:
+        qemu_system_reset_request();
+        break;
+    case 0xff:
+        /* ignore that - I don't know what is its use */
+        break;
+    default:
+        fprintf(stderr, "qemu: unsupported keyboard cmd=0x%02x\n", val);
+        break;
+    }
+}
+
+static uint32_t kbd_read_data(void *opaque, uint32_t addr)
+{
+    KBDState *s = opaque;
+    uint32_t val;
+
+    if (s->pending == KBD_PENDING_AUX)
+        val = ps2_read_data(s->mouse);
+    else
+        val = ps2_read_data(s->kbd);
+
+#if defined(DEBUG_KBD)
+    printf("kbd: read data=0x%02x\n", val);
+#endif
+    return val;
+}
+
+static void kbd_write_data(void *opaque, uint32_t addr, uint32_t val)
+{
+    KBDState *s = opaque;
+
+#ifdef DEBUG_KBD
+    printf("kbd: write data=0x%02x\n", val);
+#endif
+
+    switch(s->write_cmd) {
+    case 0:
+        ps2_write_keyboard(s->kbd, val);
+        break;
+    case KBD_CCMD_WRITE_MODE:
+        s->mode = val;
+        ps2_keyboard_set_translation(s->kbd, (s->mode & KBD_MODE_KCC) != 0);
+        /* ??? */
+        kbd_update_irq(s);
+        break;
+    case KBD_CCMD_WRITE_OBUF:
+        kbd_queue(s, val, 0);
+        break;
+    case KBD_CCMD_WRITE_AUX_OBUF:
+        kbd_queue(s, val, 1);
+        break;
+    case KBD_CCMD_WRITE_OUTPORT:
+#ifdef TARGET_I386
+        ioport_set_a20((val >> 1) & 1);
+#endif
+        if (!(val & 1)) {
+            qemu_system_reset_request();
+        }
+        break;
+    case KBD_CCMD_WRITE_MOUSE:
+        ps2_write_mouse(s->mouse, val);
+        break;
+    default:
+        break;
+    }
+    s->write_cmd = 0;
+}
+
+static void kbd_reset(void *opaque)
+{
+    KBDState *s = opaque;
+
+    s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT;
+    s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED;
+}
+
+static void kbd_save(QEMUFile* f, void* opaque)
+{
+    KBDState *s = (KBDState*)opaque;
+
+    qemu_put_8s(f, &s->write_cmd);
+    qemu_put_8s(f, &s->status);
+    qemu_put_8s(f, &s->mode);
+    qemu_put_8s(f, &s->pending);
+}
+
+static int kbd_load(QEMUFile* f, void* opaque, int version_id)
+{
+    KBDState *s = (KBDState*)opaque;
+
+    if (version_id != 3)
+        return -EINVAL;
+    qemu_get_8s(f, &s->write_cmd);
+    qemu_get_8s(f, &s->status);
+    qemu_get_8s(f, &s->mode);
+    qemu_get_8s(f, &s->pending);
+    return 0;
+}
+
+void i8042_init(qemu_irq kbd_irq, qemu_irq mouse_irq, uint32_t io_base)
+{
+    KBDState *s = &kbd_state;
+
+    s->irq_kbd = kbd_irq;
+    s->irq_mouse = mouse_irq;
+
+    kbd_reset(s);
+    register_savevm("pckbd", 0, 3, kbd_save, kbd_load, s);
+    register_ioport_read(io_base, 1, 1, kbd_read_data, s);
+    register_ioport_write(io_base, 1, 1, kbd_write_data, s);
+    register_ioport_read(io_base + 4, 1, 1, kbd_read_status, s);
+    register_ioport_write(io_base + 4, 1, 1, kbd_write_command, s);
+
+    s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s);
+    s->mouse = ps2_mouse_init(kbd_update_aux_irq, s);
+#ifndef CONFIG_ANDROID
+#ifdef TARGET_I386
+    vmmouse_init(s->mouse);
+#endif
+#endif
+    qemu_register_reset(kbd_reset, 0, s);
+}
+
+/* Memory mapped interface */
+static uint32_t kbd_mm_readb (void *opaque, target_phys_addr_t addr)
+{
+    KBDState *s = opaque;
+
+    if (addr & s->mask)
+        return kbd_read_status(s, 0) & 0xff;
+    else
+        return kbd_read_data(s, 0) & 0xff;
+}
+
+static void kbd_mm_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+    KBDState *s = opaque;
+
+    if (addr & s->mask)
+        kbd_write_command(s, 0, value & 0xff);
+    else
+        kbd_write_data(s, 0, value & 0xff);
+}
+
+static CPUReadMemoryFunc *kbd_mm_read[] = {
+    &kbd_mm_readb,
+    &kbd_mm_readb,
+    &kbd_mm_readb,
+};
+
+static CPUWriteMemoryFunc *kbd_mm_write[] = {
+    &kbd_mm_writeb,
+    &kbd_mm_writeb,
+    &kbd_mm_writeb,
+};
+
+void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
+                   target_phys_addr_t base, ram_addr_t size,
+                   target_phys_addr_t mask)
+{
+    KBDState *s = &kbd_state;
+    int s_io_memory;
+
+    s->irq_kbd = kbd_irq;
+    s->irq_mouse = mouse_irq;
+    s->mask = mask;
+
+    kbd_reset(s);
+    register_savevm("pckbd", 0, 3, kbd_save, kbd_load, s);
+    s_io_memory = cpu_register_io_memory(kbd_mm_read, kbd_mm_write, s);
+    cpu_register_physical_memory(base, size, s_io_memory);
+
+    s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s);
+    s->mouse = ps2_mouse_init(kbd_update_aux_irq, s);
+#ifndef CONFIG_ANDROID
+#ifdef TARGET_I386
+    vmmouse_init(s->mouse);
+#endif
+#endif
+    qemu_register_reset(kbd_reset, 0, s);
+}
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
new file mode 100644
index 0000000..67e6309
--- /dev/null
+++ b/hw/piix_pci.c
@@ -0,0 +1,374 @@
+/*
+ * QEMU i440FX/PIIX3 PCI Bridge Emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "pci.h"
+
+typedef uint32_t pci_addr_t;
+#include "pci_host.h"
+
+typedef PCIHostState I440FXState;
+
+static void i440fx_addr_writel(void* opaque, uint32_t addr, uint32_t val)
+{
+    I440FXState *s = opaque;
+    s->config_reg = val;
+}
+
+static uint32_t i440fx_addr_readl(void* opaque, uint32_t addr)
+{
+    I440FXState *s = opaque;
+    return s->config_reg;
+}
+
+static void piix3_set_irq(qemu_irq *pic, int irq_num, int level);
+
+/* return the global irq number corresponding to a given device irq
+   pin. We could also use the bus number to have a more precise
+   mapping. */
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+{
+    int slot_addend;
+    slot_addend = (pci_dev->devfn >> 3) - 1;
+    return (irq_num + slot_addend) & 3;
+}
+
+static target_phys_addr_t isa_page_descs[384 / 4];
+static uint8_t smm_enabled;
+static int pci_irq_levels[4];
+
+static void update_pam(PCIDevice *d, uint32_t start, uint32_t end, int r)
+{
+    uint32_t addr;
+
+    //    printf("ISA mapping %08x-0x%08x: %d\n", start, end, r);
+    switch(r) {
+    case 3:
+        /* RAM */
+        cpu_register_physical_memory(start, end - start,
+                                     start);
+        break;
+    case 1:
+        /* ROM (XXX: not quite correct) */
+        cpu_register_physical_memory(start, end - start,
+                                     start | IO_MEM_ROM);
+        break;
+    case 2:
+    case 0:
+        /* XXX: should distinguish read/write cases */
+        for(addr = start; addr < end; addr += 4096) {
+            cpu_register_physical_memory(addr, 4096,
+                                         isa_page_descs[(addr - 0xa0000) >> 12]);
+        }
+        break;
+    }
+}
+
+static void i440fx_update_memory_mappings(PCIDevice *d)
+{
+    int i, r;
+    uint32_t smram, addr;
+
+    update_pam(d, 0xf0000, 0x100000, (d->config[0x59] >> 4) & 3);
+    for(i = 0; i < 12; i++) {
+        r = (d->config[(i >> 1) + 0x5a] >> ((i & 1) * 4)) & 3;
+        update_pam(d, 0xc0000 + 0x4000 * i, 0xc0000 + 0x4000 * (i + 1), r);
+    }
+    smram = d->config[0x72];
+    if ((smm_enabled && (smram & 0x08)) || (smram & 0x40)) {
+        cpu_register_physical_memory(0xa0000, 0x20000, 0xa0000);
+    } else {
+        for(addr = 0xa0000; addr < 0xc0000; addr += 4096) {
+            cpu_register_physical_memory(addr, 4096,
+                                         isa_page_descs[(addr - 0xa0000) >> 12]);
+        }
+    }
+}
+
+void i440fx_set_smm(PCIDevice *d, int val)
+{
+    val = (val != 0);
+    if (smm_enabled != val) {
+        smm_enabled = val;
+        i440fx_update_memory_mappings(d);
+    }
+}
+
+
+/* XXX: suppress when better memory API. We make the assumption that
+   no device (in particular the VGA) changes the memory mappings in
+   the 0xa0000-0x100000 range */
+void i440fx_init_memory_mappings(PCIDevice *d)
+{
+    int i;
+    for(i = 0; i < 96; i++) {
+        isa_page_descs[i] = cpu_get_physical_page_desc(0xa0000 + i * 0x1000);
+    }
+}
+
+static void i440fx_write_config(PCIDevice *d,
+                                uint32_t address, uint32_t val, int len)
+{
+    /* XXX: implement SMRAM.D_LOCK */
+    pci_default_write_config(d, address, val, len);
+    if ((address >= 0x59 && address <= 0x5f) || address == 0x72)
+        i440fx_update_memory_mappings(d);
+}
+
+static void i440fx_save(QEMUFile* f, void *opaque)
+{
+    PCIDevice *d = opaque;
+    int i;
+
+    pci_device_save(d, f);
+    qemu_put_8s(f, &smm_enabled);
+
+    for (i = 0; i < 4; i++)
+        qemu_put_be32(f, pci_irq_levels[i]);
+}
+
+static int i440fx_load(QEMUFile* f, void *opaque, int version_id)
+{
+    PCIDevice *d = opaque;
+    int ret, i;
+
+    if (version_id > 2)
+        return -EINVAL;
+    ret = pci_device_load(d, f);
+    if (ret < 0)
+        return ret;
+    i440fx_update_memory_mappings(d);
+    qemu_get_8s(f, &smm_enabled);
+
+    if (version_id >= 2)
+        for (i = 0; i < 4; i++)
+            pci_irq_levels[i] = qemu_get_be32(f);
+
+    return 0;
+}
+
+PCIBus *i440fx_init(PCIDevice **pi440fx_state, qemu_irq *pic)
+{
+    PCIBus *b;
+    PCIDevice *d;
+    I440FXState *s;
+
+    s = qemu_mallocz(sizeof(I440FXState));
+    b = pci_register_bus(NULL, "pci", 
+                         piix3_set_irq, pci_slot_get_pirq, pic, 0, 4);
+    s->bus = b;
+
+    register_ioport_write(0xcf8, 4, 4, i440fx_addr_writel, s);
+    register_ioport_read(0xcf8, 4, 4, i440fx_addr_readl, s);
+
+    register_ioport_write(0xcfc, 4, 1, pci_host_data_writeb, s);
+    register_ioport_write(0xcfc, 4, 2, pci_host_data_writew, s);
+    register_ioport_write(0xcfc, 4, 4, pci_host_data_writel, s);
+    register_ioport_read(0xcfc, 4, 1, pci_host_data_readb, s);
+    register_ioport_read(0xcfc, 4, 2, pci_host_data_readw, s);
+    register_ioport_read(0xcfc, 4, 4, pci_host_data_readl, s);
+
+    d = pci_register_device(b, "i440FX", sizeof(PCIDevice), 0,
+                            NULL, i440fx_write_config);
+
+    pci_config_set_vendor_id(d->config, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(d->config, PCI_DEVICE_ID_INTEL_82441);
+    d->config[0x08] = 0x02; // revision
+    pci_config_set_class(d->config, PCI_CLASS_BRIDGE_HOST);
+    d->config[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; // header_type
+
+    d->config[0x72] = 0x02; /* SMRAM */
+
+    register_savevm("I440FX", 0, 2, i440fx_save, i440fx_load, d);
+    *pi440fx_state = d;
+    return b;
+}
+
+/* PIIX3 PCI to ISA bridge */
+
+static PCIDevice *piix3_dev;
+PCIDevice *piix4_dev;
+
+static void piix3_set_irq(qemu_irq *pic, int irq_num, int level)
+{
+    int i, pic_irq, pic_level;
+
+    pci_irq_levels[irq_num] = level;
+
+    /* now we change the pic irq level according to the piix irq mappings */
+    /* XXX: optimize */
+    pic_irq = piix3_dev->config[0x60 + irq_num];
+    if (pic_irq < 16) {
+        /* The pic level is the logical OR of all the PCI irqs mapped
+           to it */
+        pic_level = 0;
+        for (i = 0; i < 4; i++) {
+            if (pic_irq == piix3_dev->config[0x60 + i])
+                pic_level |= pci_irq_levels[i];
+        }
+        qemu_set_irq(pic[pic_irq], pic_level);
+    }
+}
+
+static void piix3_reset(void *opaque)
+{
+    PCIDevice *d = opaque;
+    uint8_t *pci_conf = d->config;
+
+    pci_conf[0x04] = 0x07; // master, memory and I/O
+    pci_conf[0x05] = 0x00;
+    pci_conf[0x06] = 0x00;
+    pci_conf[0x07] = 0x02; // PCI_status_devsel_medium
+    pci_conf[0x4c] = 0x4d;
+    pci_conf[0x4e] = 0x03;
+    pci_conf[0x4f] = 0x00;
+    pci_conf[0x60] = 0x80;
+    pci_conf[0x61] = 0x80;
+    pci_conf[0x62] = 0x80;
+    pci_conf[0x63] = 0x80;
+    pci_conf[0x69] = 0x02;
+    pci_conf[0x70] = 0x80;
+    pci_conf[0x76] = 0x0c;
+    pci_conf[0x77] = 0x0c;
+    pci_conf[0x78] = 0x02;
+    pci_conf[0x79] = 0x00;
+    pci_conf[0x80] = 0x00;
+    pci_conf[0x82] = 0x00;
+    pci_conf[0xa0] = 0x08;
+    pci_conf[0xa2] = 0x00;
+    pci_conf[0xa3] = 0x00;
+    pci_conf[0xa4] = 0x00;
+    pci_conf[0xa5] = 0x00;
+    pci_conf[0xa6] = 0x00;
+    pci_conf[0xa7] = 0x00;
+    pci_conf[0xa8] = 0x0f;
+    pci_conf[0xaa] = 0x00;
+    pci_conf[0xab] = 0x00;
+    pci_conf[0xac] = 0x00;
+    pci_conf[0xae] = 0x00;
+
+    memset(pci_irq_levels, 0, sizeof(pci_irq_levels));
+}
+
+static void piix4_reset(void *opaque)
+{
+    PCIDevice *d = opaque;
+    uint8_t *pci_conf = d->config;
+
+    pci_conf[0x04] = 0x07; // master, memory and I/O
+    pci_conf[0x05] = 0x00;
+    pci_conf[0x06] = 0x00;
+    pci_conf[0x07] = 0x02; // PCI_status_devsel_medium
+    pci_conf[0x4c] = 0x4d;
+    pci_conf[0x4e] = 0x03;
+    pci_conf[0x4f] = 0x00;
+    pci_conf[0x60] = 0x0a; // PCI A -> IRQ 10
+    pci_conf[0x61] = 0x0a; // PCI B -> IRQ 10
+    pci_conf[0x62] = 0x0b; // PCI C -> IRQ 11
+    pci_conf[0x63] = 0x0b; // PCI D -> IRQ 11
+    pci_conf[0x69] = 0x02;
+    pci_conf[0x70] = 0x80;
+    pci_conf[0x76] = 0x0c;
+    pci_conf[0x77] = 0x0c;
+    pci_conf[0x78] = 0x02;
+    pci_conf[0x79] = 0x00;
+    pci_conf[0x80] = 0x00;
+    pci_conf[0x82] = 0x00;
+    pci_conf[0xa0] = 0x08;
+    pci_conf[0xa2] = 0x00;
+    pci_conf[0xa3] = 0x00;
+    pci_conf[0xa4] = 0x00;
+    pci_conf[0xa5] = 0x00;
+    pci_conf[0xa6] = 0x00;
+    pci_conf[0xa7] = 0x00;
+    pci_conf[0xa8] = 0x0f;
+    pci_conf[0xaa] = 0x00;
+    pci_conf[0xab] = 0x00;
+    pci_conf[0xac] = 0x00;
+    pci_conf[0xae] = 0x00;
+
+    memset(pci_irq_levels, 0, sizeof(pci_irq_levels));
+}
+
+static void piix_save(QEMUFile* f, void *opaque)
+{
+    PCIDevice *d = opaque;
+    pci_device_save(d, f);
+}
+
+static int piix_load(QEMUFile* f, void *opaque, int version_id)
+{
+    PCIDevice *d = opaque;
+    if (version_id != 2)
+        return -EINVAL;
+    return pci_device_load(d, f);
+}
+
+int piix3_init(PCIBus *bus, int devfn)
+{
+    PCIDevice *d;
+    uint8_t *pci_conf;
+
+    d = pci_register_device(bus, "PIIX3", sizeof(PCIDevice),
+                                    devfn, NULL, NULL);
+    register_savevm("PIIX3", 0, 2, piix_save, piix_load, d);
+
+    piix3_dev = d;
+    pci_conf = d->config;
+
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371SB_0); // 82371SB PIIX3 PCI-to-ISA bridge (Step A1)
+    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_ISA);
+    pci_conf[PCI_HEADER_TYPE] =
+        PCI_HEADER_TYPE_NORMAL | PCI_HEADER_TYPE_MULTI_FUNCTION; // header_type = PCI_multifunction, generic
+
+    piix3_reset(d);
+    qemu_register_reset(piix3_reset, 0, d);
+    return d->devfn;
+}
+
+int piix4_init(PCIBus *bus, int devfn)
+{
+    PCIDevice *d;
+    uint8_t *pci_conf;
+
+    d = pci_register_device(bus, "PIIX4", sizeof(PCIDevice),
+                                    devfn, NULL, NULL);
+    register_savevm("PIIX4", 0, 2, piix_save, piix_load, d);
+
+    piix4_dev = d;
+    pci_conf = d->config;
+
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_0); // 82371AB/EB/MB PIIX4 PCI-to-ISA bridge
+    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_ISA);
+    pci_conf[PCI_HEADER_TYPE] =
+        PCI_HEADER_TYPE_NORMAL | PCI_HEADER_TYPE_MULTI_FUNCTION; // header_type = PCI_multifunction, generic
+
+
+    piix4_reset(d);
+    qemu_register_reset(piix4_reset, 0, d);
+    return d->devfn;
+}
diff --git a/hw/ps2.c b/hw/ps2.c
new file mode 100644
index 0000000..9149598
--- /dev/null
+++ b/hw/ps2.c
@@ -0,0 +1,611 @@
+/*
+ * QEMU PS/2 keyboard/mouse emulation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "ps2.h"
+#include "console.h"
+
+/* debug PC keyboard */
+//#define DEBUG_KBD
+
+/* debug PC keyboard : only mouse */
+//#define DEBUG_MOUSE
+
+/* Keyboard Commands */
+#define KBD_CMD_SET_LEDS	0xED	/* Set keyboard leds */
+#define KBD_CMD_ECHO     	0xEE
+#define KBD_CMD_SCANCODE	0xF0	/* Get/set scancode set */
+#define KBD_CMD_GET_ID 	        0xF2	/* get keyboard ID */
+#define KBD_CMD_SET_RATE	0xF3	/* Set typematic rate */
+#define KBD_CMD_ENABLE		0xF4	/* Enable scanning */
+#define KBD_CMD_RESET_DISABLE	0xF5	/* reset and disable scanning */
+#define KBD_CMD_RESET_ENABLE   	0xF6    /* reset and enable scanning */
+#define KBD_CMD_RESET		0xFF	/* Reset */
+
+/* Keyboard Replies */
+#define KBD_REPLY_POR		0xAA	/* Power on reset */
+#define KBD_REPLY_ID		0xAB	/* Keyboard ID */
+#define KBD_REPLY_ACK		0xFA	/* Command ACK */
+#define KBD_REPLY_RESEND	0xFE	/* Command NACK, send the cmd again */
+
+/* Mouse Commands */
+#define AUX_SET_SCALE11		0xE6	/* Set 1:1 scaling */
+#define AUX_SET_SCALE21		0xE7	/* Set 2:1 scaling */
+#define AUX_SET_RES		0xE8	/* Set resolution */
+#define AUX_GET_SCALE		0xE9	/* Get scaling factor */
+#define AUX_SET_STREAM		0xEA	/* Set stream mode */
+#define AUX_POLL		0xEB	/* Poll */
+#define AUX_RESET_WRAP		0xEC	/* Reset wrap mode */
+#define AUX_SET_WRAP		0xEE	/* Set wrap mode */
+#define AUX_SET_REMOTE		0xF0	/* Set remote mode */
+#define AUX_GET_TYPE		0xF2	/* Get type */
+#define AUX_SET_SAMPLE		0xF3	/* Set sample rate */
+#define AUX_ENABLE_DEV		0xF4	/* Enable aux device */
+#define AUX_DISABLE_DEV		0xF5	/* Disable aux device */
+#define AUX_SET_DEFAULT		0xF6
+#define AUX_RESET		0xFF	/* Reset aux device */
+#define AUX_ACK			0xFA	/* Command byte ACK. */
+
+#define MOUSE_STATUS_REMOTE     0x40
+#define MOUSE_STATUS_ENABLED    0x20
+#define MOUSE_STATUS_SCALE21    0x10
+
+#define PS2_QUEUE_SIZE 256
+
+typedef struct {
+    uint8_t data[PS2_QUEUE_SIZE];
+    int rptr, wptr, count;
+} PS2Queue;
+
+typedef struct {
+    PS2Queue queue;
+    int32_t write_cmd;
+    void (*update_irq)(void *, int);
+    void *update_arg;
+} PS2State;
+
+typedef struct {
+    PS2State common;
+    int scan_enabled;
+    /* Qemu uses translated PC scancodes internally.  To avoid multiple
+       conversions we do the translation (if any) in the PS/2 emulation
+       not the keyboard controller.  */
+    int translate;
+    int scancode_set; /* 1=XT, 2=AT, 3=PS/2 */
+} PS2KbdState;
+
+typedef struct {
+    PS2State common;
+    uint8_t mouse_status;
+    uint8_t mouse_resolution;
+    uint8_t mouse_sample_rate;
+    uint8_t mouse_wrap;
+    uint8_t mouse_type; /* 0 = PS2, 3 = IMPS/2, 4 = IMEX */
+    uint8_t mouse_detect_state;
+    int mouse_dx; /* current values, needed for 'poll' mode */
+    int mouse_dy;
+    int mouse_dz;
+    uint8_t mouse_buttons;
+} PS2MouseState;
+
+/* Table to convert from PC scancodes to raw scancodes.  */
+static const unsigned char ps2_raw_keycode[128] = {
+          0,118, 22, 30, 38, 37, 46, 54, 61, 62, 70, 69, 78, 85,102, 13,
+         21, 29, 36, 45, 44, 53, 60, 67, 68, 77, 84, 91, 90, 20, 28, 27,
+         35, 43, 52, 51, 59, 66, 75, 76, 82, 14, 18, 93, 26, 34, 33, 42,
+         50, 49, 58, 65, 73, 74, 89,124, 17, 41, 88,  5,  6,  4, 12,  3,
+         11,  2, 10,  1,  9,119,126,108,117,125,123,107,115,116,121,105,
+        114,122,112,113,127, 96, 97,120,  7, 15, 23, 31, 39, 47, 55, 63,
+         71, 79, 86, 94,  8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 87,111,
+         19, 25, 57, 81, 83, 92, 95, 98, 99,100,101,103,104,106,109,110
+};
+
+void ps2_queue(void *opaque, int b)
+{
+    PS2State *s = (PS2State *)opaque;
+    PS2Queue *q = &s->queue;
+
+    if (q->count >= PS2_QUEUE_SIZE)
+        return;
+    q->data[q->wptr] = b;
+    if (++q->wptr == PS2_QUEUE_SIZE)
+        q->wptr = 0;
+    q->count++;
+    s->update_irq(s->update_arg, 1);
+}
+
+/*
+   keycode is expressed as follow:
+   bit 7    - 0 key pressed, 1 = key released
+   bits 6-0 - translated scancode set 2
+ */
+static void ps2_put_keycode(void *opaque, int keycode)
+{
+    PS2KbdState *s = opaque;
+
+    /* XXX: add support for scancode sets 1 and 3 */
+    if (!s->translate && keycode < 0xe0 && s->scancode_set == 2)
+      {
+        if (keycode & 0x80)
+            ps2_queue(&s->common, 0xf0);
+        keycode = ps2_raw_keycode[keycode & 0x7f];
+      }
+    ps2_queue(&s->common, keycode);
+}
+
+uint32_t ps2_read_data(void *opaque)
+{
+    PS2State *s = (PS2State *)opaque;
+    PS2Queue *q;
+    int val, index;
+
+    q = &s->queue;
+    if (q->count == 0) {
+        /* NOTE: if no data left, we return the last keyboard one
+           (needed for EMM386) */
+        /* XXX: need a timer to do things correctly */
+        index = q->rptr - 1;
+        if (index < 0)
+            index = PS2_QUEUE_SIZE - 1;
+        val = q->data[index];
+    } else {
+        val = q->data[q->rptr];
+        if (++q->rptr == PS2_QUEUE_SIZE)
+            q->rptr = 0;
+        q->count--;
+        /* reading deasserts IRQ */
+        s->update_irq(s->update_arg, 0);
+        /* reassert IRQs if data left */
+        s->update_irq(s->update_arg, q->count != 0);
+    }
+    return val;
+}
+
+static void ps2_reset_keyboard(PS2KbdState *s)
+{
+    s->scan_enabled = 1;
+    s->scancode_set = 2;
+}
+
+void ps2_write_keyboard(void *opaque, int val)
+{
+    PS2KbdState *s = (PS2KbdState *)opaque;
+
+    switch(s->common.write_cmd) {
+    default:
+    case -1:
+        switch(val) {
+        case 0x00:
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        case 0x05:
+            ps2_queue(&s->common, KBD_REPLY_RESEND);
+            break;
+        case KBD_CMD_GET_ID:
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            /* We emulate a MF2 AT keyboard here */
+            ps2_queue(&s->common, KBD_REPLY_ID);
+            if (s->translate)
+                ps2_queue(&s->common, 0x41);
+            else
+                ps2_queue(&s->common, 0x83);
+            break;
+        case KBD_CMD_ECHO:
+            ps2_queue(&s->common, KBD_CMD_ECHO);
+            break;
+        case KBD_CMD_ENABLE:
+            s->scan_enabled = 1;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        case KBD_CMD_SCANCODE:
+        case KBD_CMD_SET_LEDS:
+        case KBD_CMD_SET_RATE:
+            s->common.write_cmd = val;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        case KBD_CMD_RESET_DISABLE:
+            ps2_reset_keyboard(s);
+            s->scan_enabled = 0;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        case KBD_CMD_RESET_ENABLE:
+            ps2_reset_keyboard(s);
+            s->scan_enabled = 1;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        case KBD_CMD_RESET:
+            ps2_reset_keyboard(s);
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_queue(&s->common, KBD_REPLY_POR);
+            break;
+        default:
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+            break;
+        }
+        break;
+    case KBD_CMD_SCANCODE:
+        if (val == 0) {
+            if (s->scancode_set == 1)
+                ps2_put_keycode(s, 0x43);
+            else if (s->scancode_set == 2)
+                ps2_put_keycode(s, 0x41);
+            else if (s->scancode_set == 3)
+                ps2_put_keycode(s, 0x3f);
+        } else {
+            if (val >= 1 && val <= 3)
+                s->scancode_set = val;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+        }
+        s->common.write_cmd = -1;
+        break;
+    case KBD_CMD_SET_LEDS:
+        ps2_queue(&s->common, KBD_REPLY_ACK);
+        s->common.write_cmd = -1;
+        break;
+    case KBD_CMD_SET_RATE:
+        ps2_queue(&s->common, KBD_REPLY_ACK);
+        s->common.write_cmd = -1;
+        break;
+    }
+}
+
+/* Set the scancode translation mode.
+   0 = raw scancodes.
+   1 = translated scancodes (used by qemu internally).  */
+
+void ps2_keyboard_set_translation(void *opaque, int mode)
+{
+    PS2KbdState *s = (PS2KbdState *)opaque;
+    s->translate = mode;
+}
+
+static void ps2_mouse_send_packet(PS2MouseState *s)
+{
+    unsigned int b;
+    int dx1, dy1, dz1;
+
+    dx1 = s->mouse_dx;
+    dy1 = s->mouse_dy;
+    dz1 = s->mouse_dz;
+    /* XXX: increase range to 8 bits ? */
+    if (dx1 > 127)
+        dx1 = 127;
+    else if (dx1 < -127)
+        dx1 = -127;
+    if (dy1 > 127)
+        dy1 = 127;
+    else if (dy1 < -127)
+        dy1 = -127;
+    b = 0x08 | ((dx1 < 0) << 4) | ((dy1 < 0) << 5) | (s->mouse_buttons & 0x07);
+    ps2_queue(&s->common, b);
+    ps2_queue(&s->common, dx1 & 0xff);
+    ps2_queue(&s->common, dy1 & 0xff);
+    /* extra byte for IMPS/2 or IMEX */
+    switch(s->mouse_type) {
+    default:
+        break;
+    case 3:
+        if (dz1 > 127)
+            dz1 = 127;
+        else if (dz1 < -127)
+                dz1 = -127;
+        ps2_queue(&s->common, dz1 & 0xff);
+        break;
+    case 4:
+        if (dz1 > 7)
+            dz1 = 7;
+        else if (dz1 < -7)
+            dz1 = -7;
+        b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1);
+        ps2_queue(&s->common, b);
+        break;
+    }
+
+    /* update deltas */
+    s->mouse_dx -= dx1;
+    s->mouse_dy -= dy1;
+    s->mouse_dz -= dz1;
+}
+
+static void ps2_mouse_event(void *opaque,
+                            int dx, int dy, int dz, int buttons_state)
+{
+    PS2MouseState *s = opaque;
+
+    /* check if deltas are recorded when disabled */
+    if (!(s->mouse_status & MOUSE_STATUS_ENABLED))
+        return;
+
+    s->mouse_dx += dx;
+    s->mouse_dy -= dy;
+    s->mouse_dz += dz;
+    /* XXX: SDL sometimes generates nul events: we delete them */
+    if (s->mouse_dx == 0 && s->mouse_dy == 0 && s->mouse_dz == 0 &&
+        s->mouse_buttons == buttons_state)
+	return;
+    s->mouse_buttons = buttons_state;
+
+    if (!(s->mouse_status & MOUSE_STATUS_REMOTE) &&
+        (s->common.queue.count < (PS2_QUEUE_SIZE - 16))) {
+        for(;;) {
+            /* if not remote, send event. Multiple events are sent if
+               too big deltas */
+            ps2_mouse_send_packet(s);
+            if (s->mouse_dx == 0 && s->mouse_dy == 0 && s->mouse_dz == 0)
+                break;
+        }
+    }
+}
+
+void ps2_mouse_fake_event(void *opaque)
+{
+    ps2_mouse_event(opaque, 1, 0, 0, 0);
+}
+
+void ps2_write_mouse(void *opaque, int val)
+{
+    PS2MouseState *s = (PS2MouseState *)opaque;
+#ifdef DEBUG_MOUSE
+    printf("kbd: write mouse 0x%02x\n", val);
+#endif
+    switch(s->common.write_cmd) {
+    default:
+    case -1:
+        /* mouse command */
+        if (s->mouse_wrap) {
+            if (val == AUX_RESET_WRAP) {
+                s->mouse_wrap = 0;
+                ps2_queue(&s->common, AUX_ACK);
+                return;
+            } else if (val != AUX_RESET) {
+                ps2_queue(&s->common, val);
+                return;
+            }
+        }
+        switch(val) {
+        case AUX_SET_SCALE11:
+            s->mouse_status &= ~MOUSE_STATUS_SCALE21;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_SET_SCALE21:
+            s->mouse_status |= MOUSE_STATUS_SCALE21;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_SET_STREAM:
+            s->mouse_status &= ~MOUSE_STATUS_REMOTE;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_SET_WRAP:
+            s->mouse_wrap = 1;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_SET_REMOTE:
+            s->mouse_status |= MOUSE_STATUS_REMOTE;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_GET_TYPE:
+            ps2_queue(&s->common, AUX_ACK);
+            ps2_queue(&s->common, s->mouse_type);
+            break;
+        case AUX_SET_RES:
+        case AUX_SET_SAMPLE:
+            s->common.write_cmd = val;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_GET_SCALE:
+            ps2_queue(&s->common, AUX_ACK);
+            ps2_queue(&s->common, s->mouse_status);
+            ps2_queue(&s->common, s->mouse_resolution);
+            ps2_queue(&s->common, s->mouse_sample_rate);
+            break;
+        case AUX_POLL:
+            ps2_queue(&s->common, AUX_ACK);
+            ps2_mouse_send_packet(s);
+            break;
+        case AUX_ENABLE_DEV:
+            s->mouse_status |= MOUSE_STATUS_ENABLED;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_DISABLE_DEV:
+            s->mouse_status &= ~MOUSE_STATUS_ENABLED;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_SET_DEFAULT:
+            s->mouse_sample_rate = 100;
+            s->mouse_resolution = 2;
+            s->mouse_status = 0;
+            ps2_queue(&s->common, AUX_ACK);
+            break;
+        case AUX_RESET:
+            s->mouse_sample_rate = 100;
+            s->mouse_resolution = 2;
+            s->mouse_status = 0;
+            s->mouse_type = 0;
+            ps2_queue(&s->common, AUX_ACK);
+            ps2_queue(&s->common, 0xaa);
+            ps2_queue(&s->common, s->mouse_type);
+            break;
+        default:
+            break;
+        }
+        break;
+    case AUX_SET_SAMPLE:
+        s->mouse_sample_rate = val;
+        /* detect IMPS/2 or IMEX */
+        switch(s->mouse_detect_state) {
+        default:
+        case 0:
+            if (val == 200)
+                s->mouse_detect_state = 1;
+            break;
+        case 1:
+            if (val == 100)
+                s->mouse_detect_state = 2;
+            else if (val == 200)
+                s->mouse_detect_state = 3;
+            else
+                s->mouse_detect_state = 0;
+            break;
+        case 2:
+            if (val == 80)
+                s->mouse_type = 3; /* IMPS/2 */
+            s->mouse_detect_state = 0;
+            break;
+        case 3:
+            if (val == 80)
+                s->mouse_type = 4; /* IMEX */
+            s->mouse_detect_state = 0;
+            break;
+        }
+        ps2_queue(&s->common, AUX_ACK);
+        s->common.write_cmd = -1;
+        break;
+    case AUX_SET_RES:
+        s->mouse_resolution = val;
+        ps2_queue(&s->common, AUX_ACK);
+        s->common.write_cmd = -1;
+        break;
+    }
+}
+
+static void ps2_reset(void *opaque)
+{
+    PS2State *s = (PS2State *)opaque;
+    PS2Queue *q;
+    s->write_cmd = -1;
+    q = &s->queue;
+    q->rptr = 0;
+    q->wptr = 0;
+    q->count = 0;
+    s->update_irq(s->update_arg, 0);
+}
+
+static void ps2_common_save (QEMUFile *f, PS2State *s)
+{
+    qemu_put_be32 (f, s->write_cmd);
+    qemu_put_be32 (f, s->queue.rptr);
+    qemu_put_be32 (f, s->queue.wptr);
+    qemu_put_be32 (f, s->queue.count);
+    qemu_put_buffer (f, s->queue.data, sizeof (s->queue.data));
+}
+
+static void ps2_common_load (QEMUFile *f, PS2State *s)
+{
+    s->write_cmd=qemu_get_be32 (f);
+    s->queue.rptr=qemu_get_be32 (f);
+    s->queue.wptr=qemu_get_be32 (f);
+    s->queue.count=qemu_get_be32 (f);
+    qemu_get_buffer (f, s->queue.data, sizeof (s->queue.data));
+}
+
+static void ps2_kbd_save(QEMUFile* f, void* opaque)
+{
+    PS2KbdState *s = (PS2KbdState*)opaque;
+
+    ps2_common_save (f, &s->common);
+    qemu_put_be32(f, s->scan_enabled);
+    qemu_put_be32(f, s->translate);
+    qemu_put_be32(f, s->scancode_set);
+}
+
+static void ps2_mouse_save(QEMUFile* f, void* opaque)
+{
+    PS2MouseState *s = (PS2MouseState*)opaque;
+
+    ps2_common_save (f, &s->common);
+    qemu_put_8s(f, &s->mouse_status);
+    qemu_put_8s(f, &s->mouse_resolution);
+    qemu_put_8s(f, &s->mouse_sample_rate);
+    qemu_put_8s(f, &s->mouse_wrap);
+    qemu_put_8s(f, &s->mouse_type);
+    qemu_put_8s(f, &s->mouse_detect_state);
+    qemu_put_be32(f, s->mouse_dx);
+    qemu_put_be32(f, s->mouse_dy);
+    qemu_put_be32(f, s->mouse_dz);
+    qemu_put_8s(f, &s->mouse_buttons);
+}
+
+static int ps2_kbd_load(QEMUFile* f, void* opaque, int version_id)
+{
+    PS2KbdState *s = (PS2KbdState*)opaque;
+
+    if (version_id != 2 && version_id != 3)
+        return -EINVAL;
+
+    ps2_common_load (f, &s->common);
+    s->scan_enabled=qemu_get_be32(f);
+    s->translate=qemu_get_be32(f);
+    if (version_id == 3)
+        s->scancode_set=qemu_get_be32(f);
+    else
+        s->scancode_set=2;
+    return 0;
+}
+
+static int ps2_mouse_load(QEMUFile* f, void* opaque, int version_id)
+{
+    PS2MouseState *s = (PS2MouseState*)opaque;
+
+    if (version_id != 2)
+        return -EINVAL;
+
+    ps2_common_load (f, &s->common);
+    qemu_get_8s(f, &s->mouse_status);
+    qemu_get_8s(f, &s->mouse_resolution);
+    qemu_get_8s(f, &s->mouse_sample_rate);
+    qemu_get_8s(f, &s->mouse_wrap);
+    qemu_get_8s(f, &s->mouse_type);
+    qemu_get_8s(f, &s->mouse_detect_state);
+    s->mouse_dx=qemu_get_be32(f);
+    s->mouse_dy=qemu_get_be32(f);
+    s->mouse_dz=qemu_get_be32(f);
+    qemu_get_8s(f, &s->mouse_buttons);
+    return 0;
+}
+
+void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg)
+{
+    PS2KbdState *s = (PS2KbdState *)qemu_mallocz(sizeof(PS2KbdState));
+
+    s->common.update_irq = update_irq;
+    s->common.update_arg = update_arg;
+    s->scancode_set = 2;
+    ps2_reset(&s->common);
+    register_savevm("ps2kbd", 0, 3, ps2_kbd_save, ps2_kbd_load, s);
+    //qemu_add_kbd_event_handler(ps2_put_keycode, s);
+    qemu_register_reset(ps2_reset, 0, &s->common);
+    return s;
+}
+
+void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg)
+{
+    PS2MouseState *s = (PS2MouseState *)qemu_mallocz(sizeof(PS2MouseState));
+
+    s->common.update_irq = update_irq;
+    s->common.update_arg = update_arg;
+    ps2_reset(&s->common);
+    register_savevm("ps2mouse", 0, 2, ps2_mouse_save, ps2_mouse_load, s);
+    //qemu_add_mouse_event_handler(ps2_mouse_event, s, 0, "QEMU PS/2 Mouse");
+    qemu_register_reset(ps2_reset, 0, &s->common);
+    return s;
+}
diff --git a/hw/ps2.h b/hw/ps2.h
new file mode 100644
index 0000000..32a4231
--- /dev/null
+++ b/hw/ps2.h
@@ -0,0 +1,9 @@
+/* ps2.c */
+void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg);
+void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg);
+void ps2_write_mouse(void *, int val);
+void ps2_write_keyboard(void *, int val);
+uint32_t ps2_read_data(void *);
+void ps2_queue(void *, int b);
+void ps2_keyboard_set_translation(void *opaque, int mode);
+void ps2_mouse_fake_event(void *opaque);
diff --git a/hw/smbios.c b/hw/smbios.c
new file mode 100644
index 0000000..ced90ce
--- /dev/null
+++ b/hw/smbios.c
@@ -0,0 +1,224 @@
+/*
+ * SMBIOS Support
+ *
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@hp.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "sysemu.h"
+#include "smbios.h"
+
+/*
+ * Structures shared with the BIOS
+ */
+struct smbios_header {
+    uint16_t length;
+    uint8_t type;
+} __attribute__((__packed__));
+
+struct smbios_field {
+    struct smbios_header header;
+    uint8_t type;
+    uint16_t offset;
+    uint8_t data[];
+} __attribute__((__packed__));
+
+struct smbios_table {
+    struct smbios_header header;
+    uint8_t data[];
+} __attribute__((__packed__));
+
+#define SMBIOS_FIELD_ENTRY 0
+#define SMBIOS_TABLE_ENTRY 1
+
+
+static uint8_t *smbios_entries;
+static size_t smbios_entries_len;
+
+uint8_t *smbios_get_table(size_t *length)
+{
+    *length = smbios_entries_len;
+    return smbios_entries;
+}
+
+/*
+ * To avoid unresolvable overlaps in data, don't allow both
+ * tables and fields for the same smbios type.
+ */
+static void smbios_check_collision(int type, int entry)
+{
+    uint16_t *num_entries = (uint16_t *)smbios_entries;
+    struct smbios_header *header;
+    char *p;
+    int i;
+
+    if (!num_entries)
+        return;
+
+    p = (char *)(num_entries + 1);
+
+    for (i = 0; i < *num_entries; i++) {
+        header = (struct smbios_header *)p;
+        if (entry == SMBIOS_TABLE_ENTRY && header->type == SMBIOS_FIELD_ENTRY) {
+            struct smbios_field *field = (void *)header;
+            if (type == field->type) {
+                fprintf(stderr, "SMBIOS type %d field already defined, "
+                                "cannot add table\n", type);
+                exit(1);
+            }
+        } else if (entry == SMBIOS_FIELD_ENTRY &&
+                   header->type == SMBIOS_TABLE_ENTRY) {
+            struct smbios_structure_header *table = (void *)(header + 1);
+            if (type == table->type) {
+                fprintf(stderr, "SMBIOS type %d table already defined, "
+                                "cannot add field\n", type);
+                exit(1);
+            }
+        }
+        p += le16_to_cpu(header->length);
+    }
+}
+
+void smbios_add_field(int type, int offset, int len, void *data)
+{
+    struct smbios_field *field;
+
+    smbios_check_collision(type, SMBIOS_FIELD_ENTRY);
+
+    if (!smbios_entries) {
+        smbios_entries_len = sizeof(uint16_t);
+        smbios_entries = qemu_mallocz(smbios_entries_len);
+    }
+    smbios_entries = qemu_realloc(smbios_entries, smbios_entries_len +
+                                                  sizeof(*field) + len);
+    field = (struct smbios_field *)(smbios_entries + smbios_entries_len);
+    field->header.type = SMBIOS_FIELD_ENTRY;
+    field->header.length = cpu_to_le16(sizeof(*field) + len);
+
+    field->type = type;
+    field->offset = cpu_to_le16(offset);
+    memcpy(field->data, data, len);
+
+    smbios_entries_len += sizeof(*field) + len;
+    (*(uint16_t *)smbios_entries) =
+            cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
+}
+
+static void smbios_build_type_0_fields(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "vendor", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0, vendor_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "version", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0, bios_version_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "date", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     bios_release_date_str),
+                                     strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "release", t)) {
+        int major, minor;
+        sscanf(buf, "%d.%d", &major, &minor);
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     system_bios_major_release), 1, &major);
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     system_bios_minor_release), 1, &minor);
+    }
+}
+
+static void smbios_build_type_1_fields(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "manufacturer", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, manufacturer_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "product", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, product_name_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "version", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, version_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "serial", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, serial_number_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "uuid", t)) {
+        if (qemu_uuid_parse(buf, qemu_uuid) != 0) {
+            fprintf(stderr, "Invalid SMBIOS UUID string\n");
+            exit(1);
+        }
+    }
+    if (get_param_value(buf, sizeof(buf), "sku", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, sku_number_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "family", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, family_str),
+                         strlen(buf) + 1, buf);
+}
+
+int smbios_entry_add(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "file", t)) {
+        struct smbios_structure_header *header;
+        struct smbios_table *table;
+        int size = get_image_size(buf);
+
+        if (size < sizeof(struct smbios_structure_header)) {
+            fprintf(stderr, "Cannot read smbios file %s", buf);
+            exit(1);
+        }
+
+        if (!smbios_entries) {
+            smbios_entries_len = sizeof(uint16_t);
+            smbios_entries = qemu_mallocz(smbios_entries_len);
+        }
+
+        smbios_entries = qemu_realloc(smbios_entries, smbios_entries_len +
+                                                      sizeof(*table) + size);
+        table = (struct smbios_table *)(smbios_entries + smbios_entries_len);
+        table->header.type = SMBIOS_TABLE_ENTRY;
+        table->header.length = cpu_to_le16(sizeof(*table) + size);
+
+        if (load_image(buf, table->data) != size) {
+            fprintf(stderr, "Failed to load smbios file %s", buf);
+            exit(1);
+        }
+
+        header = (struct smbios_structure_header *)(table->data);
+        smbios_check_collision(header->type, SMBIOS_TABLE_ENTRY);
+
+        smbios_entries_len += sizeof(*table) + size;
+        (*(uint16_t *)smbios_entries) =
+                cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
+        return 0;
+    }
+
+    if (get_param_value(buf, sizeof(buf), "type", t)) {
+        unsigned long type = strtoul(buf, NULL, 0);
+        switch (type) {
+        case 0:
+            smbios_build_type_0_fields(t);
+            return 0;
+        case 1:
+            smbios_build_type_1_fields(t);
+            return 0;
+        default:
+            fprintf(stderr, "Don't know how to build fields for SMBIOS type "
+                    "%ld\n", type);
+            exit(1);
+        }
+    }
+
+    fprintf(stderr, "smbios: must specify type= or file=\n");
+    return -1;
+}
diff --git a/target-i386/TODO b/target-i386/TODO
new file mode 100644
index 0000000..b671e24
--- /dev/null
+++ b/target-i386/TODO
@@ -0,0 +1,33 @@
+Correctness issues:
+
+- some eflags manipulation incorrectly reset the bit 0x2.
+- SVM: test, cpu save/restore, SMM save/restore. 
+- x86_64: lcall/ljmp intel/amd differences ?
+- better code fetch (different exception handling + CS.limit support)
+- user/kernel PUSHL/POPL in helper.c
+- add missing cpuid tests
+- return UD exception if LOCK prefix incorrectly used
+- test ldt limit < 7 ?
+- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret)
+- full support of segment limit/rights 
+- full x87 exception support
+- improve x87 bit exactness (use bochs code ?)
+- DRx register support
+- CR0.AC emulation
+- SSE alignment checks
+- fix SSE min/max with nans
+
+Optimizations/Features:
+
+- add SVM nested paging support
+- add VMX support
+- add AVX support
+- add SSE5 support
+- fxsave/fxrstor AMD extensions
+- improve monitor/mwait support
+- faster EFLAGS update: consider SZAP, C, O can be updated separately
+  with a bit field in CC_OP and more state variables.
+- evaluate x87 stack pointer statically
+- find a way to avoid translating several time the same TB if CR0.TS
+  is set or not.
+- move kqemu support outside target-i386.
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
new file mode 100644
index 0000000..4e7eb58
--- /dev/null
+++ b/target-i386/cpu.h
@@ -0,0 +1,923 @@
+/*
+ * i386 virtual CPU header
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#include "config.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+/* target supports implicit self modifying code */
+#define TARGET_HAS_SMC
+/* support for self modifying code even if the modified instruction is
+   close to the modifying instruction */
+#define TARGET_HAS_PRECISE_SMC
+
+#define TARGET_HAS_ICE 1
+
+#ifdef TARGET_X86_64
+#define ELF_MACHINE	EM_X86_64
+#else
+#define ELF_MACHINE	EM_386
+#endif
+
+#define CPUState struct CPUX86State
+
+#include "cpu-defs.h"
+
+#include "softfloat.h"
+
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL 0
+#define R_CL 1
+#define R_DL 2
+#define R_BL 3
+#define R_AH 4
+#define R_CH 5
+#define R_DH 6
+#define R_BH 7
+
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+/* segment descriptor fields */
+#define DESC_G_MASK     (1 << 23)
+#define DESC_B_SHIFT    22
+#define DESC_B_MASK     (1 << DESC_B_SHIFT)
+#define DESC_L_SHIFT    21 /* x86_64 only : 64 bit code segment */
+#define DESC_L_MASK     (1 << DESC_L_SHIFT)
+#define DESC_AVL_MASK   (1 << 20)
+#define DESC_P_MASK     (1 << 15)
+#define DESC_DPL_SHIFT  13
+#define DESC_DPL_MASK   (3 << DESC_DPL_SHIFT)
+#define DESC_S_MASK     (1 << 12)
+#define DESC_TYPE_SHIFT 8
+#define DESC_TYPE_MASK  (15 << DESC_TYPE_SHIFT)
+#define DESC_A_MASK     (1 << 8)
+
+#define DESC_CS_MASK    (1 << 11) /* 1=code segment 0=data segment */
+#define DESC_C_MASK     (1 << 10) /* code: conforming */
+#define DESC_R_MASK     (1 << 9)  /* code: readable */
+
+#define DESC_E_MASK     (1 << 10) /* data: expansion direction */
+#define DESC_W_MASK     (1 << 9)  /* data: writable */
+
+#define DESC_TSS_BUSY_MASK (1 << 9)
+
+/* eflags masks */
+#define CC_C   	0x0001
+#define CC_P 	0x0004
+#define CC_A	0x0010
+#define CC_Z	0x0040
+#define CC_S    0x0080
+#define CC_O    0x0800
+
+#define TF_SHIFT   8
+#define IOPL_SHIFT 12
+#define VM_SHIFT   17
+
+#define TF_MASK 		0x00000100
+#define IF_MASK 		0x00000200
+#define DF_MASK 		0x00000400
+#define IOPL_MASK		0x00003000
+#define NT_MASK	         	0x00004000
+#define RF_MASK			0x00010000
+#define VM_MASK			0x00020000
+#define AC_MASK			0x00040000
+#define VIF_MASK                0x00080000
+#define VIP_MASK                0x00100000
+#define ID_MASK                 0x00200000
+
+/* hidden flags - used internally by qemu to represent additional cpu
+   states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
+   redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
+   position to ease oring with eflags. */
+/* current cpl */
+#define HF_CPL_SHIFT         0
+/* true if soft mmu is being used */
+#define HF_SOFTMMU_SHIFT     2
+/* true if hardware interrupts must be disabled for next instruction */
+#define HF_INHIBIT_IRQ_SHIFT 3
+/* 16 or 32 segments */
+#define HF_CS32_SHIFT        4
+#define HF_SS32_SHIFT        5
+/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */
+#define HF_ADDSEG_SHIFT      6
+/* copy of CR0.PE (protected mode) */
+#define HF_PE_SHIFT          7
+#define HF_TF_SHIFT          8 /* must be same as eflags */
+#define HF_MP_SHIFT          9 /* the order must be MP, EM, TS */
+#define HF_EM_SHIFT         10
+#define HF_TS_SHIFT         11
+#define HF_IOPL_SHIFT       12 /* must be same as eflags */
+#define HF_LMA_SHIFT        14 /* only used on x86_64: long mode active */
+#define HF_CS64_SHIFT       15 /* only used on x86_64: 64 bit code segment  */
+#define HF_RF_SHIFT         16 /* must be same as eflags */
+#define HF_VM_SHIFT         17 /* must be same as eflags */
+#define HF_SMM_SHIFT        19 /* CPU in SMM mode */
+#define HF_SVME_SHIFT       20 /* SVME enabled (copy of EFER.SVME) */
+#define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
+#define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
+
+#define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
+#define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
+#define HF_INHIBIT_IRQ_MASK  (1 << HF_INHIBIT_IRQ_SHIFT)
+#define HF_CS32_MASK         (1 << HF_CS32_SHIFT)
+#define HF_SS32_MASK         (1 << HF_SS32_SHIFT)
+#define HF_ADDSEG_MASK       (1 << HF_ADDSEG_SHIFT)
+#define HF_PE_MASK           (1 << HF_PE_SHIFT)
+#define HF_TF_MASK           (1 << HF_TF_SHIFT)
+#define HF_MP_MASK           (1 << HF_MP_SHIFT)
+#define HF_EM_MASK           (1 << HF_EM_SHIFT)
+#define HF_TS_MASK           (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK         (3 << HF_IOPL_SHIFT)
+#define HF_LMA_MASK          (1 << HF_LMA_SHIFT)
+#define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
+#define HF_RF_MASK           (1 << HF_RF_SHIFT)
+#define HF_VM_MASK           (1 << HF_VM_SHIFT)
+#define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
+#define HF_SVME_MASK         (1 << HF_SVME_SHIFT)
+#define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
+#define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+
+/* hflags2 */
+
+#define HF2_GIF_SHIFT        0 /* if set CPU takes interrupts */
+#define HF2_HIF_SHIFT        1 /* value of IF_MASK when entering SVM */
+#define HF2_NMI_SHIFT        2 /* CPU serving NMI */
+#define HF2_VINTR_SHIFT      3 /* value of V_INTR_MASKING bit */
+
+#define HF2_GIF_MASK          (1 << HF2_GIF_SHIFT)
+#define HF2_HIF_MASK          (1 << HF2_HIF_SHIFT) 
+#define HF2_NMI_MASK          (1 << HF2_NMI_SHIFT)
+#define HF2_VINTR_MASK        (1 << HF2_VINTR_SHIFT)
+
+#define CR0_PE_SHIFT 0
+#define CR0_MP_SHIFT 1
+
+#define CR0_PE_MASK  (1 << 0)
+#define CR0_MP_MASK  (1 << 1)
+#define CR0_EM_MASK  (1 << 2)
+#define CR0_TS_MASK  (1 << 3)
+#define CR0_ET_MASK  (1 << 4)
+#define CR0_NE_MASK  (1 << 5)
+#define CR0_WP_MASK  (1 << 16)
+#define CR0_AM_MASK  (1 << 18)
+#define CR0_PG_MASK  (1 << 31)
+
+#define CR4_VME_MASK  (1 << 0)
+#define CR4_PVI_MASK  (1 << 1)
+#define CR4_TSD_MASK  (1 << 2)
+#define CR4_DE_MASK   (1 << 3)
+#define CR4_PSE_MASK  (1 << 4)
+#define CR4_PAE_MASK  (1 << 5)
+#define CR4_MCE_MASK  (1 << 6)
+#define CR4_PGE_MASK  (1 << 7)
+#define CR4_PCE_MASK  (1 << 8)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
+#define CR4_OSXMMEXCPT_MASK  (1 << 10)
+
+#define DR6_BD          (1 << 13)
+#define DR6_BS          (1 << 14)
+#define DR6_BT          (1 << 15)
+#define DR6_FIXED_1     0xffff0ff0
+
+#define DR7_GD          (1 << 13)
+#define DR7_TYPE_SHIFT  16
+#define DR7_LEN_SHIFT   18
+#define DR7_FIXED_1     0x00000400
+
+#define PG_PRESENT_BIT	0
+#define PG_RW_BIT	1
+#define PG_USER_BIT	2
+#define PG_PWT_BIT	3
+#define PG_PCD_BIT	4
+#define PG_ACCESSED_BIT	5
+#define PG_DIRTY_BIT	6
+#define PG_PSE_BIT	7
+#define PG_GLOBAL_BIT	8
+#define PG_NX_BIT	63
+
+#define PG_PRESENT_MASK  (1 << PG_PRESENT_BIT)
+#define PG_RW_MASK	 (1 << PG_RW_BIT)
+#define PG_USER_MASK	 (1 << PG_USER_BIT)
+#define PG_PWT_MASK	 (1 << PG_PWT_BIT)
+#define PG_PCD_MASK	 (1 << PG_PCD_BIT)
+#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT)
+#define PG_DIRTY_MASK	 (1 << PG_DIRTY_BIT)
+#define PG_PSE_MASK	 (1 << PG_PSE_BIT)
+#define PG_GLOBAL_MASK	 (1 << PG_GLOBAL_BIT)
+#define PG_NX_MASK	 (1LL << PG_NX_BIT)
+
+#define PG_ERROR_W_BIT     1
+
+#define PG_ERROR_P_MASK    0x01
+#define PG_ERROR_W_MASK    (1 << PG_ERROR_W_BIT)
+#define PG_ERROR_U_MASK    0x04
+#define PG_ERROR_RSVD_MASK 0x08
+#define PG_ERROR_I_D_MASK  0x10
+
+#define MCG_CTL_P      (1UL<<8)   /* MCG_CAP register available */
+
+#define MCE_CAP_DEF    MCG_CTL_P
+#define MCE_BANKS_DEF  10
+
+#define MCG_STATUS_MCIP        (1UL<<2)   /* machine check in progress */
+
+#define MCI_STATUS_VAL (1UL<<63)  /* valid error */
+#define MCI_STATUS_OVER        (1UL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC  (1UL<<61)  /* uncorrected error */
+
+#define MSR_IA32_TSC                    0x10
+#define MSR_IA32_APICBASE               0x1b
+#define MSR_IA32_APICBASE_BSP           (1<<8)
+#define MSR_IA32_APICBASE_ENABLE        (1<<11)
+#define MSR_IA32_APICBASE_BASE          (0xfffff<<12)
+
+#define MSR_MTRRcap			0xfe
+#define MSR_MTRRcap_VCNT		8
+#define MSR_MTRRcap_FIXRANGE_SUPPORT	(1 << 8)
+#define MSR_MTRRcap_WC_SUPPORTED	(1 << 10)
+
+#define MSR_IA32_SYSENTER_CS            0x174
+#define MSR_IA32_SYSENTER_ESP           0x175
+#define MSR_IA32_SYSENTER_EIP           0x176
+
+#define MSR_MCG_CAP                     0x179
+#define MSR_MCG_STATUS                  0x17a
+#define MSR_MCG_CTL                     0x17b
+
+#define MSR_IA32_PERF_STATUS            0x198
+
+#define MSR_MTRRphysBase(reg)		(0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg)		(0x200 + 2 * (reg) + 1)
+
+#define MSR_MTRRfix64K_00000		0x250
+#define MSR_MTRRfix16K_80000		0x258
+#define MSR_MTRRfix16K_A0000		0x259
+#define MSR_MTRRfix4K_C0000		0x268
+#define MSR_MTRRfix4K_C8000		0x269
+#define MSR_MTRRfix4K_D0000		0x26a
+#define MSR_MTRRfix4K_D8000		0x26b
+#define MSR_MTRRfix4K_E0000		0x26c
+#define MSR_MTRRfix4K_E8000		0x26d
+#define MSR_MTRRfix4K_F0000		0x26e
+#define MSR_MTRRfix4K_F8000		0x26f
+
+#define MSR_PAT                         0x277
+
+#define MSR_MTRRdefType			0x2ff
+
+#define MSR_MC0_CTL                    0x400
+#define MSR_MC0_STATUS                 0x401
+#define MSR_MC0_ADDR                   0x402
+#define MSR_MC0_MISC                   0x403
+
+#define MSR_EFER                        0xc0000080
+
+#define MSR_EFER_SCE   (1 << 0)
+#define MSR_EFER_LME   (1 << 8)
+#define MSR_EFER_LMA   (1 << 10)
+#define MSR_EFER_NXE   (1 << 11)
+#define MSR_EFER_SVME  (1 << 12)
+#define MSR_EFER_FFXSR (1 << 14)
+
+#define MSR_STAR                        0xc0000081
+#define MSR_LSTAR                       0xc0000082
+#define MSR_CSTAR                       0xc0000083
+#define MSR_FMASK                       0xc0000084
+#define MSR_FSBASE                      0xc0000100
+#define MSR_GSBASE                      0xc0000101
+#define MSR_KERNELGSBASE                0xc0000102
+
+#define MSR_VM_HSAVE_PA                 0xc0010117
+
+/* cpuid_features bits */
+#define CPUID_FP87 (1 << 0)
+#define CPUID_VME  (1 << 1)
+#define CPUID_DE   (1 << 2)
+#define CPUID_PSE  (1 << 3)
+#define CPUID_TSC  (1 << 4)
+#define CPUID_MSR  (1 << 5)
+#define CPUID_PAE  (1 << 6)
+#define CPUID_MCE  (1 << 7)
+#define CPUID_CX8  (1 << 8)
+#define CPUID_APIC (1 << 9)
+#define CPUID_SEP  (1 << 11) /* sysenter/sysexit */
+#define CPUID_MTRR (1 << 12)
+#define CPUID_PGE  (1 << 13)
+#define CPUID_MCA  (1 << 14)
+#define CPUID_CMOV (1 << 15)
+#define CPUID_PAT  (1 << 16)
+#define CPUID_PSE36   (1 << 17)
+#define CPUID_PN   (1 << 18)
+#define CPUID_CLFLUSH (1 << 19)
+#define CPUID_DTS (1 << 21)
+#define CPUID_ACPI (1 << 22)
+#define CPUID_MMX  (1 << 23)
+#define CPUID_FXSR (1 << 24)
+#define CPUID_SSE  (1 << 25)
+#define CPUID_SSE2 (1 << 26)
+#define CPUID_SS (1 << 27)
+#define CPUID_HT (1 << 28)
+#define CPUID_TM (1 << 29)
+#define CPUID_IA64 (1 << 30)
+#define CPUID_PBE (1 << 31)
+
+#define CPUID_EXT_SSE3     (1 << 0)
+#define CPUID_EXT_DTES64   (1 << 2)
+#define CPUID_EXT_MONITOR  (1 << 3)
+#define CPUID_EXT_DSCPL    (1 << 4)
+#define CPUID_EXT_VMX      (1 << 5)
+#define CPUID_EXT_SMX      (1 << 6)
+#define CPUID_EXT_EST      (1 << 7)
+#define CPUID_EXT_TM2      (1 << 8)
+#define CPUID_EXT_SSSE3    (1 << 9)
+#define CPUID_EXT_CID      (1 << 10)
+#define CPUID_EXT_CX16     (1 << 13)
+#define CPUID_EXT_XTPR     (1 << 14)
+#define CPUID_EXT_PDCM     (1 << 15)
+#define CPUID_EXT_DCA      (1 << 18)
+#define CPUID_EXT_SSE41    (1 << 19)
+#define CPUID_EXT_SSE42    (1 << 20)
+#define CPUID_EXT_X2APIC   (1 << 21)
+#define CPUID_EXT_MOVBE    (1 << 22)
+#define CPUID_EXT_POPCNT   (1 << 23)
+#define CPUID_EXT_XSAVE    (1 << 26)
+#define CPUID_EXT_OSXSAVE  (1 << 27)
+
+#define CPUID_EXT2_SYSCALL (1 << 11)
+#define CPUID_EXT2_MP      (1 << 19)
+#define CPUID_EXT2_NX      (1 << 20)
+#define CPUID_EXT2_MMXEXT  (1 << 22)
+#define CPUID_EXT2_FFXSR   (1 << 25)
+#define CPUID_EXT2_PDPE1GB (1 << 26)
+#define CPUID_EXT2_RDTSCP  (1 << 27)
+#define CPUID_EXT2_LM      (1 << 29)
+#define CPUID_EXT2_3DNOWEXT (1 << 30)
+#define CPUID_EXT2_3DNOW   (1 << 31)
+
+#define CPUID_EXT3_LAHF_LM (1 << 0)
+#define CPUID_EXT3_CMP_LEG (1 << 1)
+#define CPUID_EXT3_SVM     (1 << 2)
+#define CPUID_EXT3_EXTAPIC (1 << 3)
+#define CPUID_EXT3_CR8LEG  (1 << 4)
+#define CPUID_EXT3_ABM     (1 << 5)
+#define CPUID_EXT3_SSE4A   (1 << 6)
+#define CPUID_EXT3_MISALIGNSSE (1 << 7)
+#define CPUID_EXT3_3DNOWPREFETCH (1 << 8)
+#define CPUID_EXT3_OSVW    (1 << 9)
+#define CPUID_EXT3_IBS     (1 << 10)
+#define CPUID_EXT3_SKINIT  (1 << 12)
+
+#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
+#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
+#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+
+#define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
+#define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */ 
+#define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
+
+#define CPUID_MWAIT_IBE     (1 << 1) /* Interrupts can exit capability */
+#define CPUID_MWAIT_EMX     (1 << 0) /* enumeration supported */
+
+#define EXCP00_DIVZ	0
+#define EXCP01_DB	1
+#define EXCP02_NMI	2
+#define EXCP03_INT3	3
+#define EXCP04_INTO	4
+#define EXCP05_BOUND	5
+#define EXCP06_ILLOP	6
+#define EXCP07_PREX	7
+#define EXCP08_DBLE	8
+#define EXCP09_XERR	9
+#define EXCP0A_TSS	10
+#define EXCP0B_NOSEG	11
+#define EXCP0C_STACK	12
+#define EXCP0D_GPF	13
+#define EXCP0E_PAGE	14
+#define EXCP10_COPR	16
+#define EXCP11_ALGN	17
+#define EXCP12_MCHK	18
+
+#define EXCP_SYSCALL    0x100 /* only happens in user only emulation
+                                 for syscall instruction */
+
+enum {
+    CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
+    CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
+
+    CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+    CC_OP_MULW,
+    CC_OP_MULL,
+    CC_OP_MULQ,
+
+    CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_ADDW,
+    CC_OP_ADDL,
+    CC_OP_ADDQ,
+
+    CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_ADCW,
+    CC_OP_ADCL,
+    CC_OP_ADCQ,
+
+    CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_SUBW,
+    CC_OP_SUBL,
+    CC_OP_SUBQ,
+
+    CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_SBBW,
+    CC_OP_SBBL,
+    CC_OP_SBBQ,
+
+    CC_OP_LOGICB, /* modify all flags, CC_DST = res */
+    CC_OP_LOGICW,
+    CC_OP_LOGICL,
+    CC_OP_LOGICQ,
+
+    CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+    CC_OP_INCW,
+    CC_OP_INCL,
+    CC_OP_INCQ,
+
+    CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C  */
+    CC_OP_DECW,
+    CC_OP_DECL,
+    CC_OP_DECQ,
+
+    CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */
+    CC_OP_SHLW,
+    CC_OP_SHLL,
+    CC_OP_SHLQ,
+
+    CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */
+    CC_OP_SARW,
+    CC_OP_SARL,
+    CC_OP_SARQ,
+
+    CC_OP_NB,
+};
+
+#ifdef FLOATX80
+#define USE_X86LDOUBLE
+#endif
+
+#ifdef USE_X86LDOUBLE
+typedef floatx80 CPU86_LDouble;
+#else
+typedef float64 CPU86_LDouble;
+#endif
+
+typedef struct SegmentCache {
+    uint32_t selector;
+    target_ulong base;
+    uint32_t limit;
+    uint32_t flags;
+} SegmentCache;
+
+typedef union {
+    uint8_t _b[16];
+    uint16_t _w[8];
+    uint32_t _l[4];
+    uint64_t _q[2];
+    float32 _s[4];
+    float64 _d[2];
+} XMMReg;
+
+typedef union {
+    uint8_t _b[8];
+    uint16_t _w[4];
+    uint32_t _l[2];
+    float32 _s[2];
+    uint64_t q;
+} MMXReg;
+
+#ifdef WORDS_BIGENDIAN
+#define XMM_B(n) _b[15 - (n)]
+#define XMM_W(n) _w[7 - (n)]
+#define XMM_L(n) _l[3 - (n)]
+#define XMM_S(n) _s[3 - (n)]
+#define XMM_Q(n) _q[1 - (n)]
+#define XMM_D(n) _d[1 - (n)]
+
+#define MMX_B(n) _b[7 - (n)]
+#define MMX_W(n) _w[3 - (n)]
+#define MMX_L(n) _l[1 - (n)]
+#define MMX_S(n) _s[1 - (n)]
+#else
+#define XMM_B(n) _b[n]
+#define XMM_W(n) _w[n]
+#define XMM_L(n) _l[n]
+#define XMM_S(n) _s[n]
+#define XMM_Q(n) _q[n]
+#define XMM_D(n) _d[n]
+
+#define MMX_B(n) _b[n]
+#define MMX_W(n) _w[n]
+#define MMX_L(n) _l[n]
+#define MMX_S(n) _s[n]
+#endif
+#define MMX_Q(n) q
+
+#ifdef TARGET_X86_64
+#define CPU_NB_REGS 16
+#else
+#define CPU_NB_REGS 8
+#endif
+
+#define NB_MMU_MODES 2
+
+typedef struct CPUX86State {
+    /* standard registers */
+    target_ulong regs[CPU_NB_REGS];
+    target_ulong eip;
+    target_ulong eflags; /* eflags register. During CPU emulation, CC
+                        flags and DF are set to zero because they are
+                        stored elsewhere */
+
+    /* emulator internal eflags handling */
+    target_ulong cc_src;
+    target_ulong cc_dst;
+    uint32_t cc_op;
+    int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+    uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
+                        are known at translation time. */
+    uint32_t hflags2; /* various other flags, see HF2_xxx constants. */
+
+    /* segments */
+    SegmentCache segs[6]; /* selector values */
+    SegmentCache ldt;
+    SegmentCache tr;
+    SegmentCache gdt; /* only base and limit are used */
+    SegmentCache idt; /* only base and limit are used */
+
+    target_ulong cr[5]; /* NOTE: cr1 is unused */
+    uint64_t a20_mask;
+
+    /* FPU state */
+    unsigned int fpstt; /* top of stack index */
+    unsigned int fpus;
+    unsigned int fpuc;
+    uint8_t fptags[8];   /* 0 = valid, 1 = empty */
+    union {
+#ifdef USE_X86LDOUBLE
+        CPU86_LDouble d __attribute__((aligned(16)));
+#else
+        CPU86_LDouble d;
+#endif
+        MMXReg mmx;
+    } fpregs[8];
+
+    /* emulator internal variables */
+    float_status fp_status;
+    CPU86_LDouble ft0;
+
+    float_status mmx_status; /* for 3DNow! float ops */
+    float_status sse_status;
+    uint32_t mxcsr;
+    XMMReg xmm_regs[CPU_NB_REGS];
+    XMMReg xmm_t0;
+    MMXReg mmx_t0;
+    target_ulong cc_tmp; /* temporary for rcr/rcl */
+
+    /* sysenter registers */
+    uint32_t sysenter_cs;
+    target_ulong sysenter_esp;
+    target_ulong sysenter_eip;
+    uint64_t efer;
+    uint64_t star;
+
+    uint64_t vm_hsave;
+    uint64_t vm_vmcb;
+    uint64_t tsc_offset;
+    uint64_t intercept;
+    uint16_t intercept_cr_read;
+    uint16_t intercept_cr_write;
+    uint16_t intercept_dr_read;
+    uint16_t intercept_dr_write;
+    uint32_t intercept_exceptions;
+    uint8_t v_tpr;
+
+#ifdef TARGET_X86_64
+    target_ulong lstar;
+    target_ulong cstar;
+    target_ulong fmask;
+    target_ulong kernelgsbase;
+#endif
+
+    uint64_t tsc;
+
+    uint64_t pat;
+
+    /* exception/interrupt handling */
+    int error_code;
+    int exception_is_int;
+    target_ulong exception_next_eip;
+    target_ulong dr[8]; /* debug registers */
+    union {
+        CPUBreakpoint *cpu_breakpoint[4];
+        CPUWatchpoint *cpu_watchpoint[4];
+    }; /* break/watchpoints for dr[0..3] */
+    uint32_t smbase;
+    int old_exception;  /* exception in flight */
+
+    CPU_COMMON
+
+    /* processor features (e.g. for CPUID insn) */
+    uint32_t cpuid_level;
+    uint32_t cpuid_vendor1;
+    uint32_t cpuid_vendor2;
+    uint32_t cpuid_vendor3;
+    uint32_t cpuid_version;
+    uint32_t cpuid_features;
+    uint32_t cpuid_ext_features;
+    uint32_t cpuid_xlevel;
+    uint32_t cpuid_model[12];
+    uint32_t cpuid_ext2_features;
+    uint32_t cpuid_ext3_features;
+    uint32_t cpuid_apic_id;
+    int cpuid_vendor_override;
+
+    /* MTRRs */
+    uint64_t mtrr_fixed[11];
+    uint64_t mtrr_deftype;
+    struct {
+        uint64_t base;
+        uint64_t mask;
+    } mtrr_var[8];
+
+#ifdef CONFIG_KQEMU
+    int kqemu_enabled;
+    int last_io_time;
+#endif
+
+    /* For KVM */
+    uint64_t interrupt_bitmap[256 / 64];
+    uint32_t mp_state;
+
+    /* in order to simplify APIC support, we leave this pointer to the
+       user */
+    struct APICState *apic_state;
+
+    uint64 mcg_cap;
+    uint64 mcg_status;
+    uint64 mcg_ctl;
+    uint64 *mce_banks;
+} CPUX86State;
+
+CPUX86State *cpu_x86_init(const char *cpu_model);
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt,
+                                                 ...));
+int cpu_get_pic_interrupt(CPUX86State *s);
+/* MSDOS compatibility mode FPU exception support */
+void cpu_set_ferr(CPUX86State *s);
+
+/* this function must always be used to load data in the segment
+   cache: it synchronizes the hflags with the segment cache values */
+static inline void cpu_x86_load_seg_cache(CPUX86State *env,
+                                          int seg_reg, unsigned int selector,
+                                          target_ulong base,
+                                          unsigned int limit,
+                                          unsigned int flags)
+{
+    SegmentCache *sc;
+    unsigned int new_hflags;
+
+    sc = &env->segs[seg_reg];
+    sc->selector = selector;
+    sc->base = base;
+    sc->limit = limit;
+    sc->flags = flags;
+
+    /* update the hidden flags */
+    {
+        if (seg_reg == R_CS) {
+#ifdef TARGET_X86_64
+            if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
+                /* long mode */
+                env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+                env->hflags &= ~(HF_ADDSEG_MASK);
+            } else
+#endif
+            {
+                /* legacy / compatibility case */
+                new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
+                    >> (DESC_B_SHIFT - HF_CS32_SHIFT);
+                env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
+                    new_hflags;
+            }
+        }
+        new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
+            >> (DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (env->hflags & HF_CS64_MASK) {
+            /* zero base assumed for DS, ES and SS in long mode */
+        } else if (!(env->cr[0] & CR0_PE_MASK) ||
+                   (env->eflags & VM_MASK) ||
+                   !(env->hflags & HF_CS32_MASK)) {
+            /* XXX: try to avoid this test. The problem comes from the
+               fact that is real mode or vm86 mode we only modify the
+               'base' and 'selector' fields of the segment cache to go
+               faster. A solution may be to force addseg to one in
+               translate-i386.c. */
+            new_hflags |= HF_ADDSEG_MASK;
+        } else {
+            new_hflags |= ((env->segs[R_DS].base |
+                            env->segs[R_ES].base |
+                            env->segs[R_SS].base) != 0) <<
+                HF_ADDSEG_SHIFT;
+        }
+        env->hflags = (env->hflags &
+                       ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags;
+    }
+}
+
+/* wrapper, just in case memory mappings must be changed */
+static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
+{
+#if HF_CPL_MASK == 3
+    s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
+#else
+#error HF_CPL_MASK is hardcoded
+#endif
+}
+
+/* op_helper.c */
+/* used for debug or cpu save/restore */
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f);
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper);
+
+/* cpu-exec.c */
+/* the following helpers are only usable in user mode simulation as
+   they can trigger unexpected exceptions */
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+
+/* you can call this signal handler from your SIGBUS and SIGSEGV
+   signal handlers to inform the virtual CPU of exceptions. non zero
+   is returned if the signal was handled by the virtual CPU.  */
+int cpu_x86_signal_handler(int host_signum, void *pinfo,
+                           void *puc);
+
+/* helper.c */
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+                             int is_write, int mmu_idx, int is_softmmu);
+void cpu_x86_set_a20(CPUX86State *env, int a20_state);
+void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                   uint32_t *eax, uint32_t *ebx,
+                   uint32_t *ecx, uint32_t *edx);
+
+static inline int hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 3;
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+    return (dr7 >> (DR7_TYPE_SHIFT + (index * 2))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 2))) & 3);
+    return (len == 2) ? 8 : len + 1;
+}
+
+void hw_breakpoint_insert(CPUX86State *env, int index);
+void hw_breakpoint_remove(CPUX86State *env, int index);
+int check_hw_breakpoints(CPUX86State *env, int force_dr6_update);
+
+/* will be suppressed */
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+
+/* hw/apic.c */
+void cpu_set_apic_base(CPUX86State *env, uint64_t val);
+uint64_t cpu_get_apic_base(CPUX86State *env);
+void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+#ifndef NO_CPU_IO_DEFS
+uint8_t cpu_get_apic_tpr(CPUX86State *env);
+#endif
+
+/* hw/pc.c */
+void cpu_smm_update(CPUX86State *env);
+uint64_t cpu_get_tsc(CPUX86State *env);
+
+/* used to debug */
+#define X86_DUMP_FPU  0x0001 /* dump FPU state too */
+#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
+
+#ifdef CONFIG_KQEMU
+static inline int cpu_get_time_fast(void)
+{
+    int low, high;
+    asm volatile("rdtsc" : "=a" (low), "=d" (high));
+    return low;
+}
+#endif
+
+#define TARGET_PAGE_BITS 12
+
+#define cpu_init cpu_x86_init
+#define cpu_exec cpu_x86_exec
+#define cpu_gen_code cpu_x86_gen_code
+#define cpu_signal_handler cpu_x86_signal_handler
+#define cpu_list x86_cpu_list
+
+#define CPU_SAVE_VERSION 10
+
+/* MMU modes definitions */
+#define MMU_MODE0_SUFFIX _kernel
+#define MMU_MODE1_SUFFIX _user
+#define MMU_USER_IDX 1
+static inline int cpu_mmu_index (CPUState *env)
+{
+    return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
+}
+
+/* translate.c */
+void optimize_flags_init(void);
+
+typedef struct CCTable {
+    int (*compute_all)(void); /* return all the flags */
+    int (*compute_c)(void);  /* return the C flag */
+} CCTable;
+
+/* XXX not defined yet. Should be fixed */
+static inline int is_cpu_user(CPUState *env)
+{
+	return 0;
+}
+
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
+{
+    if (newsp)
+        env->regs[R_ESP] = newsp;
+    env->regs[R_EAX] = 0;
+}
+#endif
+
+#include "cpu-all.h"
+#include "exec-all.h"
+
+#include "svm.h"
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->eip = tb->pc - tb->cs_base;
+}
+
+static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
+                                        target_ulong *cs_base, int *flags)
+{
+    *cs_base = env->segs[R_CS].base;
+    *pc = *cs_base + env->eip;
+    *flags = env->hflags |
+        (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
+}
+
+void apic_init_reset(CPUState *env);
+void apic_sipi(CPUState *env);
+void do_cpu_init(CPUState *env);
+void do_cpu_sipi(CPUState *env);
+#endif /* CPU_I386_H */
diff --git a/target-i386/exec.h b/target-i386/exec.h
new file mode 100644
index 0000000..42b471a
--- /dev/null
+++ b/target-i386/exec.h
@@ -0,0 +1,376 @@
+/*
+ *  i386 execution defines
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#include "config.h"
+#include "dyngen-exec.h"
+
+/* XXX: factorize this mess */
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+#include "cpu-defs.h"
+
+register struct CPUX86State *env asm(AREG0);
+
+#include "qemu-common.h"
+#include "qemu-log.h"
+
+#define EAX (env->regs[R_EAX])
+#define ECX (env->regs[R_ECX])
+#define EDX (env->regs[R_EDX])
+#define EBX (env->regs[R_EBX])
+#define ESP (env->regs[R_ESP])
+#define EBP (env->regs[R_EBP])
+#define ESI (env->regs[R_ESI])
+#define EDI (env->regs[R_EDI])
+#define EIP (env->eip)
+#define DF  (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP  (env->cc_op)
+
+/* float macros */
+#define FT0    (env->ft0)
+#define ST0    (env->fpregs[env->fpstt].d)
+#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1    ST(1)
+
+#include "cpu.h"
+#include "exec-all.h"
+
+/* op_helper.c */
+void do_interrupt(int intno, int is_int, int error_code,
+                  target_ulong next_eip, int is_hw);
+void do_interrupt_user(int intno, int is_int, int error_code,
+                       target_ulong next_eip);
+void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
+void QEMU_NORETURN raise_exception(int exception_index);
+void do_smm_enter(void);
+
+/* n must be a constant to be efficient */
+static inline target_long lshift(target_long x, int n)
+{
+    if (n >= 0)
+        return x << n;
+    else
+        return x >> (-n);
+}
+
+#include "helper.h"
+
+static inline void svm_check_intercept(uint32_t type)
+{
+    helper_svm_check_intercept_param(type, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#ifdef USE_X86LDOUBLE
+/* use long double functions */
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define int32_to_floatx int32_to_floatx80
+#define int64_to_floatx int64_to_floatx80
+#define float32_to_floatx float32_to_floatx80
+#define float64_to_floatx float64_to_floatx80
+#define floatx_to_float32 floatx80_to_float32
+#define floatx_to_float64 floatx80_to_float64
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int floatx80_round_to_int
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#else
+#define floatx_to_int32 float64_to_int32
+#define floatx_to_int64 float64_to_int64
+#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
+#define int32_to_floatx int32_to_float64
+#define int64_to_floatx int64_to_float64
+#define float32_to_floatx float32_to_float64
+#define float64_to_floatx(x, e) (x)
+#define floatx_to_float32 float64_to_float32
+#define floatx_to_float64(x, e) (x)
+#define floatx_abs float64_abs
+#define floatx_chs float64_chs
+#define floatx_round_to_int float64_round_to_int
+#define floatx_compare float64_compare
+#define floatx_compare_quiet float64_compare_quiet
+#endif
+
+#define RC_MASK         0xc00
+#define RC_NEAR		0x000
+#define RC_DOWN		0x400
+#define RC_UP		0x800
+#define RC_CHOP		0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+#ifdef USE_X86LDOUBLE
+
+/* only for x86 */
+typedef union {
+    long double d;
+    struct {
+        unsigned long long lower;
+        unsigned short upper;
+    } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp)	(fp.l.upper & 0x7fff)
+#define SIGND(fp)	((fp.l.upper) & 0x8000)
+#define MANTD(fp)       (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#else
+
+/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
+typedef union {
+    double d;
+#if !defined(WORDS_BIGENDIAN) && !defined(__arm__)
+    struct {
+        uint32_t lower;
+        int32_t upper;
+    } l;
+#else
+    struct {
+        int32_t upper;
+        uint32_t lower;
+    } l;
+#endif
+#ifndef __arm__
+    int64_t ll;
+#endif
+} CPU86_LDoubleU;
+
+/* the following deal with IEEE double-precision numbers */
+#define MAXEXPD 0x7ff
+#define EXPBIAS 1023
+#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp)	((fp.l.upper) & 0x80000000)
+#ifdef __arm__
+#define MANTD(fp)	(fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32))
+#else
+#define MANTD(fp)	(fp.ll & ((1LL << 52) - 1))
+#endif
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20)
+#endif
+
+static inline void fpush(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+    env->fpstt = (env->fpstt + 1) & 7;
+}
+
+#ifndef USE_X86LDOUBLE
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+    int upper, e;
+    uint64_t ll;
+
+    /* mantissa */
+    upper = lduw(ptr + 8);
+    /* XXX: handle overflow ? */
+    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+    e |= (upper >> 4) & 0x800; /* sign */
+    ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+    temp.l.upper = (e << 20) | (ll >> 32);
+    temp.l.lower = ll;
+#else
+    temp.ll = ll | ((uint64_t)e << 52);
+#endif
+    return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+    int e;
+
+    temp.d = f;
+    /* mantissa */
+    stq(ptr, (MANTD(temp) << 11) | (1LL << 63));
+    /* exponent + sign */
+    e = EXPD(temp) - EXPBIAS + 16383;
+    e |= SIGND(temp) >> 16;
+    stw(ptr + 8, e);
+}
+#else
+
+/* we use memory access macros */
+
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+
+    temp.l.lower = ldq(ptr);
+    temp.l.upper = lduw(ptr + 8);
+    return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+
+    temp.d = f;
+    stq(ptr, temp.l.lower);
+    stw(ptr + 8, temp.l.upper);
+}
+
+#endif /* USE_X86LDOUBLE */
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B  (1 << 15)
+
+#define FPUC_EM 0x3f
+
+static inline uint32_t compute_eflags(void)
+{
+    return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+}
+
+/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+static inline void load_eflags(int eflags, int update_mask)
+{
+    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((eflags >> 10) & 1));
+    env->eflags = (env->eflags & ~update_mask) |
+        (eflags & update_mask) | 0x2;
+}
+
+static inline void env_to_regs(void)
+{
+#ifdef reg_EAX
+    EAX = env->regs[R_EAX];
+#endif
+#ifdef reg_ECX
+    ECX = env->regs[R_ECX];
+#endif
+#ifdef reg_EDX
+    EDX = env->regs[R_EDX];
+#endif
+#ifdef reg_EBX
+    EBX = env->regs[R_EBX];
+#endif
+#ifdef reg_ESP
+    ESP = env->regs[R_ESP];
+#endif
+#ifdef reg_EBP
+    EBP = env->regs[R_EBP];
+#endif
+#ifdef reg_ESI
+    ESI = env->regs[R_ESI];
+#endif
+#ifdef reg_EDI
+    EDI = env->regs[R_EDI];
+#endif
+}
+
+static inline void regs_to_env(void)
+{
+#ifdef reg_EAX
+    env->regs[R_EAX] = EAX;
+#endif
+#ifdef reg_ECX
+    env->regs[R_ECX] = ECX;
+#endif
+#ifdef reg_EDX
+    env->regs[R_EDX] = EDX;
+#endif
+#ifdef reg_EBX
+    env->regs[R_EBX] = EBX;
+#endif
+#ifdef reg_ESP
+    env->regs[R_ESP] = ESP;
+#endif
+#ifdef reg_EBP
+    env->regs[R_EBP] = EBP;
+#endif
+#ifdef reg_ESI
+    env->regs[R_ESI] = ESI;
+#endif
+#ifdef reg_EDI
+    env->regs[R_EDI] = EDI;
+#endif
+}
+
+static inline int cpu_has_work(CPUState *env)
+{
+    int work;
+
+    work = (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+           (env->eflags & IF_MASK);
+    work |= env->interrupt_request & CPU_INTERRUPT_NMI;
+    work |= env->interrupt_request & CPU_INTERRUPT_INIT;
+    work |= env->interrupt_request & CPU_INTERRUPT_SIPI;
+
+    return work;
+}
+
+static inline int cpu_halted(CPUState *env) {
+    /* handle exit of HALTED state */
+    if (!env->halted)
+        return 0;
+    /* disable halt condition */
+    if (cpu_has_work(env)) {
+        env->halted = 0;
+        return 0;
+    }
+    return EXCP_HALTED;
+}
+
+/* load efer and update the corresponding hflags. XXX: do consistency
+   checks with cpuid bits ? */
+static inline void cpu_load_efer(CPUState *env, uint64_t val)
+{
+    env->efer = val;
+    env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
+    if (env->efer & MSR_EFER_LMA)
+        env->hflags |= HF_LMA_MASK;
+    if (env->efer & MSR_EFER_SVME)
+        env->hflags |= HF_SVME_MASK;
+}
diff --git a/target-i386/helper.c b/target-i386/helper.c
new file mode 100644
index 0000000..5a81cb2
--- /dev/null
+++ b/target-i386/helper.c
@@ -0,0 +1,1833 @@
+/*
+ *  i386 helpers (without register variable usage)
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "kvm.h"
+
+//#define DEBUG_MMU
+
+/* feature flags taken from "Intel Processor Identification and the CPUID
+ * Instruction" and AMD's "CPUID Specification". In cases of disagreement
+ * about feature names, the Linux name is used. */
+static const char *feature_name[] = {
+    "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+    "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+    "pat", "pse36", "pn" /* Intel psn */, "clflush" /* Intel clfsh */, NULL, "ds" /* Intel dts */, "acpi", "mmx",
+    "fxsr", "sse", "sse2", "ss", "ht" /* Intel htt */, "tm", "ia64", "pbe",
+};
+static const char *ext_feature_name[] = {
+    "pni" /* Intel,AMD sse3 */, NULL, NULL, "monitor", "ds_cpl", "vmx", NULL /* Linux smx */, "est",
+    "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+    NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
+       NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+static const char *ext2_feature_name[] = {
+    "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+    "cx8" /* AMD CMPXCHG8B */, "apic", NULL, "syscall", "mtrr", "pge", "mca", "cmov",
+    "pat", "pse36", NULL, NULL /* Linux mp */, "nx" /* Intel xd */, NULL, "mmxext", "mmx",
+    "fxsr", "fxsr_opt" /* AMD ffxsr */, "pdpe1gb" /* AMD Page1GB */, "rdtscp", NULL, "lm" /* Intel 64 */, "3dnowext", "3dnow",
+};
+static const char *ext3_feature_name[] = {
+    "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */, "cr8legacy" /* AMD AltMovCr8 */, "abm", "sse4a", "misalignsse",
+    "3dnowprefetch", "osvw", NULL /* Linux ibs */, NULL, "skinit", "wdt", NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+static void add_flagname_to_bitmaps(char *flagname, uint32_t *features, 
+                                    uint32_t *ext_features, 
+                                    uint32_t *ext2_features, 
+                                    uint32_t *ext3_features)
+{
+    int i;
+    int found = 0;
+
+    for ( i = 0 ; i < 32 ; i++ ) 
+        if (feature_name[i] && !strcmp (flagname, feature_name[i])) {
+            *features |= 1 << i;
+            found = 1;
+        }
+    for ( i = 0 ; i < 32 ; i++ ) 
+        if (ext_feature_name[i] && !strcmp (flagname, ext_feature_name[i])) {
+            *ext_features |= 1 << i;
+            found = 1;
+        }
+    for ( i = 0 ; i < 32 ; i++ ) 
+        if (ext2_feature_name[i] && !strcmp (flagname, ext2_feature_name[i])) {
+            *ext2_features |= 1 << i;
+            found = 1;
+        }
+    for ( i = 0 ; i < 32 ; i++ ) 
+        if (ext3_feature_name[i] && !strcmp (flagname, ext3_feature_name[i])) {
+            *ext3_features |= 1 << i;
+            found = 1;
+        }
+    if (!found) {
+        fprintf(stderr, "CPU feature %s not found\n", flagname);
+    }
+}
+
+static void kvm_trim_features(uint32_t *features, uint32_t supported,
+                              const char *names[])
+{
+    int i;
+    uint32_t mask;
+
+    for (i = 0; i < 32; ++i) {
+        mask = 1U << i;
+        if ((*features & mask) && !(supported & mask)) {
+            *features &= ~mask;
+        }
+    }
+}
+
+typedef struct x86_def_t {
+    const char *name;
+    uint32_t level;
+    uint32_t vendor1, vendor2, vendor3;
+    int family;
+    int model;
+    int stepping;
+    uint32_t features, ext_features, ext2_features, ext3_features;
+    uint32_t xlevel;
+    char model_id[48];
+    int vendor_override;
+} x86_def_t;
+
+#define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
+#define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
+          CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX)
+#define PENTIUM2_FEATURES (PENTIUM_FEATURES | CPUID_PAE | CPUID_SEP | \
+          CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
+          CPUID_PSE36 | CPUID_FXSR)
+#define PENTIUM3_FEATURES (PENTIUM2_FEATURES | CPUID_SSE)
+#define PPRO_FEATURES (CPUID_FP87 | CPUID_DE | CPUID_PSE | CPUID_TSC | \
+          CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_PGE | CPUID_CMOV | \
+          CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
+          CPUID_PAE | CPUID_SEP | CPUID_APIC)
+static x86_def_t x86_defs[] = {
+#ifdef TARGET_X86_64
+    {
+        .name = "qemu64",
+        .level = 2,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 6,
+        .model = 2,
+        .stepping = 3,
+        .features = PPRO_FEATURES | 
+        /* these features are needed for Win64 and aren't fully implemented */
+            CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
+        /* this feature is needed for Solaris and isn't fully implemented */
+            CPUID_PSE36,
+        .ext_features = CPUID_EXT_SSE3,
+        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | 
+            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
+            CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
+        .ext3_features = CPUID_EXT3_SVM,
+        .xlevel = 0x8000000A,
+        .model_id = "QEMU Virtual CPU version " QEMU_VERSION,
+    },
+    {
+        .name = "phenom",
+        .level = 5,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 16,
+        .model = 2,
+        .stepping = 3,
+        /* Missing: CPUID_VME, CPUID_HT */
+        .features = PPRO_FEATURES | 
+            CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
+            CPUID_PSE36,
+        /* Missing: CPUID_EXT_CX16, CPUID_EXT_POPCNT */
+        .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR,
+        /* Missing: CPUID_EXT2_PDPE1GB, CPUID_EXT2_RDTSCP */
+        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | 
+            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
+            CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_MMXEXT |
+            CPUID_EXT2_FFXSR,
+        /* Missing: CPUID_EXT3_LAHF_LM, CPUID_EXT3_CMP_LEG, CPUID_EXT3_EXTAPIC,
+                    CPUID_EXT3_CR8LEG, CPUID_EXT3_ABM, CPUID_EXT3_SSE4A,
+                    CPUID_EXT3_MISALIGNSSE, CPUID_EXT3_3DNOWPREFETCH,
+                    CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
+        .ext3_features = CPUID_EXT3_SVM,
+        .xlevel = 0x8000001A,
+        .model_id = "AMD Phenom(tm) 9550 Quad-Core Processor"
+    },
+    {
+        .name = "core2duo",
+        .level = 10,
+        .family = 6,
+        .model = 15,
+        .stepping = 11,
+	/* The original CPU also implements these features:
+               CPUID_VME, CPUID_DTS, CPUID_ACPI, CPUID_SS, CPUID_HT,
+               CPUID_TM, CPUID_PBE */
+        .features = PPRO_FEATURES |
+            CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
+            CPUID_PSE36,
+	/* The original CPU also implements these ext features:
+               CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
+               CPUID_EXT_TM2, CPUID_EXT_CX16, CPUID_EXT_XTPR, CPUID_EXT_PDCM */
+        .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_SSSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
+        /* Missing: .ext3_features = CPUID_EXT3_LAHF_LM */
+        .xlevel = 0x80000008,
+        .model_id = "Intel(R) Core(TM)2 Duo CPU     T7700  @ 2.40GHz",
+    },
+#endif
+    {
+        .name = "qemu32",
+        .level = 2,
+        .family = 6,
+        .model = 3,
+        .stepping = 3,
+        .features = PPRO_FEATURES,
+        .ext_features = CPUID_EXT_SSE3,
+        .xlevel = 0,
+        .model_id = "QEMU Virtual CPU version " QEMU_VERSION,
+    },
+    {
+        .name = "coreduo",
+        .level = 10,
+        .family = 6,
+        .model = 14,
+        .stepping = 8,
+        /* The original CPU also implements these features:
+               CPUID_DTS, CPUID_ACPI, CPUID_SS, CPUID_HT,
+               CPUID_TM, CPUID_PBE */
+        .features = PPRO_FEATURES | CPUID_VME |
+            CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA,
+        /* The original CPU also implements these ext features:
+               CPUID_EXT_VMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_XTPR,
+               CPUID_EXT_PDCM */
+        .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR,
+        .ext2_features = CPUID_EXT2_NX,
+        .xlevel = 0x80000008,
+        .model_id = "Genuine Intel(R) CPU           T2600  @ 2.16GHz",
+    },
+    {
+        .name = "486",
+        .level = 0,
+        .family = 4,
+        .model = 0,
+        .stepping = 0,
+        .features = I486_FEATURES,
+        .xlevel = 0,
+    },
+    {
+        .name = "pentium",
+        .level = 1,
+        .family = 5,
+        .model = 4,
+        .stepping = 3,
+        .features = PENTIUM_FEATURES,
+        .xlevel = 0,
+    },
+    {
+        .name = "pentium2",
+        .level = 2,
+        .family = 6,
+        .model = 5,
+        .stepping = 2,
+        .features = PENTIUM2_FEATURES,
+        .xlevel = 0,
+    },
+    {
+        .name = "pentium3",
+        .level = 2,
+        .family = 6,
+        .model = 7,
+        .stepping = 3,
+        .features = PENTIUM3_FEATURES,
+        .xlevel = 0,
+    },
+    {
+        .name = "athlon",
+        .level = 2,
+        .vendor1 = 0x68747541, /* "Auth" */
+        .vendor2 = 0x69746e65, /* "enti" */
+        .vendor3 = 0x444d4163, /* "cAMD" */
+        .family = 6,
+        .model = 2,
+        .stepping = 3,
+        .features = PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA,
+        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
+        .xlevel = 0x80000008,
+        /* XXX: put another string ? */
+        .model_id = "QEMU Virtual CPU version " QEMU_VERSION,
+    },
+    {
+        .name = "n270",
+        /* original is on level 10 */
+        .level = 5,
+        .family = 6,
+        .model = 28,
+        .stepping = 2,
+        .features = PPRO_FEATURES |
+            CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_VME,
+            /* Missing: CPUID_DTS | CPUID_ACPI | CPUID_SS |
+             * CPUID_HT | CPUID_TM | CPUID_PBE */
+            /* Some CPUs got no CPUID_SEP */
+        .ext_features = CPUID_EXT_MONITOR |
+            CPUID_EXT_SSE3 /* PNI */ | CPUID_EXT_SSSE3,
+            /* Missing: CPUID_EXT_DSCPL | CPUID_EXT_EST |
+             * CPUID_EXT_TM2 | CPUID_EXT_XTPR */
+        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_NX,
+        /* Missing: .ext3_features = CPUID_EXT3_LAHF_LM */
+        .xlevel = 0x8000000A,
+        .model_id = "Intel(R) Atom(TM) CPU N270   @ 1.60GHz",
+    },
+};
+
+static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
+{
+    unsigned int i;
+    x86_def_t *def;
+
+    char *s = strdup(cpu_model);
+    char *featurestr, *name = strtok(s, ",");
+    uint32_t plus_features = 0, plus_ext_features = 0, plus_ext2_features = 0, plus_ext3_features = 0;
+    uint32_t minus_features = 0, minus_ext_features = 0, minus_ext2_features = 0, minus_ext3_features = 0;
+    int family = -1, model = -1, stepping = -1;
+
+    def = NULL;
+    for (i = 0; i < ARRAY_SIZE(x86_defs); i++) {
+        if (strcmp(name, x86_defs[i].name) == 0) {
+            def = &x86_defs[i];
+            break;
+        }
+    }
+    if (!def)
+        goto error;
+    memcpy(x86_cpu_def, def, sizeof(*def));
+
+    featurestr = strtok(NULL, ",");
+
+    while (featurestr) {
+        char *val;
+        if (featurestr[0] == '+') {
+            add_flagname_to_bitmaps(featurestr + 1, &plus_features, &plus_ext_features, &plus_ext2_features, &plus_ext3_features);
+        } else if (featurestr[0] == '-') {
+            add_flagname_to_bitmaps(featurestr + 1, &minus_features, &minus_ext_features, &minus_ext2_features, &minus_ext3_features);
+        } else if ((val = strchr(featurestr, '='))) {
+            *val = 0; val++;
+            if (!strcmp(featurestr, "family")) {
+                char *err;
+                family = strtol(val, &err, 10);
+                if (!*val || *err || family < 0) {
+                    fprintf(stderr, "bad numerical value %s\n", val);
+                    goto error;
+                }
+                x86_cpu_def->family = family;
+            } else if (!strcmp(featurestr, "model")) {
+                char *err;
+                model = strtol(val, &err, 10);
+                if (!*val || *err || model < 0 || model > 0xff) {
+                    fprintf(stderr, "bad numerical value %s\n", val);
+                    goto error;
+                }
+                x86_cpu_def->model = model;
+            } else if (!strcmp(featurestr, "stepping")) {
+                char *err;
+                stepping = strtol(val, &err, 10);
+                if (!*val || *err || stepping < 0 || stepping > 0xf) {
+                    fprintf(stderr, "bad numerical value %s\n", val);
+                    goto error;
+                }
+                x86_cpu_def->stepping = stepping;
+            } else if (!strcmp(featurestr, "vendor")) {
+                if (strlen(val) != 12) {
+                    fprintf(stderr, "vendor string must be 12 chars long\n");
+                    goto error;
+                }
+                x86_cpu_def->vendor1 = 0;
+                x86_cpu_def->vendor2 = 0;
+                x86_cpu_def->vendor3 = 0;
+                for(i = 0; i < 4; i++) {
+                    x86_cpu_def->vendor1 |= ((uint8_t)val[i    ]) << (8 * i);
+                    x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i);
+                    x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i);
+                }
+                x86_cpu_def->vendor_override = 1;
+            } else if (!strcmp(featurestr, "model_id")) {
+                pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
+                        val);
+            } else {
+                fprintf(stderr, "unrecognized feature %s\n", featurestr);
+                goto error;
+            }
+        } else {
+            fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr);
+            goto error;
+        }
+        featurestr = strtok(NULL, ",");
+    }
+    x86_cpu_def->features |= plus_features;
+    x86_cpu_def->ext_features |= plus_ext_features;
+    x86_cpu_def->ext2_features |= plus_ext2_features;
+    x86_cpu_def->ext3_features |= plus_ext3_features;
+    x86_cpu_def->features &= ~minus_features;
+    x86_cpu_def->ext_features &= ~minus_ext_features;
+    x86_cpu_def->ext2_features &= ~minus_ext2_features;
+    x86_cpu_def->ext3_features &= ~minus_ext3_features;
+    free(s);
+    return 0;
+
+error:
+    free(s);
+    return -1;
+}
+
+void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(x86_defs); i++)
+        (*cpu_fprintf)(f, "x86 %16s\n", x86_defs[i].name);
+}
+
+static int cpu_x86_register (CPUX86State *env, const char *cpu_model)
+{
+    x86_def_t def1, *def = &def1;
+
+    if (cpu_x86_find_by_name(def, cpu_model) < 0)
+        return -1;
+    if (def->vendor1) {
+        env->cpuid_vendor1 = def->vendor1;
+        env->cpuid_vendor2 = def->vendor2;
+        env->cpuid_vendor3 = def->vendor3;
+    } else {
+        env->cpuid_vendor1 = CPUID_VENDOR_INTEL_1;
+        env->cpuid_vendor2 = CPUID_VENDOR_INTEL_2;
+        env->cpuid_vendor3 = CPUID_VENDOR_INTEL_3;
+    }
+    env->cpuid_vendor_override = def->vendor_override;
+    env->cpuid_level = def->level;
+    if (def->family > 0x0f)
+        env->cpuid_version = 0xf00 | ((def->family - 0x0f) << 20);
+    else
+        env->cpuid_version = def->family << 8;
+    env->cpuid_version |= ((def->model & 0xf) << 4) | ((def->model >> 4) << 16);
+    env->cpuid_version |= def->stepping;
+    env->cpuid_features = def->features;
+    env->pat = 0x0007040600070406ULL;
+    env->cpuid_ext_features = def->ext_features;
+    env->cpuid_ext2_features = def->ext2_features;
+    env->cpuid_xlevel = def->xlevel;
+    env->cpuid_ext3_features = def->ext3_features;
+    {
+        const char *model_id = def->model_id;
+        int c, len, i;
+        if (!model_id)
+            model_id = "";
+        len = strlen(model_id);
+        for(i = 0; i < 48; i++) {
+            if (i >= len)
+                c = '\0';
+            else
+                c = (uint8_t)model_id[i];
+            env->cpuid_model[i >> 2] |= c << (8 * (i & 3));
+        }
+    }
+    return 0;
+}
+
+/* NOTE: must be called outside the CPU execute loop */
+void cpu_reset(CPUX86State *env)
+{
+    int i;
+
+    if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
+        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+    }
+
+    memset(env, 0, offsetof(CPUX86State, breakpoints));
+
+    tlb_flush(env, 1);
+
+    env->old_exception = -1;
+
+    /* init to reset state */
+
+#ifdef CONFIG_SOFTMMU
+    env->hflags |= HF_SOFTMMU_MASK;
+#endif
+    env->hflags2 |= HF2_GIF_MASK;
+
+    cpu_x86_update_cr0(env, 0x60000010);
+    env->a20_mask = ~0x0;
+    env->smbase = 0x30000;
+
+    env->idt.limit = 0xffff;
+    env->gdt.limit = 0xffff;
+    env->ldt.limit = 0xffff;
+    env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT);
+    env->tr.limit = 0xffff;
+    env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT);
+
+    cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
+                           DESC_R_MASK | DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+
+    env->eip = 0xfff0;
+    env->regs[R_EDX] = env->cpuid_version;
+
+    env->eflags = 0x2;
+
+    /* FPU init */
+    for(i = 0;i < 8; i++)
+        env->fptags[i] = 1;
+    env->fpuc = 0x37f;
+
+    env->mxcsr = 0x1f80;
+
+    memset(env->dr, 0, sizeof(env->dr));
+    env->dr[6] = DR6_FIXED_1;
+    env->dr[7] = DR7_FIXED_1;
+    cpu_breakpoint_remove_all(env, BP_CPU);
+    cpu_watchpoint_remove_all(env, BP_CPU);
+}
+
+void cpu_x86_close(CPUX86State *env)
+{
+    qemu_free(env);
+}
+
+/***********************************************************/
+/* x86 debug */
+
+static const char *cc_op_str[] = {
+    "DYNAMIC",
+    "EFLAGS",
+
+    "MULB",
+    "MULW",
+    "MULL",
+    "MULQ",
+
+    "ADDB",
+    "ADDW",
+    "ADDL",
+    "ADDQ",
+
+    "ADCB",
+    "ADCW",
+    "ADCL",
+    "ADCQ",
+
+    "SUBB",
+    "SUBW",
+    "SUBL",
+    "SUBQ",
+
+    "SBBB",
+    "SBBW",
+    "SBBL",
+    "SBBQ",
+
+    "LOGICB",
+    "LOGICW",
+    "LOGICL",
+    "LOGICQ",
+
+    "INCB",
+    "INCW",
+    "INCL",
+    "INCQ",
+
+    "DECB",
+    "DECW",
+    "DECL",
+    "DECQ",
+
+    "SHLB",
+    "SHLW",
+    "SHLL",
+    "SHLQ",
+
+    "SARB",
+    "SARW",
+    "SARL",
+    "SARQ",
+};
+
+static void
+cpu_x86_dump_seg_cache(CPUState *env, FILE *f,
+                       int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                       const char *name, struct SegmentCache *sc)
+{
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_CS64_MASK) {
+        cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
+                    sc->selector, sc->base, sc->limit, sc->flags);
+    } else
+#endif
+    {
+        cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
+                    (uint32_t)sc->base, sc->limit, sc->flags);
+    }
+
+    if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
+        goto done;
+
+    cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT);
+    if (sc->flags & DESC_S_MASK) {
+        if (sc->flags & DESC_CS_MASK) {
+            cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" :
+                           ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16"));
+            cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-',
+                        (sc->flags & DESC_R_MASK) ? 'R' : '-');
+        } else {
+            cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS  " : "DS16");
+            cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-',
+                        (sc->flags & DESC_W_MASK) ? 'W' : '-');
+        }
+        cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-');
+    } else {
+        static const char *sys_type_name[2][16] = {
+            { /* 32 bit mode */
+                "Reserved", "TSS16-avl", "LDT", "TSS16-busy",
+                "CallGate16", "TaskGate", "IntGate16", "TrapGate16",
+                "Reserved", "TSS32-avl", "Reserved", "TSS32-busy",
+                "CallGate32", "Reserved", "IntGate32", "TrapGate32"
+            },
+            { /* 64 bit mode */
+                "<hiword>", "Reserved", "LDT", "Reserved", "Reserved",
+                "Reserved", "Reserved", "Reserved", "Reserved",
+                "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64",
+                "Reserved", "IntGate64", "TrapGate64"
+            }
+        };
+        cpu_fprintf(f, sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0]
+                                    [(sc->flags & DESC_TYPE_MASK)
+                                     >> DESC_TYPE_SHIFT]);
+    }
+done:
+    cpu_fprintf(f, "\n");
+}
+
+void cpu_dump_state(CPUState *env, FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                    int flags)
+{
+    int eflags, i, nb;
+    char cc_op_name[32];
+    static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
+
+    if (kvm_enabled())
+        kvm_arch_get_registers(env);
+
+    eflags = env->eflags;
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_CS64_MASK) {
+        cpu_fprintf(f,
+                    "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n"
+                    "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n"
+                    "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n"
+                    "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n"
+                    "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+                    env->regs[R_EAX],
+                    env->regs[R_EBX],
+                    env->regs[R_ECX],
+                    env->regs[R_EDX],
+                    env->regs[R_ESI],
+                    env->regs[R_EDI],
+                    env->regs[R_EBP],
+                    env->regs[R_ESP],
+                    env->regs[8],
+                    env->regs[9],
+                    env->regs[10],
+                    env->regs[11],
+                    env->regs[12],
+                    env->regs[13],
+                    env->regs[14],
+                    env->regs[15],
+                    env->eip, eflags,
+                    eflags & DF_MASK ? 'D' : '-',
+                    eflags & CC_O ? 'O' : '-',
+                    eflags & CC_S ? 'S' : '-',
+                    eflags & CC_Z ? 'Z' : '-',
+                    eflags & CC_A ? 'A' : '-',
+                    eflags & CC_P ? 'P' : '-',
+                    eflags & CC_C ? 'C' : '-',
+                    env->hflags & HF_CPL_MASK,
+                    (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+                    (int)(env->a20_mask >> 20) & 1,
+                    (env->hflags >> HF_SMM_SHIFT) & 1,
+                    env->halted);
+    } else
+#endif
+    {
+        cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n"
+                    "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n"
+                    "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+                    (uint32_t)env->regs[R_EAX],
+                    (uint32_t)env->regs[R_EBX],
+                    (uint32_t)env->regs[R_ECX],
+                    (uint32_t)env->regs[R_EDX],
+                    (uint32_t)env->regs[R_ESI],
+                    (uint32_t)env->regs[R_EDI],
+                    (uint32_t)env->regs[R_EBP],
+                    (uint32_t)env->regs[R_ESP],
+                    (uint32_t)env->eip, eflags,
+                    eflags & DF_MASK ? 'D' : '-',
+                    eflags & CC_O ? 'O' : '-',
+                    eflags & CC_S ? 'S' : '-',
+                    eflags & CC_Z ? 'Z' : '-',
+                    eflags & CC_A ? 'A' : '-',
+                    eflags & CC_P ? 'P' : '-',
+                    eflags & CC_C ? 'C' : '-',
+                    env->hflags & HF_CPL_MASK,
+                    (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+                    (int)(env->a20_mask >> 20) & 1,
+                    (env->hflags >> HF_SMM_SHIFT) & 1,
+                    env->halted);
+    }
+
+    for(i = 0; i < 6; i++) {
+        cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i],
+                               &env->segs[i]);
+    }
+    cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt);
+    cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr);
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        cpu_fprintf(f, "GDT=     %016" PRIx64 " %08x\n",
+                    env->gdt.base, env->gdt.limit);
+        cpu_fprintf(f, "IDT=     %016" PRIx64 " %08x\n",
+                    env->idt.base, env->idt.limit);
+        cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n",
+                    (uint32_t)env->cr[0],
+                    env->cr[2],
+                    env->cr[3],
+                    (uint32_t)env->cr[4]);
+        for(i = 0; i < 4; i++)
+            cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]);
+        cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n",
+                    env->dr[6], env->dr[7]);
+    } else
+#endif
+    {
+        cpu_fprintf(f, "GDT=     %08x %08x\n",
+                    (uint32_t)env->gdt.base, env->gdt.limit);
+        cpu_fprintf(f, "IDT=     %08x %08x\n",
+                    (uint32_t)env->idt.base, env->idt.limit);
+        cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n",
+                    (uint32_t)env->cr[0],
+                    (uint32_t)env->cr[2],
+                    (uint32_t)env->cr[3],
+                    (uint32_t)env->cr[4]);
+        for(i = 0; i < 4; i++)
+            cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]);
+        cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]);
+    }
+    if (flags & X86_DUMP_CCOP) {
+        if ((unsigned)env->cc_op < CC_OP_NB)
+            snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
+        else
+            snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_CS64_MASK) {
+            cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
+                        env->cc_src, env->cc_dst,
+                        cc_op_name);
+        } else
+#endif
+        {
+            cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
+                        (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
+                        cc_op_name);
+        }
+    }
+    if (flags & X86_DUMP_FPU) {
+        int fptag;
+        fptag = 0;
+        for(i = 0; i < 8; i++) {
+            fptag |= ((!env->fptags[i]) << i);
+        }
+        cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
+                    env->fpuc,
+                    (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
+                    env->fpstt,
+                    fptag,
+                    env->mxcsr);
+        for(i=0;i<8;i++) {
+#if defined(USE_X86LDOUBLE)
+            union {
+                long double d;
+                struct {
+                    uint64_t lower;
+                    uint16_t upper;
+                } l;
+            } tmp;
+            tmp.d = env->fpregs[i].d;
+            cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
+                        i, tmp.l.lower, tmp.l.upper);
+#else
+            cpu_fprintf(f, "FPR%d=%016" PRIx64,
+                        i, env->fpregs[i].mmx.q);
+#endif
+            if ((i & 1) == 1)
+                cpu_fprintf(f, "\n");
+            else
+                cpu_fprintf(f, " ");
+        }
+        if (env->hflags & HF_CS64_MASK)
+            nb = 16;
+        else
+            nb = 8;
+        for(i=0;i<nb;i++) {
+            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
+                        i,
+                        env->xmm_regs[i].XMM_L(3),
+                        env->xmm_regs[i].XMM_L(2),
+                        env->xmm_regs[i].XMM_L(1),
+                        env->xmm_regs[i].XMM_L(0));
+            if ((i & 1) == 1)
+                cpu_fprintf(f, "\n");
+            else
+                cpu_fprintf(f, " ");
+        }
+    }
+}
+
+/***********************************************************/
+/* x86 mmu */
+/* XXX: add PGE support */
+
+void cpu_x86_set_a20(CPUX86State *env, int a20_state)
+{
+    a20_state = (a20_state != 0);
+    if (a20_state != ((env->a20_mask >> 20) & 1)) {
+#if defined(DEBUG_MMU)
+        printf("A20 update: a20=%d\n", a20_state);
+#endif
+        /* if the cpu is currently executing code, we must unlink it and
+           all the potentially executing TB */
+        cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
+
+        /* when a20 is changed, all the MMU mappings are invalid, so
+           we must flush everything */
+        tlb_flush(env, 1);
+        env->a20_mask = (~0x100000) | (a20_state << 20);
+    }
+}
+
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0)
+{
+    int pe_state;
+
+#if defined(DEBUG_MMU)
+    printf("CR0 update: CR0=0x%08x\n", new_cr0);
+#endif
+    if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
+        (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
+        tlb_flush(env, 1);
+    }
+
+#ifdef TARGET_X86_64
+    if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) &&
+        (env->efer & MSR_EFER_LME)) {
+        /* enter in long mode */
+        /* XXX: generate an exception */
+        if (!(env->cr[4] & CR4_PAE_MASK))
+            return;
+        env->efer |= MSR_EFER_LMA;
+        env->hflags |= HF_LMA_MASK;
+    } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) &&
+               (env->efer & MSR_EFER_LMA)) {
+        /* exit long mode */
+        env->efer &= ~MSR_EFER_LMA;
+        env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+        env->eip &= 0xffffffff;
+    }
+#endif
+    env->cr[0] = new_cr0 | CR0_ET_MASK;
+
+    /* update PE flag in hidden flags */
+    pe_state = (env->cr[0] & CR0_PE_MASK);
+    env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT);
+    /* ensure that ADDSEG is always set in real mode */
+    env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT);
+    /* update FPU flags */
+    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
+        ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
+}
+
+/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in
+   the PDPT */
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
+{
+    env->cr[3] = new_cr3;
+    if (env->cr[0] & CR0_PG_MASK) {
+#if defined(DEBUG_MMU)
+        printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
+#endif
+        tlb_flush(env, 0);
+    }
+}
+
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
+{
+#if defined(DEBUG_MMU)
+    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
+#endif
+    if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
+        (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+        tlb_flush(env, 1);
+    }
+    /* SSE handling */
+    if (!(env->cpuid_features & CPUID_SSE))
+        new_cr4 &= ~CR4_OSFXSR_MASK;
+    if (new_cr4 & CR4_OSFXSR_MASK)
+        env->hflags |= HF_OSFXSR_MASK;
+    else
+        env->hflags &= ~HF_OSFXSR_MASK;
+
+    env->cr[4] = new_cr4;
+}
+
+#if defined(CONFIG_USER_ONLY)
+
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+                             int is_write, int mmu_idx, int is_softmmu)
+{
+    /* user mode only emulation */
+    is_write &= 1;
+    env->cr[2] = addr;
+    env->error_code = (is_write << PG_ERROR_W_BIT);
+    env->error_code |= PG_ERROR_U_MASK;
+    env->exception_index = EXCP0E_PAGE;
+    return 1;
+}
+
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+    return addr;
+}
+
+#else
+
+/* XXX: This value should match the one returned by CPUID
+ * and in exec.c */
+#if defined(CONFIG_KQEMU)
+#define PHYS_ADDR_MASK 0xfffff000LL
+#else
+# if defined(TARGET_X86_64)
+# define PHYS_ADDR_MASK 0xfffffff000LL
+# else
+# define PHYS_ADDR_MASK 0xffffff000LL
+# endif
+#endif
+
+/* return value:
+   -1 = cannot handle fault
+   0  = nothing more to do
+   1  = generate PF fault
+   2  = soft MMU activation required for this block
+*/
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+                             int is_write1, int mmu_idx, int is_softmmu)
+{
+    uint64_t ptep, pte;
+    target_ulong pde_addr, pte_addr;
+    int error_code, is_dirty, prot, page_size, ret, is_write, is_user;
+    target_phys_addr_t paddr;
+    uint32_t page_offset;
+    target_ulong vaddr, virt_addr;
+
+    is_user = mmu_idx == MMU_USER_IDX;
+#if defined(DEBUG_MMU)
+    printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n",
+           addr, is_write1, is_user, env->eip);
+#endif
+    is_write = is_write1 & 1;
+
+    if (!(env->cr[0] & CR0_PG_MASK)) {
+        pte = addr;
+        virt_addr = addr & TARGET_PAGE_MASK;
+        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        page_size = 4096;
+        goto do_mapping;
+    }
+
+    if (env->cr[4] & CR4_PAE_MASK) {
+        uint64_t pde, pdpe;
+        target_ulong pdpe_addr;
+
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint64_t pml4e_addr, pml4e;
+            int32_t sext;
+
+            /* test virtual address sign extension */
+            sext = (int64_t)addr >> 47;
+            if (sext != 0 && sext != -1) {
+                env->error_code = 0;
+                env->exception_index = EXCP0D_GPF;
+                return 1;
+            }
+
+            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pml4e = ldq_phys(pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            if (!(pml4e & PG_ACCESSED_MASK)) {
+                pml4e |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pml4e_addr, pml4e);
+            }
+            ptep = pml4e ^ PG_NX_MASK;
+            pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            ptep &= pdpe ^ PG_NX_MASK;
+            if (!(pdpe & PG_ACCESSED_MASK)) {
+                pdpe |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pdpe_addr, pdpe);
+            }
+        } else
+#endif
+        {
+            /* XXX: load them when cr3 is loaded ? */
+            pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+        }
+
+        pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+            env->a20_mask;
+        pde = ldq_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            error_code = 0;
+            goto do_fault;
+        }
+        if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
+            error_code = PG_ERROR_RSVD_MASK;
+            goto do_fault;
+        }
+        ptep &= pde ^ PG_NX_MASK;
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            page_size = 2048 * 1024;
+            ptep ^= PG_NX_MASK;
+            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+                goto do_fault_protect;
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+                pde |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pde |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+            /* align to page_size */
+            pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff);
+            virt_addr = addr & ~(page_size - 1);
+        } else {
+            /* 4 KB page */
+            if (!(pde & PG_ACCESSED_MASK)) {
+                pde |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+            pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pte = ldq_phys(pte_addr);
+            if (!(pte & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            /* combine pde and pte nx, user and rw protections */
+            ptep &= pte ^ PG_NX_MASK;
+            ptep ^= PG_NX_MASK;
+            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+                goto do_fault_protect;
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+                pte |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pte |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pte_addr, pte);
+            }
+            page_size = 4096;
+            virt_addr = addr & ~0xfff;
+            pte = pte & (PHYS_ADDR_MASK | 0xfff);
+        }
+    } else {
+        uint32_t pde;
+
+        /* page directory entry */
+        pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
+            env->a20_mask;
+        pde = ldl_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            error_code = 0;
+            goto do_fault;
+        }
+        /* if PSE bit is set, then we use a 4MB page */
+        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+            page_size = 4096 * 1024;
+            if (is_user) {
+                if (!(pde & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(pde & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(pde & PG_RW_MASK))
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+                pde |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pde |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+
+            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+            ptep = pte;
+            virt_addr = addr & ~(page_size - 1);
+        } else {
+            if (!(pde & PG_ACCESSED_MASK)) {
+                pde |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+
+            /* page directory entry */
+            pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
+                env->a20_mask;
+            pte = ldl_phys(pte_addr);
+            if (!(pte & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            /* combine pde and pte user and rw protections */
+            ptep = pte & pde;
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+                pte |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pte |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pte_addr, pte);
+            }
+            page_size = 4096;
+            virt_addr = addr & ~0xfff;
+        }
+    }
+    /* the page can be put in the TLB */
+    prot = PAGE_READ;
+    if (!(ptep & PG_NX_MASK))
+        prot |= PAGE_EXEC;
+    if (pte & PG_DIRTY_MASK) {
+        /* only set write access if already dirty... otherwise wait
+           for dirty access */
+        if (is_user) {
+            if (ptep & PG_RW_MASK)
+                prot |= PAGE_WRITE;
+        } else {
+            if (!(env->cr[0] & CR0_WP_MASK) ||
+                (ptep & PG_RW_MASK))
+                prot |= PAGE_WRITE;
+        }
+    }
+ do_mapping:
+    pte = pte & env->a20_mask;
+
+    /* Even if 4MB pages, we map only one 4KB page in the cache to
+       avoid filling it too fast */
+    page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+    vaddr = virt_addr + page_offset;
+
+    ret = tlb_set_page_exec(env, vaddr, paddr, prot, mmu_idx, is_softmmu);
+    return ret;
+ do_fault_protect:
+    error_code = PG_ERROR_P_MASK;
+ do_fault:
+    error_code |= (is_write << PG_ERROR_W_BIT);
+    if (is_user)
+        error_code |= PG_ERROR_U_MASK;
+    if (is_write1 == 2 &&
+        (env->efer & MSR_EFER_NXE) &&
+        (env->cr[4] & CR4_PAE_MASK))
+        error_code |= PG_ERROR_I_D_MASK;
+    if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
+        /* cr2 is not modified in case of exceptions */
+        stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 
+                 addr);
+    } else {
+        env->cr[2] = addr;
+    }
+    env->error_code = error_code;
+    env->exception_index = EXCP0E_PAGE;
+    return 1;
+}
+
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+    target_ulong pde_addr, pte_addr;
+    uint64_t pte;
+    target_phys_addr_t paddr;
+    uint32_t page_offset;
+    int page_size;
+
+    if (env->cr[4] & CR4_PAE_MASK) {
+        target_ulong pdpe_addr;
+        uint64_t pde, pdpe;
+
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint64_t pml4e_addr, pml4e;
+            int32_t sext;
+
+            /* test virtual address sign extension */
+            sext = (int64_t)addr >> 47;
+            if (sext != 0 && sext != -1)
+                return -1;
+
+            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pml4e = ldq_phys(pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK))
+                return -1;
+
+            pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK))
+                return -1;
+        } else
+#endif
+        {
+            pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK))
+                return -1;
+        }
+
+        pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
+            env->a20_mask;
+        pde = ldq_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            return -1;
+        }
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            page_size = 2048 * 1024;
+            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+        } else {
+            /* 4 KB page */
+            pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
+                env->a20_mask;
+            page_size = 4096;
+            pte = ldq_phys(pte_addr);
+        }
+        if (!(pte & PG_PRESENT_MASK))
+            return -1;
+    } else {
+        uint32_t pde;
+
+        if (!(env->cr[0] & CR0_PG_MASK)) {
+            pte = addr;
+            page_size = 4096;
+        } else {
+            /* page directory entry */
+            pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
+            pde = ldl_phys(pde_addr);
+            if (!(pde & PG_PRESENT_MASK))
+                return -1;
+            if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+                pte = pde & ~0x003ff000; /* align to 4MB */
+                page_size = 4096 * 1024;
+            } else {
+                /* page directory entry */
+                pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
+                pte = ldl_phys(pte_addr);
+                if (!(pte & PG_PRESENT_MASK))
+                    return -1;
+                page_size = 4096;
+            }
+        }
+        pte = pte & env->a20_mask;
+    }
+
+    page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+    return paddr;
+}
+
+void hw_breakpoint_insert(CPUState *env, int index)
+{
+    int type, err = 0;
+
+    switch (hw_breakpoint_type(env->dr[7], index)) {
+    case 0:
+        if (hw_breakpoint_enabled(env->dr[7], index))
+            err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU,
+                                        &env->cpu_breakpoint[index]);
+        break;
+    case 1:
+        type = BP_CPU | BP_MEM_WRITE;
+        goto insert_wp;
+    case 2:
+         /* No support for I/O watchpoints yet */
+        break;
+    case 3:
+        type = BP_CPU | BP_MEM_ACCESS;
+    insert_wp:
+        err = cpu_watchpoint_insert(env, env->dr[index],
+                                    hw_breakpoint_len(env->dr[7], index),
+                                    type, &env->cpu_watchpoint[index]);
+        break;
+    }
+    if (err)
+        env->cpu_breakpoint[index] = NULL;
+}
+
+void hw_breakpoint_remove(CPUState *env, int index)
+{
+    if (!env->cpu_breakpoint[index])
+        return;
+    switch (hw_breakpoint_type(env->dr[7], index)) {
+    case 0:
+        if (hw_breakpoint_enabled(env->dr[7], index))
+            cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]);
+        break;
+    case 1:
+    case 3:
+        cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]);
+        break;
+    case 2:
+        /* No support for I/O watchpoints yet */
+        break;
+    }
+}
+
+int check_hw_breakpoints(CPUState *env, int force_dr6_update)
+{
+    target_ulong dr6;
+    int reg, type;
+    int hit_enabled = 0;
+
+    dr6 = env->dr[6] & ~0xf;
+    for (reg = 0; reg < 4; reg++) {
+        type = hw_breakpoint_type(env->dr[7], reg);
+        if ((type == 0 && env->dr[reg] == env->eip) ||
+            ((type & 1) && env->cpu_watchpoint[reg] &&
+             (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) {
+            dr6 |= 1 << reg;
+            if (hw_breakpoint_enabled(env->dr[7], reg))
+                hit_enabled = 1;
+        }
+    }
+    if (hit_enabled || force_dr6_update)
+        env->dr[6] = dr6;
+    return hit_enabled;
+}
+
+static CPUDebugExcpHandler *prev_debug_excp_handler;
+
+void raise_exception(int exception_index);
+
+static void breakpoint_handler(CPUState *env)
+{
+    CPUBreakpoint *bp;
+
+    if (env->watchpoint_hit) {
+        if (env->watchpoint_hit->flags & BP_CPU) {
+            env->watchpoint_hit = NULL;
+            if (check_hw_breakpoints(env, 0))
+                raise_exception(EXCP01_DB);
+            else
+                cpu_resume_from_signal(env, NULL);
+        }
+    } else {
+        QTAILQ_FOREACH(bp, &env->breakpoints, entry)
+            if (bp->pc == env->eip) {
+                if (bp->flags & BP_CPU) {
+                    check_hw_breakpoints(env, 1);
+                    raise_exception(EXCP01_DB);
+                }
+                break;
+            }
+    }
+    if (prev_debug_excp_handler)
+        prev_debug_excp_handler(env);
+}
+
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+                        uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+    uint64_t mcg_cap = cenv->mcg_cap;
+    unsigned bank_num = mcg_cap & 0xff;
+    uint64_t *banks = cenv->mce_banks;
+
+    if (bank >= bank_num || !(status & MCI_STATUS_VAL))
+        return;
+
+    /*
+     * if MSR_MCG_CTL is not all 1s, the uncorrected error
+     * reporting is disabled
+     */
+    if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+        cenv->mcg_ctl != ~(uint64_t)0)
+        return;
+    banks += 4 * bank;
+    /*
+     * if MSR_MCi_CTL is not all 1s, the uncorrected error
+     * reporting is disabled for the bank
+     */
+    if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
+        return;
+    if (status & MCI_STATUS_UC) {
+        if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
+            !(cenv->cr[4] & CR4_MCE_MASK)) {
+            fprintf(stderr, "injects mce exception while previous "
+                    "one is in progress!\n");
+            qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+            qemu_system_reset_request();
+            return;
+        }
+        if (banks[1] & MCI_STATUS_VAL)
+            status |= MCI_STATUS_OVER;
+        banks[2] = addr;
+        banks[3] = misc;
+        cenv->mcg_status = mcg_status;
+        banks[1] = status;
+        cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+    } else if (!(banks[1] & MCI_STATUS_VAL)
+               || !(banks[1] & MCI_STATUS_UC)) {
+        if (banks[1] & MCI_STATUS_VAL)
+            status |= MCI_STATUS_OVER;
+        banks[2] = addr;
+        banks[3] = misc;
+        banks[1] = status;
+    } else
+        banks[1] |= MCI_STATUS_OVER;
+}
+#endif /* !CONFIG_USER_ONLY */
+
+static void mce_init(CPUX86State *cenv)
+{
+    unsigned int bank, bank_num;
+
+    if (((cenv->cpuid_version >> 8)&0xf) >= 6
+        && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+        cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
+        cenv->mcg_ctl = ~(uint64_t)0;
+        bank_num = cenv->mcg_cap & 0xff;
+        cenv->mce_banks = qemu_mallocz(bank_num * sizeof(uint64_t) * 4);
+        for (bank = 0; bank < bank_num; bank++)
+            cenv->mce_banks[bank*4] = ~(uint64_t)0;
+    }
+}
+
+static void host_cpuid(uint32_t function, uint32_t count,
+                       uint32_t *eax, uint32_t *ebx,
+                       uint32_t *ecx, uint32_t *edx)
+{
+#if defined(CONFIG_KVM)
+    uint32_t vec[4];
+
+#ifdef __x86_64__
+    asm volatile("cpuid"
+                 : "=a"(vec[0]), "=b"(vec[1]),
+                   "=c"(vec[2]), "=d"(vec[3])
+                 : "0"(function), "c"(count) : "cc");
+#else
+    asm volatile("pusha \n\t"
+                 "cpuid \n\t"
+                 "mov %%eax, 0(%2) \n\t"
+                 "mov %%ebx, 4(%2) \n\t"
+                 "mov %%ecx, 8(%2) \n\t"
+                 "mov %%edx, 12(%2) \n\t"
+                 "popa"
+                 : : "a"(function), "c"(count), "S"(vec)
+                 : "memory", "cc");
+#endif
+
+    if (eax)
+	*eax = vec[0];
+    if (ebx)
+	*ebx = vec[1];
+    if (ecx)
+	*ecx = vec[2];
+    if (edx)
+	*edx = vec[3];
+#endif
+}
+
+void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                   uint32_t *eax, uint32_t *ebx,
+                   uint32_t *ecx, uint32_t *edx)
+{
+    /* test if maximum index reached */
+    if (index & 0x80000000) {
+        if (index > env->cpuid_xlevel)
+            index = env->cpuid_level;
+    } else {
+        if (index > env->cpuid_level)
+            index = env->cpuid_level;
+    }
+
+    switch(index) {
+    case 0:
+        *eax = env->cpuid_level;
+        *ebx = env->cpuid_vendor1;
+        *edx = env->cpuid_vendor2;
+        *ecx = env->cpuid_vendor3;
+
+        /* sysenter isn't supported on compatibility mode on AMD.  and syscall
+         * isn't supported in compatibility mode on Intel.  so advertise the
+         * actuall cpu, and say goodbye to migration between different vendors
+         * is you use compatibility mode. */
+        if (kvm_enabled() && !env->cpuid_vendor_override)
+            host_cpuid(0, 0, NULL, ebx, ecx, edx);
+        break;
+    case 1:
+        *eax = env->cpuid_version;
+        *ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
+        *ecx = env->cpuid_ext_features;
+        *edx = env->cpuid_features;
+
+        /* "Hypervisor present" bit required for Microsoft SVVP */
+        if (kvm_enabled())
+            *ecx |= (1 << 31);
+        break;
+    case 2:
+        /* cache info: needed for Pentium Pro compatibility */
+        *eax = 1;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0x2c307d;
+        break;
+    case 4:
+        /* cache info: needed for Core compatibility */
+        switch (count) {
+            case 0: /* L1 dcache info */
+                *eax = 0x0000121;
+                *ebx = 0x1c0003f;
+                *ecx = 0x000003f;
+                *edx = 0x0000001;
+                break;
+            case 1: /* L1 icache info */
+                *eax = 0x0000122;
+                *ebx = 0x1c0003f;
+                *ecx = 0x000003f;
+                *edx = 0x0000001;
+                break;
+            case 2: /* L2 cache info */
+                *eax = 0x0000143;
+                *ebx = 0x3c0003f;
+                *ecx = 0x0000fff;
+                *edx = 0x0000001;
+                break;
+            default: /* end of info */
+                *eax = 0;
+                *ebx = 0;
+                *ecx = 0;
+                *edx = 0;
+                break;
+        }
+        break;
+    case 5:
+        /* mwait info: needed for Core compatibility */
+        *eax = 0; /* Smallest monitor-line size in bytes */
+        *ebx = 0; /* Largest monitor-line size in bytes */
+        *ecx = CPUID_MWAIT_EMX | CPUID_MWAIT_IBE;
+        *edx = 0;
+        break;
+    case 6:
+        /* Thermal and Power Leaf */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    case 9:
+        /* Direct Cache Access Information Leaf */
+        *eax = 0; /* Bits 0-31 in DCA_CAP MSR */
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    case 0xA:
+        /* Architectural Performance Monitoring Leaf */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    case 0x80000000:
+        *eax = env->cpuid_xlevel;
+        *ebx = env->cpuid_vendor1;
+        *edx = env->cpuid_vendor2;
+        *ecx = env->cpuid_vendor3;
+        break;
+    case 0x80000001:
+        *eax = env->cpuid_features;
+        *ebx = 0;
+        *ecx = env->cpuid_ext3_features;
+        *edx = env->cpuid_ext2_features;
+
+        if (kvm_enabled()) {
+            uint32_t h_eax, h_edx;
+
+            host_cpuid(index, 0, &h_eax, NULL, NULL, &h_edx);
+
+            /* disable CPU features that the host does not support */
+
+            /* long mode */
+            if ((h_edx & 0x20000000) == 0 /* || !lm_capable_kernel */)
+                *edx &= ~0x20000000;
+            /* syscall */
+            if ((h_edx & 0x00000800) == 0)
+                *edx &= ~0x00000800;
+            /* nx */
+            if ((h_edx & 0x00100000) == 0)
+                *edx &= ~0x00100000;
+
+            /* disable CPU features that KVM cannot support */
+
+            /* svm */
+            *ecx &= ~4UL;
+            /* 3dnow */
+            *edx &= ~0xc0000000;
+        }
+        break;
+    case 0x80000002:
+    case 0x80000003:
+    case 0x80000004:
+        *eax = env->cpuid_model[(index - 0x80000002) * 4 + 0];
+        *ebx = env->cpuid_model[(index - 0x80000002) * 4 + 1];
+        *ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2];
+        *edx = env->cpuid_model[(index - 0x80000002) * 4 + 3];
+        break;
+    case 0x80000005:
+        /* cache info (L1 cache) */
+        *eax = 0x01ff01ff;
+        *ebx = 0x01ff01ff;
+        *ecx = 0x40020140;
+        *edx = 0x40020140;
+        break;
+    case 0x80000006:
+        /* cache info (L2 cache) */
+        *eax = 0;
+        *ebx = 0x42004200;
+        *ecx = 0x02008140;
+        *edx = 0;
+        break;
+    case 0x80000008:
+        /* virtual & phys address size in low 2 bytes. */
+/* XXX: This value must match the one used in the MMU code. */ 
+        if (env->cpuid_ext2_features & CPUID_EXT2_LM) {
+            /* 64 bit processor */
+#if defined(CONFIG_KQEMU)
+            *eax = 0x00003020;	/* 48 bits virtual, 32 bits physical */
+#else
+/* XXX: The physical address space is limited to 42 bits in exec.c. */
+            *eax = 0x00003028;	/* 48 bits virtual, 40 bits physical */
+#endif
+        } else {
+#if defined(CONFIG_KQEMU)
+            *eax = 0x00000020;	/* 32 bits physical */
+#else
+            if (env->cpuid_features & CPUID_PSE36)
+                *eax = 0x00000024; /* 36 bits physical */
+            else
+                *eax = 0x00000020; /* 32 bits physical */
+#endif
+        }
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    case 0x8000000A:
+        *eax = 0x00000001; /* SVM Revision */
+        *ebx = 0x00000010; /* nr of ASIDs */
+        *ecx = 0;
+        *edx = 0; /* optional features */
+        break;
+    default:
+        /* reserved values: zero */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    }
+}
+
+CPUX86State *cpu_x86_init(const char *cpu_model)
+{
+    CPUX86State *env;
+    static int inited;
+
+    env = qemu_mallocz(sizeof(CPUX86State));
+    cpu_exec_init(env);
+    env->cpu_model_str = cpu_model;
+
+    /* init various static tables */
+    if (!inited) {
+        inited = 1;
+        optimize_flags_init();
+#ifndef CONFIG_USER_ONLY
+        prev_debug_excp_handler =
+            cpu_set_debug_excp_handler(breakpoint_handler);
+#endif
+    }
+    if (cpu_x86_register(env, cpu_model) < 0) {
+        cpu_x86_close(env);
+        return NULL;
+    }
+    mce_init(env);
+    cpu_reset(env);
+#ifdef CONFIG_KQEMU
+    kqemu_init(env);
+#endif
+
+    qemu_init_vcpu(env);
+
+    if (kvm_enabled()) {
+        kvm_trim_features(&env->cpuid_features,
+                          kvm_arch_get_supported_cpuid(env, 1, R_EDX),
+                          feature_name);
+        kvm_trim_features(&env->cpuid_ext_features,
+                          kvm_arch_get_supported_cpuid(env, 1, R_ECX),
+                          ext_feature_name);
+        kvm_trim_features(&env->cpuid_ext2_features,
+                          kvm_arch_get_supported_cpuid(env, 0x80000001, R_EDX),
+                          ext2_feature_name);
+        kvm_trim_features(&env->cpuid_ext3_features,
+                          kvm_arch_get_supported_cpuid(env, 0x80000001, R_ECX),
+                          ext3_feature_name);
+    }
+
+    return env;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void do_cpu_init(CPUState *env)
+{
+    int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
+    cpu_reset(env);
+    env->interrupt_request = sipi;
+    apic_init_reset(env);
+}
+
+void do_cpu_sipi(CPUState *env)
+{
+    apic_sipi(env);
+}
+#else
+void do_cpu_init(CPUState *env)
+{
+}
+void do_cpu_sipi(CPUState *env)
+{
+}
+#endif
diff --git a/target-i386/helper.h b/target-i386/helper.h
new file mode 100644
index 0000000..68d57b1
--- /dev/null
+++ b/target-i386/helper.h
@@ -0,0 +1,218 @@
+#include "def-helper.h"
+
+DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int)
+DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int)
+
+DEF_HELPER_0(lock, void)
+DEF_HELPER_0(unlock, void)
+DEF_HELPER_2(write_eflags, void, tl, i32)
+DEF_HELPER_0(read_eflags, tl)
+DEF_HELPER_1(divb_AL, void, tl)
+DEF_HELPER_1(idivb_AL, void, tl)
+DEF_HELPER_1(divw_AX, void, tl)
+DEF_HELPER_1(idivw_AX, void, tl)
+DEF_HELPER_1(divl_EAX, void, tl)
+DEF_HELPER_1(idivl_EAX, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(mulq_EAX_T0, void, tl)
+DEF_HELPER_1(imulq_EAX_T0, void, tl)
+DEF_HELPER_2(imulq_T0_T1, tl, tl, tl)
+DEF_HELPER_1(divq_EAX, void, tl)
+DEF_HELPER_1(idivq_EAX, void, tl)
+#endif
+
+DEF_HELPER_1(aam, void, int)
+DEF_HELPER_1(aad, void, int)
+DEF_HELPER_0(aaa, void)
+DEF_HELPER_0(aas, void)
+DEF_HELPER_0(daa, void)
+DEF_HELPER_0(das, void)
+
+DEF_HELPER_1(lsl, tl, tl)
+DEF_HELPER_1(lar, tl, tl)
+DEF_HELPER_1(verr, void, tl)
+DEF_HELPER_1(verw, void, tl)
+DEF_HELPER_1(lldt, void, int)
+DEF_HELPER_1(ltr, void, int)
+DEF_HELPER_2(load_seg, void, int, int)
+DEF_HELPER_3(ljmp_protected, void, int, tl, int)
+DEF_HELPER_4(lcall_real, void, int, tl, int, int)
+DEF_HELPER_4(lcall_protected, void, int, tl, int, int)
+DEF_HELPER_1(iret_real, void, int)
+DEF_HELPER_2(iret_protected, void, int, int)
+DEF_HELPER_2(lret_protected, void, int, int)
+DEF_HELPER_1(read_crN, tl, int)
+DEF_HELPER_2(write_crN, void, int, tl)
+DEF_HELPER_1(lmsw, void, tl)
+DEF_HELPER_0(clts, void)
+DEF_HELPER_2(movl_drN_T0, void, int, tl)
+DEF_HELPER_1(invlpg, void, tl)
+
+DEF_HELPER_3(enter_level, void, int, int, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_3(enter64_level, void, int, int, tl)
+#endif
+DEF_HELPER_0(sysenter, void)
+DEF_HELPER_1(sysexit, void, int)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(syscall, void, int)
+DEF_HELPER_1(sysret, void, int)
+#endif
+DEF_HELPER_1(hlt, void, int)
+DEF_HELPER_1(monitor, void, tl)
+DEF_HELPER_1(mwait, void, int)
+DEF_HELPER_0(debug, void)
+DEF_HELPER_0(reset_rf, void)
+DEF_HELPER_2(raise_interrupt, void, int, int)
+DEF_HELPER_1(raise_exception, void, int)
+DEF_HELPER_0(cli, void)
+DEF_HELPER_0(sti, void)
+DEF_HELPER_0(set_inhibit_irq, void)
+DEF_HELPER_0(reset_inhibit_irq, void)
+DEF_HELPER_2(boundw, void, tl, int)
+DEF_HELPER_2(boundl, void, tl, int)
+DEF_HELPER_0(rsm, void)
+DEF_HELPER_1(into, void, int)
+DEF_HELPER_1(cmpxchg8b, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cmpxchg16b, void, tl)
+#endif
+DEF_HELPER_0(single_step, void)
+DEF_HELPER_0(cpuid, void)
+DEF_HELPER_0(rdtsc, void)
+DEF_HELPER_0(rdpmc, void)
+DEF_HELPER_0(rdmsr, void)
+DEF_HELPER_0(wrmsr, void)
+
+DEF_HELPER_1(check_iob, void, i32)
+DEF_HELPER_1(check_iow, void, i32)
+DEF_HELPER_1(check_iol, void, i32)
+DEF_HELPER_2(outb, void, i32, i32)
+DEF_HELPER_1(inb, tl, i32)
+DEF_HELPER_2(outw, void, i32, i32)
+DEF_HELPER_1(inw, tl, i32)
+DEF_HELPER_2(outl, void, i32, i32)
+DEF_HELPER_1(inl, tl, i32)
+
+DEF_HELPER_2(svm_check_intercept_param, void, i32, i64)
+DEF_HELPER_2(vmexit, void, i32, i64)
+DEF_HELPER_3(svm_check_io, void, i32, i32, i32)
+DEF_HELPER_2(vmrun, void, int, int)
+DEF_HELPER_0(vmmcall, void)
+DEF_HELPER_1(vmload, void, int)
+DEF_HELPER_1(vmsave, void, int)
+DEF_HELPER_0(stgi, void)
+DEF_HELPER_0(clgi, void)
+DEF_HELPER_0(skinit, void)
+DEF_HELPER_1(invlpga, void, int)
+
+/* x86 FPU */
+
+DEF_HELPER_1(flds_FT0, void, i32)
+DEF_HELPER_1(fldl_FT0, void, i64)
+DEF_HELPER_1(fildl_FT0, void, s32)
+DEF_HELPER_1(flds_ST0, void, i32)
+DEF_HELPER_1(fldl_ST0, void, i64)
+DEF_HELPER_1(fildl_ST0, void, s32)
+DEF_HELPER_1(fildll_ST0, void, s64)
+DEF_HELPER_0(fsts_ST0, i32)
+DEF_HELPER_0(fstl_ST0, i64)
+DEF_HELPER_0(fist_ST0, s32)
+DEF_HELPER_0(fistl_ST0, s32)
+DEF_HELPER_0(fistll_ST0, s64)
+DEF_HELPER_0(fistt_ST0, s32)
+DEF_HELPER_0(fisttl_ST0, s32)
+DEF_HELPER_0(fisttll_ST0, s64)
+DEF_HELPER_1(fldt_ST0, void, tl)
+DEF_HELPER_1(fstt_ST0, void, tl)
+DEF_HELPER_0(fpush, void)
+DEF_HELPER_0(fpop, void)
+DEF_HELPER_0(fdecstp, void)
+DEF_HELPER_0(fincstp, void)
+DEF_HELPER_1(ffree_STN, void, int)
+DEF_HELPER_0(fmov_ST0_FT0, void)
+DEF_HELPER_1(fmov_FT0_STN, void, int)
+DEF_HELPER_1(fmov_ST0_STN, void, int)
+DEF_HELPER_1(fmov_STN_ST0, void, int)
+DEF_HELPER_1(fxchg_ST0_STN, void, int)
+DEF_HELPER_0(fcom_ST0_FT0, void)
+DEF_HELPER_0(fucom_ST0_FT0, void)
+DEF_HELPER_0(fcomi_ST0_FT0, void)
+DEF_HELPER_0(fucomi_ST0_FT0, void)
+DEF_HELPER_0(fadd_ST0_FT0, void)
+DEF_HELPER_0(fmul_ST0_FT0, void)
+DEF_HELPER_0(fsub_ST0_FT0, void)
+DEF_HELPER_0(fsubr_ST0_FT0, void)
+DEF_HELPER_0(fdiv_ST0_FT0, void)
+DEF_HELPER_0(fdivr_ST0_FT0, void)
+DEF_HELPER_1(fadd_STN_ST0, void, int)
+DEF_HELPER_1(fmul_STN_ST0, void, int)
+DEF_HELPER_1(fsub_STN_ST0, void, int)
+DEF_HELPER_1(fsubr_STN_ST0, void, int)
+DEF_HELPER_1(fdiv_STN_ST0, void, int)
+DEF_HELPER_1(fdivr_STN_ST0, void, int)
+DEF_HELPER_0(fchs_ST0, void)
+DEF_HELPER_0(fabs_ST0, void)
+DEF_HELPER_0(fxam_ST0, void)
+DEF_HELPER_0(fld1_ST0, void)
+DEF_HELPER_0(fldl2t_ST0, void)
+DEF_HELPER_0(fldl2e_ST0, void)
+DEF_HELPER_0(fldpi_ST0, void)
+DEF_HELPER_0(fldlg2_ST0, void)
+DEF_HELPER_0(fldln2_ST0, void)
+DEF_HELPER_0(fldz_ST0, void)
+DEF_HELPER_0(fldz_FT0, void)
+DEF_HELPER_0(fnstsw, i32)
+DEF_HELPER_0(fnstcw, i32)
+DEF_HELPER_1(fldcw, void, i32)
+DEF_HELPER_0(fclex, void)
+DEF_HELPER_0(fwait, void)
+DEF_HELPER_0(fninit, void)
+DEF_HELPER_1(fbld_ST0, void, tl)
+DEF_HELPER_1(fbst_ST0, void, tl)
+DEF_HELPER_0(f2xm1, void)
+DEF_HELPER_0(fyl2x, void)
+DEF_HELPER_0(fptan, void)
+DEF_HELPER_0(fpatan, void)
+DEF_HELPER_0(fxtract, void)
+DEF_HELPER_0(fprem1, void)
+DEF_HELPER_0(fprem, void)
+DEF_HELPER_0(fyl2xp1, void)
+DEF_HELPER_0(fsqrt, void)
+DEF_HELPER_0(fsincos, void)
+DEF_HELPER_0(frndint, void)
+DEF_HELPER_0(fscale, void)
+DEF_HELPER_0(fsin, void)
+DEF_HELPER_0(fcos, void)
+DEF_HELPER_2(fstenv, void, tl, int)
+DEF_HELPER_2(fldenv, void, tl, int)
+DEF_HELPER_2(fsave, void, tl, int)
+DEF_HELPER_2(frstor, void, tl, int)
+DEF_HELPER_2(fxsave, void, tl, int)
+DEF_HELPER_2(fxrstor, void, tl, int)
+DEF_HELPER_1(bsf, tl, tl)
+DEF_HELPER_1(bsr, tl, tl)
+
+/* MMX/SSE */
+
+DEF_HELPER_0(enter_mmx, void)
+DEF_HELPER_0(emms, void)
+DEF_HELPER_2(movq, void, ptr, ptr)
+
+#define SHIFT 0
+#include "ops_sse_header.h"
+#define SHIFT 1
+#include "ops_sse_header.h"
+
+DEF_HELPER_2(rclb, tl, tl, tl)
+DEF_HELPER_2(rclw, tl, tl, tl)
+DEF_HELPER_2(rcll, tl, tl, tl)
+DEF_HELPER_2(rcrb, tl, tl, tl)
+DEF_HELPER_2(rcrw, tl, tl, tl)
+DEF_HELPER_2(rcrl, tl, tl, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(rclq, tl, tl, tl)
+DEF_HELPER_2(rcrq, tl, tl, tl)
+#endif
+
+#include "def-helper.h"
diff --git a/target-i386/helper_template.h b/target-i386/helper_template.h
new file mode 100644
index 0000000..c1087ac
--- /dev/null
+++ b/target-i386/helper_template.h
@@ -0,0 +1,335 @@
+/*
+ *  i386 helpers
+ *
+ *  Copyright (c) 2008 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
+#if DATA_BITS <= 32
+#define SHIFT1_MASK 0x1f
+#else
+#define SHIFT1_MASK 0x3f
+#endif
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#elif DATA_BITS == 64
+#define SUFFIX q
+#define DATA_TYPE uint64_t
+#define DATA_STYPE int64_t
+#define DATA_MASK 0xffffffffffffffffULL
+#else
+#error unhandled operand size
+#endif
+
+/* dynamic flags computation */
+
+static int glue(compute_all_add, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_DST - CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_add, SUFFIX)(void)
+{
+    int cf;
+    target_long src1;
+    src1 = CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_adc, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_DST - CC_SRC - 1;
+    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_adc, SUFFIX)(void)
+{
+    int cf;
+    target_long src1;
+    src1 = CC_SRC;
+    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_sub, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sub, SUFFIX)(void)
+{
+    int cf;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+    return cf;
+}
+
+static int glue(compute_all_sbb, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC + 1;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sbb, SUFFIX)(void)
+{
+    int cf;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC + 1;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+    return cf;
+}
+
+static int glue(compute_all_logic, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = 0;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_logic, SUFFIX)(void)
+{
+    return 0;
+}
+
+static int glue(compute_all_inc, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST - 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_inc, SUFFIX)(void)
+{
+    return CC_SRC;
+}
+#endif
+
+static int glue(compute_all_dec, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_all_shl, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    /* of is defined if shift count == 1 */
+    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_shl, SUFFIX)(void)
+{
+    return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_sar, SUFFIX)(void)
+{
+    return CC_SRC & 1;
+}
+#endif
+
+static int glue(compute_all_sar, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = CC_SRC & 1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    /* of is defined if shift count == 1 */
+    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_mul, SUFFIX)(void)
+{
+    int cf;
+    cf = (CC_SRC != 0);
+    return cf;
+}
+#endif
+
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+   CF are modified and it is slower to do that. */
+static int glue(compute_all_mul, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = (CC_SRC != 0);
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = cf << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+/* shifts */
+
+target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = helper_cc_compute_all(CC_OP);
+        t0 &= DATA_MASK;
+        src = t0;
+        res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
+        if (count > 1)
+            res |= t0 >> (DATA_BITS + 1 - count);
+        t0 = res;
+        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+            ((src >> (DATA_BITS - count)) & CC_C);
+    } else {
+        env->cc_tmp = -1;
+    }
+    return t0;
+}
+
+target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = helper_cc_compute_all(CC_OP);
+        t0 &= DATA_MASK;
+        src = t0;
+        res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
+        if (count > 1)
+            res |= t0 << (DATA_BITS + 1 - count);
+        t0 = res;
+        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+            ((src >> (count - 1)) & CC_C);
+    } else {
+        env->cc_tmp = -1;
+    }
+    return t0;
+}
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SHIFT1_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
new file mode 100644
index 0000000..c4fd484
--- /dev/null
+++ b/target-i386/kvm.c
@@ -0,0 +1,930 @@
+/*
+ * QEMU KVM support
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/kvm.h>
+
+#include "qemu-common.h"
+#include "sysemu.h"
+#include "kvm.h"
+#include "cpu.h"
+#include "gdbstub.h"
+
+//#define DEBUG_KVM
+
+#ifdef DEBUG_KVM
+#define dprintf(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) \
+    do { } while (0)
+#endif
+
+#ifdef KVM_CAP_EXT_CPUID
+
+static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
+{
+    struct kvm_cpuid2 *cpuid;
+    int r, size;
+
+    size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
+    cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
+    cpuid->nent = max;
+    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
+    if (r == 0 && cpuid->nent >= max) {
+        r = -E2BIG;
+    }
+    if (r < 0) {
+        if (r == -E2BIG) {
+            qemu_free(cpuid);
+            return NULL;
+        } else {
+            fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
+                    strerror(-r));
+            exit(1);
+        }
+    }
+    return cpuid;
+}
+
+uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
+{
+    struct kvm_cpuid2 *cpuid;
+    int i, max;
+    uint32_t ret = 0;
+    uint32_t cpuid_1_edx;
+
+    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
+        return -1U;
+    }
+
+    max = 1;
+    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
+        max *= 2;
+    }
+
+    for (i = 0; i < cpuid->nent; ++i) {
+        if (cpuid->entries[i].function == function) {
+            switch (reg) {
+            case R_EAX:
+                ret = cpuid->entries[i].eax;
+                break;
+            case R_EBX:
+                ret = cpuid->entries[i].ebx;
+                break;
+            case R_ECX:
+                ret = cpuid->entries[i].ecx;
+                break;
+            case R_EDX:
+                ret = cpuid->entries[i].edx;
+                if (function == 0x80000001) {
+                    /* On Intel, kvm returns cpuid according to the Intel spec,
+                     * so add missing bits according to the AMD spec:
+                     */
+                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
+                    ret |= cpuid_1_edx & 0xdfeff7ff;
+                }
+                break;
+            }
+        }
+    }
+
+    qemu_free(cpuid);
+
+    return ret;
+}
+
+#else
+
+uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
+{
+    return -1U;
+}
+
+#endif
+
+int kvm_arch_init_vcpu(CPUState *env)
+{
+    struct {
+        struct kvm_cpuid2 cpuid;
+        struct kvm_cpuid_entry2 entries[100];
+    } __attribute__((packed)) cpuid_data;
+    uint32_t limit, i, j, cpuid_i;
+    uint32_t unused;
+
+    env->mp_state = KVM_MP_STATE_RUNNABLE;
+
+    cpuid_i = 0;
+
+    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
+
+    for (i = 0; i <= limit; i++) {
+        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
+
+        switch (i) {
+        case 2: {
+            /* Keep reading function 2 till all the input is received */
+            int times;
+
+            c->function = i;
+            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
+                       KVM_CPUID_FLAG_STATE_READ_NEXT;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            times = c->eax & 0xff;
+
+            for (j = 1; j < times; ++j) {
+                c = &cpuid_data.entries[cpuid_i++];
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
+                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            }
+            break;
+        }
+        case 4:
+        case 0xb:
+        case 0xd:
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (i == 4 && c->eax == 0)
+                    break;
+                if (i == 0xb && !(c->ecx & 0xff00))
+                    break;
+                if (i == 0xd && c->eax == 0)
+                    break;
+
+                c = &cpuid_data.entries[cpuid_i++];
+            }
+            break;
+        default:
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            break;
+        }
+    }
+    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
+
+    for (i = 0x80000000; i <= limit; i++) {
+        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
+
+        c->function = i;
+        c->flags = 0;
+        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+    }
+
+    cpuid_data.cpuid.nent = cpuid_i;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
+}
+
+static int kvm_has_msr_star(CPUState *env)
+{
+    static int has_msr_star;
+    int ret;
+
+    /* first time */
+    if (has_msr_star == 0) {        
+        struct kvm_msr_list msr_list, *kvm_msr_list;
+
+        has_msr_star = -1;
+
+        /* Obtain MSR list from KVM.  These are the MSRs that we must
+         * save/restore */
+        msr_list.nmsrs = 0;
+        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        if (ret < 0)
+            return 0;
+
+        kvm_msr_list = qemu_mallocz(sizeof(msr_list) +
+                                    msr_list.nmsrs * sizeof(msr_list.indices[0]));
+
+        kvm_msr_list->nmsrs = msr_list.nmsrs;
+        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        if (ret >= 0) {
+            int i;
+
+            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
+                if (kvm_msr_list->indices[i] == MSR_STAR) {
+                    has_msr_star = 1;
+                    break;
+                }
+            }
+        }
+
+        free(kvm_msr_list);
+    }
+
+    if (has_msr_star == 1)
+        return 1;
+    return 0;
+}
+
+int kvm_arch_init(KVMState *s, int smp_cpus)
+{
+    int ret;
+
+    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
+     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
+     * must be part of guest physical memory, we need to allocate it.  Older
+     * versions of KVM just assumed that it would be at the end of physical
+     * memory but that doesn't work with more than 4GB of memory.  We simply
+     * refuse to work with those older versions of KVM. */
+    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
+    if (ret <= 0) {
+        fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
+        return ret;
+    }
+
+    /* this address is 3 pages before the bios, and the bios should present
+     * as unavaible memory.  FIXME, need to ensure the e820 map deals with
+     * this?
+     */
+    return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
+}
+                    
+static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->db = 0;
+    lhs->s = 1;
+    lhs->l = 0;
+    lhs->g = 0;
+    lhs->avl = 0;
+    lhs->unusable = 0;
+}
+
+static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    unsigned flags = rhs->flags;
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->db = (flags >> DESC_B_SHIFT) & 1;
+    lhs->s = (flags & DESC_S_MASK) != 0;
+    lhs->l = (flags >> DESC_L_SHIFT) & 1;
+    lhs->g = (flags & DESC_G_MASK) != 0;
+    lhs->avl = (flags & DESC_AVL_MASK) != 0;
+    lhs->unusable = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags =
+	(rhs->type << DESC_TYPE_SHIFT)
+	| (rhs->present * DESC_P_MASK)
+	| (rhs->dpl << DESC_DPL_SHIFT)
+	| (rhs->db << DESC_B_SHIFT)
+	| (rhs->s * DESC_S_MASK)
+	| (rhs->l << DESC_L_SHIFT)
+	| (rhs->g * DESC_G_MASK)
+	| (rhs->avl * DESC_AVL_MASK);
+}
+
+static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
+{
+    if (set)
+        *kvm_reg = *qemu_reg;
+    else
+        *qemu_reg = *kvm_reg;
+}
+
+static int kvm_getput_regs(CPUState *env, int set)
+{
+    struct kvm_regs regs;
+    int ret = 0;
+
+    if (!set) {
+        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+        if (ret < 0)
+            return ret;
+    }
+
+    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
+    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
+    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
+    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
+    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
+    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
+    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
+    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
+#ifdef TARGET_X86_64
+    kvm_getput_reg(&regs.r8, &env->regs[8], set);
+    kvm_getput_reg(&regs.r9, &env->regs[9], set);
+    kvm_getput_reg(&regs.r10, &env->regs[10], set);
+    kvm_getput_reg(&regs.r11, &env->regs[11], set);
+    kvm_getput_reg(&regs.r12, &env->regs[12], set);
+    kvm_getput_reg(&regs.r13, &env->regs[13], set);
+    kvm_getput_reg(&regs.r14, &env->regs[14], set);
+    kvm_getput_reg(&regs.r15, &env->regs[15], set);
+#endif
+
+    kvm_getput_reg(&regs.rflags, &env->eflags, set);
+    kvm_getput_reg(&regs.rip, &env->eip, set);
+
+    if (set)
+        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+
+    return ret;
+}
+
+static int kvm_put_fpu(CPUState *env)
+{
+    struct kvm_fpu fpu;
+    int i;
+
+    memset(&fpu, 0, sizeof fpu);
+    fpu.fsw = env->fpus & ~(7 << 11);
+    fpu.fsw |= (env->fpstt & 7) << 11;
+    fpu.fcw = env->fpuc;
+    for (i = 0; i < 8; ++i)
+	fpu.ftwx |= (!env->fptags[i]) << i;
+    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
+    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
+    fpu.mxcsr = env->mxcsr;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
+}
+
+static int kvm_put_sregs(CPUState *env)
+{
+    struct kvm_sregs sregs;
+
+    memcpy(sregs.interrupt_bitmap,
+           env->interrupt_bitmap,
+           sizeof(sregs.interrupt_bitmap));
+
+    if ((env->eflags & VM_MASK)) {
+	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+    } else {
+	    set_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_seg(&sregs.es, &env->segs[R_ES]);
+	    set_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_seg(&sregs.ss, &env->segs[R_SS]);
+
+	    if (env->cr[0] & CR0_PE_MASK) {
+		/* force ss cpl to cs cpl */
+		sregs.ss.selector = (sregs.ss.selector & ~3) |
+			(sregs.cs.selector & 3);
+		sregs.ss.dpl = sregs.ss.selector & 3;
+	    }
+    }
+
+    set_seg(&sregs.tr, &env->tr);
+    set_seg(&sregs.ldt, &env->ldt);
+
+    sregs.idt.limit = env->idt.limit;
+    sregs.idt.base = env->idt.base;
+    sregs.gdt.limit = env->gdt.limit;
+    sregs.gdt.base = env->gdt.base;
+
+    sregs.cr0 = env->cr[0];
+    sregs.cr2 = env->cr[2];
+    sregs.cr3 = env->cr[3];
+    sregs.cr4 = env->cr[4];
+
+    sregs.cr8 = cpu_get_apic_tpr(env);
+    sregs.apic_base = cpu_get_apic_base(env);
+
+    sregs.efer = env->efer;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+}
+
+static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
+                              uint32_t index, uint64_t value)
+{
+    entry->index = index;
+    entry->data = value;
+}
+
+static int kvm_put_msrs(CPUState *env)
+{
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entries[100];
+    } msr_data;
+    struct kvm_msr_entry *msrs = msr_data.entries;
+    int n = 0;
+
+    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
+    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    if (kvm_has_msr_star(env))
+	kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
+    kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    /* FIXME if lm capable */
+    kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
+    kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
+    kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
+    kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
+#endif
+    msr_data.info.nmsrs = n;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
+
+}
+
+
+static int kvm_get_fpu(CPUState *env)
+{
+    struct kvm_fpu fpu;
+    int i, ret;
+
+    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
+    if (ret < 0)
+        return ret;
+
+    env->fpstt = (fpu.fsw >> 11) & 7;
+    env->fpus = fpu.fsw;
+    env->fpuc = fpu.fcw;
+    for (i = 0; i < 8; ++i)
+	env->fptags[i] = !((fpu.ftwx >> i) & 1);
+    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
+    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
+    env->mxcsr = fpu.mxcsr;
+
+    return 0;
+}
+
+static int kvm_get_sregs(CPUState *env)
+{
+    struct kvm_sregs sregs;
+    uint32_t hflags;
+    int ret;
+
+    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+    if (ret < 0)
+        return ret;
+
+    memcpy(env->interrupt_bitmap, 
+           sregs.interrupt_bitmap,
+           sizeof(sregs.interrupt_bitmap));
+
+    get_seg(&env->segs[R_CS], &sregs.cs);
+    get_seg(&env->segs[R_DS], &sregs.ds);
+    get_seg(&env->segs[R_ES], &sregs.es);
+    get_seg(&env->segs[R_FS], &sregs.fs);
+    get_seg(&env->segs[R_GS], &sregs.gs);
+    get_seg(&env->segs[R_SS], &sregs.ss);
+
+    get_seg(&env->tr, &sregs.tr);
+    get_seg(&env->ldt, &sregs.ldt);
+
+    env->idt.limit = sregs.idt.limit;
+    env->idt.base = sregs.idt.base;
+    env->gdt.limit = sregs.gdt.limit;
+    env->gdt.base = sregs.gdt.base;
+
+    env->cr[0] = sregs.cr0;
+    env->cr[2] = sregs.cr2;
+    env->cr[3] = sregs.cr3;
+    env->cr[4] = sregs.cr4;
+
+    cpu_set_apic_base(env, sregs.apic_base);
+
+    env->efer = sregs.efer;
+    //cpu_set_apic_tpr(env, sregs.cr8);
+
+#define HFLAG_COPY_MASK ~( \
+			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
+
+
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
+	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
+	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
+		(DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
+		(DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) ||
+                   (env->eflags & VM_MASK) ||
+                   !(hflags & HF_CS32_MASK)) {
+                hflags |= HF_ADDSEG_MASK;
+            } else {
+                hflags |= ((env->segs[R_DS].base |
+                                env->segs[R_ES].base |
+                                env->segs[R_SS].base) != 0) <<
+                    HF_ADDSEG_SHIFT;
+            }
+    }
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+
+    return 0;
+}
+
+static int kvm_get_msrs(CPUState *env)
+{
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entries[100];
+    } msr_data;
+    struct kvm_msr_entry *msrs = msr_data.entries;
+    int ret, i, n;
+
+    n = 0;
+    msrs[n++].index = MSR_IA32_SYSENTER_CS;
+    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
+    if (kvm_has_msr_star(env))
+	msrs[n++].index = MSR_STAR;
+    msrs[n++].index = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    /* FIXME lm_capable_kernel */
+    msrs[n++].index = MSR_CSTAR;
+    msrs[n++].index = MSR_KERNELGSBASE;
+    msrs[n++].index = MSR_FMASK;
+    msrs[n++].index = MSR_LSTAR;
+#endif
+    msr_data.info.nmsrs = n;
+    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ret; i++) {
+        switch (msrs[i].index) {
+        case MSR_IA32_SYSENTER_CS:
+            env->sysenter_cs = msrs[i].data;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = msrs[i].data;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = msrs[i].data;
+            break;
+        case MSR_STAR:
+            env->star = msrs[i].data;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_CSTAR:
+            env->cstar = msrs[i].data;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = msrs[i].data;
+            break;
+        case MSR_FMASK:
+            env->fmask = msrs[i].data;
+            break;
+        case MSR_LSTAR:
+            env->lstar = msrs[i].data;
+            break;
+#endif
+        case MSR_IA32_TSC:
+            env->tsc = msrs[i].data;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+int kvm_arch_put_registers(CPUState *env)
+{
+    int ret;
+
+    ret = kvm_getput_regs(env, 1);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_put_fpu(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_put_sregs(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_put_msrs(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_put_mp_state(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_get_mp_state(env);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+int kvm_arch_get_registers(CPUState *env)
+{
+    int ret;
+
+    ret = kvm_getput_regs(env, 0);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_get_fpu(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_get_sregs(env);
+    if (ret < 0)
+        return ret;
+
+    ret = kvm_get_msrs(env);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
+{
+    /* Try to inject an interrupt if the guest can accept it */
+    if (run->ready_for_interrupt_injection &&
+        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+        (env->eflags & IF_MASK)) {
+        int irq;
+
+        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        irq = cpu_get_pic_interrupt(env);
+        if (irq >= 0) {
+            struct kvm_interrupt intr;
+            intr.irq = irq;
+            /* FIXME: errors */
+            dprintf("injected interrupt %d\n", irq);
+            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+        }
+    }
+
+    /* If we have an interrupt but the guest is not ready to receive an
+     * interrupt, request an interrupt window exit.  This will
+     * cause a return to userspace as soon as the guest is ready to
+     * receive interrupts. */
+    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
+        run->request_interrupt_window = 1;
+    else
+        run->request_interrupt_window = 0;
+
+    dprintf("setting tpr\n");
+    run->cr8 = cpu_get_apic_tpr(env);
+
+    return 0;
+}
+
+int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+{
+    if (run->if_flag)
+        env->eflags |= IF_MASK;
+    else
+        env->eflags &= ~IF_MASK;
+    
+    cpu_set_apic_tpr(env, run->cr8);
+    cpu_set_apic_base(env, run->apic_base);
+
+    return 0;
+}
+
+static int kvm_handle_halt(CPUState *env)
+{
+    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+          (env->eflags & IF_MASK)) &&
+        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
+        env->halted = 1;
+        env->exception_index = EXCP_HLT;
+        return 0;
+    }
+
+    return 1;
+}
+
+int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
+{
+    int ret = 0;
+
+    switch (run->exit_reason) {
+    case KVM_EXIT_HLT:
+        dprintf("handle_hlt\n");
+        ret = kvm_handle_halt(env);
+        break;
+    }
+
+    return ret;
+}
+
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
+{
+    const static uint8_t int3 = 0xcc;
+
+    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
+        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
+        return -EINVAL;
+    return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
+{
+    uint8_t int3;
+
+    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
+        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
+        return -EINVAL;
+    return 0;
+}
+
+static struct {
+    target_ulong addr;
+    int len;
+    int type;
+} hw_breakpoint[4];
+
+static int nb_hw_breakpoint;
+
+static int find_hw_breakpoint(target_ulong addr, int len, int type)
+{
+    int n;
+
+    for (n = 0; n < nb_hw_breakpoint; n++)
+        if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
+            (hw_breakpoint[n].len == len || len == -1))
+            return n;
+    return -1;
+}
+
+int kvm_arch_insert_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        len = 1;
+        break;
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_ACCESS:
+        switch (len) {
+        case 1:
+            break;
+        case 2:
+        case 4:
+        case 8:
+            if (addr & (len - 1))
+                return -EINVAL;
+            break;
+        default:
+            return -EINVAL;
+        }
+        break;
+    default:
+        return -ENOSYS;
+    }
+
+    if (nb_hw_breakpoint == 4)
+        return -ENOBUFS;
+
+    if (find_hw_breakpoint(addr, len, type) >= 0)
+        return -EEXIST;
+
+    hw_breakpoint[nb_hw_breakpoint].addr = addr;
+    hw_breakpoint[nb_hw_breakpoint].len = len;
+    hw_breakpoint[nb_hw_breakpoint].type = type;
+    nb_hw_breakpoint++;
+
+    return 0;
+}
+
+int kvm_arch_remove_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
+    if (n < 0)
+        return -ENOENT;
+
+    nb_hw_breakpoint--;
+    hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
+
+    return 0;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+    nb_hw_breakpoint = 0;
+}
+
+static CPUWatchpoint hw_watchpoint;
+
+int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
+{
+    int handle = 0;
+    int n;
+
+    if (arch_info->exception == 1) {
+        if (arch_info->dr6 & (1 << 14)) {
+            if (cpu_single_env->singlestep_enabled)
+                handle = 1;
+        } else {
+            for (n = 0; n < 4; n++)
+                if (arch_info->dr6 & (1 << n))
+                    switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
+                    case 0x0:
+                        handle = 1;
+                        break;
+                    case 0x1:
+                        handle = 1;
+                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
+                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
+                        hw_watchpoint.flags = BP_MEM_WRITE;
+                        break;
+                    case 0x3:
+                        handle = 1;
+                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
+                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
+                        hw_watchpoint.flags = BP_MEM_ACCESS;
+                        break;
+                    }
+        }
+    } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
+        handle = 1;
+
+    if (!handle)
+        kvm_update_guest_debug(cpu_single_env,
+                        (arch_info->exception == 1) ?
+                        KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
+
+    return handle;
+}
+
+void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
+{
+    const uint8_t type_code[] = {
+        [GDB_BREAKPOINT_HW] = 0x0,
+        [GDB_WATCHPOINT_WRITE] = 0x1,
+        [GDB_WATCHPOINT_ACCESS] = 0x3
+    };
+    const uint8_t len_code[] = {
+        [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
+    };
+    int n;
+
+    if (kvm_sw_breakpoints_active(env))
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+
+    if (nb_hw_breakpoint > 0) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+        dbg->arch.debugreg[7] = 0x0600;
+        for (n = 0; n < nb_hw_breakpoint; n++) {
+            dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
+            dbg->arch.debugreg[7] |= (2 << (n * 2)) |
+                (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
+                (len_code[hw_breakpoint[n].len] << (18 + n*4));
+        }
+    }
+}
+#endif /* KVM_CAP_SET_GUEST_DEBUG */
diff --git a/target-i386/machine.c b/target-i386/machine.c
new file mode 100644
index 0000000..7b366c4
--- /dev/null
+++ b/target-i386/machine.c
@@ -0,0 +1,369 @@
+#include "hw/hw.h"
+#include "hw/boards.h"
+#include "hw/pc.h"
+#include "hw/isa.h"
+
+#include "exec-all.h"
+#include "kvm.h"
+
+static void cpu_put_seg(QEMUFile *f, SegmentCache *dt)
+{
+    qemu_put_be32(f, dt->selector);
+    qemu_put_betl(f, dt->base);
+    qemu_put_be32(f, dt->limit);
+    qemu_put_be32(f, dt->flags);
+}
+
+static void cpu_get_seg(QEMUFile *f, SegmentCache *dt)
+{
+    dt->selector = qemu_get_be32(f);
+    dt->base = qemu_get_betl(f);
+    dt->limit = qemu_get_be32(f);
+    dt->flags = qemu_get_be32(f);
+}
+
+void cpu_save(QEMUFile *f, void *opaque)
+{
+    CPUState *env = opaque;
+    uint16_t fptag, fpus, fpuc, fpregs_format;
+    uint32_t hflags;
+    int32_t a20_mask;
+    int i;
+
+    cpu_synchronize_state(env, 0);
+
+    for(i = 0; i < CPU_NB_REGS; i++)
+        qemu_put_betls(f, &env->regs[i]);
+    qemu_put_betls(f, &env->eip);
+    qemu_put_betls(f, &env->eflags);
+    hflags = env->hflags; /* XXX: suppress most of the redundant hflags */
+    qemu_put_be32s(f, &hflags);
+
+    /* FPU */
+    fpuc = env->fpuc;
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for(i = 0; i < 8; i++) {
+        fptag |= ((!env->fptags[i]) << i);
+    }
+
+    qemu_put_be16s(f, &fpuc);
+    qemu_put_be16s(f, &fpus);
+    qemu_put_be16s(f, &fptag);
+
+#ifdef USE_X86LDOUBLE
+    fpregs_format = 0;
+#else
+    fpregs_format = 1;
+#endif
+    qemu_put_be16s(f, &fpregs_format);
+
+    for(i = 0; i < 8; i++) {
+#ifdef USE_X86LDOUBLE
+        {
+            uint64_t mant;
+            uint16_t exp;
+            /* we save the real CPU data (in case of MMX usage only 'mant'
+               contains the MMX register */
+            cpu_get_fp80(&mant, &exp, env->fpregs[i].d);
+            qemu_put_be64(f, mant);
+            qemu_put_be16(f, exp);
+        }
+#else
+        /* if we use doubles for float emulation, we save the doubles to
+           avoid losing information in case of MMX usage. It can give
+           problems if the image is restored on a CPU where long
+           doubles are used instead. */
+        qemu_put_be64(f, env->fpregs[i].mmx.MMX_Q(0));
+#endif
+    }
+
+    for(i = 0; i < 6; i++)
+        cpu_put_seg(f, &env->segs[i]);
+    cpu_put_seg(f, &env->ldt);
+    cpu_put_seg(f, &env->tr);
+    cpu_put_seg(f, &env->gdt);
+    cpu_put_seg(f, &env->idt);
+
+    qemu_put_be32s(f, &env->sysenter_cs);
+    qemu_put_betls(f, &env->sysenter_esp);
+    qemu_put_betls(f, &env->sysenter_eip);
+
+    qemu_put_betls(f, &env->cr[0]);
+    qemu_put_betls(f, &env->cr[2]);
+    qemu_put_betls(f, &env->cr[3]);
+    qemu_put_betls(f, &env->cr[4]);
+
+    for(i = 0; i < 8; i++)
+        qemu_put_betls(f, &env->dr[i]);
+
+    /* MMU */
+    a20_mask = (int32_t) env->a20_mask;
+    qemu_put_sbe32s(f, &a20_mask);
+
+    /* XMM */
+    qemu_put_be32s(f, &env->mxcsr);
+    for(i = 0; i < CPU_NB_REGS; i++) {
+        qemu_put_be64s(f, &env->xmm_regs[i].XMM_Q(0));
+        qemu_put_be64s(f, &env->xmm_regs[i].XMM_Q(1));
+    }
+
+#ifdef TARGET_X86_64
+    qemu_put_be64s(f, &env->efer);
+    qemu_put_be64s(f, &env->star);
+    qemu_put_be64s(f, &env->lstar);
+    qemu_put_be64s(f, &env->cstar);
+    qemu_put_be64s(f, &env->fmask);
+    qemu_put_be64s(f, &env->kernelgsbase);
+#endif
+    qemu_put_be32s(f, &env->smbase);
+
+    qemu_put_be64s(f, &env->pat);
+    qemu_put_be32s(f, &env->hflags2);
+    
+    qemu_put_be64s(f, &env->vm_hsave);
+    qemu_put_be64s(f, &env->vm_vmcb);
+    qemu_put_be64s(f, &env->tsc_offset);
+    qemu_put_be64s(f, &env->intercept);
+    qemu_put_be16s(f, &env->intercept_cr_read);
+    qemu_put_be16s(f, &env->intercept_cr_write);
+    qemu_put_be16s(f, &env->intercept_dr_read);
+    qemu_put_be16s(f, &env->intercept_dr_write);
+    qemu_put_be32s(f, &env->intercept_exceptions);
+    qemu_put_8s(f, &env->v_tpr);
+
+    /* MTRRs */
+    for(i = 0; i < 11; i++)
+        qemu_put_be64s(f, &env->mtrr_fixed[i]);
+    qemu_put_be64s(f, &env->mtrr_deftype);
+    for(i = 0; i < 8; i++) {
+        qemu_put_be64s(f, &env->mtrr_var[i].base);
+        qemu_put_be64s(f, &env->mtrr_var[i].mask);
+    }
+
+    for (i = 0; i < sizeof(env->interrupt_bitmap)/8; i++) {
+        qemu_put_be64s(f, &env->interrupt_bitmap[i]);
+    }
+    qemu_put_be64s(f, &env->tsc);
+    qemu_put_be32s(f, &env->mp_state);
+
+    /* MCE */
+    qemu_put_be64s(f, &env->mcg_cap);
+    if (env->mcg_cap) {
+        qemu_put_be64s(f, &env->mcg_status);
+        qemu_put_be64s(f, &env->mcg_ctl);
+        for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+            qemu_put_be64s(f, &env->mce_banks[4*i]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 1]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 2]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 3]);
+        }
+    }
+}
+
+#ifdef USE_X86LDOUBLE
+/* XXX: add that in a FPU generic layer */
+union x86_longdouble {
+    uint64_t mant;
+    uint16_t exp;
+};
+
+#define MANTD1(fp)	(fp & ((1LL << 52) - 1))
+#define EXPBIAS1 1023
+#define EXPD1(fp)	((fp >> 52) & 0x7FF)
+#define SIGND1(fp)	((fp >> 32) & 0x80000000)
+
+static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp)
+{
+    int e;
+    /* mantissa */
+    p->mant = (MANTD1(temp) << 11) | (1LL << 63);
+    /* exponent + sign */
+    e = EXPD1(temp) - EXPBIAS1 + 16383;
+    e |= SIGND1(temp) >> 16;
+    p->exp = e;
+}
+#endif
+
+int cpu_load(QEMUFile *f, void *opaque, int version_id)
+{
+    CPUState *env = opaque;
+    int i, guess_mmx;
+    uint32_t hflags;
+    uint16_t fpus, fpuc, fptag, fpregs_format;
+    int32_t a20_mask;
+
+    if (version_id < 3 || version_id > CPU_SAVE_VERSION)
+        return -EINVAL;
+    for(i = 0; i < CPU_NB_REGS; i++)
+        qemu_get_betls(f, &env->regs[i]);
+    qemu_get_betls(f, &env->eip);
+    qemu_get_betls(f, &env->eflags);
+    qemu_get_be32s(f, &hflags);
+
+    qemu_get_be16s(f, &fpuc);
+    qemu_get_be16s(f, &fpus);
+    qemu_get_be16s(f, &fptag);
+    qemu_get_be16s(f, &fpregs_format);
+
+    /* NOTE: we cannot always restore the FPU state if the image come
+       from a host with a different 'USE_X86LDOUBLE' define. We guess
+       if we are in an MMX state to restore correctly in that case. */
+    guess_mmx = ((fptag == 0xff) && (fpus & 0x3800) == 0);
+    for(i = 0; i < 8; i++) {
+        uint64_t mant;
+        uint16_t exp;
+
+        switch(fpregs_format) {
+        case 0:
+            mant = qemu_get_be64(f);
+            exp = qemu_get_be16(f);
+#ifdef USE_X86LDOUBLE
+            env->fpregs[i].d = cpu_set_fp80(mant, exp);
+#else
+            /* difficult case */
+            if (guess_mmx)
+                env->fpregs[i].mmx.MMX_Q(0) = mant;
+            else
+                env->fpregs[i].d = cpu_set_fp80(mant, exp);
+#endif
+            break;
+        case 1:
+            mant = qemu_get_be64(f);
+#ifdef USE_X86LDOUBLE
+            {
+                union x86_longdouble *p;
+                /* difficult case */
+                p = (void *)&env->fpregs[i];
+                if (guess_mmx) {
+                    p->mant = mant;
+                    p->exp = 0xffff;
+                } else {
+                    fp64_to_fp80(p, mant);
+                }
+            }
+#else
+            env->fpregs[i].mmx.MMX_Q(0) = mant;
+#endif
+            break;
+        default:
+            return -EINVAL;
+        }
+    }
+
+    env->fpuc = fpuc;
+    /* XXX: restore FPU round state */
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    fptag ^= 0xff;
+    for(i = 0; i < 8; i++) {
+        env->fptags[i] = (fptag >> i) & 1;
+    }
+
+    for(i = 0; i < 6; i++)
+        cpu_get_seg(f, &env->segs[i]);
+    cpu_get_seg(f, &env->ldt);
+    cpu_get_seg(f, &env->tr);
+    cpu_get_seg(f, &env->gdt);
+    cpu_get_seg(f, &env->idt);
+
+    qemu_get_be32s(f, &env->sysenter_cs);
+    if (version_id >= 7) {
+        qemu_get_betls(f, &env->sysenter_esp);
+        qemu_get_betls(f, &env->sysenter_eip);
+    } else {
+        env->sysenter_esp = qemu_get_be32(f);
+        env->sysenter_eip = qemu_get_be32(f);
+    }
+
+    qemu_get_betls(f, &env->cr[0]);
+    qemu_get_betls(f, &env->cr[2]);
+    qemu_get_betls(f, &env->cr[3]);
+    qemu_get_betls(f, &env->cr[4]);
+
+    for(i = 0; i < 8; i++)
+        qemu_get_betls(f, &env->dr[i]);
+    cpu_breakpoint_remove_all(env, BP_CPU);
+    cpu_watchpoint_remove_all(env, BP_CPU);
+    for (i = 0; i < 4; i++)
+        hw_breakpoint_insert(env, i);
+
+    /* MMU */
+    qemu_get_sbe32s(f, &a20_mask);
+    env->a20_mask = a20_mask;
+
+    qemu_get_be32s(f, &env->mxcsr);
+    for(i = 0; i < CPU_NB_REGS; i++) {
+        qemu_get_be64s(f, &env->xmm_regs[i].XMM_Q(0));
+        qemu_get_be64s(f, &env->xmm_regs[i].XMM_Q(1));
+    }
+
+#ifdef TARGET_X86_64
+    qemu_get_be64s(f, &env->efer);
+    qemu_get_be64s(f, &env->star);
+    qemu_get_be64s(f, &env->lstar);
+    qemu_get_be64s(f, &env->cstar);
+    qemu_get_be64s(f, &env->fmask);
+    qemu_get_be64s(f, &env->kernelgsbase);
+#endif
+    if (version_id >= 4) {
+        qemu_get_be32s(f, &env->smbase);
+    }
+    if (version_id >= 5) {
+        qemu_get_be64s(f, &env->pat);
+        qemu_get_be32s(f, &env->hflags2);
+        if (version_id < 6)
+            qemu_get_be32s(f, &env->halted);
+
+        qemu_get_be64s(f, &env->vm_hsave);
+        qemu_get_be64s(f, &env->vm_vmcb);
+        qemu_get_be64s(f, &env->tsc_offset);
+        qemu_get_be64s(f, &env->intercept);
+        qemu_get_be16s(f, &env->intercept_cr_read);
+        qemu_get_be16s(f, &env->intercept_cr_write);
+        qemu_get_be16s(f, &env->intercept_dr_read);
+        qemu_get_be16s(f, &env->intercept_dr_write);
+        qemu_get_be32s(f, &env->intercept_exceptions);
+        qemu_get_8s(f, &env->v_tpr);
+    }
+
+    if (version_id >= 8) {
+        /* MTRRs */
+        for(i = 0; i < 11; i++)
+            qemu_get_be64s(f, &env->mtrr_fixed[i]);
+        qemu_get_be64s(f, &env->mtrr_deftype);
+        for(i = 0; i < 8; i++) {
+            qemu_get_be64s(f, &env->mtrr_var[i].base);
+            qemu_get_be64s(f, &env->mtrr_var[i].mask);
+        }
+    }
+    if (version_id >= 9) {
+        for (i = 0; i < sizeof(env->interrupt_bitmap)/8; i++) {
+            qemu_get_be64s(f, &env->interrupt_bitmap[i]);
+        }
+        qemu_get_be64s(f, &env->tsc);
+        qemu_get_be32s(f, &env->mp_state);
+    }
+
+    if (version_id >= 10) {
+        qemu_get_be64s(f, &env->mcg_cap);
+        if (env->mcg_cap) {
+            qemu_get_be64s(f, &env->mcg_status);
+            qemu_get_be64s(f, &env->mcg_ctl);
+            for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+                qemu_get_be64s(f, &env->mce_banks[4*i]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 1]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 2]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 3]);
+            }
+        }
+    }
+
+
+    /* XXX: ensure compatiblity for halted bit ? */
+    /* XXX: compute redundant hflags bits */
+    env->hflags = hflags;
+    tlb_flush(env, 1);
+    cpu_synchronize_state(env, 1);
+    return 0;
+}
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
new file mode 100644
index 0000000..7a7829a
--- /dev/null
+++ b/target-i386/op_helper.c
@@ -0,0 +1,5669 @@
+/*
+ *  i386 helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#define CPU_NO_GLOBAL_REGS
+#include "exec.h"
+#include "exec-all.h"
+#include "host-utils.h"
+
+//#define DEBUG_PCALL
+
+
+#ifdef DEBUG_PCALL
+#  define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
+#  define LOG_PCALL_STATE(env) \
+          log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+#else
+#  define LOG_PCALL(...) do { } while (0)
+#  define LOG_PCALL_STATE(env) do { } while (0)
+#endif
+
+
+#if 0
+#define raise_exception_err(a, b)\
+do {\
+    qemu_log("raise_exception line=%d\n", __LINE__);\
+    (raise_exception_err)(a, b);\
+} while (0)
+#endif
+
+static const uint8_t parity_table[256] = {
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+/* modulo 17 table */
+static const uint8_t rclw_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 9,10,11,12,13,14,15,
+   16, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+static const uint8_t rclb_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 0, 1, 2, 3, 4, 5,
+    6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+static const CPU86_LDouble f15rk[7] =
+{
+    0.00000000000000000000L,
+    1.00000000000000000000L,
+    3.14159265358979323851L,  /*pi*/
+    0.30102999566398119523L,  /*lg2*/
+    0.69314718055994530943L,  /*ln2*/
+    1.44269504088896340739L,  /*l2e*/
+    3.32192809488736234781L,  /*l2t*/
+};
+
+/* broken thread support */
+
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void helper_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void helper_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+void helper_write_eflags(target_ulong t0, uint32_t update_mask)
+{
+    load_eflags(t0, update_mask);
+}
+
+target_ulong helper_read_eflags(void)
+{
+    uint32_t eflags;
+    eflags = helper_cc_compute_all(CC_OP);
+    eflags |= (DF & DF_MASK);
+    eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+    return eflags;
+}
+
+/* return non zero if error */
+static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr,
+                               int selector)
+{
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    if (selector & 0x4)
+        dt = &env->ldt;
+    else
+        dt = &env->gdt;
+    index = selector & ~7;
+    if ((index + 7) > dt->limit)
+        return -1;
+    ptr = dt->base + index;
+    *e1_ptr = ldl_kernel(ptr);
+    *e2_ptr = ldl_kernel(ptr + 4);
+    return 0;
+}
+
+static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
+{
+    unsigned int limit;
+    limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+    if (e2 & DESC_G_MASK)
+        limit = (limit << 12) | 0xfff;
+    return limit;
+}
+
+static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
+{
+    return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+}
+
+static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
+{
+    sc->base = get_seg_base(e1, e2);
+    sc->limit = get_seg_limit(e1, e2);
+    sc->flags = e2;
+}
+
+/* init the segment cache in vm86 mode. */
+static inline void load_seg_vm(int seg, int selector)
+{
+    selector &= 0xffff;
+    cpu_x86_load_seg_cache(env, seg, selector,
+                           (selector << 4), 0xffff, 0);
+}
+
+static inline void get_ss_esp_from_tss(uint32_t *ss_ptr,
+                                       uint32_t *esp_ptr, int dpl)
+{
+    int type, index, shift;
+
+#if 0
+    {
+        int i;
+        printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
+        for(i=0;i<env->tr.limit;i++) {
+            printf("%02x ", env->tr.base[i]);
+            if ((i & 7) == 7) printf("\n");
+        }
+        printf("\n");
+    }
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if ((type & 7) != 1)
+        cpu_abort(env, "invalid tss type");
+    shift = type >> 3;
+    index = (dpl * 4 + 2) << shift;
+    if (index + (4 << shift) - 1 > env->tr.limit)
+        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+    if (shift == 0) {
+        *esp_ptr = lduw_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+    } else {
+        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+    }
+}
+
+/* XXX: merge with load_seg() */
+static void tss_load_seg(int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int rpl, dpl, cpl;
+
+    if ((selector & 0xfffc) != 0) {
+        if (load_segment(&e1, &e2, selector) != 0)
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        if (seg_reg == R_CS) {
+            if (!(e2 & DESC_CS_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            /* XXX: is it correct ? */
+            if (dpl != rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            if ((e2 & DESC_C_MASK) && dpl > rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        } else if (seg_reg == R_SS) {
+            /* SS must be writable data */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            if (dpl != cpl || dpl != rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        } else {
+            /* not readable code */
+            if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            /* if data or non conforming code, checks the rights */
+            if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            }
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        if (seg_reg == R_SS || seg_reg == R_CS)
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+    }
+}
+
+#define SWITCH_TSS_JMP  0
+#define SWITCH_TSS_IRET 1
+#define SWITCH_TSS_CALL 2
+
+/* XXX: restore CPU state in registers (PowerPC case) */
+static void switch_tss(int tss_selector,
+                       uint32_t e1, uint32_t e2, int source,
+                       uint32_t next_eip)
+{
+    int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
+    target_ulong tss_base;
+    uint32_t new_regs[8], new_segs[6];
+    uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap;
+    uint32_t old_eflags, eflags_mask;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+    LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
+
+    /* if task gate, we read the TSS segment and we load it */
+    if (type == 5) {
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+        tss_selector = e1 >> 16;
+        if (tss_selector & 4)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(&e1, &e2, tss_selector) != 0)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+        if (e2 & DESC_S_MASK)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((type & 7) != 1)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+    }
+
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+
+    if (type & 8)
+        tss_limit_max = 103;
+    else
+        tss_limit_max = 43;
+    tss_limit = get_seg_limit(e1, e2);
+    tss_base = get_seg_base(e1, e2);
+    if ((tss_selector & 4) != 0 ||
+        tss_limit < tss_limit_max)
+        raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+    old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if (old_type & 8)
+        old_tss_limit_max = 103;
+    else
+        old_tss_limit_max = 43;
+
+    /* read all the registers from the new TSS */
+    if (type & 8) {
+        /* 32 bit */
+        new_cr3 = ldl_kernel(tss_base + 0x1c);
+        new_eip = ldl_kernel(tss_base + 0x20);
+        new_eflags = ldl_kernel(tss_base + 0x24);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+        for(i = 0; i < 6; i++)
+            new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+        new_ldt = lduw_kernel(tss_base + 0x60);
+        new_trap = ldl_kernel(tss_base + 0x64);
+    } else {
+        /* 16 bit */
+        new_cr3 = 0;
+        new_eip = lduw_kernel(tss_base + 0x0e);
+        new_eflags = lduw_kernel(tss_base + 0x10);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+        for(i = 0; i < 4; i++)
+            new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4));
+        new_ldt = lduw_kernel(tss_base + 0x2a);
+        new_segs[R_FS] = 0;
+        new_segs[R_GS] = 0;
+        new_trap = 0;
+    }
+
+    /* NOTE: we must avoid memory exceptions during the task switch,
+       so we make dummy accesses before */
+    /* XXX: it can still fail in some cases, so a bigger hack is
+       necessary to valid the TLB after having done the accesses */
+
+    v1 = ldub_kernel(env->tr.base);
+    v2 = ldub_kernel(env->tr.base + old_tss_limit_max);
+    stb_kernel(env->tr.base, v1);
+    stb_kernel(env->tr.base + old_tss_limit_max, v2);
+
+    /* clear busy bit (it is restartable) */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (env->tr.selector & ~7);
+        e2 = ldl_kernel(ptr + 4);
+        e2 &= ~DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+    old_eflags = compute_eflags();
+    if (source == SWITCH_TSS_IRET)
+        old_eflags &= ~NT_MASK;
+
+    /* save the current state in the old TSS */
+    if (type & 8) {
+        /* 32 bit */
+        stl_kernel(env->tr.base + 0x20, next_eip);
+        stl_kernel(env->tr.base + 0x24, old_eflags);
+        stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX);
+        stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX);
+        stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX);
+        stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX);
+        stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP);
+        stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP);
+        stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI);
+        stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI);
+        for(i = 0; i < 6; i++)
+            stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector);
+    } else {
+        /* 16 bit */
+        stw_kernel(env->tr.base + 0x0e, next_eip);
+        stw_kernel(env->tr.base + 0x10, old_eflags);
+        stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX);
+        stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX);
+        stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX);
+        stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX);
+        stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP);
+        stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP);
+        stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI);
+        stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI);
+        for(i = 0; i < 4; i++)
+            stw_kernel(env->tr.base + (0x22 + i * 4), env->segs[i].selector);
+    }
+
+    /* now if an exception occurs, it will occurs in the next task
+       context */
+
+    if (source == SWITCH_TSS_CALL) {
+        stw_kernel(tss_base, env->tr.selector);
+        new_eflags |= NT_MASK;
+    }
+
+    /* set busy bit */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (tss_selector & ~7);
+        e2 = ldl_kernel(ptr + 4);
+        e2 |= DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+
+    /* set the new CPU state */
+    /* from this point, any exception which occurs can give problems */
+    env->cr[0] |= CR0_TS_MASK;
+    env->hflags |= HF_TS_MASK;
+    env->tr.selector = tss_selector;
+    env->tr.base = tss_base;
+    env->tr.limit = tss_limit;
+    env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
+
+    if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
+        cpu_x86_update_cr3(env, new_cr3);
+    }
+
+    /* load all registers without an exception, then reload them with
+       possible exception */
+    env->eip = new_eip;
+    eflags_mask = TF_MASK | AC_MASK | ID_MASK |
+        IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
+    if (!(type & 8))
+        eflags_mask &= 0xffff;
+    load_eflags(new_eflags, eflags_mask);
+    /* XXX: what to do in 16 bit case ? */
+    EAX = new_regs[0];
+    ECX = new_regs[1];
+    EDX = new_regs[2];
+    EBX = new_regs[3];
+    ESP = new_regs[4];
+    EBP = new_regs[5];
+    ESI = new_regs[6];
+    EDI = new_regs[7];
+    if (new_eflags & VM_MASK) {
+        for(i = 0; i < 6; i++)
+            load_seg_vm(i, new_segs[i]);
+        /* in vm86, CPL is always 3 */
+        cpu_x86_set_cpl(env, 3);
+    } else {
+        /* CPL is set the RPL of CS */
+        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
+        /* first just selectors as the rest may trigger exceptions */
+        for(i = 0; i < 6; i++)
+            cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
+    }
+
+    env->ldt.selector = new_ldt & ~4;
+    env->ldt.base = 0;
+    env->ldt.limit = 0;
+    env->ldt.flags = 0;
+
+    /* load the LDT */
+    if (new_ldt & 4)
+        raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+
+    if ((new_ldt & 0xfffc) != 0) {
+        dt = &env->gdt;
+        index = new_ldt & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        load_seg_cache_raw_dt(&env->ldt, e1, e2);
+    }
+
+    /* load the segments */
+    if (!(new_eflags & VM_MASK)) {
+        tss_load_seg(R_CS, new_segs[R_CS]);
+        tss_load_seg(R_SS, new_segs[R_SS]);
+        tss_load_seg(R_ES, new_segs[R_ES]);
+        tss_load_seg(R_DS, new_segs[R_DS]);
+        tss_load_seg(R_FS, new_segs[R_FS]);
+        tss_load_seg(R_GS, new_segs[R_GS]);
+    }
+
+    /* check that EIP is in the CS segment limits */
+    if (new_eip > env->segs[R_CS].limit) {
+        /* XXX: different exception if CALL ? */
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+
+#ifndef CONFIG_USER_ONLY
+    /* reset local breakpoints */
+    if (env->dr[7] & 0x55) {
+        for (i = 0; i < 4; i++) {
+            if (hw_breakpoint_enabled(env->dr[7], i) == 0x1)
+                hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] &= ~0x55;
+    }
+#endif
+}
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(int addr, int size)
+{
+    int io_offset, val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103)
+        goto fail;
+    io_offset = lduw_kernel(env->tr.base + 0x66);
+    io_offset += (addr >> 3);
+    /* Note: the check needs two bytes */
+    if ((io_offset + 1) > env->tr.limit)
+        goto fail;
+    val = lduw_kernel(env->tr.base + io_offset);
+    val >>= (addr & 7);
+    mask = (1 << size) - 1;
+    /* all bits must be zero to allow the I/O */
+    if ((val & mask) != 0) {
+    fail:
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+}
+
+void helper_check_iob(uint32_t t0)
+{
+    check_io(t0, 1);
+}
+
+void helper_check_iow(uint32_t t0)
+{
+    check_io(t0, 2);
+}
+
+void helper_check_iol(uint32_t t0)
+{
+    check_io(t0, 4);
+}
+
+void helper_outb(uint32_t port, uint32_t data)
+{
+    cpu_outb(port, data & 0xff);
+}
+
+target_ulong helper_inb(uint32_t port)
+{
+    return cpu_inb(port);
+}
+
+void helper_outw(uint32_t port, uint32_t data)
+{
+    cpu_outw(port, data & 0xffff);
+}
+
+target_ulong helper_inw(uint32_t port)
+{
+    return cpu_inw(port);
+}
+
+void helper_outl(uint32_t port, uint32_t data)
+{
+    cpu_outl(port, data);
+}
+
+target_ulong helper_inl(uint32_t port)
+{
+    return cpu_inl(port);
+}
+
+static inline unsigned int get_sp_mask(unsigned int e2)
+{
+    if (e2 & DESC_B_MASK)
+        return 0xffffffff;
+    else
+        return 0xffff;
+}
+
+static int exeption_has_error_code(int intno)
+{
+        switch(intno) {
+        case 8:
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 17:
+            return 1;
+        }
+	return 0;
+}
+
+#ifdef TARGET_X86_64
+#define SET_ESP(val, sp_mask)\
+do {\
+    if ((sp_mask) == 0xffff)\
+        ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
+    else if ((sp_mask) == 0xffffffffLL)\
+        ESP = (uint32_t)(val);\
+    else\
+        ESP = (val);\
+} while (0)
+#else
+#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
+#endif
+
+/* in 64-bit machines, this can overflow. So this segment addition macro
+ * can be used to trim the value to 32-bit whenever needed */
+#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask))))
+
+/* XXX: add a is_user flag to have proper security support */
+#define PUSHW(ssp, sp, sp_mask, val)\
+{\
+    sp -= 2;\
+    stw_kernel((ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define PUSHL(ssp, sp, sp_mask, val)\
+{\
+    sp -= 4;\
+    stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\
+}
+
+#define POPW(ssp, sp, sp_mask, val)\
+{\
+    val = lduw_kernel((ssp) + (sp & (sp_mask)));\
+    sp += 2;\
+}
+
+#define POPL(ssp, sp, sp_mask, val)\
+{\
+    val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\
+    sp += 4;\
+}
+
+/* protected mode interrupt */
+static void do_interrupt_protected(int intno, int is_int, int error_code,
+                                   unsigned int next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int type, dpl, selector, ss_dpl, cpl;
+    int has_error_code, new_stack, shift;
+    uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
+    uint32_t old_eip, sp_mask;
+
+    has_error_code = 0;
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 8 + 7 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 8;
+    e1 = ldl_kernel(ptr);
+    e2 = ldl_kernel(ptr + 4);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 5: /* task gate */
+        /* must do that check here to return the correct error code */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+        switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
+        if (has_error_code) {
+            int type;
+            uint32_t mask;
+            /* push the error code */
+            type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+            shift = type >> 3;
+            if (env->segs[R_SS].flags & DESC_B_MASK)
+                mask = 0xffffffff;
+            else
+                mask = 0xffff;
+            esp = (ESP - (2 << shift)) & mask;
+            ssp = env->segs[R_SS].base + esp;
+            if (shift)
+                stl_kernel(ssp, error_code);
+            else
+                stw_kernel(ssp, error_code);
+            SET_ESP(esp, mask);
+        }
+        return;
+    case 6: /* 286 interrupt gate */
+    case 7: /* 286 trap gate */
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+    selector = e1 >> 16;
+    offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+
+    if (load_segment(&e1, &e2, selector) != 0)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+        /* to inner privilege */
+        get_ss_esp_from_tss(&ss, &esp, dpl);
+        if ((ss & 0xfffc) == 0)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if ((ss & 3) != dpl)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+        if (ss_dpl != dpl)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_S_MASK) ||
+            (ss_e2 & DESC_CS_MASK) ||
+            !(ss_e2 & DESC_W_MASK))
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        new_stack = 1;
+        sp_mask = get_sp_mask(ss_e2);
+        ssp = get_seg_base(ss_e1, ss_e2);
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same privilege */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+        ssp = env->segs[R_SS].base;
+        esp = ESP;
+        dpl = cpl;
+    } else {
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        sp_mask = 0; /* avoid warning */
+        ssp = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    shift = type >> 3;
+
+#if 0
+    /* XXX: check that enough room is available */
+    push_size = 6 + (new_stack << 2) + (has_error_code << 1);
+    if (env->eflags & VM_MASK)
+        push_size += 8;
+    push_size <<= shift;
+#endif
+    if (shift == 1) {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHL(ssp, esp, sp_mask, ESP);
+        }
+        PUSHL(ssp, esp, sp_mask, compute_eflags());
+        PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHL(ssp, esp, sp_mask, error_code);
+        }
+    } else {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHW(ssp, esp, sp_mask, ESP);
+        }
+        PUSHW(ssp, esp, sp_mask, compute_eflags());
+        PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHW(ssp, esp, sp_mask, error_code);
+        }
+    }
+
+    if (new_stack) {
+        if (env->eflags & VM_MASK) {
+            cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
+        }
+        ss = (ss & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss,
+                               ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
+    }
+    SET_ESP(esp, sp_mask);
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+
+#ifdef TARGET_X86_64
+
+#define PUSHQ(sp, val)\
+{\
+    sp -= 8;\
+    stq_kernel(sp, (val));\
+}
+
+#define POPQ(sp, val)\
+{\
+    val = ldq_kernel(sp);\
+    sp += 8;\
+}
+
+static inline target_ulong get_rsp_from_tss(int level)
+{
+    int index;
+
+#if 0
+    printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
+           env->tr.base, env->tr.limit);
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    index = 8 * level + 4;
+    if ((index + 7) > env->tr.limit)
+        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+    return ldq_kernel(env->tr.base + index);
+}
+
+/* 64 bit interrupt */
+static void do_interrupt64(int intno, int is_int, int error_code,
+                           target_ulong next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int type, dpl, selector, cpl, ist;
+    int has_error_code, new_stack;
+    uint32_t e1, e2, e3, ss;
+    target_ulong old_eip, esp, offset;
+
+    has_error_code = 0;
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 16 + 15 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+    ptr = dt->base + intno * 16;
+    e1 = ldl_kernel(ptr);
+    e2 = ldl_kernel(ptr + 4);
+    e3 = ldl_kernel(ptr + 8);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2);
+    selector = e1 >> 16;
+    offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    ist = e2 & 7;
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+
+    if (load_segment(&e1, &e2, selector) != 0)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
+        /* to inner privilege */
+        if (ist != 0)
+            esp = get_rsp_from_tss(ist + 3);
+        else
+            esp = get_rsp_from_tss(dpl);
+        esp &= ~0xfLL; /* align stack */
+        ss = 0;
+        new_stack = 1;
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same privilege */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        if (ist != 0)
+            esp = get_rsp_from_tss(ist + 3);
+        else
+            esp = ESP;
+        esp &= ~0xfLL; /* align stack */
+        dpl = cpl;
+    } else {
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    PUSHQ(esp, env->segs[R_SS].selector);
+    PUSHQ(esp, ESP);
+    PUSHQ(esp, compute_eflags());
+    PUSHQ(esp, env->segs[R_CS].selector);
+    PUSHQ(esp, old_eip);
+    if (has_error_code) {
+        PUSHQ(esp, error_code);
+    }
+
+    if (new_stack) {
+        ss = 0 | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+    }
+    ESP = esp;
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+#endif
+
+#ifdef TARGET_X86_64
+#if defined(CONFIG_USER_ONLY)
+void helper_syscall(int next_eip_addend)
+{
+    env->exception_index = EXCP_SYSCALL;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit();
+}
+#else
+void helper_syscall(int next_eip_addend)
+{
+    int selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(EXCP06_ILLOP, 0);
+    }
+    selector = (env->star >> 32) & 0xffff;
+    if (env->hflags & HF_LMA_MASK) {
+        int code64;
+
+        ECX = env->eip + next_eip_addend;
+        env->regs[11] = compute_eflags();
+
+        code64 = env->hflags & HF_CS64_MASK;
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~env->fmask;
+        load_eflags(env->eflags, 0);
+        if (code64)
+            env->eip = env->lstar;
+        else
+            env->eip = env->cstar;
+    } else {
+        ECX = (uint32_t)(env->eip + next_eip_addend);
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+        env->eip = (uint32_t)env->star;
+    }
+}
+#endif
+#endif
+
+#ifdef TARGET_X86_64
+void helper_sysret(int dflag)
+{
+    int cpl, selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(EXCP06_ILLOP, 0);
+    }
+    cpl = env->hflags & HF_CPL_MASK;
+    if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    selector = (env->star >> 48) & 0xffff;
+    if (env->hflags & HF_LMA_MASK) {
+        if (dflag == 2) {
+            cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                                   DESC_L_MASK);
+            env->eip = ECX;
+        } else {
+            cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+            env->eip = (uint32_t)ECX;
+        }
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
+                    IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
+        cpu_x86_set_cpl(env, 3);
+    } else {
+        cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        env->eip = (uint32_t)ECX;
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags |= IF_MASK;
+        cpu_x86_set_cpl(env, 3);
+    }
+#ifdef CONFIG_KQEMU
+    if (kqemu_is_ok(env)) {
+        if (env->hflags & HF_LMA_MASK)
+            CC_OP = CC_OP_EFLAGS;
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+#endif
+
+/* real mode interrupt */
+static void do_interrupt_real(int intno, int is_int, int error_code,
+                              unsigned int next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int selector;
+    uint32_t offset, esp;
+    uint32_t old_cs, old_eip;
+
+    /* real mode (simpler !) */
+    dt = &env->idt;
+    if (intno * 4 + 3 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 4;
+    offset = lduw_kernel(ptr);
+    selector = lduw_kernel(ptr + 2);
+    esp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+    old_cs = env->segs[R_CS].selector;
+    /* XXX: use SS segment size ? */
+    PUSHW(ssp, esp, 0xffff, compute_eflags());
+    PUSHW(ssp, esp, 0xffff, old_cs);
+    PUSHW(ssp, esp, 0xffff, old_eip);
+
+    /* update processor state */
+    ESP = (ESP & ~0xffff) | (esp & 0xffff);
+    env->eip = offset;
+    env->segs[R_CS].selector = selector;
+    env->segs[R_CS].base = (selector << 4);
+    env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
+}
+
+/* fake user mode interrupt */
+void do_interrupt_user(int intno, int is_int, int error_code,
+                       target_ulong next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int dpl, cpl, shift;
+    uint32_t e2;
+
+    dt = &env->idt;
+    if (env->hflags & HF_LMA_MASK) {
+        shift = 4;
+    } else {
+        shift = 3;
+    }
+    ptr = dt->base + (intno << shift);
+    e2 = ldl_kernel(ptr + 4);
+
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, (intno << shift) + 2);
+
+    /* Since we emulate only user space, we cannot do more than
+       exiting the emulation with the suitable exception and error
+       code */
+    if (is_int)
+        EIP = next_eip;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static void handle_even_inj(int intno, int is_int, int error_code,
+		int is_hw, int rm)
+{
+    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    if (!(event_inj & SVM_EVTINJ_VALID)) {
+	    int type;
+	    if (is_int)
+		    type = SVM_EVTINJ_TYPE_SOFT;
+	    else
+		    type = SVM_EVTINJ_TYPE_EXEPT;
+	    event_inj = intno | type | SVM_EVTINJ_VALID;
+	    if (!rm && exeption_has_error_code(intno)) {
+		    event_inj |= SVM_EVTINJ_VALID_ERR;
+		    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
+	    }
+	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
+    }
+}
+#endif
+
+/*
+ * Begin execution of an interruption. is_int is TRUE if coming from
+ * the int instruction. next_eip is the EIP value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE.
+ */
+void do_interrupt(int intno, int is_int, int error_code,
+                  target_ulong next_eip, int is_hw)
+{
+    if (qemu_loglevel_mask(CPU_LOG_INT)) {
+        if ((env->cr[0] & CR0_PE_MASK)) {
+            static int count;
+            qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
+                    count, intno, error_code, is_int,
+                    env->hflags & HF_CPL_MASK,
+                    env->segs[R_CS].selector, EIP,
+                    (int)env->segs[R_CS].base + EIP,
+                    env->segs[R_SS].selector, ESP);
+            if (intno == 0x0e) {
+                qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]);
+            } else {
+                qemu_log(" EAX=" TARGET_FMT_lx, EAX);
+            }
+            qemu_log("\n");
+            log_cpu_state(env, X86_DUMP_CCOP);
+#if 0
+            {
+                int i;
+                uint8_t *ptr;
+                qemu_log("       code=");
+                ptr = env->segs[R_CS].base + env->eip;
+                for(i = 0; i < 16; i++) {
+                    qemu_log(" %02x", ldub(ptr + i));
+                }
+                qemu_log("\n");
+            }
+#endif
+            count++;
+        }
+    }
+    if (env->cr[0] & CR0_PE_MASK) {
+#if !defined(CONFIG_USER_ONLY)
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(intno, is_int, error_code, is_hw, 0);
+#endif
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
+        } else
+#endif
+        {
+            do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
+        }
+    } else {
+#if !defined(CONFIG_USER_ONLY)
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(intno, is_int, error_code, is_hw, 1);
+#endif
+        do_interrupt_real(intno, is_int, error_code, next_eip);
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    if (env->hflags & HF_SVMI_MASK) {
+	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+    }
+#endif
+}
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+/*
+ * Check nested exceptions and change to double or triple fault if
+ * needed. It should only be called, if this is not an interrupt.
+ * Returns the new exception number.
+ */
+static int check_exception(int intno, int *error_code)
+{
+    int first_contributory = env->old_exception == 0 ||
+                              (env->old_exception >= 10 &&
+                               env->old_exception <= 13);
+    int second_contributory = intno == 0 ||
+                               (intno >= 10 && intno <= 13);
+
+    qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n",
+                env->old_exception, intno);
+
+#if !defined(CONFIG_USER_ONLY)
+    if (env->old_exception == EXCP08_DBLE) {
+        if (env->hflags & HF_SVMI_MASK)
+            helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */
+
+        qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+
+        qemu_system_reset_request();
+        return EXCP_HLT;
+    }
+#endif
+
+    if ((first_contributory && second_contributory)
+        || (env->old_exception == EXCP0E_PAGE &&
+            (second_contributory || (intno == EXCP0E_PAGE)))) {
+        intno = EXCP08_DBLE;
+        *error_code = 0;
+    }
+
+    if (second_contributory || (intno == EXCP0E_PAGE) ||
+        (intno == EXCP08_DBLE))
+        env->old_exception = intno;
+
+    return intno;
+}
+
+/*
+ * Signal an interruption. It is executed in the main CPU loop.
+ * is_int is TRUE if coming from the int instruction. next_eip is the
+ * EIP value AFTER the interrupt instruction. It is only relevant if
+ * is_int is TRUE.
+ */
+static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code,
+                                          int next_eip_addend)
+{
+    if (!is_int) {
+        helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code);
+        intno = check_exception(intno, &error_code);
+    } else {
+        helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0);
+    }
+
+    env->exception_index = intno;
+    env->error_code = error_code;
+    env->exception_is_int = is_int;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit();
+}
+
+/* shortcuts to generate exceptions */
+
+void raise_exception_err(int exception_index, int error_code)
+{
+    raise_interrupt(exception_index, 0, error_code, 0);
+}
+
+void raise_exception(int exception_index)
+{
+    raise_interrupt(exception_index, 0, 0, 0);
+}
+
+/* SMM support */
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_smm_enter(void)
+{
+}
+
+void helper_rsm(void)
+{
+}
+
+#else
+
+#ifdef TARGET_X86_64
+#define SMM_REVISION_ID 0x00020064
+#else
+#define SMM_REVISION_ID 0x00020000
+#endif
+
+void do_smm_enter(void)
+{
+    target_ulong sm_state;
+    SegmentCache *dt;
+    int i, offset;
+
+    qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
+    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+
+    env->hflags |= HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    sm_state = env->smbase + 0x8000;
+
+#ifdef TARGET_X86_64
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        offset = 0x7e00 + i * 16;
+        stw_phys(sm_state + offset, dt->selector);
+        stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stq_phys(sm_state + offset + 8, dt->base);
+    }
+
+    stq_phys(sm_state + 0x7e68, env->gdt.base);
+    stl_phys(sm_state + 0x7e64, env->gdt.limit);
+
+    stw_phys(sm_state + 0x7e70, env->ldt.selector);
+    stq_phys(sm_state + 0x7e78, env->ldt.base);
+    stl_phys(sm_state + 0x7e74, env->ldt.limit);
+    stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7e88, env->idt.base);
+    stl_phys(sm_state + 0x7e84, env->idt.limit);
+
+    stw_phys(sm_state + 0x7e90, env->tr.selector);
+    stq_phys(sm_state + 0x7e98, env->tr.base);
+    stl_phys(sm_state + 0x7e94, env->tr.limit);
+    stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7ed0, env->efer);
+
+    stq_phys(sm_state + 0x7ff8, EAX);
+    stq_phys(sm_state + 0x7ff0, ECX);
+    stq_phys(sm_state + 0x7fe8, EDX);
+    stq_phys(sm_state + 0x7fe0, EBX);
+    stq_phys(sm_state + 0x7fd8, ESP);
+    stq_phys(sm_state + 0x7fd0, EBP);
+    stq_phys(sm_state + 0x7fc8, ESI);
+    stq_phys(sm_state + 0x7fc0, EDI);
+    for(i = 8; i < 16; i++)
+        stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
+    stq_phys(sm_state + 0x7f78, env->eip);
+    stl_phys(sm_state + 0x7f70, compute_eflags());
+    stl_phys(sm_state + 0x7f68, env->dr[6]);
+    stl_phys(sm_state + 0x7f60, env->dr[7]);
+
+    stl_phys(sm_state + 0x7f48, env->cr[4]);
+    stl_phys(sm_state + 0x7f50, env->cr[3]);
+    stl_phys(sm_state + 0x7f58, env->cr[0]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7f00, env->smbase);
+#else
+    stl_phys(sm_state + 0x7ffc, env->cr[0]);
+    stl_phys(sm_state + 0x7ff8, env->cr[3]);
+    stl_phys(sm_state + 0x7ff4, compute_eflags());
+    stl_phys(sm_state + 0x7ff0, env->eip);
+    stl_phys(sm_state + 0x7fec, EDI);
+    stl_phys(sm_state + 0x7fe8, ESI);
+    stl_phys(sm_state + 0x7fe4, EBP);
+    stl_phys(sm_state + 0x7fe0, ESP);
+    stl_phys(sm_state + 0x7fdc, EBX);
+    stl_phys(sm_state + 0x7fd8, EDX);
+    stl_phys(sm_state + 0x7fd4, ECX);
+    stl_phys(sm_state + 0x7fd0, EAX);
+    stl_phys(sm_state + 0x7fcc, env->dr[6]);
+    stl_phys(sm_state + 0x7fc8, env->dr[7]);
+
+    stl_phys(sm_state + 0x7fc4, env->tr.selector);
+    stl_phys(sm_state + 0x7f64, env->tr.base);
+    stl_phys(sm_state + 0x7f60, env->tr.limit);
+    stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7fc0, env->ldt.selector);
+    stl_phys(sm_state + 0x7f80, env->ldt.base);
+    stl_phys(sm_state + 0x7f7c, env->ldt.limit);
+    stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7f74, env->gdt.base);
+    stl_phys(sm_state + 0x7f70, env->gdt.limit);
+
+    stl_phys(sm_state + 0x7f58, env->idt.base);
+    stl_phys(sm_state + 0x7f54, env->idt.limit);
+
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
+        stl_phys(sm_state + offset + 8, dt->base);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
+    }
+    stl_phys(sm_state + 0x7f14, env->cr[4]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7ef8, env->smbase);
+#endif
+    /* init SMM cpu state */
+
+#ifdef TARGET_X86_64
+    cpu_load_efer(env, 0);
+#endif
+    load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = 0x00008000;
+    cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
+                           0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+
+    cpu_x86_update_cr0(env,
+                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
+    cpu_x86_update_cr4(env, 0);
+    env->dr[7] = 0x00000400;
+    CC_OP = CC_OP_EFLAGS;
+}
+
+void helper_rsm(void)
+{
+    target_ulong sm_state;
+    int i, offset;
+    uint32_t val;
+
+    sm_state = env->smbase + 0x8000;
+#ifdef TARGET_X86_64
+    cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0));
+
+    for(i = 0; i < 6; i++) {
+        offset = 0x7e00 + i * 16;
+        cpu_x86_load_seg_cache(env, i,
+                               lduw_phys(sm_state + offset),
+                               ldq_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
+    }
+
+    env->gdt.base = ldq_phys(sm_state + 0x7e68);
+    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+
+    env->ldt.selector = lduw_phys(sm_state + 0x7e70);
+    env->ldt.base = ldq_phys(sm_state + 0x7e78);
+    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
+
+    env->idt.base = ldq_phys(sm_state + 0x7e88);
+    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+
+    env->tr.selector = lduw_phys(sm_state + 0x7e90);
+    env->tr.base = ldq_phys(sm_state + 0x7e98);
+    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
+
+    EAX = ldq_phys(sm_state + 0x7ff8);
+    ECX = ldq_phys(sm_state + 0x7ff0);
+    EDX = ldq_phys(sm_state + 0x7fe8);
+    EBX = ldq_phys(sm_state + 0x7fe0);
+    ESP = ldq_phys(sm_state + 0x7fd8);
+    EBP = ldq_phys(sm_state + 0x7fd0);
+    ESI = ldq_phys(sm_state + 0x7fc8);
+    EDI = ldq_phys(sm_state + 0x7fc0);
+    for(i = 8; i < 16; i++)
+        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
+    env->eip = ldq_phys(sm_state + 0x7f78);
+    load_eflags(ldl_phys(sm_state + 0x7f70),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->dr[6] = ldl_phys(sm_state + 0x7f68);
+    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+    }
+#else
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
+    load_eflags(ldl_phys(sm_state + 0x7ff4),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = ldl_phys(sm_state + 0x7ff0);
+    EDI = ldl_phys(sm_state + 0x7fec);
+    ESI = ldl_phys(sm_state + 0x7fe8);
+    EBP = ldl_phys(sm_state + 0x7fe4);
+    ESP = ldl_phys(sm_state + 0x7fe0);
+    EBX = ldl_phys(sm_state + 0x7fdc);
+    EDX = ldl_phys(sm_state + 0x7fd8);
+    ECX = ldl_phys(sm_state + 0x7fd4);
+    EAX = ldl_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+
+    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldl_phys(sm_state + 0x7f64);
+    env->tr.limit = ldl_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+
+    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldl_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+
+    env->gdt.base = ldl_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+
+    env->idt.base = ldl_phys(sm_state + 0x7f58);
+    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+
+    for(i = 0; i < 6; i++) {
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        cpu_x86_load_seg_cache(env, i,
+                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldl_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+    }
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+    }
+#endif
+    CC_OP = CC_OP_EFLAGS;
+    env->hflags &= ~HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
+    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+
+/* division, flags are undefined */
+
+void helper_divb_AL(target_ulong t0)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff);
+    den = (t0 & 0xff);
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xff)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_idivb_AL(target_ulong t0)
+{
+    int num, den, q, r;
+
+    num = (int16_t)EAX;
+    den = (int8_t)t0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int8_t)q)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_divw_AX(target_ulong t0)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (t0 & 0xffff);
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xffff)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_idivw_AX(target_ulong t0)
+{
+    int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (int16_t)t0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int16_t)q)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_divl_EAX(target_ulong t0)
+{
+    unsigned int den, r;
+    uint64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = t0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    r = (num % den);
+    if (q > 0xffffffff)
+        raise_exception(EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+void helper_idivl_EAX(target_ulong t0)
+{
+    int den, r;
+    int64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = t0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    r = (num % den);
+    if (q != (int32_t)q)
+        raise_exception(EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+/* bcd */
+
+/* XXX: exception */
+void helper_aam(int base)
+{
+    int al, ah;
+    al = EAX & 0xff;
+    ah = al / base;
+    al = al % base;
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_DST = al;
+}
+
+void helper_aad(int base)
+{
+    int al, ah;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+    al = ((ah * base) + al) & 0xff;
+    EAX = (EAX & ~0xffff) | al;
+    CC_DST = al;
+}
+
+void helper_aaa(void)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = helper_cc_compute_all(CC_OP);
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al > 0xf9);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0x0f;
+        ah = (ah + 1 + icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+}
+
+void helper_aas(void)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = helper_cc_compute_all(CC_OP);
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al < 6);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al - 6) & 0x0f;
+        ah = (ah - 1 - icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+}
+
+void helper_daa(void)
+{
+    int al, af, cf;
+    int eflags;
+
+    eflags = helper_cc_compute_all(CC_OP);
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0xff;
+        eflags |= CC_A;
+    }
+    if ((al > 0x9f) || cf) {
+        al = (al + 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+}
+
+void helper_das(void)
+{
+    int al, al1, af, cf;
+    int eflags;
+
+    eflags = helper_cc_compute_all(CC_OP);
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    al1 = al;
+    if (((al & 0x0f) > 9 ) || af) {
+        eflags |= CC_A;
+        if (al < 6 || cf)
+            eflags |= CC_C;
+        al = (al - 6) & 0xff;
+    }
+    if ((al1 > 0x99) || cf) {
+        al = (al - 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+}
+
+void helper_into(int next_eip_addend)
+{
+    int eflags;
+    eflags = helper_cc_compute_all(CC_OP);
+    if (eflags & CC_O) {
+        raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend);
+    }
+}
+
+void helper_cmpxchg8b(target_ulong a0)
+{
+    uint64_t d;
+    int eflags;
+
+    eflags = helper_cc_compute_all(CC_OP);
+    d = ldq(a0);
+    if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) {
+        stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX);
+        eflags |= CC_Z;
+    } else {
+        /* always do the store */
+        stq(a0, d); 
+        EDX = (uint32_t)(d >> 32);
+        EAX = (uint32_t)d;
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+
+#ifdef TARGET_X86_64
+void helper_cmpxchg16b(target_ulong a0)
+{
+    uint64_t d0, d1;
+    int eflags;
+
+    if ((a0 & 0xf) != 0)
+        raise_exception(EXCP0D_GPF);
+    eflags = helper_cc_compute_all(CC_OP);
+    d0 = ldq(a0);
+    d1 = ldq(a0 + 8);
+    if (d0 == EAX && d1 == EDX) {
+        stq(a0, EBX);
+        stq(a0 + 8, ECX);
+        eflags |= CC_Z;
+    } else {
+        /* always do the store */
+        stq(a0, d0); 
+        stq(a0 + 8, d1); 
+        EDX = d1;
+        EAX = d0;
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+#endif
+
+void helper_single_step(void)
+{
+#ifndef CONFIG_USER_ONLY
+    check_hw_breakpoints(env, 1);
+    env->dr[6] |= DR6_BS;
+#endif
+    raise_exception(EXCP01_DB);
+}
+
+void helper_cpuid(void)
+{
+    uint32_t eax, ebx, ecx, edx;
+
+    helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0);
+
+    cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx);
+    EAX = eax;
+    EBX = ebx;
+    ECX = ecx;
+    EDX = edx;
+}
+
+void helper_enter_level(int level, int data32, target_ulong t1)
+{
+    target_ulong ssp;
+    uint32_t esp_mask, esp, ebp;
+
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    ebp = EBP;
+    esp = ESP;
+    if (data32) {
+        /* 32 bit */
+        esp -= 4;
+        while (--level) {
+            esp -= 4;
+            ebp -= 4;
+            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+        }
+        esp -= 4;
+        stl(ssp + (esp & esp_mask), t1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask)));
+        }
+        esp -= 2;
+        stw(ssp + (esp & esp_mask), t1);
+    }
+}
+
+#ifdef TARGET_X86_64
+void helper_enter64_level(int level, int data64, target_ulong t1)
+{
+    target_ulong esp, ebp;
+    ebp = EBP;
+    esp = ESP;
+
+    if (data64) {
+        /* 64 bit */
+        esp -= 8;
+        while (--level) {
+            esp -= 8;
+            ebp -= 8;
+            stq(esp, ldq(ebp));
+        }
+        esp -= 8;
+        stq(esp, t1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            stw(esp, lduw(ebp));
+        }
+        esp -= 2;
+        stw(esp, t1);
+    }
+}
+#endif
+
+void helper_lldt(int selector)
+{
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, entry_limit;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* XXX: NULL selector case: invalid LDT */
+        env->ldt.base = 0;
+        env->ldt.limit = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3;
+            e3 = ldl_kernel(ptr + 8);
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+            env->ldt.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+        }
+    }
+    env->ldt.selector = selector;
+}
+
+void helper_ltr(int selector)
+{
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, type, entry_limit;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* NULL selector case: invalid TR */
+        env->tr.base = 0;
+        env->tr.limit = 0;
+        env->tr.flags = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((e2 & DESC_S_MASK) ||
+            (type != 1 && type != 9))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3, e4;
+            e3 = ldl_kernel(ptr + 8);
+            e4 = ldl_kernel(ptr + 12);
+            if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+            env->tr.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+        }
+        e2 |= DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+    env->tr.selector = selector;
+}
+
+/* only works if protected mode and not VM86. seg_reg must be != R_CS */
+void helper_load_seg(int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int cpl, dpl, rpl;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    cpl = env->hflags & HF_CPL_MASK;
+    if ((selector & 0xfffc) == 0) {
+        /* null selector case */
+        if (seg_reg == R_SS
+#ifdef TARGET_X86_64
+            && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+#endif
+            )
+            raise_exception_err(EXCP0D_GPF, 0);
+        cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
+    } else {
+
+        if (selector & 0x4)
+            dt = &env->ldt;
+        else
+            dt = &env->gdt;
+        index = selector & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (seg_reg == R_SS) {
+            /* must be writable segment */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+            if (rpl != cpl || dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        } else {
+            /* must be readable segment */
+            if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+
+            if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+                /* if not conforming code, test rights */
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+            }
+        }
+
+        if (!(e2 & DESC_P_MASK)) {
+            if (seg_reg == R_SS)
+                raise_exception_err(EXCP0C_STACK, selector & 0xfffc);
+            else
+                raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+        }
+
+        /* set the access bit if not already set */
+        if (!(e2 & DESC_A_MASK)) {
+            e2 |= DESC_A_MASK;
+            stl_kernel(ptr + 4, e2);
+        }
+
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+#if 0
+        qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
+                selector, (unsigned long)sc->base, sc->limit, sc->flags);
+#endif
+    }
+}
+
+/* protected mode jump */
+void helper_ljmp_protected(int new_cs, target_ulong new_eip,
+                           int next_eip_addend)
+{
+    int gate_cs, type;
+    uint32_t e1, e2, cpl, dpl, rpl, limit;
+    target_ulong next_eip;
+
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+    if (load_segment(&e1, &e2, new_cs) != 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+        limit = get_seg_limit(e1, e2);
+        if (new_eip > limit &&
+            !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                       get_seg_base(e1, e2), limit, e2);
+        EIP = new_eip;
+    } else {
+        /* jump to call or task gate */
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1: /* 286 TSS */
+        case 9: /* 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            next_eip = env->eip + next_eip_addend;
+            switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            break;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            if ((dpl < cpl) || (dpl < rpl))
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+            gate_cs = e1 >> 16;
+            new_eip = (e1 & 0xffff);
+            if (type == 12)
+                new_eip |= (e2 & 0xffff0000);
+            if (load_segment(&e1, &e2, gate_cs) != 0)
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+            /* must be code segment */
+            if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
+                 (DESC_S_MASK | DESC_CS_MASK)))
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
+                (!(e2 & DESC_C_MASK) && (dpl != cpl)))
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(EXCP0D_GPF, 0);
+            cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+            break;
+        default:
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+    }
+}
+
+/* real mode call */
+void helper_lcall_real(int new_cs, target_ulong new_eip1,
+                       int shift, int next_eip)
+{
+    int new_eip;
+    uint32_t esp, esp_mask;
+    target_ulong ssp;
+
+    new_eip = new_eip1;
+    esp = ESP;
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    if (shift) {
+        PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, esp_mask, next_eip);
+    } else {
+        PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, esp_mask, next_eip);
+    }
+
+    SET_ESP(esp, esp_mask);
+    env->eip = new_eip;
+    env->segs[R_CS].selector = new_cs;
+    env->segs[R_CS].base = (new_cs << 4);
+}
+
+/* protected mode call */
+void helper_lcall_protected(int new_cs, target_ulong new_eip, 
+                            int shift, int next_eip_addend)
+{
+    int new_stack, i;
+    uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
+    uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
+    uint32_t val, limit, old_sp_mask;
+    target_ulong ssp, old_ssp, next_eip;
+
+    next_eip = env->eip + next_eip_addend;
+    LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
+    LOG_PCALL_STATE(env);
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+    if (load_segment(&e1, &e2, new_cs) != 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    LOG_PCALL("desc=%08x:%08x\n", e1, e2);
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+
+#ifdef TARGET_X86_64
+        /* XXX: check 16/32 bit cases in long mode */
+        if (shift == 2) {
+            target_ulong rsp;
+            /* 64 bit case */
+            rsp = ESP;
+            PUSHQ(rsp, env->segs[R_CS].selector);
+            PUSHQ(rsp, next_eip);
+            /* from this point, not restartable */
+            ESP = rsp;
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2),
+                                   get_seg_limit(e1, e2), e2);
+            EIP = new_eip;
+        } else
+#endif
+        {
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHL(ssp, sp, sp_mask, next_eip);
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHW(ssp, sp, sp_mask, next_eip);
+            }
+
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            /* from this point, not restartable */
+            SET_ESP(sp, sp_mask);
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+        }
+    } else {
+        /* check gate type */
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        switch(type) {
+        case 1: /* available 286 TSS */
+        case 9: /* available 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            return;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            break;
+        default:
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+        shift = type >> 3;
+
+        if (dpl < cpl || dpl < rpl)
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        /* check valid bit */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG,  new_cs & 0xfffc);
+        selector = e1 >> 16;
+        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+        param_count = e2 & 0x1f;
+        if ((selector & 0xfffc) == 0)
+            raise_exception_err(EXCP0D_GPF, 0);
+
+        if (load_segment(&e1, &e2, selector) != 0)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (dpl > cpl)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+
+        if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+            /* to inner privilege */
+            get_ss_esp_from_tss(&ss, &sp, dpl);
+            LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
+                        ss, sp, param_count, ESP);
+            if ((ss & 0xfffc) == 0)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if ((ss & 3) != dpl)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (ss_dpl != dpl)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+
+            //            push_size = ((param_count * 2) + 8) << shift;
+
+            old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            old_ssp = env->segs[R_SS].base;
+
+            sp_mask = get_sp_mask(ss_e2);
+            ssp = get_seg_base(ss_e1, ss_e2);
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHL(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    PUSHL(ssp, sp, sp_mask, val);
+                }
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHW(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask));
+                    PUSHW(ssp, sp, sp_mask, val);
+                }
+            }
+            new_stack = 1;
+        } else {
+            /* to same privilege */
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            //            push_size = (4 << shift);
+            new_stack = 0;
+        }
+
+        if (shift) {
+            PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHL(ssp, sp, sp_mask, next_eip);
+        } else {
+            PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHW(ssp, sp, sp_mask, next_eip);
+        }
+
+        /* from this point, not restartable */
+
+        if (new_stack) {
+            ss = (ss & ~3) | dpl;
+            cpu_x86_load_seg_cache(env, R_SS, ss,
+                                   ssp,
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        selector = (selector & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_CS, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, dpl);
+        SET_ESP(sp, sp_mask);
+        EIP = offset;
+    }
+#ifdef CONFIG_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+/* real and vm86 mode iret */
+void helper_iret_real(int shift)
+{
+    uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
+    target_ulong ssp;
+    int eflags_mask;
+
+    sp_mask = 0xffff; /* XXXX: use SS segment size ? */
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        POPL(ssp, sp, sp_mask, new_eflags);
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        POPW(ssp, sp, sp_mask, new_eflags);
+    }
+    ESP = (ESP & ~sp_mask) | (sp & sp_mask);
+    env->segs[R_CS].selector = new_cs;
+    env->segs[R_CS].base = (new_cs << 4);
+    env->eip = new_eip;
+    if (env->eflags & VM_MASK)
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
+    else
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
+    if (shift == 0)
+        eflags_mask &= 0xffff;
+    load_eflags(new_eflags, eflags_mask);
+    env->hflags2 &= ~HF2_NMI_MASK;
+}
+
+static inline void validate_seg(int seg_reg, int cpl)
+{
+    int dpl;
+    uint32_t e2;
+
+    /* XXX: on x86_64, we do not want to nullify FS and GS because
+       they may still contain a valid base. I would be interested to
+       know how a real x86_64 CPU behaves */
+    if ((seg_reg == R_FS || seg_reg == R_GS) &&
+        (env->segs[seg_reg].selector & 0xfffc) == 0)
+        return;
+
+    e2 = env->segs[seg_reg].flags;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+        /* data or non conforming code segment */
+        if (dpl < cpl) {
+            cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+        }
+    }
+}
+
+/* protected mode iret */
+static inline void helper_ret_protected(int shift, int is_iret, int addend)
+{
+    uint32_t new_cs, new_eflags, new_ss;
+    uint32_t new_es, new_ds, new_fs, new_gs;
+    uint32_t e1, e2, ss_e1, ss_e2;
+    int cpl, dpl, rpl, eflags_mask, iopl;
+    target_ulong ssp, sp, new_eip, new_esp, sp_mask;
+
+#ifdef TARGET_X86_64
+    if (shift == 2)
+        sp_mask = -1;
+    else
+#endif
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    new_eflags = 0; /* avoid warning */
+#ifdef TARGET_X86_64
+    if (shift == 2) {
+        POPQ(sp, new_eip);
+        POPQ(sp, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPQ(sp, new_eflags);
+        }
+    } else
+#endif
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPL(ssp, sp, sp_mask, new_eflags);
+            if (new_eflags & VM_MASK)
+                goto return_to_vm86;
+        }
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        if (is_iret)
+            POPW(ssp, sp, sp_mask, new_eflags);
+    }
+    LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
+              new_cs, new_eip, shift, addend);
+    LOG_PCALL_STATE(env);
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    if (load_segment(&e1, &e2, new_cs) != 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    if (!(e2 & DESC_S_MASK) ||
+        !(e2 & DESC_CS_MASK))
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    rpl = new_cs & 3;
+    if (rpl < cpl)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (e2 & DESC_C_MASK) {
+        if (dpl > rpl)
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    } else {
+        if (dpl != rpl)
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+
+    sp += addend;
+    if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
+                       ((env->hflags & HF_CS64_MASK) && !is_iret))) {
+        /* return to same privilege level */
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        /* return to different privilege level */
+#ifdef TARGET_X86_64
+        if (shift == 2) {
+            POPQ(sp, new_esp);
+            POPQ(sp, new_ss);
+            new_ss &= 0xffff;
+        } else
+#endif
+        if (shift == 1) {
+            /* 32 bits */
+            POPL(ssp, sp, sp_mask, new_esp);
+            POPL(ssp, sp, sp_mask, new_ss);
+            new_ss &= 0xffff;
+        } else {
+            /* 16 bits */
+            POPW(ssp, sp, sp_mask, new_esp);
+            POPW(ssp, sp, sp_mask, new_ss);
+        }
+        LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n",
+                    new_ss, new_esp);
+        if ((new_ss & 0xfffc) == 0) {
+#ifdef TARGET_X86_64
+            /* NULL ss is allowed in long mode if cpl != 3*/
+            /* XXX: test CS64 ? */
+            if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
+                cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                       0, 0xffffffff,
+                                       DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                       DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
+                                       DESC_W_MASK | DESC_A_MASK);
+                ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
+            } else
+#endif
+            {
+                raise_exception_err(EXCP0D_GPF, 0);
+            }
+        } else {
+            if ((new_ss & 3) != rpl)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (load_segment(&ss_e1, &ss_e2, new_ss) != 0)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (dpl != rpl)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc);
+            cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                   get_seg_base(ss_e1, ss_e2),
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, rpl);
+        sp = new_esp;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_CS64_MASK)
+            sp_mask = -1;
+        else
+#endif
+            sp_mask = get_sp_mask(ss_e2);
+
+        /* validate data segments */
+        validate_seg(R_ES, rpl);
+        validate_seg(R_DS, rpl);
+        validate_seg(R_FS, rpl);
+        validate_seg(R_GS, rpl);
+
+        sp += addend;
+    }
+    SET_ESP(sp, sp_mask);
+    env->eip = new_eip;
+    if (is_iret) {
+        /* NOTE: 'cpl' is the _old_ CPL */
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+        if (cpl == 0)
+            eflags_mask |= IOPL_MASK;
+        iopl = (env->eflags >> IOPL_SHIFT) & 3;
+        if (cpl <= iopl)
+            eflags_mask |= IF_MASK;
+        if (shift == 0)
+            eflags_mask &= 0xffff;
+        load_eflags(new_eflags, eflags_mask);
+    }
+    return;
+
+ return_to_vm86:
+    POPL(ssp, sp, sp_mask, new_esp);
+    POPL(ssp, sp, sp_mask, new_ss);
+    POPL(ssp, sp, sp_mask, new_es);
+    POPL(ssp, sp, sp_mask, new_ds);
+    POPL(ssp, sp, sp_mask, new_fs);
+    POPL(ssp, sp, sp_mask, new_gs);
+
+    /* modify processor state */
+    load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK |
+                IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+    load_seg_vm(R_CS, new_cs & 0xffff);
+    cpu_x86_set_cpl(env, 3);
+    load_seg_vm(R_SS, new_ss & 0xffff);
+    load_seg_vm(R_ES, new_es & 0xffff);
+    load_seg_vm(R_DS, new_ds & 0xffff);
+    load_seg_vm(R_FS, new_fs & 0xffff);
+    load_seg_vm(R_GS, new_gs & 0xffff);
+
+    env->eip = new_eip & 0xffff;
+    ESP = new_esp;
+}
+
+void helper_iret_protected(int shift, int next_eip)
+{
+    int tss_selector, type;
+    uint32_t e1, e2;
+
+    /* specific case for TSS */
+    if (env->eflags & NT_MASK) {
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            raise_exception_err(EXCP0D_GPF, 0);
+#endif
+        tss_selector = lduw_kernel(env->tr.base + 0);
+        if (tss_selector & 4)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(&e1, &e2, tss_selector) != 0)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
+        /* NOTE: we check both segment and busy TSS */
+        if (type != 3)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
+    } else {
+        helper_ret_protected(shift, 1, 0);
+    }
+    env->hflags2 &= ~HF2_NMI_MASK;
+#ifdef CONFIG_KQEMU
+    if (kqemu_is_ok(env)) {
+        CC_OP = CC_OP_EFLAGS;
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+void helper_lret_protected(int shift, int addend)
+{
+    helper_ret_protected(shift, 0, addend);
+#ifdef CONFIG_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+void helper_sysenter(void)
+{
+    if (env->sysenter_cs == 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+    cpu_x86_set_cpl(env, 0);
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    }
+    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_W_MASK | DESC_A_MASK);
+    ESP = env->sysenter_esp;
+    EIP = env->sysenter_eip;
+}
+
+void helper_sysexit(int dflag)
+{
+    int cpl;
+
+    cpl = env->hflags & HF_CPL_MASK;
+    if (env->sysenter_cs == 0 || cpl != 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    cpu_x86_set_cpl(env, 3);
+#ifdef TARGET_X86_64
+    if (dflag == 2) {
+        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+    }
+    ESP = ECX;
+    EIP = EDX;
+#ifdef CONFIG_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+#if defined(CONFIG_USER_ONLY)
+target_ulong helper_read_crN(int reg)
+{
+    return 0;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+}
+#else
+target_ulong helper_read_crN(int reg)
+{
+    target_ulong val;
+
+    helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0);
+    switch(reg) {
+    default:
+        val = env->cr[reg];
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            val = cpu_get_apic_tpr(env);
+        } else {
+            val = env->v_tpr;
+        }
+        break;
+    }
+    return val;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0);
+    switch(reg) {
+    case 0:
+        cpu_x86_update_cr0(env, t0);
+        break;
+    case 3:
+        cpu_x86_update_cr3(env, t0);
+        break;
+    case 4:
+        cpu_x86_update_cr4(env, t0);
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            cpu_set_apic_tpr(env, t0);
+        }
+        env->v_tpr = t0 & 0x0f;
+        break;
+    default:
+        env->cr[reg] = t0;
+        break;
+    }
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+    int i;
+
+    if (reg < 4) {
+        hw_breakpoint_remove(env, reg);
+        env->dr[reg] = t0;
+        hw_breakpoint_insert(env, reg);
+    } else if (reg == 7) {
+        for (i = 0; i < 4; i++)
+            hw_breakpoint_remove(env, i);
+        env->dr[7] = t0;
+        for (i = 0; i < 4; i++)
+            hw_breakpoint_insert(env, i);
+    } else
+        env->dr[reg] = t0;
+}
+#endif
+
+void helper_lmsw(target_ulong t0)
+{
+    /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
+       if already set to one. */
+    t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
+    helper_write_crN(0, t0);
+}
+
+void helper_clts(void)
+{
+    env->cr[0] &= ~CR0_TS_MASK;
+    env->hflags &= ~HF_TS_MASK;
+}
+
+void helper_invlpg(target_ulong addr)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0);
+    tlb_flush_page(env, addr);
+}
+
+void helper_rdtsc(void)
+{
+    uint64_t val;
+
+    if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+        raise_exception(EXCP0D_GPF);
+    }
+    helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0);
+
+    val = cpu_get_tsc(env) + env->tsc_offset;
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+
+void helper_rdpmc(void)
+{
+    if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+        raise_exception(EXCP0D_GPF);
+    }
+    helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0);
+    
+    /* currently unimplemented */
+    raise_exception_err(EXCP06_ILLOP, 0);
+}
+
+#if defined(CONFIG_USER_ONLY)
+void helper_wrmsr(void)
+{
+}
+
+void helper_rdmsr(void)
+{
+}
+#else
+void helper_wrmsr(void)
+{
+    uint64_t val;
+
+    helper_svm_check_intercept_param(SVM_EXIT_MSR, 1);
+
+    val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        env->sysenter_cs = val & 0xffff;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        env->sysenter_esp = val;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        env->sysenter_eip = val;
+        break;
+    case MSR_IA32_APICBASE:
+        cpu_set_apic_base(env, val);
+        break;
+    case MSR_EFER:
+        {
+            uint64_t update_mask;
+            update_mask = 0;
+            if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
+                update_mask |= MSR_EFER_SCE;
+            if (env->cpuid_ext2_features & CPUID_EXT2_LM)
+                update_mask |= MSR_EFER_LME;
+            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+                update_mask |= MSR_EFER_FFXSR;
+            if (env->cpuid_ext2_features & CPUID_EXT2_NX)
+                update_mask |= MSR_EFER_NXE;
+            if (env->cpuid_ext3_features & CPUID_EXT3_SVM)
+                update_mask |= MSR_EFER_SVME;
+            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+                update_mask |= MSR_EFER_FFXSR;
+            cpu_load_efer(env, (env->efer & ~update_mask) |
+                          (val & update_mask));
+        }
+        break;
+    case MSR_STAR:
+        env->star = val;
+        break;
+    case MSR_PAT:
+        env->pat = val;
+        break;
+    case MSR_VM_HSAVE_PA:
+        env->vm_hsave = val;
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        env->lstar = val;
+        break;
+    case MSR_CSTAR:
+        env->cstar = val;
+        break;
+    case MSR_FMASK:
+        env->fmask = val;
+        break;
+    case MSR_FSBASE:
+        env->segs[R_FS].base = val;
+        break;
+    case MSR_GSBASE:
+        env->segs[R_GS].base = val;
+        break;
+    case MSR_KERNELGSBASE:
+        env->kernelgsbase = val;
+        break;
+#endif
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val;
+        break;
+    case MSR_MTRRfix64K_00000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val;
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val;
+        break;
+    case MSR_MTRRdefType:
+        env->mtrr_deftype = val;
+        break;
+    case MSR_MCG_STATUS:
+        env->mcg_status = val;
+        break;
+    case MSR_MCG_CTL:
+        if ((env->mcg_cap & MCG_CTL_P)
+            && (val == 0 || val == ~(uint64_t)0))
+            env->mcg_ctl = val;
+        break;
+    default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            if ((offset & 0x3) != 0
+                || (val == 0 || val == ~(uint64_t)0))
+                env->mce_banks[offset] = val;
+            break;
+        }
+        /* XXX: exception ? */
+        break;
+    }
+}
+
+void helper_rdmsr(void)
+{
+    uint64_t val;
+
+    helper_svm_check_intercept_param(SVM_EXIT_MSR, 0);
+
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        val = env->sysenter_cs;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        val = env->sysenter_esp;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        val = env->sysenter_eip;
+        break;
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(env);
+        break;
+    case MSR_EFER:
+        val = env->efer;
+        break;
+    case MSR_STAR:
+        val = env->star;
+        break;
+    case MSR_PAT:
+        val = env->pat;
+        break;
+    case MSR_VM_HSAVE_PA:
+        val = env->vm_hsave;
+        break;
+    case MSR_IA32_PERF_STATUS:
+        /* tsc_increment_by_tick */
+        val = 1000ULL;
+        /* CPU multiplier */
+        val |= (((uint64_t)4ULL) << 40);
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        val = env->lstar;
+        break;
+    case MSR_CSTAR:
+        val = env->cstar;
+        break;
+    case MSR_FMASK:
+        val = env->fmask;
+        break;
+    case MSR_FSBASE:
+        val = env->segs[R_FS].base;
+        break;
+    case MSR_GSBASE:
+        val = env->segs[R_GS].base;
+        break;
+    case MSR_KERNELGSBASE:
+        val = env->kernelgsbase;
+        break;
+#endif
+#ifdef CONFIG_KQEMU
+    case MSR_QPI_COMMBASE:
+        if (env->kqemu_enabled) {
+            val = kqemu_comm_base;
+        } else {
+            val = 0;
+        }
+        break;
+#endif
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask;
+        break;
+    case MSR_MTRRfix64K_00000:
+        val = env->mtrr_fixed[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1];
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3];
+        break;
+    case MSR_MTRRdefType:
+        val = env->mtrr_deftype;
+        break;
+    case MSR_MTRRcap:
+        if (env->cpuid_features & CPUID_MTRR)
+            val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED;
+        else
+            /* XXX: exception ? */
+            val = 0;
+        break;
+    case MSR_MCG_CAP:
+        val = env->mcg_cap;
+        break;
+    case MSR_MCG_CTL:
+        if (env->mcg_cap & MCG_CTL_P)
+            val = env->mcg_ctl;
+        else
+            val = 0;
+        break;
+    case MSR_MCG_STATUS:
+        val = env->mcg_status;
+        break;
+    default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            val = env->mce_banks[offset];
+            break;
+        }
+        /* XXX: exception ? */
+        val = 0;
+        break;
+    }
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+#endif
+
+target_ulong helper_lsl(target_ulong selector1)
+{
+    unsigned int limit;
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl, type;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 9:
+        case 11:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return 0;
+        }
+    }
+    limit = get_seg_limit(e1, e2);
+    CC_SRC = eflags | CC_Z;
+    return limit;
+}
+
+target_ulong helper_lar(target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl, type;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 4:
+        case 5:
+        case 9:
+        case 11:
+        case 12:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return 0;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+    return e2 & 0x00f0ff00;
+}
+
+void helper_verr(target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        if (!(e2 & DESC_R_MASK))
+            goto fail;
+        if (!(e2 & DESC_C_MASK)) {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
+
+void helper_verw(target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        goto fail;
+    } else {
+        if (dpl < cpl || dpl < rpl)
+            goto fail;
+        if (!(e2 & DESC_W_MASK)) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
+
+/* x87 FPU helpers */
+
+static void fpu_set_exception(int mask)
+{
+    env->fpus |= mask;
+    if (env->fpus & (~env->fpuc & FPUC_EM))
+        env->fpus |= FPUS_SE | FPUS_B;
+}
+
+static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
+{
+    if (b == 0.0)
+        fpu_set_exception(FPUS_ZE);
+    return a / b;
+}
+
+static void fpu_raise_exception(void)
+{
+    if (env->cr[0] & CR0_NE_MASK) {
+        raise_exception(EXCP10_COPR);
+    }
+#if !defined(CONFIG_USER_ONLY)
+    else {
+        cpu_set_ferr(env);
+    }
+#endif
+}
+
+void helper_flds_FT0(uint32_t val)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = val;
+    FT0 = float32_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fldl_FT0(uint64_t val)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    u.i = val;
+    FT0 = float64_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fildl_FT0(int32_t val)
+{
+    FT0 = int32_to_floatx(val, &env->fp_status);
+}
+
+void helper_flds_ST0(uint32_t val)
+{
+    int new_fpstt;
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fldl_ST0(uint64_t val)
+{
+    int new_fpstt;
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0(int32_t val)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0(int64_t val)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+uint32_t helper_fsts_ST0(void)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = floatx_to_float32(ST0, &env->fp_status);
+    return u.i;
+}
+
+uint64_t helper_fstl_ST0(void)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    u.f = floatx_to_float64(ST0, &env->fp_status);
+    return u.i;
+}
+
+int32_t helper_fist_ST0(void)
+{
+    int32_t val;
+    val = floatx_to_int32(ST0, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    return val;
+}
+
+int32_t helper_fistl_ST0(void)
+{
+    int32_t val;
+    val = floatx_to_int32(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fistll_ST0(void)
+{
+    int64_t val;
+    val = floatx_to_int64(ST0, &env->fp_status);
+    return val;
+}
+
+int32_t helper_fistt_ST0(void)
+{
+    int32_t val;
+    val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    return val;
+}
+
+int32_t helper_fisttl_ST0(void)
+{
+    int32_t val;
+    val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fisttll_ST0(void)
+{
+    int64_t val;
+    val = floatx_to_int64_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+void helper_fldt_ST0(target_ulong ptr)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = helper_fldt(ptr);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0(target_ulong ptr)
+{
+    helper_fstt(ST0, ptr);
+}
+
+void helper_fpush(void)
+{
+    fpush();
+}
+
+void helper_fpop(void)
+{
+    fpop();
+}
+
+void helper_fdecstp(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+void helper_fincstp(void)
+{
+    env->fpstt = (env->fpstt + 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+/* FPU move */
+
+void helper_ffree_STN(int st_index)
+{
+    env->fptags[(env->fpstt + st_index) & 7] = 1;
+}
+
+void helper_fmov_ST0_FT0(void)
+{
+    ST0 = FT0;
+}
+
+void helper_fmov_FT0_STN(int st_index)
+{
+    FT0 = ST(st_index);
+}
+
+void helper_fmov_ST0_STN(int st_index)
+{
+    ST0 = ST(st_index);
+}
+
+void helper_fmov_STN_ST0(int st_index)
+{
+    ST(st_index) = ST0;
+}
+
+void helper_fxchg_ST0_STN(int st_index)
+{
+    CPU86_LDouble tmp;
+    tmp = ST(st_index);
+    ST(st_index) = ST0;
+    ST0 = tmp;
+}
+
+/* FPU operations */
+
+static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void helper_fcom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+void helper_fucom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
+}
+
+static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_fcomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fucomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fadd_ST0_FT0(void)
+{
+    ST0 += FT0;
+}
+
+void helper_fmul_ST0_FT0(void)
+{
+    ST0 *= FT0;
+}
+
+void helper_fsub_ST0_FT0(void)
+{
+    ST0 -= FT0;
+}
+
+void helper_fsubr_ST0_FT0(void)
+{
+    ST0 = FT0 - ST0;
+}
+
+void helper_fdiv_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(ST0, FT0);
+}
+
+void helper_fdivr_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void helper_fadd_STN_ST0(int st_index)
+{
+    ST(st_index) += ST0;
+}
+
+void helper_fmul_STN_ST0(int st_index)
+{
+    ST(st_index) *= ST0;
+}
+
+void helper_fsub_STN_ST0(int st_index)
+{
+    ST(st_index) -= ST0;
+}
+
+void helper_fsubr_STN_ST0(int st_index)
+{
+    CPU86_LDouble *p;
+    p = &ST(st_index);
+    *p = ST0 - *p;
+}
+
+void helper_fdiv_STN_ST0(int st_index)
+{
+    CPU86_LDouble *p;
+    p = &ST(st_index);
+    *p = helper_fdiv(*p, ST0);
+}
+
+void helper_fdivr_STN_ST0(int st_index)
+{
+    CPU86_LDouble *p;
+    p = &ST(st_index);
+    *p = helper_fdiv(ST0, *p);
+}
+
+/* misc FPU operations */
+void helper_fchs_ST0(void)
+{
+    ST0 = floatx_chs(ST0);
+}
+
+void helper_fabs_ST0(void)
+{
+    ST0 = floatx_abs(ST0);
+}
+
+void helper_fld1_ST0(void)
+{
+    ST0 = f15rk[1];
+}
+
+void helper_fldl2t_ST0(void)
+{
+    ST0 = f15rk[6];
+}
+
+void helper_fldl2e_ST0(void)
+{
+    ST0 = f15rk[5];
+}
+
+void helper_fldpi_ST0(void)
+{
+    ST0 = f15rk[2];
+}
+
+void helper_fldlg2_ST0(void)
+{
+    ST0 = f15rk[3];
+}
+
+void helper_fldln2_ST0(void)
+{
+    ST0 = f15rk[4];
+}
+
+void helper_fldz_ST0(void)
+{
+    ST0 = f15rk[0];
+}
+
+void helper_fldz_FT0(void)
+{
+    FT0 = f15rk[0];
+}
+
+uint32_t helper_fnstsw(void)
+{
+    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+}
+
+uint32_t helper_fnstcw(void)
+{
+    return env->fpuc;
+}
+
+static void update_fp_status(void)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch(env->fpuc & RC_MASK) {
+    default:
+    case RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->fp_status);
+#ifdef FLOATX80
+    switch((env->fpuc >> 8) & 3) {
+    case 0:
+        rnd_type = 32;
+        break;
+    case 2:
+        rnd_type = 64;
+        break;
+    case 3:
+    default:
+        rnd_type = 80;
+        break;
+    }
+    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+#endif
+}
+
+void helper_fldcw(uint32_t val)
+{
+    env->fpuc = val;
+    update_fp_status();
+}
+
+void helper_fclex(void)
+{
+    env->fpus &= 0x7f00;
+}
+
+void helper_fwait(void)
+{
+    if (env->fpus & FPUS_SE)
+        fpu_raise_exception();
+}
+
+void helper_fninit(void)
+{
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0(target_ulong ptr)
+{
+    CPU86_LDouble tmp;
+    uint64_t val;
+    unsigned int v;
+    int i;
+
+    val = 0;
+    for(i = 8; i >= 0; i--) {
+        v = ldub(ptr + i);
+        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+    }
+    tmp = val;
+    if (ldub(ptr + 9) & 0x80)
+        tmp = -tmp;
+    fpush();
+    ST0 = tmp;
+}
+
+void helper_fbst_ST0(target_ulong ptr)
+{
+    int v;
+    target_ulong mem_ref, mem_end;
+    int64_t val;
+
+    val = floatx_to_int64(ST0, &env->fp_status);
+    mem_ref = ptr;
+    mem_end = mem_ref + 9;
+    if (val < 0) {
+        stb(mem_end, 0x80);
+        val = -val;
+    } else {
+        stb(mem_end, 0x00);
+    }
+    while (mem_ref < mem_end) {
+        if (val == 0)
+            break;
+        v = val % 100;
+        val = val / 100;
+        v = ((v / 10) << 4) | (v % 10);
+        stb(mem_ref++, v);
+    }
+    while (mem_ref < mem_end) {
+        stb(mem_ref++, 0);
+    }
+}
+
+void helper_f2xm1(void)
+{
+    ST0 = pow(2.0,ST0) - 1.0;
+}
+
+void helper_fyl2x(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp>0.0){
+        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fptan(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = tan(fptemp);
+        fpush();
+        ST0 = 1.0;
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**52 only */
+    }
+}
+
+void helper_fpatan(void)
+{
+    CPU86_LDouble fptemp, fpsrcop;
+
+    fpsrcop = ST1;
+    fptemp = ST0;
+    ST1 = atan2(fpsrcop,fptemp);
+    fpop();
+}
+
+void helper_fxtract(void)
+{
+    CPU86_LDoubleU temp;
+    unsigned int expdif;
+
+    temp.d = ST0;
+    expdif = EXPD(temp) - EXPBIAS;
+    /*DP exponent bias*/
+    ST0 = expdif;
+    fpush();
+    BIASEXPONENT(temp);
+    ST0 = temp.d;
+}
+
+void helper_fprem1(void)
+{
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+        ST0 = 0.0 / 0.0; /* NaN */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+        return;
+    }
+
+    fpsrcop = ST0;
+    fptemp = ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp;
+        /* round dblq towards nearest integer */
+        dblq = rint(dblq);
+        ST0 = fpsrcop - fptemp * dblq;
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0)
+           q = (signed long long int)(-dblq);
+        else
+           q = (signed long long int)dblq;
+
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+                                /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif - 50);
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+}
+
+void helper_fprem(void)
+{
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+       ST0 = 0.0 / 0.0; /* NaN */
+       env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+       return;
+    }
+
+    fpsrcop = (CPU86_LDouble)ST0;
+    fptemp = (CPU86_LDouble)ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if ( expdif < 53 ) {
+        dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
+        /* round dblq towards zero */
+        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
+        ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0)
+           q = (signed long long int)(-dblq);
+        else
+           q = (signed long long int)dblq;
+
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+                                /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        int N = 32 + (expdif % 32); /* as per AMD docs */
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, (double)(expdif - N));
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+}
+
+void helper_fyl2xp1(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp+1.0)>0.0) {
+        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fsqrt(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp<0.0) {
+        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+        env->fpus |= 0x400;
+    }
+    ST0 = sqrt(fptemp);
+}
+
+void helper_fsincos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        fpush();
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**63 only */
+    }
+}
+
+void helper_frndint(void)
+{
+    ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(void)
+{
+    ST0 = ldexp (ST0, (int)(ST1));
+}
+
+void helper_fsin(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**53 only */
+    }
+}
+
+void helper_fcos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg5 < 2**63 only */
+    }
+}
+
+void helper_fxam_ST0(void)
+{
+    CPU86_LDoubleU temp;
+    int expdif;
+
+    temp.d = ST0;
+
+    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+    if (SIGND(temp))
+        env->fpus |= 0x200; /* C1 <-- 1 */
+
+    /* XXX: test fptags too */
+    expdif = EXPD(temp);
+    if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+        if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+        if (MANTD(temp) == 0)
+#endif
+            env->fpus |=  0x500 /*Infinity*/;
+        else
+            env->fpus |=  0x100 /*NaN*/;
+    } else if (expdif == 0) {
+        if (MANTD(temp) == 0)
+            env->fpus |=  0x4000 /*Zero*/;
+        else
+            env->fpus |= 0x4400 /*Denormal*/;
+    } else {
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fstenv(target_ulong ptr, int data32)
+{
+    int fpus, fptag, exp, i;
+    uint64_t mant;
+    CPU86_LDoubleU tmp;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i=7; i>=0; i--) {
+	fptag <<= 2;
+	if (env->fptags[i]) {
+            fptag |= 3;
+	} else {
+            tmp.d = env->fpregs[i].d;
+            exp = EXPD(tmp);
+            mant = MANTD(tmp);
+            if (exp == 0 && mant == 0) {
+                /* zero */
+	        fptag |= 1;
+	    } else if (exp == 0 || exp == MAXEXPD
+#ifdef USE_X86LDOUBLE
+                       || (mant & (1LL << 63)) == 0
+#endif
+                       ) {
+                /* NaNs, infinity, denormal */
+                fptag |= 2;
+            }
+        }
+    }
+    if (data32) {
+        /* 32 bit */
+        stl(ptr, env->fpuc);
+        stl(ptr + 4, fpus);
+        stl(ptr + 8, fptag);
+        stl(ptr + 12, 0); /* fpip */
+        stl(ptr + 16, 0); /* fpcs */
+        stl(ptr + 20, 0); /* fpoo */
+        stl(ptr + 24, 0); /* fpos */
+    } else {
+        /* 16 bit */
+        stw(ptr, env->fpuc);
+        stw(ptr + 2, fpus);
+        stw(ptr + 4, fptag);
+        stw(ptr + 6, 0);
+        stw(ptr + 8, 0);
+        stw(ptr + 10, 0);
+        stw(ptr + 12, 0);
+    }
+}
+
+void helper_fldenv(target_ulong ptr, int data32)
+{
+    int i, fpus, fptag;
+
+    if (data32) {
+	env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 4);
+        fptag = lduw(ptr + 8);
+    }
+    else {
+	env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 2);
+        fptag = lduw(ptr + 4);
+    }
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag & 3) == 3);
+        fptag >>= 2;
+    }
+}
+
+void helper_fsave(target_ulong ptr, int data32)
+{
+    CPU86_LDouble tmp;
+    int i;
+
+    helper_fstenv(ptr, data32);
+
+    ptr += (14 << data32);
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, ptr);
+        ptr += 10;
+    }
+
+    /* fninit */
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+void helper_frstor(target_ulong ptr, int data32)
+{
+    CPU86_LDouble tmp;
+    int i;
+
+    helper_fldenv(ptr, data32);
+    ptr += (14 << data32);
+
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(ptr);
+        ST(i) = tmp;
+        ptr += 10;
+    }
+}
+
+void helper_fxsave(target_ulong ptr, int data64)
+{
+    int fpus, fptag, i, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    target_ulong addr;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for(i = 0; i < 8; i++) {
+        fptag |= (env->fptags[i] << i);
+    }
+    stw(ptr, env->fpuc);
+    stw(ptr + 2, fpus);
+    stw(ptr + 4, fptag ^ 0xff);
+#ifdef TARGET_X86_64
+    if (data64) {
+        stq(ptr + 0x08, 0); /* rip */
+        stq(ptr + 0x10, 0); /* rdp */
+    } else 
+#endif
+    {
+        stl(ptr + 0x08, 0); /* eip */
+        stl(ptr + 0x0c, 0); /* sel  */
+        stl(ptr + 0x10, 0); /* dp */
+        stl(ptr + 0x14, 0); /* sel  */
+    }
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, addr);
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+        stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+        if (env->hflags & HF_CS64_MASK)
+            nb_xmm_regs = 16;
+        else
+            nb_xmm_regs = 8;
+        addr = ptr + 0xa0;
+        /* Fast FXSAVE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+          || (env->hflags & HF_CPL_MASK)
+          || !(env->hflags & HF_LMA_MASK)) {
+            for(i = 0; i < nb_xmm_regs; i++) {
+                stq(addr, env->xmm_regs[i].XMM_Q(0));
+                stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+                addr += 16;
+            }
+        }
+    }
+}
+
+void helper_fxrstor(target_ulong ptr, int data64)
+{
+    int i, fpus, fptag, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    target_ulong addr;
+
+    env->fpuc = lduw(ptr);
+    fpus = lduw(ptr + 2);
+    fptag = lduw(ptr + 4);
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    fptag ^= 0xff;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag >> i) & 1);
+    }
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(addr);
+        ST(i) = tmp;
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        env->mxcsr = ldl(ptr + 0x18);
+        //ldl(ptr + 0x1c);
+        if (env->hflags & HF_CS64_MASK)
+            nb_xmm_regs = 16;
+        else
+            nb_xmm_regs = 8;
+        addr = ptr + 0xa0;
+        /* Fast FXRESTORE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+          || (env->hflags & HF_CPL_MASK)
+          || !(env->hflags & HF_LMA_MASK)) {
+            for(i = 0; i < nb_xmm_regs; i++) {
+                env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+                env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+                addr += 16;
+            }
+        }
+    }
+}
+
+#ifndef USE_X86LDOUBLE
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+    CPU86_LDoubleU temp;
+    int e;
+
+    temp.d = f;
+    /* mantissa */
+    *pmant = (MANTD(temp) << 11) | (1LL << 63);
+    /* exponent + sign */
+    e = EXPD(temp) - EXPBIAS + 16383;
+    e |= SIGND(temp) >> 16;
+    *pexp = e;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU86_LDoubleU temp;
+    int e;
+    uint64_t ll;
+
+    /* XXX: handle overflow ? */
+    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+    e |= (upper >> 4) & 0x800; /* sign */
+    ll = (mant >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+    temp.l.upper = (e << 20) | (ll >> 32);
+    temp.l.lower = ll;
+#else
+    temp.ll = ll | ((uint64_t)e << 52);
+#endif
+    return temp.d;
+}
+
+#else
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+    CPU86_LDoubleU temp;
+
+    temp.d = f;
+    *pmant = temp.l.lower;
+    *pexp = temp.l.upper;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU86_LDoubleU temp;
+
+    temp.l.upper = upper;
+    temp.l.lower = mant;
+    return temp.d;
+}
+#endif
+
+#ifdef TARGET_X86_64
+
+//#define DEBUG_MULDIV
+
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    *plow += a;
+    /* carry test */
+    if (*plow < a)
+        (*phigh)++;
+    *phigh += b;
+}
+
+static void neg128(uint64_t *plow, uint64_t *phigh)
+{
+    *plow = ~ *plow;
+    *phigh = ~ *phigh;
+    add128(plow, phigh, 1, 0);
+}
+
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
+{
+    uint64_t q, r, a1, a0;
+    int i, qb, ab;
+
+    a0 = *plow;
+    a1 = *phigh;
+    if (a1 == 0) {
+        q = a0 / b;
+        r = a0 % b;
+        *plow = q;
+        *phigh = r;
+    } else {
+        if (a1 >= b)
+            return 1;
+        /* XXX: use a better algorithm */
+        for(i = 0; i < 64; i++) {
+            ab = a1 >> 63;
+            a1 = (a1 << 1) | (a0 >> 63);
+            if (ab || a1 >= b) {
+                a1 -= b;
+                qb = 1;
+            } else {
+                qb = 0;
+            }
+            a0 = (a0 << 1) | qb;
+        }
+#if defined(DEBUG_MULDIV)
+        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+               *phigh, *plow, b, a0, a1);
+#endif
+        *plow = a0;
+        *phigh = a1;
+    }
+    return 0;
+}
+
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
+{
+    int sa, sb;
+    sa = ((int64_t)*phigh < 0);
+    if (sa)
+        neg128(plow, phigh);
+    sb = (b < 0);
+    if (sb)
+        b = -b;
+    if (div64(plow, phigh, b) != 0)
+        return 1;
+    if (sa ^ sb) {
+        if (*plow > (1ULL << 63))
+            return 1;
+        *plow = - *plow;
+    } else {
+        if (*plow >= (1ULL << 63))
+            return 1;
+    }
+    if (sa)
+        *phigh = - *phigh;
+    return 0;
+}
+
+void helper_mulq_EAX_T0(target_ulong t0)
+{
+    uint64_t r0, r1;
+
+    mulu64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = r1;
+}
+
+void helper_imulq_EAX_T0(target_ulong t0)
+{
+    uint64_t r0, r1;
+
+    muls64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
+{
+    uint64_t r0, r1;
+
+    muls64(&r0, &r1, t0, t1);
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+    return r0;
+}
+
+void helper_divq_EAX(target_ulong t0)
+{
+    uint64_t r0, r1;
+    if (t0 == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (div64(&r0, &r1, t0))
+        raise_exception(EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+
+void helper_idivq_EAX(target_ulong t0)
+{
+    uint64_t r0, r1;
+    if (t0 == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (idiv64(&r0, &r1, t0))
+        raise_exception(EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+#endif
+
+static void do_hlt(void)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    env->halted = 1;
+    env->exception_index = EXCP_HLT;
+    cpu_loop_exit();
+}
+
+void helper_hlt(int next_eip_addend)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
+    EIP += next_eip_addend;
+    
+    do_hlt();
+}
+
+void helper_monitor(target_ulong ptr)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(EXCP0D_GPF);
+    /* XXX: store address ? */
+    helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
+}
+
+void helper_mwait(int next_eip_addend)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(EXCP0D_GPF);
+    helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
+    EIP += next_eip_addend;
+
+    /* XXX: not complete but not completely erroneous */
+    if (env->cpu_index != 0 || env->next_cpu != NULL) {
+        /* more than one CPU: do not sleep because another CPU may
+           wake this one */
+    } else {
+        do_hlt();
+    }
+}
+
+void helper_debug(void)
+{
+    env->exception_index = EXCP_DEBUG;
+    cpu_loop_exit();
+}
+
+void helper_reset_rf(void)
+{
+    env->eflags &= ~RF_MASK;
+}
+
+void helper_raise_interrupt(int intno, int next_eip_addend)
+{
+    raise_interrupt(intno, 1, 0, next_eip_addend);
+}
+
+void helper_raise_exception(int exception_index)
+{
+    raise_exception(exception_index);
+}
+
+void helper_cli(void)
+{
+    env->eflags &= ~IF_MASK;
+}
+
+void helper_sti(void)
+{
+    env->eflags |= IF_MASK;
+}
+
+#if 0
+/* vm86plus instructions */
+void helper_cli_vm(void)
+{
+    env->eflags &= ~VIF_MASK;
+}
+
+void helper_sti_vm(void)
+{
+    env->eflags |= VIF_MASK;
+    if (env->eflags & VIP_MASK) {
+        raise_exception(EXCP0D_GPF);
+    }
+}
+#endif
+
+void helper_set_inhibit_irq(void)
+{
+    env->hflags |= HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_reset_inhibit_irq(void)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_boundw(target_ulong a0, int v)
+{
+    int low, high;
+    low = ldsw(a0);
+    high = ldsw(a0 + 2);
+    v = (int16_t)v;
+    if (v < low || v > high) {
+        raise_exception(EXCP05_BOUND);
+    }
+}
+
+void helper_boundl(target_ulong a0, int v)
+{
+    int low, high;
+    low = ldl(a0);
+    high = ldl(a0 + 4);
+    if (v < low || v > high) {
+        raise_exception(EXCP05_BOUND);
+    }
+}
+
+static float approx_rsqrt(float a)
+{
+    return 1.0 / sqrt(a);
+}
+
+static float approx_rcp(float a)
+{
+    return 1.0 / a;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+/* try to fill the TLB and return an exception if error. If retaddr is
+   NULL, it means that the function was called in C code (i.e. not
+   from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+{
+    TranslationBlock *tb;
+    int ret;
+    unsigned long pc;
+    CPUX86State *saved_env;
+
+    /* XXX: hack to restore env in all cases, even if not called from
+       generated code */
+    saved_env = env;
+    env = cpu_single_env;
+
+    ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+    if (ret) {
+        if (retaddr) {
+            /* now we have a real cpu fault */
+            pc = (unsigned long)retaddr;
+            tb = tb_find_pc(pc);
+            if (tb) {
+                /* the PC is inside the translated code. It means that we have
+                   a virtual CPU fault */
+                cpu_restore_state(tb, env, pc, NULL);
+            }
+        }
+        raise_exception_err(env->exception_index, env->error_code);
+    }
+    env = saved_env;
+}
+#endif
+
+/* Secure Virtual Machine helpers */
+
+#if defined(CONFIG_USER_ONLY)
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{ 
+}
+void helper_vmmcall(void) 
+{ 
+}
+void helper_vmload(int aflag)
+{ 
+}
+void helper_vmsave(int aflag)
+{ 
+}
+void helper_stgi(void)
+{
+}
+void helper_clgi(void)
+{
+}
+void helper_skinit(void) 
+{ 
+}
+void helper_invlpga(int aflag)
+{ 
+}
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) 
+{ 
+}
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param, 
+                         uint32_t next_eip_addend)
+{
+}
+#else
+
+static inline void svm_save_seg(target_phys_addr_t addr,
+                                const SegmentCache *sc)
+{
+    stw_phys(addr + offsetof(struct vmcb_seg, selector), 
+             sc->selector);
+    stq_phys(addr + offsetof(struct vmcb_seg, base), 
+             sc->base);
+    stl_phys(addr + offsetof(struct vmcb_seg, limit), 
+             sc->limit);
+    stw_phys(addr + offsetof(struct vmcb_seg, attrib), 
+             ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
+}
+                                
+static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc)
+{
+    unsigned int flags;
+
+    sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector));
+    sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base));
+    sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit));
+    flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib));
+    sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+}
+
+static inline void svm_load_seg_cache(target_phys_addr_t addr, 
+                                      CPUState *env, int seg_reg)
+{
+    SegmentCache sc1, *sc = &sc1;
+    svm_load_seg(addr, sc);
+    cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
+                           sc->base, sc->limit, sc->flags);
+}
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{
+    target_ulong addr;
+    uint32_t event_inj;
+    uint32_t int_ctl;
+
+    helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr);
+
+    env->vm_vmcb = addr;
+
+    /* save the current CPU state in the hsave page */
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags());
+
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), 
+                  &env->segs[R_ES]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), 
+                 &env->segs[R_CS]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), 
+                 &env->segs[R_SS]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), 
+                 &env->segs[R_DS]);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
+             EIP + next_eip_addend);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX);
+
+    /* load the interception bitmaps so we do not need to access the
+       vmcb in svm mode */
+    env->intercept            = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept));
+    env->intercept_cr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
+    env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
+    env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
+    env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
+    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+
+    /* enable intercepts */
+    env->hflags |= HF_SVMI_MASK;
+
+    env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset));
+
+    env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+
+    env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+
+    /* clear exit_info_2 so we behave like the real hardware */
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
+
+    cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
+    cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
+    env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
+    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+    if (int_ctl & V_INTR_MASKING_MASK) {
+        env->v_tpr = int_ctl & V_TPR_MASK;
+        env->hflags2 |= HF2_VINTR_MASK;
+        if (env->eflags & IF_MASK)
+            env->hflags2 |= HF2_HIF_MASK;
+    }
+
+    cpu_load_efer(env, 
+                  ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
+    env->eflags = 0;
+    load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    CC_OP = CC_OP_EFLAGS;
+
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es),
+                       env, R_ES);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+                       env, R_CS);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+                       env, R_SS);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+                       env, R_DS);
+
+    EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
+    env->eip = EIP;
+    ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
+    EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
+    env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+    env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+    cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
+
+    /* FIXME: guest state consistency checks */
+
+    switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
+        case TLB_CONTROL_DO_NOTHING:
+            break;
+        case TLB_CONTROL_FLUSH_ALL_ASID:
+            /* FIXME: this is not 100% correct but should work for now */
+            tlb_flush(env, 1);
+        break;
+    }
+
+    env->hflags2 |= HF2_GIF_MASK;
+
+    if (int_ctl & V_IRQ_MASK) {
+        env->interrupt_request |= CPU_INTERRUPT_VIRQ;
+    }
+
+    /* maybe we need to inject an event */
+    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    if (event_inj & SVM_EVTINJ_VALID) {
+        uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
+        uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
+        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+
+        qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
+        /* FIXME: need to implement valid_err */
+        switch (event_inj & SVM_EVTINJ_TYPE_MASK) {
+        case SVM_EVTINJ_TYPE_INTR:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = -1;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
+                /* XXX: is it always correct ? */
+                do_interrupt(vector, 0, 0, 0, 1);
+                break;
+        case SVM_EVTINJ_TYPE_NMI:
+                env->exception_index = EXCP02_NMI;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = EIP;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
+                cpu_loop_exit();
+                break;
+        case SVM_EVTINJ_TYPE_EXEPT:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = -1;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
+                cpu_loop_exit();
+                break;
+        case SVM_EVTINJ_TYPE_SOFT:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 1;
+                env->exception_next_eip = EIP;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
+                cpu_loop_exit();
+                break;
+        }
+        qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
+    }
+}
+
+void helper_vmmcall(void)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0);
+    raise_exception(EXCP06_ILLOP);
+}
+
+void helper_vmload(int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                env->segs[R_FS].base);
+
+    svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs),
+                       env, R_FS);
+    svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs),
+                       env, R_GS);
+    svm_load_seg(addr + offsetof(struct vmcb, save.tr),
+                 &env->tr);
+    svm_load_seg(addr + offsetof(struct vmcb, save.ldtr),
+                 &env->ldt);
+
+#ifdef TARGET_X86_64
+    env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
+    env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
+    env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
+    env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
+#endif
+    env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
+    env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
+    env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
+    env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
+}
+
+void helper_vmsave(int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                env->segs[R_FS].base);
+
+    svm_save_seg(addr + offsetof(struct vmcb, save.fs), 
+                 &env->segs[R_FS]);
+    svm_save_seg(addr + offsetof(struct vmcb, save.gs), 
+                 &env->segs[R_GS]);
+    svm_save_seg(addr + offsetof(struct vmcb, save.tr), 
+                 &env->tr);
+    svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), 
+                 &env->ldt);
+
+#ifdef TARGET_X86_64
+    stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase);
+    stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar);
+    stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar);
+    stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask);
+#endif
+    stq_phys(addr + offsetof(struct vmcb, save.star), env->star);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip);
+}
+
+void helper_stgi(void)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_STGI, 0);
+    env->hflags2 |= HF2_GIF_MASK;
+}
+
+void helper_clgi(void)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0);
+    env->hflags2 &= ~HF2_GIF_MASK;
+}
+
+void helper_skinit(void)
+{
+    helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0);
+    /* XXX: not implemented */
+    raise_exception(EXCP06_ILLOP);
+}
+
+void helper_invlpga(int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0);
+    
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    /* XXX: could use the ASID to see if it is needed to do the
+       flush */
+    tlb_flush_page(env, addr);
+}
+
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+    if (likely(!(env->hflags & HF_SVMI_MASK)))
+        return;
+    switch(type) {
+    case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
+        if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
+        if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
+        if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
+        if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
+        if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    case SVM_EXIT_MSR:
+        if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
+            /* FIXME: this should be read in at vmrun (faster this way?) */
+            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
+            uint32_t t0, t1;
+            switch((uint32_t)ECX) {
+            case 0 ... 0x1fff:
+                t0 = (ECX * 2) % 8;
+                t1 = ECX / 8;
+                break;
+            case 0xc0000000 ... 0xc0001fff:
+                t0 = (8192 + ECX - 0xc0000000) * 2;
+                t1 = (t0 / 8);
+                t0 %= 8;
+                break;
+            case 0xc0010000 ... 0xc0011fff:
+                t0 = (16384 + ECX - 0xc0010000) * 2;
+                t1 = (t0 / 8);
+                t0 %= 8;
+                break;
+            default:
+                helper_vmexit(type, param);
+                t0 = 0;
+                t1 = 0;
+                break;
+            }
+            if (ldub_phys(addr + t1) & ((1 << param) << t0))
+                helper_vmexit(type, param);
+        }
+        break;
+    default:
+        if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
+            helper_vmexit(type, param);
+        }
+        break;
+    }
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param, 
+                         uint32_t next_eip_addend)
+{
+    if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
+        /* FIXME: this should be read in at vmrun (faster this way?) */
+        uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
+        uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
+        if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
+            /* next EIP */
+            stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 
+                     env->eip + next_eip_addend);
+            helper_vmexit(SVM_EXIT_IOIO, param | (port << 16));
+        }
+    }
+}
+
+/* Note: currently only 32 bits of exit_code are used */
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
+    uint32_t int_ctl;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
+                exit_code, exit_info_1,
+                ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
+                EIP);
+
+    if(env->hflags & HF_INHIBIT_IRQ_MASK) {
+        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
+        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+    } else {
+        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
+    }
+
+    /* Save the VM state in the vmcb */
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), 
+                 &env->segs[R_ES]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), 
+                 &env->segs[R_CS]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), 
+                 &env->segs[R_SS]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), 
+                 &env->segs[R_DS]);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
+
+    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
+    int_ctl |= env->v_tpr & V_TPR_MASK;
+    if (env->interrupt_request & CPU_INTERRUPT_VIRQ)
+        int_ctl |= V_IRQ_MASK;
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags());
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]);
+    stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK);
+
+    /* Reload the host state from vm_hsave */
+    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+    env->hflags &= ~HF_SVMI_MASK;
+    env->intercept = 0;
+    env->intercept_exceptions = 0;
+    env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+    env->tsc_offset = 0;
+
+    env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+
+    env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+
+    cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
+    cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
+    /* we need to set the efer after the crs so the hidden flags get
+       set properly */
+    cpu_load_efer(env, 
+                  ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
+    env->eflags = 0;
+    load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    CC_OP = CC_OP_EFLAGS;
+
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es),
+                       env, R_ES);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs),
+                       env, R_CS);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss),
+                       env, R_SS);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds),
+                       env, R_DS);
+
+    EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
+    ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
+    EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
+
+    env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
+    env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+    /* other setups */
+    cpu_x86_set_cpl(env, 0);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
+
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
+
+    env->hflags2 &= ~HF2_GIF_MASK;
+    /* FIXME: Resets the current ASID register to zero (host ASID). */
+
+    /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
+
+    /* Clears the TSC_OFFSET inside the processor. */
+
+    /* If the host is in PAE mode, the processor reloads the host's PDPEs
+       from the page table indicated the host's CR3. If the PDPEs contain
+       illegal state, the processor causes a shutdown. */
+
+    /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
+    env->cr[0] |= CR0_PE_MASK;
+    env->eflags &= ~VM_MASK;
+
+    /* Disables all breakpoints in the host DR7 register. */
+
+    /* Checks the reloaded host state for consistency. */
+
+    /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
+       host's code segment or non-canonical (in the case of long mode), a
+       #GP fault is delivered inside the host.) */
+
+    /* remove any pending exception */
+    env->exception_index = -1;
+    env->error_code = 0;
+    env->old_exception = -1;
+
+    cpu_loop_exit();
+}
+
+#endif
+
+/* MMX/SSE */
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+void helper_enter_mmx(void)
+{
+    env->fpstt = 0;
+    *(uint32_t *)(env->fptags) = 0;
+    *(uint32_t *)(env->fptags + 4) = 0;
+}
+
+void helper_emms(void)
+{
+    /* set to empty state */
+    *(uint32_t *)(env->fptags) = 0x01010101;
+    *(uint32_t *)(env->fptags + 4) = 0x01010101;
+}
+
+/* XXX: suppress */
+void helper_movq(void *d, void *s)
+{
+    *(uint64_t *)d = *(uint64_t *)s;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
+
+#define SHIFT 0
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "helper_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+
+#define SHIFT 3
+#include "helper_template.h"
+#undef SHIFT
+
+#endif
+
+/* bit operations */
+target_ulong helper_bsf(target_ulong t0)
+{
+    int count;
+    target_ulong res;
+
+    res = t0;
+    count = 0;
+    while ((res & 1) == 0) {
+        count++;
+        res >>= 1;
+    }
+    return count;
+}
+
+target_ulong helper_bsr(target_ulong t0)
+{
+    int count;
+    target_ulong res, mask;
+    
+    res = t0;
+    count = TARGET_LONG_BITS - 1;
+    mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
+    while ((res & mask) == 0) {
+        count--;
+        res <<= 1;
+    }
+    return count;
+}
+
+
+static int compute_all_eflags(void)
+{
+    return CC_SRC;
+}
+
+static int compute_c_eflags(void)
+{
+    return CC_SRC & CC_C;
+}
+
+uint32_t helper_cc_compute_all(int op)
+{
+    switch (op) {
+    default: /* should never happen */ return 0;
+
+    case CC_OP_EFLAGS: return compute_all_eflags();
+
+    case CC_OP_MULB: return compute_all_mulb();
+    case CC_OP_MULW: return compute_all_mulw();
+    case CC_OP_MULL: return compute_all_mull();
+
+    case CC_OP_ADDB: return compute_all_addb();
+    case CC_OP_ADDW: return compute_all_addw();
+    case CC_OP_ADDL: return compute_all_addl();
+
+    case CC_OP_ADCB: return compute_all_adcb();
+    case CC_OP_ADCW: return compute_all_adcw();
+    case CC_OP_ADCL: return compute_all_adcl();
+
+    case CC_OP_SUBB: return compute_all_subb();
+    case CC_OP_SUBW: return compute_all_subw();
+    case CC_OP_SUBL: return compute_all_subl();
+
+    case CC_OP_SBBB: return compute_all_sbbb();
+    case CC_OP_SBBW: return compute_all_sbbw();
+    case CC_OP_SBBL: return compute_all_sbbl();
+
+    case CC_OP_LOGICB: return compute_all_logicb();
+    case CC_OP_LOGICW: return compute_all_logicw();
+    case CC_OP_LOGICL: return compute_all_logicl();
+
+    case CC_OP_INCB: return compute_all_incb();
+    case CC_OP_INCW: return compute_all_incw();
+    case CC_OP_INCL: return compute_all_incl();
+
+    case CC_OP_DECB: return compute_all_decb();
+    case CC_OP_DECW: return compute_all_decw();
+    case CC_OP_DECL: return compute_all_decl();
+
+    case CC_OP_SHLB: return compute_all_shlb();
+    case CC_OP_SHLW: return compute_all_shlw();
+    case CC_OP_SHLL: return compute_all_shll();
+
+    case CC_OP_SARB: return compute_all_sarb();
+    case CC_OP_SARW: return compute_all_sarw();
+    case CC_OP_SARL: return compute_all_sarl();
+
+#ifdef TARGET_X86_64
+    case CC_OP_MULQ: return compute_all_mulq();
+
+    case CC_OP_ADDQ: return compute_all_addq();
+
+    case CC_OP_ADCQ: return compute_all_adcq();
+
+    case CC_OP_SUBQ: return compute_all_subq();
+
+    case CC_OP_SBBQ: return compute_all_sbbq();
+
+    case CC_OP_LOGICQ: return compute_all_logicq();
+
+    case CC_OP_INCQ: return compute_all_incq();
+
+    case CC_OP_DECQ: return compute_all_decq();
+
+    case CC_OP_SHLQ: return compute_all_shlq();
+
+    case CC_OP_SARQ: return compute_all_sarq();
+#endif
+    }
+}
+
+uint32_t helper_cc_compute_c(int op)
+{
+    switch (op) {
+    default: /* should never happen */ return 0;
+
+    case CC_OP_EFLAGS: return compute_c_eflags();
+
+    case CC_OP_MULB: return compute_c_mull();
+    case CC_OP_MULW: return compute_c_mull();
+    case CC_OP_MULL: return compute_c_mull();
+
+    case CC_OP_ADDB: return compute_c_addb();
+    case CC_OP_ADDW: return compute_c_addw();
+    case CC_OP_ADDL: return compute_c_addl();
+
+    case CC_OP_ADCB: return compute_c_adcb();
+    case CC_OP_ADCW: return compute_c_adcw();
+    case CC_OP_ADCL: return compute_c_adcl();
+
+    case CC_OP_SUBB: return compute_c_subb();
+    case CC_OP_SUBW: return compute_c_subw();
+    case CC_OP_SUBL: return compute_c_subl();
+
+    case CC_OP_SBBB: return compute_c_sbbb();
+    case CC_OP_SBBW: return compute_c_sbbw();
+    case CC_OP_SBBL: return compute_c_sbbl();
+
+    case CC_OP_LOGICB: return compute_c_logicb();
+    case CC_OP_LOGICW: return compute_c_logicw();
+    case CC_OP_LOGICL: return compute_c_logicl();
+
+    case CC_OP_INCB: return compute_c_incl();
+    case CC_OP_INCW: return compute_c_incl();
+    case CC_OP_INCL: return compute_c_incl();
+
+    case CC_OP_DECB: return compute_c_incl();
+    case CC_OP_DECW: return compute_c_incl();
+    case CC_OP_DECL: return compute_c_incl();
+
+    case CC_OP_SHLB: return compute_c_shlb();
+    case CC_OP_SHLW: return compute_c_shlw();
+    case CC_OP_SHLL: return compute_c_shll();
+
+    case CC_OP_SARB: return compute_c_sarl();
+    case CC_OP_SARW: return compute_c_sarl();
+    case CC_OP_SARL: return compute_c_sarl();
+
+#ifdef TARGET_X86_64
+    case CC_OP_MULQ: return compute_c_mull();
+
+    case CC_OP_ADDQ: return compute_c_addq();
+
+    case CC_OP_ADCQ: return compute_c_adcq();
+
+    case CC_OP_SUBQ: return compute_c_subq();
+
+    case CC_OP_SBBQ: return compute_c_sbbq();
+
+    case CC_OP_LOGICQ: return compute_c_logicq();
+
+    case CC_OP_INCQ: return compute_c_incl();
+
+    case CC_OP_DECQ: return compute_c_incl();
+
+    case CC_OP_SHLQ: return compute_c_shlq();
+
+    case CC_OP_SARQ: return compute_c_sarl();
+#endif
+    }
+}
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
new file mode 100644
index 0000000..35ac211
--- /dev/null
+++ b/target-i386/ops_sse.h
@@ -0,0 +1,2032 @@
+/*
+ *  MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ *  Copyright (c) 2005 Fabrice Bellard
+ *  Copyright (c) 2008 Intel Corporation  <andrew.zaborowski@intel.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#if SHIFT == 0
+#define Reg MMXReg
+#define XMM_ONLY(...)
+#define B(n) MMX_B(n)
+#define W(n) MMX_W(n)
+#define L(n) MMX_L(n)
+#define Q(n) q
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define XMM_ONLY(...) __VA_ARGS__
+#define B(n) XMM_B(n)
+#define W(n) XMM_W(n)
+#define L(n) XMM_L(n)
+#define Q(n) XMM_Q(n)
+#define SUFFIX _xmm
+#endif
+
+void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 15) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->W(0) >>= shift;
+        d->W(1) >>= shift;
+        d->W(2) >>= shift;
+        d->W(3) >>= shift;
+#if SHIFT == 1
+        d->W(4) >>= shift;
+        d->W(5) >>= shift;
+        d->W(6) >>= shift;
+        d->W(7) >>= shift;
+#endif
+    }
+}
+
+void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 15) {
+        shift = 15;
+    } else {
+        shift = s->B(0);
+    }
+    d->W(0) = (int16_t)d->W(0) >> shift;
+    d->W(1) = (int16_t)d->W(1) >> shift;
+    d->W(2) = (int16_t)d->W(2) >> shift;
+    d->W(3) = (int16_t)d->W(3) >> shift;
+#if SHIFT == 1
+    d->W(4) = (int16_t)d->W(4) >> shift;
+    d->W(5) = (int16_t)d->W(5) >> shift;
+    d->W(6) = (int16_t)d->W(6) >> shift;
+    d->W(7) = (int16_t)d->W(7) >> shift;
+#endif
+}
+
+void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 15) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->W(0) <<= shift;
+        d->W(1) <<= shift;
+        d->W(2) <<= shift;
+        d->W(3) <<= shift;
+#if SHIFT == 1
+        d->W(4) <<= shift;
+        d->W(5) <<= shift;
+        d->W(6) <<= shift;
+        d->W(7) <<= shift;
+#endif
+    }
+}
+
+void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 31) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->L(0) >>= shift;
+        d->L(1) >>= shift;
+#if SHIFT == 1
+        d->L(2) >>= shift;
+        d->L(3) >>= shift;
+#endif
+    }
+}
+
+void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 31) {
+        shift = 31;
+    } else {
+        shift = s->B(0);
+    }
+    d->L(0) = (int32_t)d->L(0) >> shift;
+    d->L(1) = (int32_t)d->L(1) >> shift;
+#if SHIFT == 1
+    d->L(2) = (int32_t)d->L(2) >> shift;
+    d->L(3) = (int32_t)d->L(3) >> shift;
+#endif
+}
+
+void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 31) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->L(0) <<= shift;
+        d->L(1) <<= shift;
+#if SHIFT == 1
+        d->L(2) <<= shift;
+        d->L(3) <<= shift;
+#endif
+    }
+}
+
+void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 63) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->Q(0) >>= shift;
+#if SHIFT == 1
+        d->Q(1) >>= shift;
+#endif
+    }
+}
+
+void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift;
+
+    if (s->Q(0) > 63) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->Q(0) <<= shift;
+#if SHIFT == 1
+        d->Q(1) <<= shift;
+#endif
+    }
+}
+
+#if SHIFT == 1
+void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift, i;
+
+    shift = s->L(0);
+    if (shift > 16)
+        shift = 16;
+    for(i = 0; i < 16 - shift; i++)
+        d->B(i) = d->B(i + shift);
+    for(i = 16 - shift; i < 16; i++)
+        d->B(i) = 0;
+}
+
+void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
+{
+    int shift, i;
+
+    shift = s->L(0);
+    if (shift > 16)
+        shift = 16;
+    for(i = 15; i >= shift; i--)
+        d->B(i) = d->B(i - shift);
+    for(i = 0; i < shift; i++)
+        d->B(i) = 0;
+}
+#endif
+
+#define SSE_HELPER_B(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->B(0) = F(d->B(0), s->B(0));\
+    d->B(1) = F(d->B(1), s->B(1));\
+    d->B(2) = F(d->B(2), s->B(2));\
+    d->B(3) = F(d->B(3), s->B(3));\
+    d->B(4) = F(d->B(4), s->B(4));\
+    d->B(5) = F(d->B(5), s->B(5));\
+    d->B(6) = F(d->B(6), s->B(6));\
+    d->B(7) = F(d->B(7), s->B(7));\
+    XMM_ONLY(\
+    d->B(8) = F(d->B(8), s->B(8));\
+    d->B(9) = F(d->B(9), s->B(9));\
+    d->B(10) = F(d->B(10), s->B(10));\
+    d->B(11) = F(d->B(11), s->B(11));\
+    d->B(12) = F(d->B(12), s->B(12));\
+    d->B(13) = F(d->B(13), s->B(13));\
+    d->B(14) = F(d->B(14), s->B(14));\
+    d->B(15) = F(d->B(15), s->B(15));\
+    )\
+}
+
+#define SSE_HELPER_W(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->W(0) = F(d->W(0), s->W(0));\
+    d->W(1) = F(d->W(1), s->W(1));\
+    d->W(2) = F(d->W(2), s->W(2));\
+    d->W(3) = F(d->W(3), s->W(3));\
+    XMM_ONLY(\
+    d->W(4) = F(d->W(4), s->W(4));\
+    d->W(5) = F(d->W(5), s->W(5));\
+    d->W(6) = F(d->W(6), s->W(6));\
+    d->W(7) = F(d->W(7), s->W(7));\
+    )\
+}
+
+#define SSE_HELPER_L(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->L(0) = F(d->L(0), s->L(0));\
+    d->L(1) = F(d->L(1), s->L(1));\
+    XMM_ONLY(\
+    d->L(2) = F(d->L(2), s->L(2));\
+    d->L(3) = F(d->L(3), s->L(3));\
+    )\
+}
+
+#define SSE_HELPER_Q(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->Q(0) = F(d->Q(0), s->Q(0));\
+    XMM_ONLY(\
+    d->Q(1) = F(d->Q(1), s->Q(1));\
+    )\
+}
+
+#if SHIFT == 0
+static inline int satub(int x)
+{
+    if (x < 0)
+        return 0;
+    else if (x > 255)
+        return 255;
+    else
+        return x;
+}
+
+static inline int satuw(int x)
+{
+    if (x < 0)
+        return 0;
+    else if (x > 65535)
+        return 65535;
+    else
+        return x;
+}
+
+static inline int satsb(int x)
+{
+    if (x < -128)
+        return -128;
+    else if (x > 127)
+        return 127;
+    else
+        return x;
+}
+
+static inline int satsw(int x)
+{
+    if (x < -32768)
+        return -32768;
+    else if (x > 32767)
+        return 32767;
+    else
+        return x;
+}
+
+#define FADD(a, b) ((a) + (b))
+#define FADDUB(a, b) satub((a) + (b))
+#define FADDUW(a, b) satuw((a) + (b))
+#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
+#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
+
+#define FSUB(a, b) ((a) - (b))
+#define FSUBUB(a, b) satub((a) - (b))
+#define FSUBUW(a, b) satuw((a) - (b))
+#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
+#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
+#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
+#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
+#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
+#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
+
+#define FAND(a, b) (a) & (b)
+#define FANDN(a, b) ((~(a)) & (b))
+#define FOR(a, b) (a) | (b)
+#define FXOR(a, b) (a) ^ (b)
+
+#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
+#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
+#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
+#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
+
+#define FMULLW(a, b) (a) * (b)
+#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
+#define FMULHUW(a, b) (a) * (b) >> 16
+#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
+
+#define FAVG(a, b) ((a) + (b) + 1) >> 1
+#endif
+
+SSE_HELPER_B(helper_paddb, FADD)
+SSE_HELPER_W(helper_paddw, FADD)
+SSE_HELPER_L(helper_paddl, FADD)
+SSE_HELPER_Q(helper_paddq, FADD)
+
+SSE_HELPER_B(helper_psubb, FSUB)
+SSE_HELPER_W(helper_psubw, FSUB)
+SSE_HELPER_L(helper_psubl, FSUB)
+SSE_HELPER_Q(helper_psubq, FSUB)
+
+SSE_HELPER_B(helper_paddusb, FADDUB)
+SSE_HELPER_B(helper_paddsb, FADDSB)
+SSE_HELPER_B(helper_psubusb, FSUBUB)
+SSE_HELPER_B(helper_psubsb, FSUBSB)
+
+SSE_HELPER_W(helper_paddusw, FADDUW)
+SSE_HELPER_W(helper_paddsw, FADDSW)
+SSE_HELPER_W(helper_psubusw, FSUBUW)
+SSE_HELPER_W(helper_psubsw, FSUBSW)
+
+SSE_HELPER_B(helper_pminub, FMINUB)
+SSE_HELPER_B(helper_pmaxub, FMAXUB)
+
+SSE_HELPER_W(helper_pminsw, FMINSW)
+SSE_HELPER_W(helper_pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(helper_pand, FAND)
+SSE_HELPER_Q(helper_pandn, FANDN)
+SSE_HELPER_Q(helper_por, FOR)
+SSE_HELPER_Q(helper_pxor, FXOR)
+
+SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
+SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
+SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
+SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
+SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(helper_pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(helper_pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(helper_pmulhuw, FMULHUW)
+SSE_HELPER_W(helper_pmulhw, FMULHW)
+
+SSE_HELPER_B(helper_pavgb, FAVG)
+SSE_HELPER_W(helper_pavgw, FAVG)
+
+void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
+{
+    d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
+#if SHIFT == 1
+    d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
+#endif
+}
+
+void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
+{
+    int i;
+
+    for(i = 0; i < (2 << SHIFT); i++) {
+        d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
+            (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
+    }
+}
+
+#if SHIFT == 0
+static inline int abs1(int a)
+{
+    if (a < 0)
+        return -a;
+    else
+        return a;
+}
+#endif
+void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
+{
+    unsigned int val;
+
+    val = 0;
+    val += abs1(d->B(0) - s->B(0));
+    val += abs1(d->B(1) - s->B(1));
+    val += abs1(d->B(2) - s->B(2));
+    val += abs1(d->B(3) - s->B(3));
+    val += abs1(d->B(4) - s->B(4));
+    val += abs1(d->B(5) - s->B(5));
+    val += abs1(d->B(6) - s->B(6));
+    val += abs1(d->B(7) - s->B(7));
+    d->Q(0) = val;
+#if SHIFT == 1
+    val = 0;
+    val += abs1(d->B(8) - s->B(8));
+    val += abs1(d->B(9) - s->B(9));
+    val += abs1(d->B(10) - s->B(10));
+    val += abs1(d->B(11) - s->B(11));
+    val += abs1(d->B(12) - s->B(12));
+    val += abs1(d->B(13) - s->B(13));
+    val += abs1(d->B(14) - s->B(14));
+    val += abs1(d->B(15) - s->B(15));
+    d->Q(1) = val;
+#endif
+}
+
+void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0)
+{
+    int i;
+    for(i = 0; i < (8 << SHIFT); i++) {
+        if (s->B(i) & 0x80)
+            stb(a0 + i, d->B(i));
+    }
+}
+
+void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
+{
+    d->L(0) = val;
+    d->L(1) = 0;
+#if SHIFT == 1
+    d->Q(1) = 0;
+#endif
+}
+
+#ifdef TARGET_X86_64
+void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
+{
+    d->Q(0) = val;
+#if SHIFT == 1
+    d->Q(1) = 0;
+#endif
+}
+#endif
+
+#if SHIFT == 0
+void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.W(0) = s->W(order & 3);
+    r.W(1) = s->W((order >> 2) & 3);
+    r.W(2) = s->W((order >> 4) & 3);
+    r.W(3) = s->W((order >> 6) & 3);
+    *d = r;
+}
+#else
+void helper_shufps(Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.L(0) = d->L(order & 3);
+    r.L(1) = d->L((order >> 2) & 3);
+    r.L(2) = s->L((order >> 4) & 3);
+    r.L(3) = s->L((order >> 6) & 3);
+    *d = r;
+}
+
+void helper_shufpd(Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.Q(0) = d->Q(order & 1);
+    r.Q(1) = s->Q((order >> 1) & 1);
+    *d = r;
+}
+
+void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.L(0) = s->L(order & 3);
+    r.L(1) = s->L((order >> 2) & 3);
+    r.L(2) = s->L((order >> 4) & 3);
+    r.L(3) = s->L((order >> 6) & 3);
+    *d = r;
+}
+
+void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.W(0) = s->W(order & 3);
+    r.W(1) = s->W((order >> 2) & 3);
+    r.W(2) = s->W((order >> 4) & 3);
+    r.W(3) = s->W((order >> 6) & 3);
+    r.Q(1) = s->Q(1);
+    *d = r;
+}
+
+void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+    Reg r;
+    r.Q(0) = s->Q(0);
+    r.W(4) = s->W(4 + (order & 3));
+    r.W(5) = s->W(4 + ((order >> 2) & 3));
+    r.W(6) = s->W(4 + ((order >> 4) & 3));
+    r.W(7) = s->W(4 + ((order >> 6) & 3));
+    *d = r;
+}
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+    d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+    d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+    d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+    d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
+#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
+#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
+#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
+#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
+#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
+#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+/* float to float conversions */
+void helper_cvtps2pd(Reg *d, Reg *s)
+{
+    float32 s0, s1;
+    s0 = s->XMM_S(0);
+    s1 = s->XMM_S(1);
+    d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
+    d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
+}
+
+void helper_cvtpd2ps(Reg *d, Reg *s)
+{
+    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+    d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
+    d->Q(1) = 0;
+}
+
+void helper_cvtss2sd(Reg *d, Reg *s)
+{
+    d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
+}
+
+void helper_cvtsd2ss(Reg *d, Reg *s)
+{
+    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+}
+
+/* integer to float */
+void helper_cvtdq2ps(Reg *d, Reg *s)
+{
+    d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
+    d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
+    d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
+    d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
+}
+
+void helper_cvtdq2pd(Reg *d, Reg *s)
+{
+    int32_t l0, l1;
+    l0 = (int32_t)s->XMM_L(0);
+    l1 = (int32_t)s->XMM_L(1);
+    d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
+    d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
+}
+
+void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
+{
+    d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
+    d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
+{
+    d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
+    d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtsi2ss(XMMReg *d, uint32_t val)
+{
+    d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsi2sd(XMMReg *d, uint32_t val)
+{
+    d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void helper_cvtsq2ss(XMMReg *d, uint64_t val)
+{
+    d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsq2sd(XMMReg *d, uint64_t val)
+{
+    d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
+}
+#endif
+
+/* float to integer */
+void helper_cvtps2dq(XMMReg *d, XMMReg *s)
+{
+    d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+    d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+    d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
+    d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
+{
+    d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+    d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+    d->XMM_Q(1) = 0;
+}
+
+void helper_cvtps2pi(MMXReg *d, XMMReg *s)
+{
+    d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+    d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
+{
+    d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+    d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvtss2si(XMMReg *s)
+{
+    return float32_to_int32(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvtsd2si(XMMReg *s)
+{
+    return float64_to_int32(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvtss2sq(XMMReg *s)
+{
+    return float32_to_int64(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvtsd2sq(XMMReg *s)
+{
+    return float64_to_int64(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+/* float to integer truncated */
+void helper_cvttps2dq(XMMReg *d, XMMReg *s)
+{
+    d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+    d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+    d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
+    d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
+{
+    d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+    d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+    d->XMM_Q(1) = 0;
+}
+
+void helper_cvttps2pi(MMXReg *d, XMMReg *s)
+{
+    d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+    d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
+{
+    d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+    d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvttss2si(XMMReg *s)
+{
+    return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvttsd2si(XMMReg *s)
+{
+    return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvttss2sq(XMMReg *s)
+{
+    return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvttsd2sq(XMMReg *s)
+{
+    return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+void helper_rsqrtps(XMMReg *d, XMMReg *s)
+{
+    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
+    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
+    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+}
+
+void helper_rsqrtss(XMMReg *d, XMMReg *s)
+{
+    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+}
+
+void helper_rcpps(XMMReg *d, XMMReg *s)
+{
+    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
+    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
+    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+}
+
+void helper_rcpss(XMMReg *d, XMMReg *s)
+{
+    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+}
+
+void helper_haddps(XMMReg *d, XMMReg *s)
+{
+    XMMReg r;
+    r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
+    r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
+    r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
+    r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
+    *d = r;
+}
+
+void helper_haddpd(XMMReg *d, XMMReg *s)
+{
+    XMMReg r;
+    r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
+    r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
+    *d = r;
+}
+
+void helper_hsubps(XMMReg *d, XMMReg *s)
+{
+    XMMReg r;
+    r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
+    r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
+    r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
+    r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
+    *d = r;
+}
+
+void helper_hsubpd(XMMReg *d, XMMReg *s)
+{
+    XMMReg r;
+    r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
+    r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
+    *d = r;
+}
+
+void helper_addsubps(XMMReg *d, XMMReg *s)
+{
+    d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
+    d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
+    d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
+    d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
+}
+
+void helper_addsubpd(XMMReg *d, XMMReg *s)
+{
+    d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
+    d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
+}
+
+/* XXX: unordered */
+#define SSE_HELPER_CMP(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+    d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+    d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+    d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+    d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_ucomiss(Reg *d, Reg *s)
+{
+    int ret;
+    float32 s0, s1;
+
+    s0 = d->XMM_S(0);
+    s1 = s->XMM_S(0);
+    ret = float32_compare_quiet(s0, s1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comiss(Reg *d, Reg *s)
+{
+    int ret;
+    float32 s0, s1;
+
+    s0 = d->XMM_S(0);
+    s1 = s->XMM_S(0);
+    ret = float32_compare(s0, s1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_ucomisd(Reg *d, Reg *s)
+{
+    int ret;
+    float64 d0, d1;
+
+    d0 = d->XMM_D(0);
+    d1 = s->XMM_D(0);
+    ret = float64_compare_quiet(d0, d1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comisd(Reg *d, Reg *s)
+{
+    int ret;
+    float64 d0, d1;
+
+    d0 = d->XMM_D(0);
+    d1 = s->XMM_D(0);
+    ret = float64_compare(d0, d1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+}
+
+uint32_t helper_movmskps(Reg *s)
+{
+    int b0, b1, b2, b3;
+    b0 = s->XMM_L(0) >> 31;
+    b1 = s->XMM_L(1) >> 31;
+    b2 = s->XMM_L(2) >> 31;
+    b3 = s->XMM_L(3) >> 31;
+    return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
+}
+
+uint32_t helper_movmskpd(Reg *s)
+{
+    int b0, b1;
+    b0 = s->XMM_L(1) >> 31;
+    b1 = s->XMM_L(3) >> 31;
+    return b0 | (b1 << 1);
+}
+
+#endif
+
+uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
+{
+    uint32_t val;
+    val = 0;
+    val |= (s->B(0) >> 7);
+    val |= (s->B(1) >> 6) & 0x02;
+    val |= (s->B(2) >> 5) & 0x04;
+    val |= (s->B(3) >> 4) & 0x08;
+    val |= (s->B(4) >> 3) & 0x10;
+    val |= (s->B(5) >> 2) & 0x20;
+    val |= (s->B(6) >> 1) & 0x40;
+    val |= (s->B(7)) & 0x80;
+#if SHIFT == 1
+    val |= (s->B(8) << 1) & 0x0100;
+    val |= (s->B(9) << 2) & 0x0200;
+    val |= (s->B(10) << 3) & 0x0400;
+    val |= (s->B(11) << 4) & 0x0800;
+    val |= (s->B(12) << 5) & 0x1000;
+    val |= (s->B(13) << 6) & 0x2000;
+    val |= (s->B(14) << 7) & 0x4000;
+    val |= (s->B(15) << 8) & 0x8000;
+#endif
+    return val;
+}
+
+void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
+{
+    Reg r;
+
+    r.B(0) = satsb((int16_t)d->W(0));
+    r.B(1) = satsb((int16_t)d->W(1));
+    r.B(2) = satsb((int16_t)d->W(2));
+    r.B(3) = satsb((int16_t)d->W(3));
+#if SHIFT == 1
+    r.B(4) = satsb((int16_t)d->W(4));
+    r.B(5) = satsb((int16_t)d->W(5));
+    r.B(6) = satsb((int16_t)d->W(6));
+    r.B(7) = satsb((int16_t)d->W(7));
+#endif
+    r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
+    r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
+    r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
+    r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
+#if SHIFT == 1
+    r.B(12) = satsb((int16_t)s->W(4));
+    r.B(13) = satsb((int16_t)s->W(5));
+    r.B(14) = satsb((int16_t)s->W(6));
+    r.B(15) = satsb((int16_t)s->W(7));
+#endif
+    *d = r;
+}
+
+void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
+{
+    Reg r;
+
+    r.B(0) = satub((int16_t)d->W(0));
+    r.B(1) = satub((int16_t)d->W(1));
+    r.B(2) = satub((int16_t)d->W(2));
+    r.B(3) = satub((int16_t)d->W(3));
+#if SHIFT == 1
+    r.B(4) = satub((int16_t)d->W(4));
+    r.B(5) = satub((int16_t)d->W(5));
+    r.B(6) = satub((int16_t)d->W(6));
+    r.B(7) = satub((int16_t)d->W(7));
+#endif
+    r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
+    r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
+    r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
+    r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
+#if SHIFT == 1
+    r.B(12) = satub((int16_t)s->W(4));
+    r.B(13) = satub((int16_t)s->W(5));
+    r.B(14) = satub((int16_t)s->W(6));
+    r.B(15) = satub((int16_t)s->W(7));
+#endif
+    *d = r;
+}
+
+void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
+{
+    Reg r;
+
+    r.W(0) = satsw(d->L(0));
+    r.W(1) = satsw(d->L(1));
+#if SHIFT == 1
+    r.W(2) = satsw(d->L(2));
+    r.W(3) = satsw(d->L(3));
+#endif
+    r.W((2 << SHIFT) + 0) = satsw(s->L(0));
+    r.W((2 << SHIFT) + 1) = satsw(s->L(1));
+#if SHIFT == 1
+    r.W(6) = satsw(s->L(2));
+    r.W(7) = satsw(s->L(3));
+#endif
+    *d = r;
+}
+
+#define UNPCK_OP(base_name, base)                               \
+                                                                \
+void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s)   \
+{                                                               \
+    Reg r;                                              \
+                                                                \
+    r.B(0) = d->B((base << (SHIFT + 2)) + 0);                   \
+    r.B(1) = s->B((base << (SHIFT + 2)) + 0);                   \
+    r.B(2) = d->B((base << (SHIFT + 2)) + 1);                   \
+    r.B(3) = s->B((base << (SHIFT + 2)) + 1);                   \
+    r.B(4) = d->B((base << (SHIFT + 2)) + 2);                   \
+    r.B(5) = s->B((base << (SHIFT + 2)) + 2);                   \
+    r.B(6) = d->B((base << (SHIFT + 2)) + 3);                   \
+    r.B(7) = s->B((base << (SHIFT + 2)) + 3);                   \
+XMM_ONLY(                                                       \
+    r.B(8) = d->B((base << (SHIFT + 2)) + 4);                   \
+    r.B(9) = s->B((base << (SHIFT + 2)) + 4);                   \
+    r.B(10) = d->B((base << (SHIFT + 2)) + 5);                  \
+    r.B(11) = s->B((base << (SHIFT + 2)) + 5);                  \
+    r.B(12) = d->B((base << (SHIFT + 2)) + 6);                  \
+    r.B(13) = s->B((base << (SHIFT + 2)) + 6);                  \
+    r.B(14) = d->B((base << (SHIFT + 2)) + 7);                  \
+    r.B(15) = s->B((base << (SHIFT + 2)) + 7);                  \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s)   \
+{                                                               \
+    Reg r;                                              \
+                                                                \
+    r.W(0) = d->W((base << (SHIFT + 1)) + 0);                   \
+    r.W(1) = s->W((base << (SHIFT + 1)) + 0);                   \
+    r.W(2) = d->W((base << (SHIFT + 1)) + 1);                   \
+    r.W(3) = s->W((base << (SHIFT + 1)) + 1);                   \
+XMM_ONLY(                                                       \
+    r.W(4) = d->W((base << (SHIFT + 1)) + 2);                   \
+    r.W(5) = s->W((base << (SHIFT + 1)) + 2);                   \
+    r.W(6) = d->W((base << (SHIFT + 1)) + 3);                   \
+    r.W(7) = s->W((base << (SHIFT + 1)) + 3);                   \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s)   \
+{                                                               \
+    Reg r;                                              \
+                                                                \
+    r.L(0) = d->L((base << SHIFT) + 0);                         \
+    r.L(1) = s->L((base << SHIFT) + 0);                         \
+XMM_ONLY(                                                       \
+    r.L(2) = d->L((base << SHIFT) + 1);                         \
+    r.L(3) = s->L((base << SHIFT) + 1);                         \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+XMM_ONLY(                                                       \
+void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s)  \
+{                                                               \
+    Reg r;                                              \
+                                                                \
+    r.Q(0) = d->Q(base);                                        \
+    r.Q(1) = s->Q(base);                                        \
+    *d = r;                                                     \
+}                                                               \
+)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+void helper_pi2fd(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
+    d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
+}
+
+void helper_pi2fw(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
+    d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
+}
+
+void helper_pf2id(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
+    d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pf2iw(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
+    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
+}
+
+void helper_pfacc(MMXReg *d, MMXReg *s)
+{
+    MMXReg r;
+    r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+    *d = r;
+}
+
+void helper_pfadd(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+    d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfcmpeq(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpge(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpgt(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfmax(MMXReg *d, MMXReg *s)
+{
+    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
+        d->MMX_S(0) = s->MMX_S(0);
+    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
+        d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmin(MMXReg *d, MMXReg *s)
+{
+    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
+        d->MMX_S(0) = s->MMX_S(0);
+    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
+        d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmul(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+    d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfnacc(MMXReg *d, MMXReg *s)
+{
+    MMXReg r;
+    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+    r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+    *d = r;
+}
+
+void helper_pfpnacc(MMXReg *d, MMXReg *s)
+{
+    MMXReg r;
+    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+    *d = r;
+}
+
+void helper_pfrcp(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+    d->MMX_S(1) = d->MMX_S(0);
+}
+
+void helper_pfrsqrt(MMXReg *d, MMXReg *s)
+{
+    d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
+    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+    d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
+    d->MMX_L(0) = d->MMX_L(1);
+}
+
+void helper_pfsub(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+    d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfsubr(MMXReg *d, MMXReg *s)
+{
+    d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
+    d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pswapd(MMXReg *d, MMXReg *s)
+{
+    MMXReg r;
+    r.MMX_L(0) = s->MMX_L(1);
+    r.MMX_L(1) = s->MMX_L(0);
+    *d = r;
+}
+#endif
+
+/* SSSE3 op helpers */
+void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s)
+{
+    int i;
+    Reg r;
+
+    for (i = 0; i < (8 << SHIFT); i++)
+        r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
+
+    *d = r;
+}
+
+void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+    d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+    XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+    XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+    d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+    d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+    XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+    XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+}
+
+void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s)
+{
+    d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+    XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+    d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+    XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+}
+
+void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+    d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+    XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+    XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+    d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+    d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+    XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+    XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+}
+
+void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) +
+                    (int8_t)s->B( 1) * (uint8_t)d->B( 1));
+    d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) +
+                    (int8_t)s->B( 3) * (uint8_t)d->B( 3));
+    d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) +
+                    (int8_t)s->B( 5) * (uint8_t)d->B( 5));
+    d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) +
+                    (int8_t)s->B( 7) * (uint8_t)d->B( 7));
+#if SHIFT == 1
+    d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) +
+                    (int8_t)s->B( 9) * (uint8_t)d->B( 9));
+    d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
+                    (int8_t)s->B(11) * (uint8_t)d->B(11));
+    d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
+                    (int8_t)s->B(13) * (uint8_t)d->B(13));
+    d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) +
+                    (int8_t)s->B(15) * (uint8_t)d->B(15));
+#endif
+}
+
+void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
+    d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
+    XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5));
+    XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7));
+    d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1);
+    d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3);
+    XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5));
+    XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
+}
+
+void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s)
+{
+    d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
+    XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
+    d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1);
+    XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
+}
+
+void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
+    d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
+    XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5)));
+    XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7)));
+    d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1));
+    d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3));
+    XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5)));
+    XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
+}
+
+#define FABSB(_, x) x > INT8_MAX  ? -(int8_t ) x : x
+#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x
+#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x
+SSE_HELPER_B(helper_pabsb, FABSB)
+SSE_HELPER_W(helper_pabsw, FABSW)
+SSE_HELPER_L(helper_pabsd, FABSL)
+
+#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15
+SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
+
+#define FSIGNB(d, s) s <= INT8_MAX  ? s ? d : 0 : -(int8_t ) d
+#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d
+#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d
+SSE_HELPER_B(helper_psignb, FSIGNB)
+SSE_HELPER_W(helper_psignw, FSIGNW)
+SSE_HELPER_L(helper_psignd, FSIGNL)
+
+void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift)
+{
+    Reg r;
+
+    /* XXX could be checked during translation */
+    if (shift >= (16 << SHIFT)) {
+        r.Q(0) = 0;
+        XMM_ONLY(r.Q(1) = 0);
+    } else {
+        shift <<= 3;
+#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
+#if SHIFT == 0
+        r.Q(0) = SHR(s->Q(0), shift -   0) |
+                 SHR(d->Q(0), shift -  64);
+#else
+        r.Q(0) = SHR(s->Q(0), shift -   0) |
+                 SHR(s->Q(1), shift -  64) |
+                 SHR(d->Q(0), shift - 128) |
+                 SHR(d->Q(1), shift - 192);
+        r.Q(1) = SHR(s->Q(0), shift +  64) |
+                 SHR(s->Q(1), shift -   0) |
+                 SHR(d->Q(0), shift -  64) |
+                 SHR(d->Q(1), shift - 128);
+#endif
+#undef SHR
+    }
+
+    *d = r;
+}
+
+#define XMM0 env->xmm_regs[0]
+
+#if SHIFT == 1
+#define SSE_HELPER_V(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\
+    d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\
+    if (num > 2) {\
+        d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\
+        d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\
+        if (num > 4) {\
+            d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\
+            d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\
+            d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\
+            d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\
+            if (num > 8) {\
+                d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\
+                d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\
+                d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\
+                d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\
+                d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\
+                d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\
+                d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\
+                d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\
+            }\
+        }\
+    }\
+}
+
+#define SSE_HELPER_I(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\
+{\
+    d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\
+    d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\
+    if (num > 2) {\
+        d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\
+        d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\
+        if (num > 4) {\
+            d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\
+            d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\
+            d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\
+            d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\
+            if (num > 8) {\
+                d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\
+                d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\
+                d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\
+                d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\
+                d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\
+                d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\
+                d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\
+                d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\
+            }\
+        }\
+    }\
+}
+
+/* SSE4.1 op helpers */
+#define FBLENDVB(d, s, m) (m & 0x80) ? s : d
+#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d
+#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d
+SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
+SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
+SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
+
+void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s)
+{
+    uint64_t zf = (s->Q(0) &  d->Q(0)) | (s->Q(1) &  d->Q(1));
+    uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
+
+    CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
+}
+
+#define SSE_HELPER_F(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+    d->elem(0) = F(0);\
+    d->elem(1) = F(1);\
+    if (num > 2) {\
+        d->elem(2) = F(2);\
+        d->elem(3) = F(3);\
+        if (num > 4) {\
+            d->elem(4) = F(4);\
+            d->elem(5) = F(5);\
+            d->elem(6) = F(6);\
+            d->elem(7) = F(7);\
+        }\
+    }\
+}
+
+SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L)
+SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B)
+SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B)
+SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B)
+SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W)
+SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
+SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
+
+void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s)
+{
+    d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0);
+    d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2);
+}
+
+#define FCMPEQQ(d, s) d == s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
+
+void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s)
+{
+    d->W(0) = satuw((int32_t) d->L(0));
+    d->W(1) = satuw((int32_t) d->L(1));
+    d->W(2) = satuw((int32_t) d->L(2));
+    d->W(3) = satuw((int32_t) d->L(3));
+    d->W(4) = satuw((int32_t) s->L(0));
+    d->W(5) = satuw((int32_t) s->L(1));
+    d->W(6) = satuw((int32_t) s->L(2));
+    d->W(7) = satuw((int32_t) s->L(3));
+}
+
+#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s)
+#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s)
+#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s)
+#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s)
+SSE_HELPER_B(helper_pminsb, FMINSB)
+SSE_HELPER_L(helper_pminsd, FMINSD)
+SSE_HELPER_W(helper_pminuw, MIN)
+SSE_HELPER_L(helper_pminud, MIN)
+SSE_HELPER_B(helper_pmaxsb, FMAXSB)
+SSE_HELPER_L(helper_pmaxsd, FMAXSD)
+SSE_HELPER_W(helper_pmaxuw, MAX)
+SSE_HELPER_L(helper_pmaxud, MAX)
+
+#define FMULLD(d, s) (int32_t) d * (int32_t) s
+SSE_HELPER_L(helper_pmulld, FMULLD)
+
+void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s)
+{
+    int idx = 0;
+
+    if (s->W(1) < s->W(idx))
+        idx = 1;
+    if (s->W(2) < s->W(idx))
+        idx = 2;
+    if (s->W(3) < s->W(idx))
+        idx = 3;
+    if (s->W(4) < s->W(idx))
+        idx = 4;
+    if (s->W(5) < s->W(idx))
+        idx = 5;
+    if (s->W(6) < s->W(idx))
+        idx = 6;
+    if (s->W(7) < s->W(idx))
+        idx = 7;
+
+    d->Q(1) = 0;
+    d->L(1) = 0;
+    d->W(1) = idx;
+    d->W(0) = s->W(idx);
+}
+
+void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+    signed char prev_rounding_mode;
+
+    prev_rounding_mode = env->sse_status.float_rounding_mode;
+    if (!(mode & (1 << 2)))
+        switch (mode & 3) {
+        case 0:
+            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+            break;
+        case 1:
+            set_float_rounding_mode(float_round_down, &env->sse_status);
+            break;
+        case 2:
+            set_float_rounding_mode(float_round_up, &env->sse_status);
+            break;
+        case 3:
+            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+            break;
+        }
+
+    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+    d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
+    d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
+    d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
+
+#if 0 /* TODO */
+    if (mode & (1 << 3))
+        set_float_exception_flags(
+                        get_float_exception_flags(&env->sse_status) &
+                        ~float_flag_inexact,
+                        &env->sse_status);
+#endif
+    env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+    signed char prev_rounding_mode;
+
+    prev_rounding_mode = env->sse_status.float_rounding_mode;
+    if (!(mode & (1 << 2)))
+        switch (mode & 3) {
+        case 0:
+            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+            break;
+        case 1:
+            set_float_rounding_mode(float_round_down, &env->sse_status);
+            break;
+        case 2:
+            set_float_rounding_mode(float_round_up, &env->sse_status);
+            break;
+        case 3:
+            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+            break;
+        }
+
+    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+    d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
+
+#if 0 /* TODO */
+    if (mode & (1 << 3))
+        set_float_exception_flags(
+                        get_float_exception_flags(&env->sse_status) &
+                        ~float_flag_inexact,
+                        &env->sse_status);
+#endif
+    env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+    signed char prev_rounding_mode;
+
+    prev_rounding_mode = env->sse_status.float_rounding_mode;
+    if (!(mode & (1 << 2)))
+        switch (mode & 3) {
+        case 0:
+            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+            break;
+        case 1:
+            set_float_rounding_mode(float_round_down, &env->sse_status);
+            break;
+        case 2:
+            set_float_rounding_mode(float_round_up, &env->sse_status);
+            break;
+        case 3:
+            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+            break;
+        }
+
+    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+
+#if 0 /* TODO */
+    if (mode & (1 << 3))
+        set_float_exception_flags(
+                        get_float_exception_flags(&env->sse_status) &
+                        ~float_flag_inexact,
+                        &env->sse_status);
+#endif
+    env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+    signed char prev_rounding_mode;
+
+    prev_rounding_mode = env->sse_status.float_rounding_mode;
+    if (!(mode & (1 << 2)))
+        switch (mode & 3) {
+        case 0:
+            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+            break;
+        case 1:
+            set_float_rounding_mode(float_round_down, &env->sse_status);
+            break;
+        case 2:
+            set_float_rounding_mode(float_round_up, &env->sse_status);
+            break;
+        case 3:
+            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+            break;
+        }
+
+    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+
+#if 0 /* TODO */
+    if (mode & (1 << 3))
+        set_float_exception_flags(
+                        get_float_exception_flags(&env->sse_status) &
+                        ~float_flag_inexact,
+                        &env->sse_status);
+#endif
+    env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+#define FBLENDP(d, s, m) m ? s : d
+SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
+SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
+SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
+
+void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+    float32 iresult = 0 /*float32_zero*/;
+
+    if (mask & (1 << 4))
+        iresult = float32_add(iresult,
+                        float32_mul(d->L(0), s->L(0), &env->sse_status),
+                        &env->sse_status);
+    if (mask & (1 << 5))
+        iresult = float32_add(iresult,
+                        float32_mul(d->L(1), s->L(1), &env->sse_status),
+                        &env->sse_status);
+    if (mask & (1 << 6))
+        iresult = float32_add(iresult,
+                        float32_mul(d->L(2), s->L(2), &env->sse_status),
+                        &env->sse_status);
+    if (mask & (1 << 7))
+        iresult = float32_add(iresult,
+                        float32_mul(d->L(3), s->L(3), &env->sse_status),
+                        &env->sse_status);
+    d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
+    d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
+    d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
+    d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
+}
+
+void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+    float64 iresult = 0 /*float64_zero*/;
+
+    if (mask & (1 << 4))
+        iresult = float64_add(iresult,
+                        float64_mul(d->Q(0), s->Q(0), &env->sse_status),
+                        &env->sse_status);
+    if (mask & (1 << 5))
+        iresult = float64_add(iresult,
+                        float64_mul(d->Q(1), s->Q(1), &env->sse_status),
+                        &env->sse_status);
+    d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
+    d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
+}
+
+void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
+{
+    int s0 = (offset & 3) << 2;
+    int d0 = (offset & 4) << 0;
+    int i;
+    Reg r;
+
+    for (i = 0; i < 8; i++, d0++) {
+        r.W(i) = 0;
+        r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0));
+        r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1));
+        r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2));
+        r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
+    }
+
+    *d = r;
+}
+
+/* SSE4.2 op helpers */
+/* it's unclear whether signed or unsigned */
+#define FCMPGTQ(d, s) d > s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
+
+static inline int pcmp_elen(int reg, uint32_t ctrl)
+{
+    int val;
+
+    /* Presence of REX.W is indicated by a bit higher than 7 set */
+    if (ctrl >> 8)
+        val = abs1((int64_t) env->regs[reg]);
+    else
+        val = abs1((int32_t) env->regs[reg]);
+
+    if (ctrl & 1) {
+        if (val > 8)
+            return 8;
+    } else
+        if (val > 16)
+            return 16;
+
+    return val;
+}
+
+static inline int pcmp_ilen(Reg *r, uint8_t ctrl)
+{
+    int val = 0;
+
+    if (ctrl & 1) {
+        while (val < 8 && r->W(val))
+            val++;
+    } else
+        while (val < 16 && r->B(val))
+            val++;
+
+    return val;
+}
+
+static inline int pcmp_val(Reg *r, uint8_t ctrl, int i)
+{
+    switch ((ctrl >> 0) & 3) {
+    case 0:
+        return r->B(i);
+    case 1:
+        return r->W(i);
+    case 2:
+        return (int8_t) r->B(i);
+    case 3:
+    default:
+        return (int16_t) r->W(i);
+    }
+}
+
+static inline unsigned pcmpxstrx(Reg *d, Reg *s,
+                int8_t ctrl, int valids, int validd)
+{
+    unsigned int res = 0;
+    int v;
+    int j, i;
+    int upper = (ctrl & 1) ? 7 : 15;
+
+    valids--;
+    validd--;
+
+    CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0);
+
+    switch ((ctrl >> 2) & 3) {
+    case 0:
+        for (j = valids; j >= 0; j--) {
+            res <<= 1;
+            v = pcmp_val(s, ctrl, j);
+            for (i = validd; i >= 0; i--)
+                res |= (v == pcmp_val(d, ctrl, i));
+        }
+        break;
+    case 1:
+        for (j = valids; j >= 0; j--) {
+            res <<= 1;
+            v = pcmp_val(s, ctrl, j);
+            for (i = ((validd - 1) | 1); i >= 0; i -= 2)
+                res |= (pcmp_val(d, ctrl, i - 0) <= v &&
+                        pcmp_val(d, ctrl, i - 1) >= v);
+        }
+        break;
+    case 2:
+        res = (2 << (upper - MAX(valids, validd))) - 1;
+        res <<= MAX(valids, validd) - MIN(valids, validd);
+        for (i = MIN(valids, validd); i >= 0; i--) {
+            res <<= 1;
+            v = pcmp_val(s, ctrl, i);
+            res |= (v == pcmp_val(d, ctrl, i));
+        }
+        break;
+    case 3:
+        for (j = valids - validd; j >= 0; j--) {
+            res <<= 1;
+            res |= 1;
+            for (i = MIN(upper - j, validd); i >= 0; i--)
+                res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
+        }
+        break;
+    }
+
+    switch ((ctrl >> 4) & 3) {
+    case 1:
+        res ^= (2 << upper) - 1;
+        break;
+    case 3:
+        res ^= (2 << valids) - 1;
+        break;
+    }
+
+    if (res)
+       CC_SRC |= CC_C;
+    if (res & 1)
+       CC_SRC |= CC_O;
+
+    return res;
+}
+
+static inline int rffs1(unsigned int val)
+{
+    int ret = 1, hi;
+
+    for (hi = sizeof(val) * 4; hi; hi /= 2)
+        if (val >> hi) {
+            val >>= hi;
+            ret += hi;
+        }
+
+    return ret;
+}
+
+static inline int ffs1(unsigned int val)
+{
+    int ret = 1, hi;
+
+    for (hi = sizeof(val) * 4; hi; hi /= 2)
+        if (val << hi) {
+            val <<= hi;
+            ret += hi;
+        }
+
+    return ret;
+}
+
+void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+    unsigned int res = pcmpxstrx(d, s, ctrl,
+                    pcmp_elen(R_EDX, ctrl),
+                    pcmp_elen(R_EAX, ctrl));
+
+    if (res)
+        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+    else
+        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+    int i;
+    unsigned int res = pcmpxstrx(d, s, ctrl,
+                    pcmp_elen(R_EDX, ctrl),
+                    pcmp_elen(R_EAX, ctrl));
+
+    if ((ctrl >> 6) & 1) {
+        if (ctrl & 1)
+            for (i = 0; i <= 8; i--, res >>= 1)
+                d->W(i) = (res & 1) ? ~0 : 0;
+        else
+            for (i = 0; i <= 16; i--, res >>= 1)
+                d->B(i) = (res & 1) ? ~0 : 0;
+    } else {
+        d->Q(1) = 0;
+        d->Q(0) = res;
+    }
+}
+
+void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+    unsigned int res = pcmpxstrx(d, s, ctrl,
+                    pcmp_ilen(s, ctrl),
+                    pcmp_ilen(d, ctrl));
+
+    if (res)
+        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+    else
+        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+    int i;
+    unsigned int res = pcmpxstrx(d, s, ctrl,
+                    pcmp_ilen(s, ctrl),
+                    pcmp_ilen(d, ctrl));
+
+    if ((ctrl >> 6) & 1) {
+        if (ctrl & 1)
+            for (i = 0; i <= 8; i--, res >>= 1)
+                d->W(i) = (res & 1) ? ~0 : 0;
+        else
+            for (i = 0; i <= 16; i--, res >>= 1)
+                d->B(i) = (res & 1) ? ~0 : 0;
+    } else {
+        d->Q(1) = 0;
+        d->Q(0) = res;
+    }
+}
+
+#define CRCPOLY        0x1edc6f41
+#define CRCPOLY_BITREV 0x82f63b78
+target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
+{
+    target_ulong crc = (msg & ((target_ulong) -1 >>
+                            (TARGET_LONG_BITS - len))) ^ crc1;
+
+    while (len--)
+        crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
+
+    return crc;
+}
+
+#define POPMASK(i)     ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
+#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))
+target_ulong helper_popcnt(target_ulong n, uint32_t type)
+{
+    CC_SRC = n ? 0 : CC_Z;
+
+    n = POPCOUNT(n, 0);
+    n = POPCOUNT(n, 1);
+    n = POPCOUNT(n, 2);
+    n = POPCOUNT(n, 3);
+    if (type == 1)
+        return n & 0xff;
+
+    n = POPCOUNT(n, 4);
+#ifndef TARGET_X86_64
+    return n;
+#else
+    if (type == 2)
+        return n & 0xff;
+
+    return POPCOUNT(n, 5);
+#endif
+}
+#endif
+
+#undef SHIFT
+#undef XMM_ONLY
+#undef Reg
+#undef B
+#undef W
+#undef L
+#undef Q
+#undef SUFFIX
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
new file mode 100644
index 0000000..03e9b17
--- /dev/null
+++ b/target-i386/ops_sse_header.h
@@ -0,0 +1,343 @@
+/*
+ *  MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ *  Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#if SHIFT == 0
+#define Reg MMXReg
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define SUFFIX _xmm
+#endif
+
+#define dh_alias_Reg ptr
+#define dh_alias_XMMReg ptr
+#define dh_alias_MMXReg ptr
+#define dh_ctype_Reg Reg *
+#define dh_ctype_XMMReg XMMReg *
+#define dh_ctype_MMXReg MMXReg *
+
+DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg)
+#endif
+
+#define SSE_HELPER_B(name, F)\
+    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_W(name, F)\
+    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_L(name, F)\
+    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_Q(name, F)\
+    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+SSE_HELPER_B(paddb, FADD)
+SSE_HELPER_W(paddw, FADD)
+SSE_HELPER_L(paddl, FADD)
+SSE_HELPER_Q(paddq, FADD)
+
+SSE_HELPER_B(psubb, FSUB)
+SSE_HELPER_W(psubw, FSUB)
+SSE_HELPER_L(psubl, FSUB)
+SSE_HELPER_Q(psubq, FSUB)
+
+SSE_HELPER_B(paddusb, FADDUB)
+SSE_HELPER_B(paddsb, FADDSB)
+SSE_HELPER_B(psubusb, FSUBUB)
+SSE_HELPER_B(psubsb, FSUBSB)
+
+SSE_HELPER_W(paddusw, FADDUW)
+SSE_HELPER_W(paddsw, FADDSW)
+SSE_HELPER_W(psubusw, FSUBUW)
+SSE_HELPER_W(psubsw, FSUBSW)
+
+SSE_HELPER_B(pminub, FMINUB)
+SSE_HELPER_B(pmaxub, FMAXUB)
+
+SSE_HELPER_W(pminsw, FMINSW)
+SSE_HELPER_W(pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(pand, FAND)
+SSE_HELPER_Q(pandn, FANDN)
+SSE_HELPER_Q(por, FOR)
+SSE_HELPER_Q(pxor, FXOR)
+
+SSE_HELPER_B(pcmpgtb, FCMPGTB)
+SSE_HELPER_W(pcmpgtw, FCMPGTW)
+SSE_HELPER_L(pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(pcmpeqb, FCMPEQ)
+SSE_HELPER_W(pcmpeqw, FCMPEQ)
+SSE_HELPER_L(pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(pmulhuw, FMULHUW)
+SSE_HELPER_W(pmulhw, FMULHW)
+
+SSE_HELPER_B(pavgb, FAVG)
+SSE_HELPER_W(pavgw, FAVG)
+
+DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg)
+
+DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl)
+DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
+#endif
+
+#if SHIFT == 0
+DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int)
+#else
+DEF_HELPER_3(shufps, void, Reg, Reg, int)
+DEF_HELPER_3(shufpd, void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int)
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+    DEF_HELPER_2(name ## ps , void, Reg, Reg)        \
+    DEF_HELPER_2(name ## ss , void, Reg, Reg)        \
+    DEF_HELPER_2(name ## pd , void, Reg, Reg)        \
+    DEF_HELPER_2(name ## sd , void, Reg, Reg)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+DEF_HELPER_2(cvtps2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpd2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtss2sd, void, Reg, Reg)
+DEF_HELPER_2(cvtsd2ss, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32)
+DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32)
+
+#ifdef TARGET_X86_64
+DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64)
+DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64)
+#endif
+
+DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvtss2si, s32, XMMReg)
+DEF_HELPER_1(cvtsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvtss2sq, s64, XMMReg)
+DEF_HELPER_1(cvtsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvttss2si, s32, XMMReg)
+DEF_HELPER_1(cvttsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvttss2sq, s64, XMMReg)
+DEF_HELPER_1(cvttsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpss, void, XMMReg, XMMReg)
+DEF_HELPER_2(haddps, void, XMMReg, XMMReg)
+DEF_HELPER_2(haddpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg)
+
+#define SSE_HELPER_CMP(name, F)\
+    DEF_HELPER_2( name ## ps , void, Reg, Reg)        \
+    DEF_HELPER_2( name ## ss , void, Reg, Reg)        \
+    DEF_HELPER_2( name ## pd , void, Reg, Reg)        \
+    DEF_HELPER_2( name ## sd , void, Reg, Reg)
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+DEF_HELPER_2(ucomiss, void, Reg, Reg)
+DEF_HELPER_2(comiss, void, Reg, Reg)
+DEF_HELPER_2(ucomisd, void, Reg, Reg)
+DEF_HELPER_2(comisd, void, Reg, Reg)
+DEF_HELPER_1(movmskps, i32, Reg)
+DEF_HELPER_1(movmskpd, i32, Reg)
+#endif
+
+DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg)
+DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg)
+#define UNPCK_OP(base_name, base)                               \
+    DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
+    DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
+    DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg)
+#endif
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2id, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfadd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmax, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmin, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmul, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsub, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg)
+DEF_HELPER_2(pswapd, void, MMXReg, MMXReg)
+#endif
+
+/* SSSE3 op helpers */
+DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32)
+
+/* SSE4.1 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32)
+#endif
+
+/* SSE4.2 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(crc32, tl, i32, tl, i32)
+DEF_HELPER_2(popcnt, tl, tl, i32)
+#endif
+
+#undef SHIFT
+#undef Reg
+#undef SUFFIX
+
+#undef SSE_HELPER_B
+#undef SSE_HELPER_W
+#undef SSE_HELPER_L
+#undef SSE_HELPER_Q
+#undef SSE_HELPER_S
+#undef SSE_HELPER_CMP
+#undef UNPCK_OP
diff --git a/target-i386/svm.h b/target-i386/svm.h
new file mode 100644
index 0000000..a224aea
--- /dev/null
+++ b/target-i386/svm.h
@@ -0,0 +1,222 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define	SVM_EXIT_READ_CR0 	0x000
+#define	SVM_EXIT_READ_CR3 	0x003
+#define	SVM_EXIT_READ_CR4 	0x004
+#define	SVM_EXIT_READ_CR8 	0x008
+#define	SVM_EXIT_WRITE_CR0 	0x010
+#define	SVM_EXIT_WRITE_CR3 	0x013
+#define	SVM_EXIT_WRITE_CR4 	0x014
+#define	SVM_EXIT_WRITE_CR8 	0x018
+#define	SVM_EXIT_READ_DR0 	0x020
+#define	SVM_EXIT_READ_DR1 	0x021
+#define	SVM_EXIT_READ_DR2 	0x022
+#define	SVM_EXIT_READ_DR3 	0x023
+#define	SVM_EXIT_READ_DR4 	0x024
+#define	SVM_EXIT_READ_DR5 	0x025
+#define	SVM_EXIT_READ_DR6 	0x026
+#define	SVM_EXIT_READ_DR7 	0x027
+#define	SVM_EXIT_WRITE_DR0 	0x030
+#define	SVM_EXIT_WRITE_DR1 	0x031
+#define	SVM_EXIT_WRITE_DR2 	0x032
+#define	SVM_EXIT_WRITE_DR3 	0x033
+#define	SVM_EXIT_WRITE_DR4 	0x034
+#define	SVM_EXIT_WRITE_DR5 	0x035
+#define	SVM_EXIT_WRITE_DR6 	0x036
+#define	SVM_EXIT_WRITE_DR7 	0x037
+#define SVM_EXIT_EXCP_BASE      0x040
+#define SVM_EXIT_INTR		0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT		0x063
+#define SVM_EXIT_VINTR		0x064
+#define SVM_EXIT_CR0_SEL_WRITE	0x065
+#define SVM_EXIT_IDTR_READ	0x066
+#define SVM_EXIT_GDTR_READ	0x067
+#define SVM_EXIT_LDTR_READ	0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE	0x06a
+#define SVM_EXIT_GDTR_WRITE	0x06b
+#define SVM_EXIT_LDTR_WRITE	0x06c
+#define SVM_EXIT_TR_WRITE	0x06d
+#define SVM_EXIT_RDTSC		0x06e
+#define SVM_EXIT_RDPMC		0x06f
+#define SVM_EXIT_PUSHF		0x070
+#define SVM_EXIT_POPF		0x071
+#define SVM_EXIT_CPUID		0x072
+#define SVM_EXIT_RSM		0x073
+#define SVM_EXIT_IRET		0x074
+#define SVM_EXIT_SWINT		0x075
+#define SVM_EXIT_INVD		0x076
+#define SVM_EXIT_PAUSE		0x077
+#define SVM_EXIT_HLT		0x078
+#define SVM_EXIT_INVLPG		0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO		0x07b
+#define SVM_EXIT_MSR		0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN	0x07f
+#define SVM_EXIT_VMRUN		0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD		0x082
+#define SVM_EXIT_VMSAVE		0x083
+#define SVM_EXIT_STGI		0x084
+#define SVM_EXIT_CLGI		0x085
+#define SVM_EXIT_SKINIT		0x086
+#define SVM_EXIT_RDTSCP		0x087
+#define SVM_EXIT_ICEBP		0x088
+#define SVM_EXIT_WBINVD		0x089
+/* only included in documentation, maybe wrong */
+#define SVM_EXIT_MONITOR	0x08a
+#define SVM_EXIT_MWAIT		0x08b
+#define SVM_EXIT_NPF  		0x400
+
+#define SVM_EXIT_ERR		-1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	uint16_t intercept_cr_read;
+	uint16_t intercept_cr_write;
+	uint16_t intercept_dr_read;
+	uint16_t intercept_dr_write;
+	uint32_t intercept_exceptions;
+	uint64_t intercept;
+	uint8_t reserved_1[44];
+	uint64_t iopm_base_pa;
+	uint64_t msrpm_base_pa;
+	uint64_t tsc_offset;
+	uint32_t asid;
+	uint8_t tlb_ctl;
+	uint8_t reserved_2[3];
+	uint32_t int_ctl;
+	uint32_t int_vector;
+	uint32_t int_state;
+	uint8_t reserved_3[4];
+	uint64_t exit_code;
+	uint64_t exit_info_1;
+	uint64_t exit_info_2;
+	uint32_t exit_int_info;
+	uint32_t exit_int_info_err;
+	uint64_t nested_ctl;
+	uint8_t reserved_4[16];
+	uint32_t event_inj;
+	uint32_t event_inj_err;
+	uint64_t nested_cr3;
+	uint64_t lbr_ctl;
+	uint8_t reserved_5[832];
+};
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	uint16_t selector;
+	uint16_t attrib;
+	uint32_t limit;
+	uint64_t base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	uint8_t reserved_1[43];
+	uint8_t cpl;
+	uint8_t reserved_2[4];
+	uint64_t efer;
+	uint8_t reserved_3[112];
+	uint64_t cr4;
+	uint64_t cr3;
+	uint64_t cr0;
+	uint64_t dr7;
+	uint64_t dr6;
+	uint64_t rflags;
+	uint64_t rip;
+	uint8_t reserved_4[88];
+	uint64_t rsp;
+	uint8_t reserved_5[24];
+	uint64_t rax;
+	uint64_t star;
+	uint64_t lstar;
+	uint64_t cstar;
+	uint64_t sfmask;
+	uint64_t kernel_gs_base;
+	uint64_t sysenter_cs;
+	uint64_t sysenter_esp;
+	uint64_t sysenter_eip;
+	uint64_t cr2;
+	uint8_t reserved_6[32];
+	uint64_t g_pat;
+	uint64_t dbgctl;
+	uint64_t br_from;
+	uint64_t br_to;
+	uint64_t last_excp_from;
+	uint64_t last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#endif
diff --git a/target-i386/translate.c b/target-i386/translate.c
new file mode 100644
index 0000000..b50f0a9
--- /dev/null
+++ b/target-i386/translate.c
@@ -0,0 +1,7812 @@
+/*
+ *  i386 translation
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+#include "tcg-op.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+#define PREFIX_REPZ   0x01
+#define PREFIX_REPNZ  0x02
+#define PREFIX_LOCK   0x04
+#define PREFIX_DATA   0x08
+#define PREFIX_ADR    0x10
+
+#ifdef TARGET_X86_64
+#define X86_64_ONLY(x) x
+#define X86_64_DEF(...)  __VA_ARGS__
+#define CODE64(s) ((s)->code64)
+#define REX_X(s) ((s)->rex_x)
+#define REX_B(s) ((s)->rex_b)
+/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
+#if 1
+#define BUGGY_64(x) NULL
+#endif
+#else
+#define X86_64_ONLY(x) NULL
+#define X86_64_DEF(...)
+#define CODE64(s) 0
+#define REX_X(s) 0
+#define REX_B(s) 0
+#endif
+
+//#define MACRO_TEST   1
+
+/* global register indexes */
+static TCGv_ptr cpu_env;
+static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv_i32 cpu_cc_op;
+/* local temps */
+static TCGv cpu_T[2], cpu_T3;
+/* local register indexes (only used inside old micro ops) */
+static TCGv cpu_tmp0, cpu_tmp4;
+static TCGv_ptr cpu_ptr0, cpu_ptr1;
+static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static TCGv_i64 cpu_tmp1_i64;
+static TCGv cpu_tmp5, cpu_tmp6;
+
+#include "gen-icount.h"
+
+#ifdef TARGET_X86_64
+static int x86_64_hregs;
+#endif
+
+typedef struct DisasContext {
+    /* current insn context */
+    int override; /* -1 if no override */
+    int prefix;
+    int aflag, dflag;
+    target_ulong pc; /* pc = eip + cs_base */
+    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
+                   static state change (stop translation) */
+    /* current block context */
+    target_ulong cs_base; /* base of CS segment */
+    int pe;     /* protected mode */
+    int code32; /* 32 bit code segment */
+#ifdef TARGET_X86_64
+    int lma;    /* long mode active */
+    int code64; /* 64 bit code segment */
+    int rex_x, rex_b;
+#endif
+    int ss32;   /* 32 bit stack segment */
+    int cc_op;  /* current CC operation */
+    int addseg; /* non zero if either DS/ES/SS have a non zero base */
+    int f_st;   /* currently unused */
+    int vm86;   /* vm86 mode */
+    int cpl;
+    int iopl;
+    int tf;     /* TF cpu flag */
+    int singlestep_enabled; /* "hardware" single step enabled */
+    int jmp_opt; /* use direct block chaining for direct jumps */
+    int mem_index; /* select memory access functions */
+    uint64_t flags; /* all execution flags */
+    struct TranslationBlock *tb;
+    int popl_esp_hack; /* for correct popl with esp base handling */
+    int rip_offset; /* only used in x86_64, but left for simplicity */
+    int cpuid_features;
+    int cpuid_ext_features;
+    int cpuid_ext2_features;
+    int cpuid_ext3_features;
+} DisasContext;
+
+static void gen_eob(DisasContext *s);
+static void gen_jmp(DisasContext *s, target_ulong eip);
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+
+/* i386 arith/logic operations */
+enum {
+    OP_ADDL,
+    OP_ORL,
+    OP_ADCL,
+    OP_SBBL,
+    OP_ANDL,
+    OP_SUBL,
+    OP_XORL,
+    OP_CMPL,
+};
+
+/* i386 shift ops */
+enum {
+    OP_ROL,
+    OP_ROR,
+    OP_RCL,
+    OP_RCR,
+    OP_SHL,
+    OP_SHR,
+    OP_SHL1, /* undocumented */
+    OP_SAR = 7,
+};
+
+enum {
+    JCC_O,
+    JCC_B,
+    JCC_Z,
+    JCC_BE,
+    JCC_S,
+    JCC_P,
+    JCC_L,
+    JCC_LE,
+};
+
+/* operand size */
+enum {
+    OT_BYTE = 0,
+    OT_WORD,
+    OT_LONG,
+    OT_QUAD,
+};
+
+enum {
+    /* I386 int registers */
+    OR_EAX,   /* MUST be even numbered */
+    OR_ECX,
+    OR_EDX,
+    OR_EBX,
+    OR_ESP,
+    OR_EBP,
+    OR_ESI,
+    OR_EDI,
+
+    OR_TMP0 = 16,    /* temporary operand register */
+    OR_TMP1,
+    OR_A0, /* temporary register used when doing address evaluation */
+};
+
+static inline void gen_op_movl_T0_0(void)
+{
+    tcg_gen_movi_tl(cpu_T[0], 0);
+}
+
+static inline void gen_op_movl_T0_im(int32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_imu(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T1_im(int32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_T1_imu(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_A0_im(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_A0, val);
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_im(int64_t val)
+{
+    tcg_gen_movi_tl(cpu_A0, val);
+}
+#endif
+
+static inline void gen_movtl_T0_im(target_ulong val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_movtl_T1_im(target_ulong val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_andl_T0_ffff(void)
+{
+    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+}
+
+static inline void gen_op_andl_T0_im(uint32_t val)
+{
+    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_T1(void)
+{
+    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_andl_A0_ffff(void)
+{
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
+}
+
+#ifdef TARGET_X86_64
+
+#define NB_OP_SIZES 4
+
+#else /* !TARGET_X86_64 */
+
+#define NB_OP_SIZES 3
+
+#endif /* !TARGET_X86_64 */
+
+#if defined(WORDS_BIGENDIAN)
+#define REG_B_OFFSET (sizeof(target_ulong) - 1)
+#define REG_H_OFFSET (sizeof(target_ulong) - 2)
+#define REG_W_OFFSET (sizeof(target_ulong) - 2)
+#define REG_L_OFFSET (sizeof(target_ulong) - 4)
+#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
+#else
+#define REG_B_OFFSET 0
+#define REG_H_OFFSET 1
+#define REG_W_OFFSET 0
+#define REG_L_OFFSET 0
+#define REG_LH_OFFSET 4
+#endif
+
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+{
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+        } else {
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    case OT_WORD:
+        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case OT_QUAD:
+        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#else
+    default:
+    case OT_LONG:
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
+#endif
+    }
+}
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
+{
+    switch(size) {
+    case 0:
+        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+#ifdef TARGET_X86_64
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case 2:
+        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#else
+    default:
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
+#endif
+    }
+}
+
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
+{
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            goto std_case;
+        } else {
+            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    default:
+    std_case:
+        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+    }
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
+static inline void gen_op_movl_A0_reg(int reg)
+{
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+    
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s))
+        gen_op_addq_A0_im(val);
+    else
+#endif
+        gen_op_addl_A0_im(val);
+}
+
+static inline void gen_op_addl_T0_T1(void)
+{
+    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
+{
+    switch(size) {
+    case 0:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+    case 1:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#ifdef TARGET_X86_64
+    case 2:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#endif
+    }
+}
+
+static inline void gen_op_add_reg_T0(int size, int reg)
+{
+    switch(size) {
+    case 0:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+    case 1:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#ifdef TARGET_X86_64
+    case 2:
+        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#endif
+    }
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+    tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+    if (shift != 0) 
+        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+static inline void gen_op_movl_A0_seg(int reg)
+{
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
+}
+
+static inline void gen_op_addq_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+
+static inline void gen_op_movq_A0_reg(int reg)
+{
+    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+}
+
+static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+    if (shift != 0) 
+        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+#endif
+
+static inline void gen_op_lds_T0_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    default:
+    case 2:
+        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    }
+}
+
+static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_ld8u(t0, a0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_ld16u(t0, a0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_ld32u(t0, a0, mem_index);
+        break;
+    default:
+    case 3:
+        /* Should never happen on 32-bit targets.  */
+#ifdef TARGET_X86_64
+        tcg_gen_qemu_ld64(t0, a0, mem_index);
+#endif
+        break;
+    }
+}
+
+/* XXX: always use ldu or lds */
+static inline void gen_op_ld_T0_A0(int idx)
+{
+    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ldu_T0_A0(int idx)
+{
+    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ld_T1_A0(int idx)
+{
+    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
+}
+
+static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_st8(t0, a0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_st16(t0, a0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_st32(t0, a0, mem_index);
+        break;
+    default:
+    case 3:
+        /* Should never happen on 32-bit targets.  */
+#ifdef TARGET_X86_64
+        tcg_gen_qemu_st64(t0, a0, mem_index);
+#endif
+        break;
+    }
+}
+
+static inline void gen_op_st_T0_A0(int idx)
+{
+    gen_op_st_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_st_T1_A0(int idx)
+{
+    gen_op_st_v(idx, cpu_T[1], cpu_A0);
+}
+
+static inline void gen_jmp_im(target_ulong pc)
+{
+    tcg_gen_movi_tl(cpu_tmp0, pc);
+    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_string_movl_A0_ESI(DisasContext *s)
+{
+    int override;
+
+    override = s->override;
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        if (override >= 0) {
+            gen_op_movq_A0_seg(override);
+            gen_op_addq_A0_reg_sN(0, R_ESI);
+        } else {
+            gen_op_movq_A0_reg(R_ESI);
+        }
+    } else
+#endif
+    if (s->aflag) {
+        /* 32 bit address */
+        if (s->addseg && override < 0)
+            override = R_DS;
+        if (override >= 0) {
+            gen_op_movl_A0_seg(override);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
+        } else {
+            gen_op_movl_A0_reg(R_ESI);
+        }
+    } else {
+        /* 16 address, always override */
+        if (override < 0)
+            override = R_DS;
+        gen_op_movl_A0_reg(R_ESI);
+        gen_op_andl_A0_ffff();
+        gen_op_addl_A0_seg(override);
+    }
+}
+
+static inline void gen_string_movl_A0_EDI(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_movq_A0_reg(R_EDI);
+    } else
+#endif
+    if (s->aflag) {
+        if (s->addseg) {
+            gen_op_movl_A0_seg(R_ES);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
+        } else {
+            gen_op_movl_A0_reg(R_EDI);
+        }
+    } else {
+        gen_op_movl_A0_reg(R_EDI);
+        gen_op_andl_A0_ffff();
+        gen_op_addl_A0_seg(R_ES);
+    }
+}
+
+static inline void gen_op_movl_T0_Dshift(int ot) 
+{
+    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
+    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
+};
+
+static void gen_extu(int ot, TCGv reg)
+{
+    switch(ot) {
+    case OT_BYTE:
+        tcg_gen_ext8u_tl(reg, reg);
+        break;
+    case OT_WORD:
+        tcg_gen_ext16u_tl(reg, reg);
+        break;
+    case OT_LONG:
+        tcg_gen_ext32u_tl(reg, reg);
+        break;
+    default:
+        break;
+    }
+}
+
+static void gen_exts(int ot, TCGv reg)
+{
+    switch(ot) {
+    case OT_BYTE:
+        tcg_gen_ext8s_tl(reg, reg);
+        break;
+    case OT_WORD:
+        tcg_gen_ext16s_tl(reg, reg);
+        break;
+    case OT_LONG:
+        tcg_gen_ext32s_tl(reg, reg);
+        break;
+    default:
+        break;
+    }
+}
+
+static inline void gen_op_jnz_ecx(int size, int label1)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+    gen_extu(size + 1, cpu_tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+}
+
+static inline void gen_op_jz_ecx(int size, int label1)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+    gen_extu(size + 1, cpu_tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+}
+
+static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
+{
+    switch (ot) {
+    case 0: gen_helper_inb(v, n); break;
+    case 1: gen_helper_inw(v, n); break;
+    case 2: gen_helper_inl(v, n); break;
+    }
+
+}
+
+static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
+{
+    switch (ot) {
+    case 0: gen_helper_outb(v, n); break;
+    case 1: gen_helper_outw(v, n); break;
+    case 2: gen_helper_outl(v, n); break;
+    }
+
+}
+
+static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
+                         uint32_t svm_flags)
+{
+    int state_saved;
+    target_ulong next_eip;
+
+    state_saved = 0;
+    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(cur_eip);
+        state_saved = 1;
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        switch (ot) {
+        case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
+        case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
+        case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
+        }
+    }
+    if(s->flags & HF_SVMI_MASK) {
+        if (!state_saved) {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(cur_eip);
+            state_saved = 1;
+        }
+        svm_flags |= (1 << (4 + ot));
+        next_eip = s->pc - s->cs_base;
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
+                                tcg_const_i32(next_eip - cur_eip));
+    }
+}
+
+static inline void gen_movs(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_string_movl_A0_EDI(s);
+    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_update_cc_op(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+        s->cc_op = CC_OP_DYNAMIC;
+    }
+}
+
+static void gen_op_update1_cc(void)
+{
+    tcg_gen_discard_tl(cpu_cc_src);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_op_update2_cc(void)
+{
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static inline void gen_op_cmpl_T0_T1_cc(void)
+{
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_testl_T0_T1_cc(void)
+{
+    tcg_gen_discard_tl(cpu_cc_src);
+    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static void gen_op_update_neg_cc(void)
+{
+    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(TCGv reg)
+{
+    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
+    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+/* compute all eflags to cc_src */
+static void gen_compute_eflags(TCGv reg)
+{
+    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
+    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    switch(jcc_op) {
+    case JCC_O:
+        gen_compute_eflags(cpu_T[0]);
+        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    case JCC_B:
+        gen_compute_eflags_c(cpu_T[0]);
+        break;
+    case JCC_Z:
+        gen_compute_eflags(cpu_T[0]);
+        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    case JCC_BE:
+        gen_compute_eflags(cpu_tmp0);
+        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    case JCC_S:
+        gen_compute_eflags(cpu_T[0]);
+        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    case JCC_P:
+        gen_compute_eflags(cpu_T[0]);
+        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    case JCC_L:
+        gen_compute_eflags(cpu_tmp0);
+        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    default:
+    case JCC_LE:
+        gen_compute_eflags(cpu_tmp0);
+        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        break;
+    }
+}
+
+/* return true if setcc_slow is not needed (WARNING: must be kept in
+   sync with gen_jcc1) */
+static int is_fast_jcc_case(DisasContext *s, int b)
+{
+    int jcc_op;
+    jcc_op = (b >> 1) & 7;
+    switch(s->cc_op) {
+        /* we optimize the cmp/jcc case */
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+    case CC_OP_SUBQ:
+        if (jcc_op == JCC_O || jcc_op == JCC_P)
+            goto slow_jcc;
+        break;
+
+        /* some jumps are easy to compute */
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+    case CC_OP_ADDQ:
+
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_LOGICQ:
+
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_INCQ:
+
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_DECQ:
+
+    case CC_OP_SHLB:
+    case CC_OP_SHLW:
+    case CC_OP_SHLL:
+    case CC_OP_SHLQ:
+        if (jcc_op != JCC_Z && jcc_op != JCC_S)
+            goto slow_jcc;
+        break;
+    default:
+    slow_jcc:
+        return 0;
+    }
+    return 1;
+}
+
+/* generate a conditional jump to label 'l1' according to jump opcode
+   value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+{
+    int inv, jcc_op, size, cond;
+    TCGv t0;
+
+    inv = b & 1;
+    jcc_op = (b >> 1) & 7;
+
+    switch(cc_op) {
+        /* we optimize the cmp/jcc case */
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+    case CC_OP_SUBQ:
+        
+        size = cc_op - CC_OP_SUBB;
+        switch(jcc_op) {
+        case JCC_Z:
+        fast_jcc_z:
+            switch(size) {
+            case 0:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
+                t0 = cpu_tmp0;
+                break;
+            case 1:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
+                t0 = cpu_tmp0;
+                break;
+#ifdef TARGET_X86_64
+            case 2:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
+                t0 = cpu_tmp0;
+                break;
+#endif
+            default:
+                t0 = cpu_cc_dst;
+                break;
+            }
+            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
+            break;
+        case JCC_S:
+        fast_jcc_s:
+            switch(size) {
+            case 0:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
+                break;
+            case 1:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
+                break;
+#ifdef TARGET_X86_64
+            case 2:
+                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
+                break;
+#endif
+            default:
+                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
+                                   0, l1);
+                break;
+            }
+            break;
+            
+        case JCC_B:
+            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
+            goto fast_jcc_b;
+        case JCC_BE:
+            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
+        fast_jcc_b:
+            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            switch(size) {
+            case 0:
+                t0 = cpu_tmp0;
+                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
+                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
+                break;
+            case 1:
+                t0 = cpu_tmp0;
+                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
+                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
+                break;
+#ifdef TARGET_X86_64
+            case 2:
+                t0 = cpu_tmp0;
+                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
+                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
+                break;
+#endif
+            default:
+                t0 = cpu_cc_src;
+                break;
+            }
+            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+            break;
+            
+        case JCC_L:
+            cond = inv ? TCG_COND_GE : TCG_COND_LT;
+            goto fast_jcc_l;
+        case JCC_LE:
+            cond = inv ? TCG_COND_GT : TCG_COND_LE;
+        fast_jcc_l:
+            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            switch(size) {
+            case 0:
+                t0 = cpu_tmp0;
+                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
+                tcg_gen_ext8s_tl(t0, cpu_cc_src);
+                break;
+            case 1:
+                t0 = cpu_tmp0;
+                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
+                tcg_gen_ext16s_tl(t0, cpu_cc_src);
+                break;
+#ifdef TARGET_X86_64
+            case 2:
+                t0 = cpu_tmp0;
+                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
+                tcg_gen_ext32s_tl(t0, cpu_cc_src);
+                break;
+#endif
+            default:
+                t0 = cpu_cc_src;
+                break;
+            }
+            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+            break;
+            
+        default:
+            goto slow_jcc;
+        }
+        break;
+        
+        /* some jumps are easy to compute */
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+    case CC_OP_ADDQ:
+        
+    case CC_OP_ADCB:
+    case CC_OP_ADCW:
+    case CC_OP_ADCL:
+    case CC_OP_ADCQ:
+        
+    case CC_OP_SBBB:
+    case CC_OP_SBBW:
+    case CC_OP_SBBL:
+    case CC_OP_SBBQ:
+        
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_LOGICQ:
+        
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_INCQ:
+        
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_DECQ:
+        
+    case CC_OP_SHLB:
+    case CC_OP_SHLW:
+    case CC_OP_SHLL:
+    case CC_OP_SHLQ:
+        
+    case CC_OP_SARB:
+    case CC_OP_SARW:
+    case CC_OP_SARL:
+    case CC_OP_SARQ:
+        switch(jcc_op) {
+        case JCC_Z:
+            size = (cc_op - CC_OP_ADDB) & 3;
+            goto fast_jcc_z;
+        case JCC_S:
+            size = (cc_op - CC_OP_ADDB) & 3;
+            goto fast_jcc_s;
+        default:
+            goto slow_jcc;
+        }
+        break;
+    default:
+    slow_jcc:
+        gen_setcc_slow_T0(s, jcc_op);
+        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
+                           cpu_T[0], 0, l1);
+        break;
+    }
+}
+
+/* XXX: does not work with gdbstub "ice" single step - not a
+   serious problem */
+static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
+{
+    int l1, l2;
+
+    l1 = gen_new_label();
+    l2 = gen_new_label();
+    gen_op_jnz_ecx(s->aflag, l1);
+    gen_set_label(l2);
+    gen_jmp_tb(s, next_eip, 1);
+    gen_set_label(l1);
+    return l2;
+}
+
+static inline void gen_stos(DisasContext *s, int ot)
+{
+    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+    gen_string_movl_A0_EDI(s);
+    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_lods(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_op_mov_reg_T0(ot, R_EAX);
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_ESI);
+}
+
+static inline void gen_scas(DisasContext *s, int ot)
+{
+    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+    gen_string_movl_A0_EDI(s);
+    gen_op_ld_T1_A0(ot + s->mem_index);
+    gen_op_cmpl_T0_T1_cc();
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_cmps(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_string_movl_A0_EDI(s);
+    gen_op_ld_T1_A0(ot + s->mem_index);
+    gen_op_cmpl_T0_T1_cc();
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_ins(DisasContext *s, int ot)
+{
+    if (use_icount)
+        gen_io_start();
+    gen_string_movl_A0_EDI(s);
+    /* Note: we must do this dummy write first to be restartable in
+       case of page fault. */
+    gen_op_movl_T0_0();
+    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
+    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_EDI);
+    if (use_icount)
+        gen_io_end();
+}
+
+static inline void gen_outs(DisasContext *s, int ot)
+{
+    if (use_icount)
+        gen_io_start();
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0(ot + s->mem_index);
+
+    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
+    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+
+    gen_op_movl_T0_Dshift(ot);
+    gen_op_add_reg_T0(s->aflag, R_ESI);
+    if (use_icount)
+        gen_io_end();
+}
+
+/* same method as Valgrind : we generate jumps to current or next
+   instruction */
+#define GEN_REPZ(op)                                                          \
+static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+                                 target_ulong cur_eip, target_ulong next_eip) \
+{                                                                             \
+    int l2;\
+    gen_update_cc_op(s);                                                      \
+    l2 = gen_jz_ecx_string(s, next_eip);                                      \
+    gen_ ## op(s, ot);                                                        \
+    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    /* a loop would cause two single step exceptions if ECX = 1               \
+       before rep string_insn */                                              \
+    if (!s->jmp_opt)                                                          \
+        gen_op_jz_ecx(s->aflag, l2);                                          \
+    gen_jmp(s, cur_eip);                                                      \
+}
+
+#define GEN_REPZ2(op)                                                         \
+static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+                                   target_ulong cur_eip,                      \
+                                   target_ulong next_eip,                     \
+                                   int nz)                                    \
+{                                                                             \
+    int l2;\
+    gen_update_cc_op(s);                                                      \
+    l2 = gen_jz_ecx_string(s, next_eip);                                      \
+    gen_ ## op(s, ot);                                                        \
+    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
+    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
+    if (!s->jmp_opt)                                                          \
+        gen_op_jz_ecx(s->aflag, l2);                                          \
+    gen_jmp(s, cur_eip);                                                      \
+}
+
+GEN_REPZ(movs)
+GEN_REPZ(stos)
+GEN_REPZ(lods)
+GEN_REPZ(ins)
+GEN_REPZ(outs)
+GEN_REPZ2(scas)
+GEN_REPZ2(cmps)
+
+static void gen_helper_fp_arith_ST0_FT0(int op)
+{
+    switch (op) {
+    case 0: gen_helper_fadd_ST0_FT0(); break;
+    case 1: gen_helper_fmul_ST0_FT0(); break;
+    case 2: gen_helper_fcom_ST0_FT0(); break;
+    case 3: gen_helper_fcom_ST0_FT0(); break;
+    case 4: gen_helper_fsub_ST0_FT0(); break;
+    case 5: gen_helper_fsubr_ST0_FT0(); break;
+    case 6: gen_helper_fdiv_ST0_FT0(); break;
+    case 7: gen_helper_fdivr_ST0_FT0(); break;
+    }
+}
+
+/* NOTE the exception in "r" op ordering */
+static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
+{
+    TCGv_i32 tmp = tcg_const_i32(opreg);
+    switch (op) {
+    case 0: gen_helper_fadd_STN_ST0(tmp); break;
+    case 1: gen_helper_fmul_STN_ST0(tmp); break;
+    case 4: gen_helper_fsubr_STN_ST0(tmp); break;
+    case 5: gen_helper_fsub_STN_ST0(tmp); break;
+    case 6: gen_helper_fdivr_STN_ST0(tmp); break;
+    case 7: gen_helper_fdiv_STN_ST0(tmp); break;
+    }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_op(DisasContext *s1, int op, int ot, int d)
+{
+    if (d != OR_TMP0) {
+        gen_op_mov_TN_reg(ot, 0, d);
+    } else {
+        gen_op_ld_T0_A0(ot + s1->mem_index);
+    }
+    switch(op) {
+    case OP_ADCL:
+        if (s1->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s1->cc_op);
+        gen_compute_eflags_c(cpu_tmp4);
+        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
+        s1->cc_op = CC_OP_DYNAMIC;
+        break;
+    case OP_SBBL:
+        if (s1->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s1->cc_op);
+        gen_compute_eflags_c(cpu_tmp4);
+        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
+        s1->cc_op = CC_OP_DYNAMIC;
+        break;
+    case OP_ADDL:
+        gen_op_addl_T0_T1();
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_update2_cc();
+        s1->cc_op = CC_OP_ADDB + ot;
+        break;
+    case OP_SUBL:
+        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_update2_cc();
+        s1->cc_op = CC_OP_SUBB + ot;
+        break;
+    default:
+    case OP_ANDL:
+        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_update1_cc();
+        s1->cc_op = CC_OP_LOGICB + ot;
+        break;
+    case OP_ORL:
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_update1_cc();
+        s1->cc_op = CC_OP_LOGICB + ot;
+        break;
+    case OP_XORL:
+        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0(ot, d);
+        else
+            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_update1_cc();
+        s1->cc_op = CC_OP_LOGICB + ot;
+        break;
+    case OP_CMPL:
+        gen_op_cmpl_T0_T1_cc();
+        s1->cc_op = CC_OP_SUBB + ot;
+        break;
+    }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_inc(DisasContext *s1, int ot, int d, int c)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg(ot, 0, d);
+    else
+        gen_op_ld_T0_A0(ot + s1->mem_index);
+    if (s1->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s1->cc_op);
+    if (c > 0) {
+        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
+        s1->cc_op = CC_OP_INCB + ot;
+    } else {
+        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
+        s1->cc_op = CC_OP_DECB + ot;
+    }
+    if (d != OR_TMP0)
+        gen_op_mov_reg_T0(ot, d);
+    else
+        gen_op_st_T0_A0(ot + s1->mem_index);
+    gen_compute_eflags_c(cpu_cc_src);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
+                            int is_right, int is_arith)
+{
+    target_ulong mask;
+    int shift_label;
+    TCGv t0, t1;
+
+    if (ot == OT_QUAD)
+        mask = 0x3f;
+    else
+        mask = 0x1f;
+
+    /* load */
+    if (op1 == OR_TMP0)
+        gen_op_ld_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_TN_reg(ot, 0, op1);
+
+    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+
+    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
+
+    if (is_right) {
+        if (is_arith) {
+            gen_exts(ot, cpu_T[0]);
+            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        } else {
+            gen_extu(ot, cpu_T[0]);
+            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        }
+    } else {
+        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+    }
+
+    /* store */
+    if (op1 == OR_TMP0)
+        gen_op_st_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_reg_T0(ot, op1);
+        
+    /* update eflags if non zero shift */
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+
+    /* XXX: inefficient */
+    t0 = tcg_temp_local_new();
+    t1 = tcg_temp_local_new();
+
+    tcg_gen_mov_tl(t0, cpu_T[0]);
+    tcg_gen_mov_tl(t1, cpu_T3);
+
+    shift_label = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
+
+    tcg_gen_mov_tl(cpu_cc_src, t1);
+    tcg_gen_mov_tl(cpu_cc_dst, t0);
+    if (is_right)
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+    else
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+        
+    gen_set_label(shift_label);
+    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
+                            int is_right, int is_arith)
+{
+    int mask;
+    
+    if (ot == OT_QUAD)
+        mask = 0x3f;
+    else
+        mask = 0x1f;
+
+    /* load */
+    if (op1 == OR_TMP0)
+        gen_op_ld_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_TN_reg(ot, 0, op1);
+
+    op2 &= mask;
+    if (op2 != 0) {
+        if (is_right) {
+            if (is_arith) {
+                gen_exts(ot, cpu_T[0]);
+                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
+            } else {
+                gen_extu(ot, cpu_T[0]);
+                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
+            }
+        } else {
+            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
+        }
+    }
+
+    /* store */
+    if (op1 == OR_TMP0)
+        gen_op_st_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_reg_T0(ot, op1);
+        
+    /* update eflags if non zero shift */
+    if (op2 != 0) {
+        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+        if (is_right)
+            s->cc_op = CC_OP_SARB + ot;
+        else
+            s->cc_op = CC_OP_SHLB + ot;
+    }
+}
+
+static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
+{
+    if (arg2 >= 0)
+        tcg_gen_shli_tl(ret, arg1, arg2);
+    else
+        tcg_gen_shri_tl(ret, arg1, -arg2);
+}
+
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
+                          int is_right)
+{
+    target_ulong mask;
+    int label1, label2, data_bits;
+    TCGv t0, t1, t2, a0;
+
+    /* XXX: inefficient, but we must use local temps */
+    t0 = tcg_temp_local_new();
+    t1 = tcg_temp_local_new();
+    t2 = tcg_temp_local_new();
+    a0 = tcg_temp_local_new();
+
+    if (ot == OT_QUAD)
+        mask = 0x3f;
+    else
+        mask = 0x1f;
+
+    /* load */
+    if (op1 == OR_TMP0) {
+        tcg_gen_mov_tl(a0, cpu_A0);
+        gen_op_ld_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_v_reg(ot, t0, op1);
+    }
+
+    tcg_gen_mov_tl(t1, cpu_T[1]);
+
+    tcg_gen_andi_tl(t1, t1, mask);
+
+    /* Must test zero case to avoid using undefined behaviour in TCG
+       shifts. */
+    label1 = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
+    
+    if (ot <= OT_WORD)
+        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
+    else
+        tcg_gen_mov_tl(cpu_tmp0, t1);
+    
+    gen_extu(ot, t0);
+    tcg_gen_mov_tl(t2, t0);
+
+    data_bits = 8 << ot;
+    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
+       fix TCG definition) */
+    if (is_right) {
+        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
+        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
+        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
+    } else {
+        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
+        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
+        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+    }
+    tcg_gen_or_tl(t0, t0, cpu_tmp4);
+
+    gen_set_label(label1);
+    /* store */
+    if (op1 == OR_TMP0) {
+        gen_op_st_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_reg_v(ot, op1, t0);
+    }
+    
+    /* update eflags */
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+
+    label2 = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
+
+    gen_compute_eflags(cpu_cc_src);
+    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
+    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+    if (is_right) {
+        tcg_gen_shri_tl(t0, t0, data_bits - 1);
+    }
+    tcg_gen_andi_tl(t0, t0, CC_C);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+    
+    tcg_gen_discard_tl(cpu_cc_dst);
+    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+        
+    gen_set_label(label2);
+    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(a0);
+}
+
+static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
+                          int is_right)
+{
+    int mask;
+    int data_bits;
+    TCGv t0, t1, a0;
+
+    /* XXX: inefficient, but we must use local temps */
+    t0 = tcg_temp_local_new();
+    t1 = tcg_temp_local_new();
+    a0 = tcg_temp_local_new();
+
+    if (ot == OT_QUAD)
+        mask = 0x3f;
+    else
+        mask = 0x1f;
+
+    /* load */
+    if (op1 == OR_TMP0) {
+        tcg_gen_mov_tl(a0, cpu_A0);
+        gen_op_ld_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_v_reg(ot, t0, op1);
+    }
+
+    gen_extu(ot, t0);
+    tcg_gen_mov_tl(t1, t0);
+
+    op2 &= mask;
+    data_bits = 8 << ot;
+    if (op2 != 0) {
+        int shift = op2 & ((1 << (3 + ot)) - 1);
+        if (is_right) {
+            tcg_gen_shri_tl(cpu_tmp4, t0, shift);
+            tcg_gen_shli_tl(t0, t0, data_bits - shift);
+        }
+        else {
+            tcg_gen_shli_tl(cpu_tmp4, t0, shift);
+            tcg_gen_shri_tl(t0, t0, data_bits - shift);
+        }
+        tcg_gen_or_tl(t0, t0, cpu_tmp4);
+    }
+
+    /* store */
+    if (op1 == OR_TMP0) {
+        gen_op_st_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_reg_v(ot, op1, t0);
+    }
+
+    if (op2 != 0) {
+        /* update eflags */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+
+        gen_compute_eflags(cpu_cc_src);
+        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+        tcg_gen_xor_tl(cpu_tmp0, t1, t0);
+        tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+        if (is_right) {
+            tcg_gen_shri_tl(t0, t0, data_bits - 1);
+        }
+        tcg_gen_andi_tl(t0, t0, CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+
+        tcg_gen_discard_tl(cpu_cc_dst);
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+        s->cc_op = CC_OP_EFLAGS;
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(a0);
+}
+
+/* XXX: add faster immediate = 1 case */
+static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
+                           int is_right)
+{
+    int label1;
+
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+
+    /* load */
+    if (op1 == OR_TMP0)
+        gen_op_ld_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_TN_reg(ot, 0, op1);
+    
+    if (is_right) {
+        switch (ot) {
+        case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+        case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+        }
+    } else {
+        switch (ot) {
+        case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+        case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+        }
+    }
+    /* store */
+    if (op1 == OR_TMP0)
+        gen_op_st_T0_A0(ot + s->mem_index);
+    else
+        gen_op_mov_reg_T0(ot, op1);
+
+    /* update eflags */
+    label1 = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
+
+    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+        
+    gen_set_label(label1);
+    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+}
+
+/* XXX: add faster immediate case */
+static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
+                                int is_right)
+{
+    int label1, label2, data_bits;
+    target_ulong mask;
+    TCGv t0, t1, t2, a0;
+
+    t0 = tcg_temp_local_new();
+    t1 = tcg_temp_local_new();
+    t2 = tcg_temp_local_new();
+    a0 = tcg_temp_local_new();
+
+    if (ot == OT_QUAD)
+        mask = 0x3f;
+    else
+        mask = 0x1f;
+
+    /* load */
+    if (op1 == OR_TMP0) {
+        tcg_gen_mov_tl(a0, cpu_A0);
+        gen_op_ld_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_v_reg(ot, t0, op1);
+    }
+
+    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
+
+    tcg_gen_mov_tl(t1, cpu_T[1]);
+    tcg_gen_mov_tl(t2, cpu_T3);
+
+    /* Must test zero case to avoid using undefined behaviour in TCG
+       shifts. */
+    label1 = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+    
+    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
+    if (ot == OT_WORD) {
+        /* Note: we implement the Intel behaviour for shift count > 16 */
+        if (is_right) {
+            tcg_gen_andi_tl(t0, t0, 0xffff);
+            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
+            tcg_gen_or_tl(t0, t0, cpu_tmp0);
+            tcg_gen_ext32u_tl(t0, t0);
+
+            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+            
+            /* only needed if count > 16, but a test would complicate */
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
+            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
+
+            tcg_gen_shr_tl(t0, t0, t2);
+
+            tcg_gen_or_tl(t0, t0, cpu_tmp0);
+        } else {
+            /* XXX: not optimal */
+            tcg_gen_andi_tl(t0, t0, 0xffff);
+            tcg_gen_shli_tl(t1, t1, 16);
+            tcg_gen_or_tl(t1, t1, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            
+            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+            tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
+            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
+            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
+
+            tcg_gen_shl_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
+            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
+        }
+    } else {
+        data_bits = 8 << ot;
+        if (is_right) {
+            if (ot == OT_LONG)
+                tcg_gen_ext32u_tl(t0, t0);
+
+            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+
+            tcg_gen_shr_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
+            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
+            
+        } else {
+            if (ot == OT_LONG)
+                tcg_gen_ext32u_tl(t1, t1);
+
+            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+            
+            tcg_gen_shl_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
+            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
+        }
+    }
+    tcg_gen_mov_tl(t1, cpu_tmp4);
+
+    gen_set_label(label1);
+    /* store */
+    if (op1 == OR_TMP0) {
+        gen_op_st_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_reg_v(ot, op1, t0);
+    }
+    
+    /* update eflags */
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+
+    label2 = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
+
+    tcg_gen_mov_tl(cpu_cc_src, t1);
+    tcg_gen_mov_tl(cpu_cc_dst, t0);
+    if (is_right) {
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+    } else {
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+    }
+    gen_set_label(label2);
+    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(a0);
+}
+
+static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+{
+    if (s != OR_TMP1)
+        gen_op_mov_TN_reg(ot, 1, s);
+    switch(op) {
+    case OP_ROL:
+        gen_rot_rm_T1(s1, ot, d, 0);
+        break;
+    case OP_ROR:
+        gen_rot_rm_T1(s1, ot, d, 1);
+        break;
+    case OP_SHL:
+    case OP_SHL1:
+        gen_shift_rm_T1(s1, ot, d, 0, 0);
+        break;
+    case OP_SHR:
+        gen_shift_rm_T1(s1, ot, d, 1, 0);
+        break;
+    case OP_SAR:
+        gen_shift_rm_T1(s1, ot, d, 1, 1);
+        break;
+    case OP_RCL:
+        gen_rotc_rm_T1(s1, ot, d, 0);
+        break;
+    case OP_RCR:
+        gen_rotc_rm_T1(s1, ot, d, 1);
+        break;
+    }
+}
+
+static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+{
+    switch(op) {
+    case OP_ROL:
+        gen_rot_rm_im(s1, ot, d, c, 0);
+        break;
+    case OP_ROR:
+        gen_rot_rm_im(s1, ot, d, c, 1);
+        break;
+    case OP_SHL:
+    case OP_SHL1:
+        gen_shift_rm_im(s1, ot, d, c, 0, 0);
+        break;
+    case OP_SHR:
+        gen_shift_rm_im(s1, ot, d, c, 1, 0);
+        break;
+    case OP_SAR:
+        gen_shift_rm_im(s1, ot, d, c, 1, 1);
+        break;
+    default:
+        /* currently not optimized */
+        gen_op_movl_T1_im(c);
+        gen_shift(s1, op, ot, d, OR_TMP1);
+        break;
+    }
+}
+
+static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+{
+    target_long disp;
+    int havesib;
+    int base;
+    int index;
+    int scale;
+    int opreg;
+    int mod, rm, code, override, must_add_seg;
+
+    override = s->override;
+    must_add_seg = s->addseg;
+    if (override >= 0)
+        must_add_seg = 1;
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        havesib = 0;
+        base = rm;
+        index = 0;
+        scale = 0;
+
+        if (base == 4) {
+            havesib = 1;
+            code = ldub_code(s->pc++);
+            scale = (code >> 6) & 3;
+            index = ((code >> 3) & 7) | REX_X(s);
+            base = (code & 7);
+        }
+        base |= REX_B(s);
+
+        switch (mod) {
+        case 0:
+            if ((base & 7) == 5) {
+                base = -1;
+                disp = (int32_t)ldl_code(s->pc);
+                s->pc += 4;
+                if (CODE64(s) && !havesib) {
+                    disp += s->pc + s->rip_offset;
+                }
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = ldl_code(s->pc);
+            s->pc += 4;
+            break;
+        }
+
+        if (base >= 0) {
+            /* for correct popl handling with esp */
+            if (base == 4 && s->popl_esp_hack)
+                disp += s->popl_esp_hack;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_reg(base);
+                if (disp != 0) {
+                    gen_op_addq_A0_im(disp);
+                }
+            } else
+#endif
+            {
+                gen_op_movl_A0_reg(base);
+                if (disp != 0)
+                    gen_op_addl_A0_im(disp);
+            }
+        } else {
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_im(disp);
+            } else
+#endif
+            {
+                gen_op_movl_A0_im(disp);
+            }
+        }
+        /* XXX: index == 4 is always invalid */
+        if (havesib && (index != 4 || scale != 0)) {
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_addq_A0_reg_sN(scale, index);
+            } else
+#endif
+            {
+                gen_op_addl_A0_reg_sN(scale, index);
+            }
+        }
+        if (must_add_seg) {
+            if (override < 0) {
+                if (base == R_EBP || base == R_ESP)
+                    override = R_SS;
+                else
+                    override = R_DS;
+            }
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_addq_A0_seg(override);
+            } else
+#endif
+            {
+                gen_op_addl_A0_seg(override);
+            }
+        }
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                disp = lduw_code(s->pc);
+                s->pc += 2;
+                gen_op_movl_A0_im(disp);
+                rm = 0; /* avoid SS override */
+                goto no_rm;
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = lduw_code(s->pc);
+            s->pc += 2;
+            break;
+        }
+        switch(rm) {
+        case 0:
+            gen_op_movl_A0_reg(R_EBX);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
+            break;
+        case 1:
+            gen_op_movl_A0_reg(R_EBX);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
+            break;
+        case 2:
+            gen_op_movl_A0_reg(R_EBP);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
+            break;
+        case 3:
+            gen_op_movl_A0_reg(R_EBP);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
+            break;
+        case 4:
+            gen_op_movl_A0_reg(R_ESI);
+            break;
+        case 5:
+            gen_op_movl_A0_reg(R_EDI);
+            break;
+        case 6:
+            gen_op_movl_A0_reg(R_EBP);
+            break;
+        default:
+        case 7:
+            gen_op_movl_A0_reg(R_EBX);
+            break;
+        }
+        if (disp != 0)
+            gen_op_addl_A0_im(disp);
+        gen_op_andl_A0_ffff();
+    no_rm:
+        if (must_add_seg) {
+            if (override < 0) {
+                if (rm == 2 || rm == 3 || rm == 6)
+                    override = R_SS;
+                else
+                    override = R_DS;
+            }
+            gen_op_addl_A0_seg(override);
+        }
+    }
+
+    opreg = OR_A0;
+    disp = 0;
+    *reg_ptr = opreg;
+    *offset_ptr = disp;
+}
+
+static void gen_nop_modrm(DisasContext *s, int modrm)
+{
+    int mod, rm, base, code;
+
+    mod = (modrm >> 6) & 3;
+    if (mod == 3)
+        return;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        base = rm;
+
+        if (base == 4) {
+            code = ldub_code(s->pc++);
+            base = (code & 7);
+        }
+
+        switch (mod) {
+        case 0:
+            if (base == 5) {
+                s->pc += 4;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 4;
+            break;
+        }
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                s->pc += 2;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 2;
+            break;
+        }
+    }
+}
+
+/* used for LEA and MOV AX, mem */
+static void gen_add_A0_ds_seg(DisasContext *s)
+{
+    int override, must_add_seg;
+    must_add_seg = s->addseg;
+    override = R_DS;
+    if (s->override >= 0) {
+        override = s->override;
+        must_add_seg = 1;
+    } else {
+        override = R_DS;
+    }
+    if (must_add_seg) {
+#ifdef TARGET_X86_64
+        if (CODE64(s)) {
+            gen_op_addq_A0_seg(override);
+        } else
+#endif
+        {
+            gen_op_addl_A0_seg(override);
+        }
+    }
+}
+
+/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
+   OR_TMP0 */
+static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+{
+    int mod, rm, opreg, disp;
+
+    mod = (modrm >> 6) & 3;
+    rm = (modrm & 7) | REX_B(s);
+    if (mod == 3) {
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_reg_T0(ot, rm);
+        } else {
+            gen_op_mov_TN_reg(ot, 0, rm);
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0(ot, reg);
+        }
+    } else {
+        gen_lea_modrm(s, modrm, &opreg, &disp);
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_st_T0_A0(ot + s->mem_index);
+        } else {
+            gen_op_ld_T0_A0(ot + s->mem_index);
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0(ot, reg);
+        }
+    }
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+    uint32_t ret;
+
+    switch(ot) {
+    case OT_BYTE:
+        ret = ldub_code(s->pc);
+        s->pc++;
+        break;
+    case OT_WORD:
+        ret = lduw_code(s->pc);
+        s->pc += 2;
+        break;
+    default:
+    case OT_LONG:
+        ret = ldl_code(s->pc);
+        s->pc += 4;
+        break;
+    }
+    return ret;
+}
+
+static inline int insn_const_size(unsigned int ot)
+{
+    if (ot <= OT_LONG)
+        return 1 << ot;
+    else
+        return 4;
+}
+
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+    TranslationBlock *tb;
+    target_ulong pc;
+
+    pc = s->cs_base + eip;
+    tb = s->tb;
+    /* NOTE: we handle the case where the TB spans two pages here */
+    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
+        /* jump to same page: we can use a direct jump */
+        tcg_gen_goto_tb(tb_num);
+        gen_jmp_im(eip);
+        tcg_gen_exit_tb((long)tb + tb_num);
+    } else {
+        /* jump to another page: currently not optimized */
+        gen_jmp_im(eip);
+        gen_eob(s);
+    }
+}
+
+static inline void gen_jcc(DisasContext *s, int b,
+                           target_ulong val, target_ulong next_eip)
+{
+    int l1, l2, cc_op;
+
+    cc_op = s->cc_op;
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+        s->cc_op = CC_OP_DYNAMIC;
+    }
+    if (s->jmp_opt) {
+        l1 = gen_new_label();
+        gen_jcc1(s, cc_op, b, l1);
+        
+        gen_goto_tb(s, 0, next_eip);
+
+        gen_set_label(l1);
+        gen_goto_tb(s, 1, val);
+        s->is_jmp = 3;
+    } else {
+
+        l1 = gen_new_label();
+        l2 = gen_new_label();
+        gen_jcc1(s, cc_op, b, l1);
+
+        gen_jmp_im(next_eip);
+        tcg_gen_br(l2);
+
+        gen_set_label(l1);
+        gen_jmp_im(val);
+        gen_set_label(l2);
+        gen_eob(s);
+    }
+}
+
+static void gen_setcc(DisasContext *s, int b)
+{
+    int inv, jcc_op, l1;
+    TCGv t0;
+
+    if (is_fast_jcc_case(s, b)) {
+        /* nominal case: we use a jump */
+        /* XXX: make it faster by adding new instructions in TCG */
+        t0 = tcg_temp_local_new();
+        tcg_gen_movi_tl(t0, 0);
+        l1 = gen_new_label();
+        gen_jcc1(s, s->cc_op, b ^ 1, l1);
+        tcg_gen_movi_tl(t0, 1);
+        gen_set_label(l1);
+        tcg_gen_mov_tl(cpu_T[0], t0);
+        tcg_temp_free(t0);
+    } else {
+        /* slow case: it is more efficient not to generate a jump,
+           although it is questionnable whether this optimization is
+           worth to */
+        inv = b & 1;
+        jcc_op = (b >> 1) & 7;
+        gen_setcc_slow_T0(s, jcc_op);
+        if (inv) {
+            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+        }
+    }
+}
+
+static inline void gen_op_movl_T0_seg(int seg_reg)
+{
+    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                     offsetof(CPUX86State,segs[seg_reg].selector));
+}
+
+static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+{
+    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
+                    offsetof(CPUX86State,segs[seg_reg].selector));
+    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
+    tcg_gen_st_tl(cpu_T[0], cpu_env, 
+                  offsetof(CPUX86State,segs[seg_reg].base));
+}
+
+/* move T0 to seg_reg and compute if the CPU state may change. Never
+   call this function with seg_reg == R_CS */
+static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
+{
+    if (s->pe && !s->vm86) {
+        /* XXX: optimize by finding processor state dynamically */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(cur_eip);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        /* abort translation because the addseg value may change or
+           because ss32 may change. For R_SS, translation must always
+           stop as a special handling must be done to disable hardware
+           interrupts for the next instruction */
+        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
+            s->is_jmp = 3;
+    } else {
+        gen_op_movl_seg_T0_vm(seg_reg);
+        if (seg_reg == R_SS)
+            s->is_jmp = 3;
+    }
+}
+
+static inline int svm_is_rep(int prefixes)
+{
+    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
+}
+
+static inline void
+gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
+                              uint32_t type, uint64_t param)
+{
+    /* no SVM activated; fast case */
+    if (likely(!(s->flags & HF_SVMI_MASK)))
+        return;
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(pc_start - s->cs_base);
+    gen_helper_svm_check_intercept_param(tcg_const_i32(type),
+                                         tcg_const_i64(param));
+}
+
+static inline void
+gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
+{
+    gen_svm_check_intercept_param(s, pc_start, type, 0);
+}
+
+static inline void gen_stack_update(DisasContext *s, int addend)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_add_reg_im(2, R_ESP, addend);
+    } else
+#endif
+    if (s->ss32) {
+        gen_op_add_reg_im(1, R_ESP, addend);
+    } else {
+        gen_op_add_reg_im(0, R_ESP, addend);
+    }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+static void gen_push_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg(R_ESP);
+        if (s->dflag) {
+            gen_op_addq_A0_im(-8);
+            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
+        } else {
+            gen_op_addq_A0_im(-2);
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
+        }
+        gen_op_mov_reg_A0(2, R_ESP);
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg(R_ESP);
+        if (!s->dflag)
+            gen_op_addl_A0_im(-2);
+        else
+            gen_op_addl_A0_im(-4);
+        if (s->ss32) {
+            if (s->addseg) {
+                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+                gen_op_addl_A0_seg(R_SS);
+            }
+        } else {
+            gen_op_andl_A0_ffff();
+            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+            gen_op_addl_A0_seg(R_SS);
+        }
+        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
+        if (s->ss32 && !s->addseg)
+            gen_op_mov_reg_A0(1, R_ESP);
+        else
+            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
+    }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+/* slower version for T1, only used for call Ev */
+static void gen_push_T1(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg(R_ESP);
+        if (s->dflag) {
+            gen_op_addq_A0_im(-8);
+            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
+        } else {
+            gen_op_addq_A0_im(-2);
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
+        }
+        gen_op_mov_reg_A0(2, R_ESP);
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg(R_ESP);
+        if (!s->dflag)
+            gen_op_addl_A0_im(-2);
+        else
+            gen_op_addl_A0_im(-4);
+        if (s->ss32) {
+            if (s->addseg) {
+                gen_op_addl_A0_seg(R_SS);
+            }
+        } else {
+            gen_op_andl_A0_ffff();
+            gen_op_addl_A0_seg(R_SS);
+        }
+        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
+
+        if (s->ss32 && !s->addseg)
+            gen_op_mov_reg_A0(1, R_ESP);
+        else
+            gen_stack_update(s, (-2) << s->dflag);
+    }
+}
+
+/* two step pop is necessary for precise exceptions */
+static void gen_pop_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg(R_ESP);
+        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg(R_ESP);
+        if (s->ss32) {
+            if (s->addseg)
+                gen_op_addl_A0_seg(R_SS);
+        } else {
+            gen_op_andl_A0_ffff();
+            gen_op_addl_A0_seg(R_SS);
+        }
+        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
+    }
+}
+
+static void gen_pop_update(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s) && s->dflag) {
+        gen_stack_update(s, 8);
+    } else
+#endif
+    {
+        gen_stack_update(s, 2 << s->dflag);
+    }
+}
+
+static void gen_stack_A0(DisasContext *s)
+{
+    gen_op_movl_A0_reg(R_ESP);
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+    if (s->addseg)
+        gen_op_addl_A0_seg(R_SS);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_pusha(DisasContext *s)
+{
+    int i;
+    gen_op_movl_A0_reg(R_ESP);
+    gen_op_addl_A0_im(-16 <<  s->dflag);
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+    if (s->addseg)
+        gen_op_addl_A0_seg(R_SS);
+    for(i = 0;i < 8; i++) {
+        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
+        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
+        gen_op_addl_A0_im(2 <<  s->dflag);
+    }
+    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_popa(DisasContext *s)
+{
+    int i;
+    gen_op_movl_A0_reg(R_ESP);
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
+    if (s->addseg)
+        gen_op_addl_A0_seg(R_SS);
+    for(i = 0;i < 8; i++) {
+        /* ESP is not reloaded */
+        if (i != 3) {
+            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
+            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
+        }
+        gen_op_addl_A0_im(2 <<  s->dflag);
+    }
+    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+static void gen_enter(DisasContext *s, int esp_addend, int level)
+{
+    int ot, opsize;
+
+    level &= 0x1f;
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        ot = s->dflag ? OT_QUAD : OT_WORD;
+        opsize = 1 << ot;
+
+        gen_op_movl_A0_reg(R_ESP);
+        gen_op_addq_A0_im(-opsize);
+        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+
+        /* push bp */
+        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+        gen_op_st_T0_A0(ot + s->mem_index);
+        if (level) {
+            /* XXX: must save state */
+            gen_helper_enter64_level(tcg_const_i32(level),
+                                     tcg_const_i32((ot == OT_QUAD)),
+                                     cpu_T[1]);
+        }
+        gen_op_mov_reg_T1(ot, R_EBP);
+        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
+    } else
+#endif
+    {
+        ot = s->dflag + OT_WORD;
+        opsize = 2 << s->dflag;
+
+        gen_op_movl_A0_reg(R_ESP);
+        gen_op_addl_A0_im(-opsize);
+        if (!s->ss32)
+            gen_op_andl_A0_ffff();
+        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+        if (s->addseg)
+            gen_op_addl_A0_seg(R_SS);
+        /* push bp */
+        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+        gen_op_st_T0_A0(ot + s->mem_index);
+        if (level) {
+            /* XXX: must save state */
+            gen_helper_enter_level(tcg_const_i32(level),
+                                   tcg_const_i32(s->dflag),
+                                   cpu_T[1]);
+        }
+        gen_op_mov_reg_T1(ot, R_EBP);
+        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+    }
+}
+
+static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_helper_raise_exception(tcg_const_i32(trapno));
+    s->is_jmp = 3;
+}
+
+/* an interrupt is different from an exception because of the
+   privilege checks */
+static void gen_interrupt(DisasContext *s, int intno,
+                          target_ulong cur_eip, target_ulong next_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_helper_raise_interrupt(tcg_const_i32(intno), 
+                               tcg_const_i32(next_eip - cur_eip));
+    s->is_jmp = 3;
+}
+
+static void gen_debug(DisasContext *s, target_ulong cur_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_helper_debug();
+    s->is_jmp = 3;
+}
+
+/* generate a generic end of block. Trace exception is also generated
+   if needed */
+static void gen_eob(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
+        gen_helper_reset_inhibit_irq();
+    }
+    if (s->tb->flags & HF_RF_MASK) {
+        gen_helper_reset_rf();
+    }
+    if (s->singlestep_enabled) {
+        gen_helper_debug();
+    } else if (s->tf) {
+	gen_helper_single_step();
+    } else {
+        tcg_gen_exit_tb(0);
+    }
+    s->is_jmp = 3;
+}
+
+/* generate a jump to eip. No segment change must happen before as a
+   direct call to the next block may occur */
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
+{
+    if (s->jmp_opt) {
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_goto_tb(s, tb_num, eip);
+        s->is_jmp = 3;
+    } else {
+        gen_jmp_im(eip);
+        gen_eob(s);
+    }
+}
+
+static void gen_jmp(DisasContext *s, target_ulong eip)
+{
+    gen_jmp_tb(s, eip, 0);
+}
+
+static inline void gen_ldq_env_A0(int idx, int offset)
+{
+    int mem_index = (idx >> 2) - 1;
+    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+}
+
+static inline void gen_stq_env_A0(int idx, int offset)
+{
+    int mem_index = (idx >> 2) - 1;
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+}
+
+static inline void gen_ldo_env_A0(int idx, int offset)
+{
+    int mem_index = (idx >> 2) - 1;
+    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+}
+
+static inline void gen_sto_env_A0(int idx, int offset)
+{
+    int mem_index = (idx >> 2) - 1;
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+}
+
+static inline void gen_op_movo(int d_offset, int s_offset)
+{
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+}
+
+static inline void gen_op_movq(int d_offset, int s_offset)
+{
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+static inline void gen_op_movl(int d_offset, int s_offset)
+{
+    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
+    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+}
+
+static inline void gen_op_movq_env_0(int d_offset)
+{
+    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+#define SSE_SPECIAL ((void *)1)
+#define SSE_DUMMY ((void *)2)
+
+#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
+#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
+                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
+
+static void *sse_op_table1[256][4] = {
+    /* 3DNow! extensions */
+    [0x0e] = { SSE_DUMMY }, /* femms */
+    [0x0f] = { SSE_DUMMY }, /* pf... */
+    /* pure SSE operations */
+    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
+    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
+    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
+    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
+    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
+    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
+
+    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
+    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
+    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
+    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
+    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
+    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
+    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
+    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
+    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
+    [0x51] = SSE_FOP(sqrt),
+    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
+    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
+    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
+    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
+    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
+    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
+    [0x58] = SSE_FOP(add),
+    [0x59] = SSE_FOP(mul),
+    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
+               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
+    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
+    [0x5c] = SSE_FOP(sub),
+    [0x5d] = SSE_FOP(min),
+    [0x5e] = SSE_FOP(div),
+    [0x5f] = SSE_FOP(max),
+
+    [0xc2] = SSE_FOP(cmpeq),
+    [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
+
+    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+
+    /* MMX ops and their SSE extensions */
+    [0x60] = MMX_OP2(punpcklbw),
+    [0x61] = MMX_OP2(punpcklwd),
+    [0x62] = MMX_OP2(punpckldq),
+    [0x63] = MMX_OP2(packsswb),
+    [0x64] = MMX_OP2(pcmpgtb),
+    [0x65] = MMX_OP2(pcmpgtw),
+    [0x66] = MMX_OP2(pcmpgtl),
+    [0x67] = MMX_OP2(packuswb),
+    [0x68] = MMX_OP2(punpckhbw),
+    [0x69] = MMX_OP2(punpckhwd),
+    [0x6a] = MMX_OP2(punpckhdq),
+    [0x6b] = MMX_OP2(packssdw),
+    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
+    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
+    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
+    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
+    [0x70] = { gen_helper_pshufw_mmx,
+               gen_helper_pshufd_xmm,
+               gen_helper_pshufhw_xmm,
+               gen_helper_pshuflw_xmm },
+    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
+    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
+    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
+    [0x74] = MMX_OP2(pcmpeqb),
+    [0x75] = MMX_OP2(pcmpeqw),
+    [0x76] = MMX_OP2(pcmpeql),
+    [0x77] = { SSE_DUMMY }, /* emms */
+    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
+    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
+    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
+    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
+    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
+    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
+    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
+    [0xd1] = MMX_OP2(psrlw),
+    [0xd2] = MMX_OP2(psrld),
+    [0xd3] = MMX_OP2(psrlq),
+    [0xd4] = MMX_OP2(paddq),
+    [0xd5] = MMX_OP2(pmullw),
+    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
+    [0xd8] = MMX_OP2(psubusb),
+    [0xd9] = MMX_OP2(psubusw),
+    [0xda] = MMX_OP2(pminub),
+    [0xdb] = MMX_OP2(pand),
+    [0xdc] = MMX_OP2(paddusb),
+    [0xdd] = MMX_OP2(paddusw),
+    [0xde] = MMX_OP2(pmaxub),
+    [0xdf] = MMX_OP2(pandn),
+    [0xe0] = MMX_OP2(pavgb),
+    [0xe1] = MMX_OP2(psraw),
+    [0xe2] = MMX_OP2(psrad),
+    [0xe3] = MMX_OP2(pavgw),
+    [0xe4] = MMX_OP2(pmulhuw),
+    [0xe5] = MMX_OP2(pmulhw),
+    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
+    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
+    [0xe8] = MMX_OP2(psubsb),
+    [0xe9] = MMX_OP2(psubsw),
+    [0xea] = MMX_OP2(pminsw),
+    [0xeb] = MMX_OP2(por),
+    [0xec] = MMX_OP2(paddsb),
+    [0xed] = MMX_OP2(paddsw),
+    [0xee] = MMX_OP2(pmaxsw),
+    [0xef] = MMX_OP2(pxor),
+    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
+    [0xf1] = MMX_OP2(psllw),
+    [0xf2] = MMX_OP2(pslld),
+    [0xf3] = MMX_OP2(psllq),
+    [0xf4] = MMX_OP2(pmuludq),
+    [0xf5] = MMX_OP2(pmaddwd),
+    [0xf6] = MMX_OP2(psadbw),
+    [0xf7] = MMX_OP2(maskmov),
+    [0xf8] = MMX_OP2(psubb),
+    [0xf9] = MMX_OP2(psubw),
+    [0xfa] = MMX_OP2(psubl),
+    [0xfb] = MMX_OP2(psubq),
+    [0xfc] = MMX_OP2(paddb),
+    [0xfd] = MMX_OP2(paddw),
+    [0xfe] = MMX_OP2(paddl),
+};
+
+static void *sse_op_table2[3 * 8][2] = {
+    [0 + 2] = MMX_OP2(psrlw),
+    [0 + 4] = MMX_OP2(psraw),
+    [0 + 6] = MMX_OP2(psllw),
+    [8 + 2] = MMX_OP2(psrld),
+    [8 + 4] = MMX_OP2(psrad),
+    [8 + 6] = MMX_OP2(pslld),
+    [16 + 2] = MMX_OP2(psrlq),
+    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
+    [16 + 6] = MMX_OP2(psllq),
+    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
+};
+
+static void *sse_op_table3[4 * 3] = {
+    gen_helper_cvtsi2ss,
+    gen_helper_cvtsi2sd,
+    X86_64_ONLY(gen_helper_cvtsq2ss),
+    X86_64_ONLY(gen_helper_cvtsq2sd),
+
+    gen_helper_cvttss2si,
+    gen_helper_cvttsd2si,
+    X86_64_ONLY(gen_helper_cvttss2sq),
+    X86_64_ONLY(gen_helper_cvttsd2sq),
+
+    gen_helper_cvtss2si,
+    gen_helper_cvtsd2si,
+    X86_64_ONLY(gen_helper_cvtss2sq),
+    X86_64_ONLY(gen_helper_cvtsd2sq),
+};
+
+static void *sse_op_table4[8][4] = {
+    SSE_FOP(cmpeq),
+    SSE_FOP(cmplt),
+    SSE_FOP(cmple),
+    SSE_FOP(cmpunord),
+    SSE_FOP(cmpneq),
+    SSE_FOP(cmpnlt),
+    SSE_FOP(cmpnle),
+    SSE_FOP(cmpord),
+};
+
+static void *sse_op_table5[256] = {
+    [0x0c] = gen_helper_pi2fw,
+    [0x0d] = gen_helper_pi2fd,
+    [0x1c] = gen_helper_pf2iw,
+    [0x1d] = gen_helper_pf2id,
+    [0x8a] = gen_helper_pfnacc,
+    [0x8e] = gen_helper_pfpnacc,
+    [0x90] = gen_helper_pfcmpge,
+    [0x94] = gen_helper_pfmin,
+    [0x96] = gen_helper_pfrcp,
+    [0x97] = gen_helper_pfrsqrt,
+    [0x9a] = gen_helper_pfsub,
+    [0x9e] = gen_helper_pfadd,
+    [0xa0] = gen_helper_pfcmpgt,
+    [0xa4] = gen_helper_pfmax,
+    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
+    [0xa7] = gen_helper_movq, /* pfrsqit1 */
+    [0xaa] = gen_helper_pfsubr,
+    [0xae] = gen_helper_pfacc,
+    [0xb0] = gen_helper_pfcmpeq,
+    [0xb4] = gen_helper_pfmul,
+    [0xb6] = gen_helper_movq, /* pfrcpit2 */
+    [0xb7] = gen_helper_pmulhrw_mmx,
+    [0xbb] = gen_helper_pswapd,
+    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
+};
+
+struct sse_op_helper_s {
+    void *op[2]; uint32_t ext_mask;
+};
+#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
+#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
+#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
+#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+static struct sse_op_helper_s sse_op_table6[256] = {
+    [0x00] = SSSE3_OP(pshufb),
+    [0x01] = SSSE3_OP(phaddw),
+    [0x02] = SSSE3_OP(phaddd),
+    [0x03] = SSSE3_OP(phaddsw),
+    [0x04] = SSSE3_OP(pmaddubsw),
+    [0x05] = SSSE3_OP(phsubw),
+    [0x06] = SSSE3_OP(phsubd),
+    [0x07] = SSSE3_OP(phsubsw),
+    [0x08] = SSSE3_OP(psignb),
+    [0x09] = SSSE3_OP(psignw),
+    [0x0a] = SSSE3_OP(psignd),
+    [0x0b] = SSSE3_OP(pmulhrsw),
+    [0x10] = SSE41_OP(pblendvb),
+    [0x14] = SSE41_OP(blendvps),
+    [0x15] = SSE41_OP(blendvpd),
+    [0x17] = SSE41_OP(ptest),
+    [0x1c] = SSSE3_OP(pabsb),
+    [0x1d] = SSSE3_OP(pabsw),
+    [0x1e] = SSSE3_OP(pabsd),
+    [0x20] = SSE41_OP(pmovsxbw),
+    [0x21] = SSE41_OP(pmovsxbd),
+    [0x22] = SSE41_OP(pmovsxbq),
+    [0x23] = SSE41_OP(pmovsxwd),
+    [0x24] = SSE41_OP(pmovsxwq),
+    [0x25] = SSE41_OP(pmovsxdq),
+    [0x28] = SSE41_OP(pmuldq),
+    [0x29] = SSE41_OP(pcmpeqq),
+    [0x2a] = SSE41_SPECIAL, /* movntqda */
+    [0x2b] = SSE41_OP(packusdw),
+    [0x30] = SSE41_OP(pmovzxbw),
+    [0x31] = SSE41_OP(pmovzxbd),
+    [0x32] = SSE41_OP(pmovzxbq),
+    [0x33] = SSE41_OP(pmovzxwd),
+    [0x34] = SSE41_OP(pmovzxwq),
+    [0x35] = SSE41_OP(pmovzxdq),
+    [0x37] = SSE42_OP(pcmpgtq),
+    [0x38] = SSE41_OP(pminsb),
+    [0x39] = SSE41_OP(pminsd),
+    [0x3a] = SSE41_OP(pminuw),
+    [0x3b] = SSE41_OP(pminud),
+    [0x3c] = SSE41_OP(pmaxsb),
+    [0x3d] = SSE41_OP(pmaxsd),
+    [0x3e] = SSE41_OP(pmaxuw),
+    [0x3f] = SSE41_OP(pmaxud),
+    [0x40] = SSE41_OP(pmulld),
+    [0x41] = SSE41_OP(phminposuw),
+};
+
+static struct sse_op_helper_s sse_op_table7[256] = {
+    [0x08] = SSE41_OP(roundps),
+    [0x09] = SSE41_OP(roundpd),
+    [0x0a] = SSE41_OP(roundss),
+    [0x0b] = SSE41_OP(roundsd),
+    [0x0c] = SSE41_OP(blendps),
+    [0x0d] = SSE41_OP(blendpd),
+    [0x0e] = SSE41_OP(pblendw),
+    [0x0f] = SSSE3_OP(palignr),
+    [0x14] = SSE41_SPECIAL, /* pextrb */
+    [0x15] = SSE41_SPECIAL, /* pextrw */
+    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
+    [0x17] = SSE41_SPECIAL, /* extractps */
+    [0x20] = SSE41_SPECIAL, /* pinsrb */
+    [0x21] = SSE41_SPECIAL, /* insertps */
+    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
+    [0x40] = SSE41_OP(dpps),
+    [0x41] = SSE41_OP(dppd),
+    [0x42] = SSE41_OP(mpsadbw),
+    [0x60] = SSE42_OP(pcmpestrm),
+    [0x61] = SSE42_OP(pcmpestri),
+    [0x62] = SSE42_OP(pcmpistrm),
+    [0x63] = SSE42_OP(pcmpistri),
+};
+
+static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+{
+    int b1, op1_offset, op2_offset, is_xmm, val, ot;
+    int modrm, mod, rm, reg, reg_addr, offset_addr;
+    void *sse_op2;
+
+    b &= 0xff;
+    if (s->prefix & PREFIX_DATA)
+        b1 = 1;
+    else if (s->prefix & PREFIX_REPZ)
+        b1 = 2;
+    else if (s->prefix & PREFIX_REPNZ)
+        b1 = 3;
+    else
+        b1 = 0;
+    sse_op2 = sse_op_table1[b][b1];
+    if (!sse_op2)
+        goto illegal_op;
+    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
+        is_xmm = 1;
+    } else {
+        if (b1 == 0) {
+            /* MMX case */
+            is_xmm = 0;
+        } else {
+            is_xmm = 1;
+        }
+    }
+    /* simple MMX/SSE operation */
+    if (s->flags & HF_TS_MASK) {
+        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+        return;
+    }
+    if (s->flags & HF_EM_MASK) {
+    illegal_op:
+        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+        return;
+    }
+    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
+        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
+            goto illegal_op;
+    if (b == 0x0e) {
+        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+            goto illegal_op;
+        /* femms */
+        gen_helper_emms();
+        return;
+    }
+    if (b == 0x77) {
+        /* emms */
+        gen_helper_emms();
+        return;
+    }
+    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
+       the static cpu state) */
+    if (!is_xmm) {
+        gen_helper_enter_mmx();
+    }
+
+    modrm = ldub_code(s->pc++);
+    reg = ((modrm >> 3) & 7);
+    if (is_xmm)
+        reg |= rex_r;
+    mod = (modrm >> 6) & 3;
+    if (sse_op2 == SSE_SPECIAL) {
+        b |= (b1 << 8);
+        switch(b) {
+        case 0x0e7: /* movntq */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+            break;
+        case 0x1e7: /* movntdq */
+        case 0x02b: /* movntps */
+        case 0x12b: /* movntps */
+        case 0x3f0: /* lddqu */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            break;
+        case 0x6e: /* movd mm, ea */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+            } else
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                                 offsetof(CPUX86State,fpregs[reg].mmx));
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+            }
+            break;
+        case 0x16e: /* movd xmm, ea */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                                 offsetof(CPUX86State,xmm_regs[reg]));
+                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
+            } else
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                                 offsetof(CPUX86State,xmm_regs[reg]));
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+            }
+            break;
+        case 0x6f: /* movq mm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+            } else {
+                rm = (modrm & 7);
+                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                               offsetof(CPUX86State,fpregs[rm].mmx));
+                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                               offsetof(CPUX86State,fpregs[reg].mmx));
+            }
+            break;
+        case 0x010: /* movups */
+        case 0x110: /* movupd */
+        case 0x028: /* movaps */
+        case 0x128: /* movapd */
+        case 0x16f: /* movdqa xmm, ea */
+        case 0x26f: /* movdqu xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+                            offsetof(CPUX86State,xmm_regs[rm]));
+            }
+            break;
+        case 0x210: /* movss xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_op_movl_T0_0();
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+            }
+            break;
+        case 0x310: /* movsd xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_op_movl_T0_0();
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            break;
+        case 0x012: /* movlps */
+        case 0x112: /* movlpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                /* movhlps */
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            }
+            break;
+        case 0x212: /* movsldup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+            break;
+        case 0x312: /* movddup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            break;
+        case 0x016: /* movhps */
+        case 0x116: /* movhpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            } else {
+                /* movlhps */
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            break;
+        case 0x216: /* movshdup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            break;
+        case 0x7e: /* movd ea, mm */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
+                               offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else
+#endif
+            {
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
+            break;
+        case 0x17e: /* movd ea, xmm */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
+                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else
+#endif
+            {
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
+            break;
+        case 0x27e: /* movq xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            break;
+        case 0x7f: /* movq ea, mm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+            } else {
+                rm = (modrm & 7);
+                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                            offsetof(CPUX86State,fpregs[reg].mmx));
+            }
+            break;
+        case 0x011: /* movups */
+        case 0x111: /* movupd */
+        case 0x029: /* movaps */
+        case 0x129: /* movapd */
+        case 0x17f: /* movdqa ea, xmm */
+        case 0x27f: /* movdqu ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+                            offsetof(CPUX86State,xmm_regs[reg]));
+            }
+            break;
+        case 0x211: /* movss ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_op_st_T0_A0(OT_LONG + s->mem_index);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            }
+            break;
+        case 0x311: /* movsd ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            }
+            break;
+        case 0x013: /* movlps */
+        case 0x113: /* movlpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                goto illegal_op;
+            }
+            break;
+        case 0x017: /* movhps */
+        case 0x117: /* movhpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            } else {
+                goto illegal_op;
+            }
+            break;
+        case 0x71: /* shift mm, im */
+        case 0x72:
+        case 0x73:
+        case 0x171: /* shift xmm, im */
+        case 0x172:
+        case 0x173:
+            val = ldub_code(s->pc++);
+            if (is_xmm) {
+                gen_op_movl_T0_im(val);
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                gen_op_movl_T0_0();
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
+                op1_offset = offsetof(CPUX86State,xmm_t0);
+            } else {
+                gen_op_movl_T0_im(val);
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
+                gen_op_movl_T0_0();
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                op1_offset = offsetof(CPUX86State,mmx_t0);
+            }
+            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
+            if (!sse_op2)
+                goto illegal_op;
+            if (is_xmm) {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            break;
+        case 0x050: /* movmskps */
+            rm = (modrm & 7) | REX_B(s);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                             offsetof(CPUX86State,xmm_regs[rm]));
+            gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
+            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            gen_op_mov_reg_T0(OT_LONG, reg);
+            break;
+        case 0x150: /* movmskpd */
+            rm = (modrm & 7) | REX_B(s);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                             offsetof(CPUX86State,xmm_regs[rm]));
+            gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
+            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            gen_op_mov_reg_T0(OT_LONG, reg);
+            break;
+        case 0x02a: /* cvtpi2ps */
+        case 0x12a: /* cvtpi2pd */
+            gen_helper_enter_mmx();
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,mmx_t0);
+                gen_ldq_env_A0(s->mem_index, op2_offset);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            switch(b >> 8) {
+            case 0x0:
+                gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
+                break;
+            default:
+            case 0x1:
+                gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
+                break;
+            }
+            break;
+        case 0x22a: /* cvtsi2ss */
+        case 0x32a: /* cvtsi2sd */
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
+            if (ot == OT_LONG) {
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
+            } else {
+                ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
+            }
+            break;
+        case 0x02c: /* cvttps2pi */
+        case 0x12c: /* cvttpd2pi */
+        case 0x02d: /* cvtps2pi */
+        case 0x12d: /* cvtpd2pi */
+            gen_helper_enter_mmx();
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+                gen_ldo_env_A0(s->mem_index, op2_offset);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            switch(b) {
+            case 0x02c:
+                gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
+                break;
+            case 0x12c:
+                gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
+                break;
+            case 0x02d:
+                gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
+                break;
+            case 0x12d:
+                gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
+                break;
+            }
+            break;
+        case 0x22c: /* cvttss2si */
+        case 0x32c: /* cvttsd2si */
+        case 0x22d: /* cvtss2si */
+        case 0x32d: /* cvtsd2si */
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if ((b >> 8) & 1) {
+                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+                } else {
+                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                }
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
+                                    (b & 1) * 4];
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            if (ot == OT_LONG) {
+                ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
+                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            } else {
+                ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
+            }
+            gen_op_mov_reg_T0(ot, reg);
+            break;
+        case 0xc4: /* pinsrw */
+        case 0x1c4:
+            s->rip_offset = 1;
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            val = ldub_code(s->pc++);
+            if (b1) {
+                val &= 7;
+                tcg_gen_st16_tl(cpu_T[0], cpu_env,
+                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
+            } else {
+                val &= 3;
+                tcg_gen_st16_tl(cpu_T[0], cpu_env,
+                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
+            }
+            break;
+        case 0xc5: /* pextrw */
+        case 0x1c5:
+            if (mod != 3)
+                goto illegal_op;
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            val = ldub_code(s->pc++);
+            if (b1) {
+                val &= 7;
+                rm = (modrm & 7) | REX_B(s);
+                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
+            } else {
+                val &= 3;
+                rm = (modrm & 7);
+                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
+            }
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_op_mov_reg_T0(ot, reg);
+            break;
+        case 0x1d6: /* movq ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            }
+            break;
+        case 0x2d6: /* movq2dq */
+            gen_helper_enter_mmx();
+            rm = (modrm & 7);
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                        offsetof(CPUX86State,fpregs[rm].mmx));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            break;
+        case 0x3d6: /* movdq2q */
+            gen_helper_enter_mmx();
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            break;
+        case 0xd7: /* pmovmskb */
+        case 0x1d7:
+            if (mod != 3)
+                goto illegal_op;
+            if (b1) {
+                rm = (modrm & 7) | REX_B(s);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
+                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
+            } else {
+                rm = (modrm & 7);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
+                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
+            }
+            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_op_mov_reg_T0(OT_LONG, reg);
+            break;
+        case 0x138:
+            if (s->prefix & PREFIX_REPNZ)
+                goto crc32;
+        case 0x038:
+            b = modrm;
+            modrm = ldub_code(s->pc++);
+            rm = modrm & 7;
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+
+            sse_op2 = sse_op_table6[b].op[b1];
+            if (!sse_op2)
+                goto illegal_op;
+            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
+                goto illegal_op;
+
+            if (b1) {
+                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+                if (mod == 3) {
+                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+                } else {
+                    op2_offset = offsetof(CPUX86State,xmm_t0);
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    switch (b) {
+                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
+                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
+                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
+                        gen_ldq_env_A0(s->mem_index, op2_offset +
+                                        offsetof(XMMReg, XMM_Q(0)));
+                        break;
+                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
+                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
+                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+                                        offsetof(XMMReg, XMM_L(0)));
+                        break;
+                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
+                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
+                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+                                        offsetof(XMMReg, XMM_W(0)));
+                        break;
+                    case 0x2a:            /* movntqda */
+                        gen_ldo_env_A0(s->mem_index, op1_offset);
+                        return;
+                    default:
+                        gen_ldo_env_A0(s->mem_index, op2_offset);
+                    }
+                }
+            } else {
+                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+                if (mod == 3) {
+                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+                } else {
+                    op2_offset = offsetof(CPUX86State,mmx_t0);
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_ldq_env_A0(s->mem_index, op2_offset);
+                }
+            }
+            if (sse_op2 == SSE_SPECIAL)
+                goto illegal_op;
+
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+
+            if (b == 0x17)
+                s->cc_op = CC_OP_EFLAGS;
+            break;
+        case 0x338: /* crc32 */
+        crc32:
+            b = modrm;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+
+            if (b != 0xf0 && b != 0xf1)
+                goto illegal_op;
+            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
+                goto illegal_op;
+
+            if (b == 0xf0)
+                ot = OT_BYTE;
+            else if (b == 0xf1 && s->dflag != 2)
+                if (s->prefix & PREFIX_DATA)
+                    ot = OT_WORD;
+                else
+                    ot = OT_LONG;
+            else
+                ot = OT_QUAD;
+
+            gen_op_mov_TN_reg(OT_LONG, 0, reg);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+                             cpu_T[0], tcg_const_i32(8 << ot));
+
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            gen_op_mov_reg_T0(ot, reg);
+            break;
+        case 0x03a:
+        case 0x13a:
+            b = modrm;
+            modrm = ldub_code(s->pc++);
+            rm = modrm & 7;
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+
+            sse_op2 = sse_op_table7[b].op[b1];
+            if (!sse_op2)
+                goto illegal_op;
+            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
+                goto illegal_op;
+
+            if (sse_op2 == SSE_SPECIAL) {
+                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+                rm = (modrm & 7) | REX_B(s);
+                if (mod != 3)
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                reg = ((modrm >> 3) & 7) | rex_r;
+                val = ldub_code(s->pc++);
+                switch (b) {
+                case 0x14: /* pextrb */
+                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].XMM_B(val & 15)));
+                    if (mod == 3)
+                        gen_op_mov_reg_T0(ot, rm);
+                    else
+                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
+                                        (s->mem_index >> 2) - 1);
+                    break;
+                case 0x15: /* pextrw */
+                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].XMM_W(val & 7)));
+                    if (mod == 3)
+                        gen_op_mov_reg_T0(ot, rm);
+                    else
+                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
+                                        (s->mem_index >> 2) - 1);
+                    break;
+                case 0x16:
+                    if (ot == OT_LONG) { /* pextrd */
+                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                                        offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(val & 3)));
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        if (mod == 3)
+                            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                        else
+                            tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+                                            (s->mem_index >> 2) - 1);
+                    } else { /* pextrq */
+#ifdef TARGET_X86_64
+                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                                        offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_Q(val & 1)));
+                        if (mod == 3)
+                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
+                        else
+                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+                                            (s->mem_index >> 2) - 1);
+#else
+                        goto illegal_op;
+#endif
+                    }
+                    break;
+                case 0x17: /* extractps */
+                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].XMM_L(val & 3)));
+                    if (mod == 3)
+                        gen_op_mov_reg_T0(ot, rm);
+                    else
+                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+                                        (s->mem_index >> 2) - 1);
+                    break;
+                case 0x20: /* pinsrb */
+                    if (mod == 3)
+                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
+                    else
+                        tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+                                        (s->mem_index >> 2) - 1);
+                    tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].XMM_B(val & 15)));
+                    break;
+                case 0x21: /* insertps */
+                    if (mod == 3) {
+                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                                        offsetof(CPUX86State,xmm_regs[rm]
+                                                .XMM_L((val >> 6) & 3)));
+                    } else {
+                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+                                        (s->mem_index >> 2) - 1);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                    }
+                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                                    offsetof(CPUX86State,xmm_regs[reg]
+                                            .XMM_L((val >> 4) & 3)));
+                    if ((val >> 0) & 1)
+                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+                                        cpu_env, offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(0)));
+                    if ((val >> 1) & 1)
+                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+                                        cpu_env, offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(1)));
+                    if ((val >> 2) & 1)
+                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+                                        cpu_env, offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(2)));
+                    if ((val >> 3) & 1)
+                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+                                        cpu_env, offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(3)));
+                    break;
+                case 0x22:
+                    if (ot == OT_LONG) { /* pinsrd */
+                        if (mod == 3)
+                            gen_op_mov_v_reg(ot, cpu_tmp0, rm);
+                        else
+                            tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+                                            (s->mem_index >> 2) - 1);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                                        offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_L(val & 3)));
+                    } else { /* pinsrq */
+#ifdef TARGET_X86_64
+                        if (mod == 3)
+                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+                        else
+                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+                                            (s->mem_index >> 2) - 1);
+                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                                        offsetof(CPUX86State,
+                                                xmm_regs[reg].XMM_Q(val & 1)));
+#else
+                        goto illegal_op;
+#endif
+                    }
+                    break;
+                }
+                return;
+            }
+
+            if (b1) {
+                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+                if (mod == 3) {
+                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+                } else {
+                    op2_offset = offsetof(CPUX86State,xmm_t0);
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_ldo_env_A0(s->mem_index, op2_offset);
+                }
+            } else {
+                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+                if (mod == 3) {
+                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+                } else {
+                    op2_offset = offsetof(CPUX86State,mmx_t0);
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_ldq_env_A0(s->mem_index, op2_offset);
+                }
+            }
+            val = ldub_code(s->pc++);
+
+            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
+                s->cc_op = CC_OP_EFLAGS;
+
+                if (s->dflag == 2)
+                    /* The helper must use entire 64-bit gp registers */
+                    val |= 1 << 8;
+            }
+
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            break;
+        default:
+            goto illegal_op;
+        }
+    } else {
+        /* generic MMX or SSE operation */
+        switch(b) {
+        case 0x70: /* pshufx insn */
+        case 0xc6: /* pshufx insn */
+        case 0xc2: /* compare insns */
+            s->rip_offset = 1;
+            break;
+        default:
+            break;
+        }
+        if (is_xmm) {
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
+                                b == 0xc2)) {
+                    /* specific case for SSE single instructions */
+                    if (b1 == 2) {
+                        /* 32 bit access */
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                    } else {
+                        /* 64 bit access */
+                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
+                    }
+                } else {
+                    gen_ldo_env_A0(s->mem_index, op2_offset);
+                }
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+        } else {
+            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,mmx_t0);
+                gen_ldq_env_A0(s->mem_index, op2_offset);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+        }
+        switch(b) {
+        case 0x0f: /* 3DNow! data insns */
+            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+                goto illegal_op;
+            val = ldub_code(s->pc++);
+            sse_op2 = sse_op_table5[val];
+            if (!sse_op2)
+                goto illegal_op;
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            break;
+        case 0x70: /* pshufx insn */
+        case 0xc6: /* pshufx insn */
+            val = ldub_code(s->pc++);
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            break;
+        case 0xc2:
+            /* compare insns */
+            val = ldub_code(s->pc++);
+            if (val >= 8)
+                goto illegal_op;
+            sse_op2 = sse_op_table4[val][b1];
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            break;
+        case 0xf7:
+            /* maskmov : we must prepare A0 */
+            if (mod != 3)
+                goto illegal_op;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_reg(R_EDI);
+            } else
+#endif
+            {
+                gen_op_movl_A0_reg(R_EDI);
+                if (s->aflag == 0)
+                    gen_op_andl_A0_ffff();
+            }
+            gen_add_A0_ds_seg(s);
+
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
+            break;
+        default:
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            break;
+        }
+        if (b == 0x2e || b == 0x2f) {
+            s->cc_op = CC_OP_EFLAGS;
+        }
+    }
+}
+
+/* convert one instruction. s->is_jmp is set if the translation must
+   be stopped. Return the next pc value */
+static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+{
+    int b, prefixes, aflag, dflag;
+    int shift, ot;
+    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+    target_ulong next_eip, tval;
+    int rex_w, rex_r;
+
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+        tcg_gen_debug_insn_start(pc_start);
+    s->pc = pc_start;
+    prefixes = 0;
+    aflag = s->code32;
+    dflag = s->code32;
+    s->override = -1;
+    rex_w = -1;
+    rex_r = 0;
+#ifdef TARGET_X86_64
+    s->rex_x = 0;
+    s->rex_b = 0;
+    x86_64_hregs = 0;
+#endif
+    s->rip_offset = 0; /* for relative ip address */
+ next_byte:
+    b = ldub_code(s->pc);
+    s->pc++;
+    /* check prefixes */
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        switch (b) {
+        case 0xf3:
+            prefixes |= PREFIX_REPZ;
+            goto next_byte;
+        case 0xf2:
+            prefixes |= PREFIX_REPNZ;
+            goto next_byte;
+        case 0xf0:
+            prefixes |= PREFIX_LOCK;
+            goto next_byte;
+        case 0x2e:
+            s->override = R_CS;
+            goto next_byte;
+        case 0x36:
+            s->override = R_SS;
+            goto next_byte;
+        case 0x3e:
+            s->override = R_DS;
+            goto next_byte;
+        case 0x26:
+            s->override = R_ES;
+            goto next_byte;
+        case 0x64:
+            s->override = R_FS;
+            goto next_byte;
+        case 0x65:
+            s->override = R_GS;
+            goto next_byte;
+        case 0x66:
+            prefixes |= PREFIX_DATA;
+            goto next_byte;
+        case 0x67:
+            prefixes |= PREFIX_ADR;
+            goto next_byte;
+        case 0x40 ... 0x4f:
+            /* REX prefix */
+            rex_w = (b >> 3) & 1;
+            rex_r = (b & 0x4) << 1;
+            s->rex_x = (b & 0x2) << 2;
+            REX_B(s) = (b & 0x1) << 3;
+            x86_64_hregs = 1; /* select uniform byte register addressing */
+            goto next_byte;
+        }
+        if (rex_w == 1) {
+            /* 0x66 is ignored if rex.w is set */
+            dflag = 2;
+        } else {
+            if (prefixes & PREFIX_DATA)
+                dflag ^= 1;
+        }
+        if (!(prefixes & PREFIX_ADR))
+            aflag = 2;
+    } else
+#endif
+    {
+        switch (b) {
+        case 0xf3:
+            prefixes |= PREFIX_REPZ;
+            goto next_byte;
+        case 0xf2:
+            prefixes |= PREFIX_REPNZ;
+            goto next_byte;
+        case 0xf0:
+            prefixes |= PREFIX_LOCK;
+            goto next_byte;
+        case 0x2e:
+            s->override = R_CS;
+            goto next_byte;
+        case 0x36:
+            s->override = R_SS;
+            goto next_byte;
+        case 0x3e:
+            s->override = R_DS;
+            goto next_byte;
+        case 0x26:
+            s->override = R_ES;
+            goto next_byte;
+        case 0x64:
+            s->override = R_FS;
+            goto next_byte;
+        case 0x65:
+            s->override = R_GS;
+            goto next_byte;
+        case 0x66:
+            prefixes |= PREFIX_DATA;
+            goto next_byte;
+        case 0x67:
+            prefixes |= PREFIX_ADR;
+            goto next_byte;
+        }
+        if (prefixes & PREFIX_DATA)
+            dflag ^= 1;
+        if (prefixes & PREFIX_ADR)
+            aflag ^= 1;
+    }
+
+    s->prefix = prefixes;
+    s->aflag = aflag;
+    s->dflag = dflag;
+
+    /* lock generation */
+    if (prefixes & PREFIX_LOCK)
+        gen_helper_lock();
+
+    /* now check op code */
+ reswitch:
+    switch(b) {
+    case 0x0f:
+        /**************************/
+        /* extended op code */
+        b = ldub_code(s->pc++) | 0x100;
+        goto reswitch;
+
+        /**************************/
+        /* arith & logic */
+    case 0x00 ... 0x05:
+    case 0x08 ... 0x0d:
+    case 0x10 ... 0x15:
+    case 0x18 ... 0x1d:
+    case 0x20 ... 0x25:
+    case 0x28 ... 0x2d:
+    case 0x30 ... 0x35:
+    case 0x38 ... 0x3d:
+        {
+            int op, f, val;
+            op = (b >> 3) & 7;
+            f = (b >> 1) & 3;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            switch(f) {
+            case 0: /* OP Ev, Gv */
+                modrm = ldub_code(s->pc++);
+                reg = ((modrm >> 3) & 7) | rex_r;
+                mod = (modrm >> 6) & 3;
+                rm = (modrm & 7) | REX_B(s);
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    opreg = OR_TMP0;
+                } else if (op == OP_XORL && rm == reg) {
+                xor_zero:
+                    /* xor reg, reg optimisation */
+                    gen_op_movl_T0_0();
+                    s->cc_op = CC_OP_LOGICB + ot;
+                    gen_op_mov_reg_T0(ot, reg);
+                    gen_op_update1_cc();
+                    break;
+                } else {
+                    opreg = rm;
+                }
+                gen_op_mov_TN_reg(ot, 1, reg);
+                gen_op(s, op, ot, opreg);
+                break;
+            case 1: /* OP Gv, Ev */
+                modrm = ldub_code(s->pc++);
+                mod = (modrm >> 6) & 3;
+                reg = ((modrm >> 3) & 7) | rex_r;
+                rm = (modrm & 7) | REX_B(s);
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_op_ld_T1_A0(ot + s->mem_index);
+                } else if (op == OP_XORL && rm == reg) {
+                    goto xor_zero;
+                } else {
+                    gen_op_mov_TN_reg(ot, 1, rm);
+                }
+                gen_op(s, op, ot, reg);
+                break;
+            case 2: /* OP A, Iv */
+                val = insn_get(s, ot);
+                gen_op_movl_T1_im(val);
+                gen_op(s, op, ot, OR_EAX);
+                break;
+            }
+        }
+        break;
+
+    case 0x82:
+        if (CODE64(s))
+            goto illegal_op;
+    case 0x80: /* GRP1 */
+    case 0x81:
+    case 0x83:
+        {
+            int val;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+            op = (modrm >> 3) & 7;
+
+            if (mod != 3) {
+                if (b == 0x83)
+                    s->rip_offset = 1;
+                else
+                    s->rip_offset = insn_const_size(ot);
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                opreg = OR_TMP0;
+            } else {
+                opreg = rm;
+            }
+
+            switch(b) {
+            default:
+            case 0x80:
+            case 0x81:
+            case 0x82:
+                val = insn_get(s, ot);
+                break;
+            case 0x83:
+                val = (int8_t)insn_get(s, OT_BYTE);
+                break;
+            }
+            gen_op_movl_T1_im(val);
+            gen_op(s, op, ot, opreg);
+        }
+        break;
+
+        /**************************/
+        /* inc, dec, and other misc arith */
+    case 0x40 ... 0x47: /* inc Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), 1);
+        break;
+    case 0x48 ... 0x4f: /* dec Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), -1);
+        break;
+    case 0xf6: /* GRP3 */
+    case 0xf7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        op = (modrm >> 3) & 7;
+        if (mod != 3) {
+            if (op == 0)
+                s->rip_offset = insn_const_size(ot);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0(ot + s->mem_index);
+        } else {
+            gen_op_mov_TN_reg(ot, 0, rm);
+        }
+
+        switch(op) {
+        case 0: /* test */
+            val = insn_get(s, ot);
+            gen_op_movl_T1_im(val);
+            gen_op_testl_T0_T1_cc();
+            s->cc_op = CC_OP_LOGICB + ot;
+            break;
+        case 2: /* not */
+            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            if (mod != 3) {
+                gen_op_st_T0_A0(ot + s->mem_index);
+            } else {
+                gen_op_mov_reg_T0(ot, rm);
+            }
+            break;
+        case 3: /* neg */
+            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+            if (mod != 3) {
+                gen_op_st_T0_A0(ot + s->mem_index);
+            } else {
+                gen_op_mov_reg_T0(ot, rm);
+            }
+            gen_op_update_neg_cc();
+            s->cc_op = CC_OP_SUBB + ot;
+            break;
+        case 4: /* mul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
+                s->cc_op = CC_OP_MULB;
+                break;
+            case OT_WORD:
+                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+                gen_op_mov_reg_T0(OT_WORD, R_EDX);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                s->cc_op = CC_OP_MULW;
+                break;
+            default:
+            case OT_LONG:
+#ifdef TARGET_X86_64
+                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+                gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+#else
+                {
+                    TCGv_i64 t0, t1;
+                    t0 = tcg_temp_new_i64();
+                    t1 = tcg_temp_new_i64();
+                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                }
+#endif
+                s->cc_op = CC_OP_MULL;
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_helper_mulq_EAX_T0(cpu_T[0]);
+                s->cc_op = CC_OP_MULQ;
+                break;
+#endif
+            }
+            break;
+        case 5: /* imul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                s->cc_op = CC_OP_MULB;
+                break;
+            case OT_WORD:
+                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+                gen_op_mov_reg_T0(OT_WORD, R_EDX);
+                s->cc_op = CC_OP_MULW;
+                break;
+            default:
+            case OT_LONG:
+#ifdef TARGET_X86_64
+                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+                gen_op_mov_reg_T0(OT_LONG, R_EDX);
+#else
+                {
+                    TCGv_i64 t0, t1;
+                    t0 = tcg_temp_new_i64();
+                    t1 = tcg_temp_new_i64();
+                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                }
+#endif
+                s->cc_op = CC_OP_MULL;
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_helper_imulq_EAX_T0(cpu_T[0]);
+                s->cc_op = CC_OP_MULQ;
+                break;
+#endif
+            }
+            break;
+        case 6: /* div */
+            switch(ot) {
+            case OT_BYTE:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_divb_AL(cpu_T[0]);
+                break;
+            case OT_WORD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_divw_AX(cpu_T[0]);
+                break;
+            default:
+            case OT_LONG:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_divl_EAX(cpu_T[0]);
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_divq_EAX(cpu_T[0]);
+                break;
+#endif
+            }
+            break;
+        case 7: /* idiv */
+            switch(ot) {
+            case OT_BYTE:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_idivb_AL(cpu_T[0]);
+                break;
+            case OT_WORD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_idivw_AX(cpu_T[0]);
+                break;
+            default:
+            case OT_LONG:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_idivl_EAX(cpu_T[0]);
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_idivq_EAX(cpu_T[0]);
+                break;
+#endif
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+
+    case 0xfe: /* GRP4 */
+    case 0xff: /* GRP5 */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        op = (modrm >> 3) & 7;
+        if (op >= 2 && b == 0xfe) {
+            goto illegal_op;
+        }
+        if (CODE64(s)) {
+            if (op == 2 || op == 4) {
+                /* operand size for jumps is 64 bit */
+                ot = OT_QUAD;
+            } else if (op == 3 || op == 5) {
+                /* for call calls, the operand is 16 or 32 bit, even
+                   in long mode */
+                ot = dflag ? OT_LONG : OT_WORD;
+            } else if (op == 6) {
+                /* default push size is 64 bit */
+                ot = dflag ? OT_QUAD : OT_WORD;
+            }
+        }
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (op >= 2 && op != 3 && op != 5)
+                gen_op_ld_T0_A0(ot + s->mem_index);
+        } else {
+            gen_op_mov_TN_reg(ot, 0, rm);
+        }
+
+        switch(op) {
+        case 0: /* inc Ev */
+            if (mod != 3)
+                opreg = OR_TMP0;
+            else
+                opreg = rm;
+            gen_inc(s, ot, opreg, 1);
+            break;
+        case 1: /* dec Ev */
+            if (mod != 3)
+                opreg = OR_TMP0;
+            else
+                opreg = rm;
+            gen_inc(s, ot, opreg, -1);
+            break;
+        case 2: /* call Ev */
+            /* XXX: optimize if memory (no 'and' is necessary) */
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            next_eip = s->pc - s->cs_base;
+            gen_movtl_T1_im(next_eip);
+            gen_push_T1(s);
+            gen_op_jmp_T0();
+            gen_eob(s);
+            break;
+        case 3: /* lcall Ev */
+            gen_op_ld_T1_A0(ot + s->mem_index);
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+        do_lcall:
+            if (s->pe && !s->vm86) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
+                                           tcg_const_i32(dflag), 
+                                           tcg_const_i32(s->pc - pc_start));
+            } else {
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
+                                      tcg_const_i32(dflag), 
+                                      tcg_const_i32(s->pc - s->cs_base));
+            }
+            gen_eob(s);
+            break;
+        case 4: /* jmp Ev */
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            gen_op_jmp_T0();
+            gen_eob(s);
+            break;
+        case 5: /* ljmp Ev */
+            gen_op_ld_T1_A0(ot + s->mem_index);
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+        do_ljmp:
+            if (s->pe && !s->vm86) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
+                                          tcg_const_i32(s->pc - pc_start));
+            } else {
+                gen_op_movl_seg_T0_vm(R_CS);
+                gen_op_movl_T0_T1();
+                gen_op_jmp_T0();
+            }
+            gen_eob(s);
+            break;
+        case 6: /* push Ev */
+            gen_push_T0(s);
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+
+    case 0x84: /* test Ev, Gv */
+    case 0x85:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_TN_reg(ot, 1, reg);
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+
+    case 0xa8: /* test eAX, Iv */
+    case 0xa9:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        val = insn_get(s, ot);
+
+        gen_op_mov_TN_reg(ot, 0, OR_EAX);
+        gen_op_movl_T1_im(val);
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+
+    case 0x98: /* CWDE/CBW */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
+        } else
+#endif
+        if (dflag == 1) {
+            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_LONG, R_EAX);
+        } else {
+            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
+            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_WORD, R_EAX);
+        }
+        break;
+    case 0x99: /* CDQ/CWD */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
+            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
+        } else
+#endif
+        if (dflag == 1) {
+            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
+            gen_op_mov_reg_T0(OT_LONG, R_EDX);
+        } else {
+            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
+            gen_op_mov_reg_T0(OT_WORD, R_EDX);
+        }
+        break;
+    case 0x1af: /* imul Gv, Ev */
+    case 0x69: /* imul Gv, Ev, I */
+    case 0x6b:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        if (b == 0x69)
+            s->rip_offset = insn_const_size(ot);
+        else if (b == 0x6b)
+            s->rip_offset = 1;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        if (b == 0x69) {
+            val = insn_get(s, ot);
+            gen_op_movl_T1_im(val);
+        } else if (b == 0x6b) {
+            val = (int8_t)insn_get(s, OT_BYTE);
+            gen_op_movl_T1_im(val);
+        } else {
+            gen_op_mov_TN_reg(ot, 1, reg);
+        }
+
+#ifdef TARGET_X86_64
+        if (ot == OT_QUAD) {
+            gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
+        } else
+#endif
+        if (ot == OT_LONG) {
+#ifdef TARGET_X86_64
+                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+#else
+                {
+                    TCGv_i64 t0, t1;
+                    t0 = tcg_temp_new_i64();
+                    t1 = tcg_temp_new_i64();
+                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
+                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
+                }
+#endif
+        } else {
+            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+            /* XXX: use 32 bit mul which could be faster */
+            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+        }
+        gen_op_mov_reg_T0(ot, reg);
+        s->cc_op = CC_OP_MULB + ot;
+        break;
+    case 0x1c0:
+    case 0x1c1: /* xadd Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3) {
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_TN_reg(ot, 1, rm);
+            gen_op_addl_T0_T1();
+            gen_op_mov_reg_T1(ot, reg);
+            gen_op_mov_reg_T0(ot, rm);
+        } else {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_ld_T1_A0(ot + s->mem_index);
+            gen_op_addl_T0_T1();
+            gen_op_st_T0_A0(ot + s->mem_index);
+            gen_op_mov_reg_T1(ot, reg);
+        }
+        gen_op_update2_cc();
+        s->cc_op = CC_OP_ADDB + ot;
+        break;
+    case 0x1b0:
+    case 0x1b1: /* cmpxchg Ev, Gv */
+        {
+            int label1, label2;
+            TCGv t0, t1, t2, a0;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            t0 = tcg_temp_local_new();
+            t1 = tcg_temp_local_new();
+            t2 = tcg_temp_local_new();
+            a0 = tcg_temp_local_new();
+            gen_op_mov_v_reg(ot, t1, reg);
+            if (mod == 3) {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_mov_v_reg(ot, t0, rm);
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                tcg_gen_mov_tl(a0, cpu_A0);
+                gen_op_ld_v(ot + s->mem_index, t0, a0);
+                rm = 0; /* avoid warning */
+            }
+            label1 = gen_new_label();
+            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
+            tcg_gen_sub_tl(t2, t2, t0);
+            gen_extu(ot, t2);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+            if (mod == 3) {
+                label2 = gen_new_label();
+                gen_op_mov_reg_v(ot, R_EAX, t0);
+                tcg_gen_br(label2);
+                gen_set_label(label1);
+                gen_op_mov_reg_v(ot, rm, t1);
+                gen_set_label(label2);
+            } else {
+                tcg_gen_mov_tl(t1, t0);
+                gen_op_mov_reg_v(ot, R_EAX, t0);
+                gen_set_label(label1);
+                /* always store */
+                gen_op_st_v(ot + s->mem_index, t1, a0);
+            }
+            tcg_gen_mov_tl(cpu_cc_src, t0);
+            tcg_gen_mov_tl(cpu_cc_dst, t2);
+            s->cc_op = CC_OP_SUBB + ot;
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            tcg_temp_free(t2);
+            tcg_temp_free(a0);
+        }
+        break;
+    case 0x1c7: /* cmpxchg8b */
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if ((mod == 3) || ((modrm & 0x38) != 0x8))
+            goto illegal_op;
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
+                goto illegal_op;
+            gen_jmp_im(pc_start - s->cs_base);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg16b(cpu_A0);
+        } else
+#endif        
+        {
+            if (!(s->cpuid_features & CPUID_CX8))
+                goto illegal_op;
+            gen_jmp_im(pc_start - s->cs_base);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg8b(cpu_A0);
+        }
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+
+        /**************************/
+        /* push/pop */
+    case 0x50 ... 0x57: /* push */
+        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
+        gen_push_T0(s);
+        break;
+    case 0x58 ... 0x5f: /* pop */
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        gen_pop_T0(s);
+        /* NOTE: order is important for pop %sp */
+        gen_pop_update(s);
+        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
+        break;
+    case 0x60: /* pusha */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_pusha(s);
+        break;
+    case 0x61: /* popa */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_popa(s);
+        break;
+    case 0x68: /* push Iv */
+    case 0x6a:
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        if (b == 0x68)
+            val = insn_get(s, ot);
+        else
+            val = (int8_t)insn_get(s, OT_BYTE);
+        gen_op_movl_T0_im(val);
+        gen_push_T0(s);
+        break;
+    case 0x8f: /* pop Ev */
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        gen_pop_T0(s);
+        if (mod == 3) {
+            /* NOTE: order is important for pop %sp */
+            gen_pop_update(s);
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_reg_T0(ot, rm);
+        } else {
+            /* NOTE: order is important too for MMU exceptions */
+            s->popl_esp_hack = 1 << ot;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            s->popl_esp_hack = 0;
+            gen_pop_update(s);
+        }
+        break;
+    case 0xc8: /* enter */
+        {
+            int level;
+            val = lduw_code(s->pc);
+            s->pc += 2;
+            level = ldub_code(s->pc++);
+            gen_enter(s, val, level);
+        }
+        break;
+    case 0xc9: /* leave */
+        /* XXX: exception not precise (ESP is updated before potential exception) */
+        if (CODE64(s)) {
+            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
+        } else if (s->ss32) {
+            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_LONG, R_ESP);
+        } else {
+            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_WORD, R_ESP);
+        }
+        gen_pop_T0(s);
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        gen_op_mov_reg_T0(ot, R_EBP);
+        gen_pop_update(s);
+        break;
+    case 0x06: /* push es */
+    case 0x0e: /* push cs */
+    case 0x16: /* push ss */
+    case 0x1e: /* push ds */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_op_movl_T0_seg(b >> 3);
+        gen_push_T0(s);
+        break;
+    case 0x1a0: /* push fs */
+    case 0x1a8: /* push gs */
+        gen_op_movl_T0_seg((b >> 3) & 7);
+        gen_push_T0(s);
+        break;
+    case 0x07: /* pop es */
+    case 0x17: /* pop ss */
+    case 0x1f: /* pop ds */
+        if (CODE64(s))
+            goto illegal_op;
+        reg = b >> 3;
+        gen_pop_T0(s);
+        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+        gen_pop_update(s);
+        if (reg == R_SS) {
+            /* if reg == SS, inhibit interrupts/trace. */
+            /* If several instructions disable interrupts, only the
+               _first_ does it */
+            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                gen_helper_set_inhibit_irq();
+            s->tf = 0;
+        }
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x1a1: /* pop fs */
+    case 0x1a9: /* pop gs */
+        gen_pop_T0(s);
+        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
+        gen_pop_update(s);
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+
+        /**************************/
+        /* mov */
+    case 0x88:
+    case 0x89: /* mov Gv, Ev */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        /* generate a generic store */
+        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        break;
+    case 0xc6:
+    case 0xc7: /* mov Ev, Iv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod != 3) {
+            s->rip_offset = insn_const_size(ot);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        }
+        val = insn_get(s, ot);
+        gen_op_movl_T0_im(val);
+        if (mod != 3)
+            gen_op_st_T0_A0(ot + s->mem_index);
+        else
+            gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
+        break;
+    case 0x8a:
+    case 0x8b: /* mov Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = OT_WORD + dflag;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_reg_T0(ot, reg);
+        break;
+    case 0x8e: /* mov seg, Gv */
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        if (reg >= 6 || reg == R_CS)
+            goto illegal_op;
+        gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+        if (reg == R_SS) {
+            /* if reg == SS, inhibit interrupts/trace */
+            /* If several instructions disable interrupts, only the
+               _first_ does it */
+            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                gen_helper_set_inhibit_irq();
+            s->tf = 0;
+        }
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x8c: /* mov Gv, seg */
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        if (reg >= 6)
+            goto illegal_op;
+        gen_op_movl_T0_seg(reg);
+        if (mod == 3)
+            ot = OT_WORD + dflag;
+        else
+            ot = OT_WORD;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        break;
+
+    case 0x1b6: /* movzbS Gv, Eb */
+    case 0x1b7: /* movzwS Gv, Eb */
+    case 0x1be: /* movsbS Gv, Eb */
+    case 0x1bf: /* movswS Gv, Eb */
+        {
+            int d_ot;
+            /* d_ot is the size of destination */
+            d_ot = dflag + OT_WORD;
+            /* ot is the size of source */
+            ot = (b & 1) + OT_BYTE;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+
+            if (mod == 3) {
+                gen_op_mov_TN_reg(ot, 0, rm);
+                switch(ot | (b & 8)) {
+                case OT_BYTE:
+                    tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+                    break;
+                case OT_BYTE | 8:
+                    tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+                    break;
+                case OT_WORD:
+                    tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                    break;
+                default:
+                case OT_WORD | 8:
+                    tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+                    break;
+                }
+                gen_op_mov_reg_T0(d_ot, reg);
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if (b & 8) {
+                    gen_op_lds_T0_A0(ot + s->mem_index);
+                } else {
+                    gen_op_ldu_T0_A0(ot + s->mem_index);
+                }
+                gen_op_mov_reg_T0(d_ot, reg);
+            }
+        }
+        break;
+
+    case 0x8d: /* lea */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        reg = ((modrm >> 3) & 7) | rex_r;
+        /* we must ensure that no segment is added */
+        s->override = -1;
+        val = s->addseg;
+        s->addseg = 0;
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        s->addseg = val;
+        gen_op_mov_reg_A0(ot - OT_WORD, reg);
+        break;
+
+    case 0xa0: /* mov EAX, Ov */
+    case 0xa1:
+    case 0xa2: /* mov Ov, EAX */
+    case 0xa3:
+        {
+            target_ulong offset_addr;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                offset_addr = ldq_code(s->pc);
+                s->pc += 8;
+                gen_op_movq_A0_im(offset_addr);
+            } else
+#endif
+            {
+                if (s->aflag) {
+                    offset_addr = insn_get(s, OT_LONG);
+                } else {
+                    offset_addr = insn_get(s, OT_WORD);
+                }
+                gen_op_movl_A0_im(offset_addr);
+            }
+            gen_add_A0_ds_seg(s);
+            if ((b & 2) == 0) {
+                gen_op_ld_T0_A0(ot + s->mem_index);
+                gen_op_mov_reg_T0(ot, R_EAX);
+            } else {
+                gen_op_mov_TN_reg(ot, 0, R_EAX);
+                gen_op_st_T0_A0(ot + s->mem_index);
+            }
+        }
+        break;
+    case 0xd7: /* xlat */
+#ifdef TARGET_X86_64
+        if (s->aflag == 2) {
+            gen_op_movq_A0_reg(R_EBX);
+            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+        } else
+#endif
+        {
+            gen_op_movl_A0_reg(R_EBX);
+            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+            if (s->aflag == 0)
+                gen_op_andl_A0_ffff();
+            else
+                tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+        }
+        gen_add_A0_ds_seg(s);
+        gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
+        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+        break;
+    case 0xb0 ... 0xb7: /* mov R, Ib */
+        val = insn_get(s, OT_BYTE);
+        gen_op_movl_T0_im(val);
+        gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
+        break;
+    case 0xb8 ... 0xbf: /* mov R, Iv */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            uint64_t tmp;
+            /* 64 bit case */
+            tmp = ldq_code(s->pc);
+            s->pc += 8;
+            reg = (b & 7) | REX_B(s);
+            gen_movtl_T0_im(tmp);
+            gen_op_mov_reg_T0(OT_QUAD, reg);
+        } else
+#endif
+        {
+            ot = dflag ? OT_LONG : OT_WORD;
+            val = insn_get(s, ot);
+            reg = (b & 7) | REX_B(s);
+            gen_op_movl_T0_im(val);
+            gen_op_mov_reg_T0(ot, reg);
+        }
+        break;
+
+    case 0x91 ... 0x97: /* xchg R, EAX */
+        ot = dflag + OT_WORD;
+        reg = (b & 7) | REX_B(s);
+        rm = R_EAX;
+        goto do_xchg_reg;
+    case 0x86:
+    case 0x87: /* xchg Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3) {
+            rm = (modrm & 7) | REX_B(s);
+        do_xchg_reg:
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_TN_reg(ot, 1, rm);
+            gen_op_mov_reg_T0(ot, rm);
+            gen_op_mov_reg_T1(ot, reg);
+        } else {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_mov_TN_reg(ot, 0, reg);
+            /* for xchg, lock is implicit */
+            if (!(prefixes & PREFIX_LOCK))
+                gen_helper_lock();
+            gen_op_ld_T1_A0(ot + s->mem_index);
+            gen_op_st_T0_A0(ot + s->mem_index);
+            if (!(prefixes & PREFIX_LOCK))
+                gen_helper_unlock();
+            gen_op_mov_reg_T1(ot, reg);
+        }
+        break;
+    case 0xc4: /* les Gv */
+        if (CODE64(s))
+            goto illegal_op;
+        op = R_ES;
+        goto do_lxx;
+    case 0xc5: /* lds Gv */
+        if (CODE64(s))
+            goto illegal_op;
+        op = R_DS;
+        goto do_lxx;
+    case 0x1b2: /* lss Gv */
+        op = R_SS;
+        goto do_lxx;
+    case 0x1b4: /* lfs Gv */
+        op = R_FS;
+        goto do_lxx;
+    case 0x1b5: /* lgs Gv */
+        op = R_GS;
+    do_lxx:
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_op_ld_T1_A0(ot + s->mem_index);
+        gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+        /* load the segment first to handle exceptions properly */
+        gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+        gen_movl_seg_T0(s, op, pc_start - s->cs_base);
+        /* then put the data */
+        gen_op_mov_reg_T1(ot, reg);
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+
+        /************************/
+        /* shifts */
+    case 0xc0:
+    case 0xc1:
+        /* shift Ev,Ib */
+        shift = 2;
+    grp2:
+        {
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            mod = (modrm >> 6) & 3;
+            op = (modrm >> 3) & 7;
+
+            if (mod != 3) {
+                if (shift == 2) {
+                    s->rip_offset = 1;
+                }
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                opreg = OR_TMP0;
+            } else {
+                opreg = (modrm & 7) | REX_B(s);
+            }
+
+            /* simpler op */
+            if (shift == 0) {
+                gen_shift(s, op, ot, opreg, OR_ECX);
+            } else {
+                if (shift == 2) {
+                    shift = ldub_code(s->pc++);
+                }
+                gen_shifti(s, op, ot, opreg, shift);
+            }
+        }
+        break;
+    case 0xd0:
+    case 0xd1:
+        /* shift Ev,1 */
+        shift = 1;
+        goto grp2;
+    case 0xd2:
+    case 0xd3:
+        /* shift Ev,cl */
+        shift = 0;
+        goto grp2;
+
+    case 0x1a4: /* shld imm */
+        op = 0;
+        shift = 1;
+        goto do_shiftd;
+    case 0x1a5: /* shld cl */
+        op = 0;
+        shift = 0;
+        goto do_shiftd;
+    case 0x1ac: /* shrd imm */
+        op = 1;
+        shift = 1;
+        goto do_shiftd;
+    case 0x1ad: /* shrd cl */
+        op = 1;
+        shift = 0;
+    do_shiftd:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            opreg = OR_TMP0;
+        } else {
+            opreg = rm;
+        }
+        gen_op_mov_TN_reg(ot, 1, reg);
+
+        if (shift) {
+            val = ldub_code(s->pc++);
+            tcg_gen_movi_tl(cpu_T3, val);
+        } else {
+            tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
+        }
+        gen_shiftd_rm_T1_T3(s, ot, opreg, op);
+        break;
+
+        /************************/
+        /* floats */
+    case 0xd8 ... 0xdf:
+        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+            /* XXX: what to do if illegal op ? */
+            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+            break;
+        }
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        if (mod != 3) {
+            /* memory op */
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            switch(op) {
+            case 0x00 ... 0x07: /* fxxxs */
+            case 0x10 ... 0x17: /* fixxxl */
+            case 0x20 ... 0x27: /* fxxxl */
+            case 0x30 ... 0x37: /* fixxx */
+                {
+                    int op1;
+                    op1 = op & 7;
+
+                    switch(op >> 4) {
+                    case 0:
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_flds_FT0(cpu_tmp2_i32);
+                        break;
+                    case 1:
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        break;
+                    case 2:
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                                          (s->mem_index >> 2) - 1);
+                        gen_helper_fldl_FT0(cpu_tmp1_i64);
+                        break;
+                    case 3:
+                    default:
+                        gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        break;
+                    }
+
+                    gen_helper_fp_arith_ST0_FT0(op1);
+                    if (op1 == 3) {
+                        /* fcomp needs pop */
+                        gen_helper_fpop();
+                    }
+                }
+                break;
+            case 0x08: /* flds */
+            case 0x0a: /* fsts */
+            case 0x0b: /* fstps */
+            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+                switch(op & 7) {
+                case 0:
+                    switch(op >> 4) {
+                    case 0:
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_flds_ST0(cpu_tmp2_i32);
+                        break;
+                    case 1:
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        break;
+                    case 2:
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                                          (s->mem_index >> 2) - 1);
+                        gen_helper_fldl_ST0(cpu_tmp1_i64);
+                        break;
+                    case 3:
+                    default:
+                        gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        break;
+                    }
+                    break;
+                case 1:
+                    /* XXX: the corresponding CPUID bit must be tested ! */
+                    switch(op >> 4) {
+                    case 1:
+                        gen_helper_fisttl_ST0(cpu_tmp2_i32);
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        break;
+                    case 2:
+                        gen_helper_fisttll_ST0(cpu_tmp1_i64);
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                                          (s->mem_index >> 2) - 1);
+                        break;
+                    case 3:
+                    default:
+                        gen_helper_fistt_ST0(cpu_tmp2_i32);
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                        break;
+                    }
+                    gen_helper_fpop();
+                    break;
+                default:
+                    switch(op >> 4) {
+                    case 0:
+                        gen_helper_fsts_ST0(cpu_tmp2_i32);
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        break;
+                    case 1:
+                        gen_helper_fistl_ST0(cpu_tmp2_i32);
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        break;
+                    case 2:
+                        gen_helper_fstl_ST0(cpu_tmp1_i64);
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                                          (s->mem_index >> 2) - 1);
+                        break;
+                    case 3:
+                    default:
+                        gen_helper_fist_ST0(cpu_tmp2_i32);
+                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                        gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                        break;
+                    }
+                    if ((op & 7) == 3)
+                        gen_helper_fpop();
+                    break;
+                }
+                break;
+            case 0x0c: /* fldenv mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fldenv(
+                                   cpu_A0, tcg_const_i32(s->dflag));
+                break;
+            case 0x0d: /* fldcw mem */
+                gen_op_ld_T0_A0(OT_WORD + s->mem_index);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_fldcw(cpu_tmp2_i32);
+                break;
+            case 0x0e: /* fnstenv mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag));
+                break;
+            case 0x0f: /* fnstcw mem */
+                gen_helper_fnstcw(cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                break;
+            case 0x1d: /* fldt mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fldt_ST0(cpu_A0);
+                break;
+            case 0x1f: /* fstpt mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fstt_ST0(cpu_A0);
+                gen_helper_fpop();
+                break;
+            case 0x2c: /* frstor mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag));
+                break;
+            case 0x2e: /* fnsave mem */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag));
+                break;
+            case 0x2f: /* fnstsw mem */
+                gen_helper_fnstsw(cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                break;
+            case 0x3c: /* fbld */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fbld_ST0(cpu_A0);
+                break;
+            case 0x3e: /* fbstp */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_helper_fbst_ST0(cpu_A0);
+                gen_helper_fpop();
+                break;
+            case 0x3d: /* fildll */
+                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                                  (s->mem_index >> 2) - 1);
+                gen_helper_fildll_ST0(cpu_tmp1_i64);
+                break;
+            case 0x3f: /* fistpll */
+                gen_helper_fistll_ST0(cpu_tmp1_i64);
+                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                                  (s->mem_index >> 2) - 1);
+                gen_helper_fpop();
+                break;
+            default:
+                goto illegal_op;
+            }
+        } else {
+            /* register float ops */
+            opreg = rm;
+
+            switch(op) {
+            case 0x08: /* fld sti */
+                gen_helper_fpush();
+                gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7));
+                break;
+            case 0x09: /* fxchg sti */
+            case 0x29: /* fxchg4 sti, undocumented op */
+            case 0x39: /* fxchg7 sti, undocumented op */
+                gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg));
+                break;
+            case 0x0a: /* grp d9/2 */
+                switch(rm) {
+                case 0: /* fnop */
+                    /* check exceptions (FreeBSD FPU probe) */
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
+                    gen_jmp_im(pc_start - s->cs_base);
+                    gen_helper_fwait();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0c: /* grp d9/4 */
+                switch(rm) {
+                case 0: /* fchs */
+                    gen_helper_fchs_ST0();
+                    break;
+                case 1: /* fabs */
+                    gen_helper_fabs_ST0();
+                    break;
+                case 4: /* ftst */
+                    gen_helper_fldz_FT0();
+                    gen_helper_fcom_ST0_FT0();
+                    break;
+                case 5: /* fxam */
+                    gen_helper_fxam_ST0();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0d: /* grp d9/5 */
+                {
+                    switch(rm) {
+                    case 0:
+                        gen_helper_fpush();
+                        gen_helper_fld1_ST0();
+                        break;
+                    case 1:
+                        gen_helper_fpush();
+                        gen_helper_fldl2t_ST0();
+                        break;
+                    case 2:
+                        gen_helper_fpush();
+                        gen_helper_fldl2e_ST0();
+                        break;
+                    case 3:
+                        gen_helper_fpush();
+                        gen_helper_fldpi_ST0();
+                        break;
+                    case 4:
+                        gen_helper_fpush();
+                        gen_helper_fldlg2_ST0();
+                        break;
+                    case 5:
+                        gen_helper_fpush();
+                        gen_helper_fldln2_ST0();
+                        break;
+                    case 6:
+                        gen_helper_fpush();
+                        gen_helper_fldz_ST0();
+                        break;
+                    default:
+                        goto illegal_op;
+                    }
+                }
+                break;
+            case 0x0e: /* grp d9/6 */
+                switch(rm) {
+                case 0: /* f2xm1 */
+                    gen_helper_f2xm1();
+                    break;
+                case 1: /* fyl2x */
+                    gen_helper_fyl2x();
+                    break;
+                case 2: /* fptan */
+                    gen_helper_fptan();
+                    break;
+                case 3: /* fpatan */
+                    gen_helper_fpatan();
+                    break;
+                case 4: /* fxtract */
+                    gen_helper_fxtract();
+                    break;
+                case 5: /* fprem1 */
+                    gen_helper_fprem1();
+                    break;
+                case 6: /* fdecstp */
+                    gen_helper_fdecstp();
+                    break;
+                default:
+                case 7: /* fincstp */
+                    gen_helper_fincstp();
+                    break;
+                }
+                break;
+            case 0x0f: /* grp d9/7 */
+                switch(rm) {
+                case 0: /* fprem */
+                    gen_helper_fprem();
+                    break;
+                case 1: /* fyl2xp1 */
+                    gen_helper_fyl2xp1();
+                    break;
+                case 2: /* fsqrt */
+                    gen_helper_fsqrt();
+                    break;
+                case 3: /* fsincos */
+                    gen_helper_fsincos();
+                    break;
+                case 5: /* fscale */
+                    gen_helper_fscale();
+                    break;
+                case 4: /* frndint */
+                    gen_helper_frndint();
+                    break;
+                case 6: /* fsin */
+                    gen_helper_fsin();
+                    break;
+                default:
+                case 7: /* fcos */
+                    gen_helper_fcos();
+                    break;
+                }
+                break;
+            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+                {
+                    int op1;
+
+                    op1 = op & 7;
+                    if (op >= 0x20) {
+                        gen_helper_fp_arith_STN_ST0(op1, opreg);
+                        if (op >= 0x30)
+                            gen_helper_fpop();
+                    } else {
+                        gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                        gen_helper_fp_arith_ST0_FT0(op1);
+                    }
+                }
+                break;
+            case 0x02: /* fcom */
+            case 0x22: /* fcom2, undocumented op */
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0();
+                break;
+            case 0x03: /* fcomp */
+            case 0x23: /* fcomp3, undocumented op */
+            case 0x32: /* fcomp5, undocumented op */
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0();
+                gen_helper_fpop();
+                break;
+            case 0x15: /* da/5 */
+                switch(rm) {
+                case 1: /* fucompp */
+                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+                    gen_helper_fucom_ST0_FT0();
+                    gen_helper_fpop();
+                    gen_helper_fpop();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1c:
+                switch(rm) {
+                case 0: /* feni (287 only, just do nop here) */
+                    break;
+                case 1: /* fdisi (287 only, just do nop here) */
+                    break;
+                case 2: /* fclex */
+                    gen_helper_fclex();
+                    break;
+                case 3: /* fninit */
+                    gen_helper_fninit();
+                    break;
+                case 4: /* fsetpm (287 only, just do nop here) */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1d: /* fucomi */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x1e: /* fcomi */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x28: /* ffree sti */
+                gen_helper_ffree_STN(tcg_const_i32(opreg));
+                break;
+            case 0x2a: /* fst sti */
+                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+                break;
+            case 0x2b: /* fstp sti */
+            case 0x0b: /* fstp1 sti, undocumented op */
+            case 0x3a: /* fstp8 sti, undocumented op */
+            case 0x3b: /* fstp9 sti, undocumented op */
+                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+                gen_helper_fpop();
+                break;
+            case 0x2c: /* fucom st(i) */
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0();
+                break;
+            case 0x2d: /* fucomp st(i) */
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0();
+                gen_helper_fpop();
+                break;
+            case 0x33: /* de/3 */
+                switch(rm) {
+                case 1: /* fcompp */
+                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+                    gen_helper_fcom_ST0_FT0();
+                    gen_helper_fpop();
+                    gen_helper_fpop();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x38: /* ffreep sti, undocumented op */
+                gen_helper_ffree_STN(tcg_const_i32(opreg));
+                gen_helper_fpop();
+                break;
+            case 0x3c: /* df/4 */
+                switch(rm) {
+                case 0:
+                    gen_helper_fnstsw(cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                    gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x3d: /* fucomip */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0();
+                gen_helper_fpop();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x3e: /* fcomip */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0();
+                gen_helper_fpop();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x10 ... 0x13: /* fcmovxx */
+            case 0x18 ... 0x1b:
+                {
+                    int op1, l1;
+                    static const uint8_t fcmov_cc[8] = {
+                        (JCC_B << 1),
+                        (JCC_Z << 1),
+                        (JCC_BE << 1),
+                        (JCC_P << 1),
+                    };
+                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+                    l1 = gen_new_label();
+                    gen_jcc1(s, s->cc_op, op1, l1);
+                    gen_helper_fmov_ST0_STN(tcg_const_i32(opreg));
+                    gen_set_label(l1);
+                }
+                break;
+            default:
+                goto illegal_op;
+            }
+        }
+        break;
+        /************************/
+        /* string ops */
+
+    case 0xa4: /* movsS */
+    case 0xa5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_movs(s, ot);
+        }
+        break;
+
+    case 0xaa: /* stosS */
+    case 0xab:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_stos(s, ot);
+        }
+        break;
+    case 0xac: /* lodsS */
+    case 0xad:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_lods(s, ot);
+        }
+        break;
+    case 0xae: /* scasS */
+    case 0xaf:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+        } else {
+            gen_scas(s, ot);
+            s->cc_op = CC_OP_SUBB + ot;
+        }
+        break;
+
+    case 0xa6: /* cmpsS */
+    case 0xa7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+        } else {
+            gen_cmps(s, ot);
+            s->cc_op = CC_OP_SUBB + ot;
+        }
+        break;
+    case 0x6c: /* insS */
+    case 0x6d:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, pc_start - s->cs_base, 
+                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_ins(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
+        }
+        break;
+    case 0x6e: /* outsS */
+    case 0x6f:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, pc_start - s->cs_base,
+                     svm_is_rep(prefixes) | 4);
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_outs(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
+        }
+        break;
+
+        /************************/
+        /* port I/O */
+
+    case 0xe4:
+    case 0xe5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub_code(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_check_io(s, ot, pc_start - s->cs_base,
+                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+        gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
+        break;
+    case 0xe6:
+    case 0xe7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub_code(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_check_io(s, ot, pc_start - s->cs_base,
+                     svm_is_rep(prefixes));
+        gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+        if (use_icount)
+            gen_io_start();
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
+        break;
+    case 0xec:
+    case 0xed:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, pc_start - s->cs_base,
+                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+        gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
+        break;
+    case 0xee:
+    case 0xef:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, pc_start - s->cs_base,
+                     svm_is_rep(prefixes));
+        gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+        if (use_icount)
+            gen_io_start();
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
+        break;
+
+        /************************/
+        /* control */
+    case 0xc2: /* ret im */
+        val = ldsw_code(s->pc);
+        s->pc += 2;
+        gen_pop_T0(s);
+        if (CODE64(s) && s->dflag)
+            s->dflag = 2;
+        gen_stack_update(s, val + (2 << s->dflag));
+        if (s->dflag == 0)
+            gen_op_andl_T0_ffff();
+        gen_op_jmp_T0();
+        gen_eob(s);
+        break;
+    case 0xc3: /* ret */
+        gen_pop_T0(s);
+        gen_pop_update(s);
+        if (s->dflag == 0)
+            gen_op_andl_T0_ffff();
+        gen_op_jmp_T0();
+        gen_eob(s);
+        break;
+    case 0xca: /* lret im */
+        val = ldsw_code(s->pc);
+        s->pc += 2;
+    do_lret:
+        if (s->pe && !s->vm86) {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_lret_protected(tcg_const_i32(s->dflag),
+                                      tcg_const_i32(val));
+        } else {
+            gen_stack_A0(s);
+            /* pop offset */
+            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            /* NOTE: keeping EIP updated is not a problem in case of
+               exception */
+            gen_op_jmp_T0();
+            /* pop selector */
+            gen_op_addl_A0_im(2 << s->dflag);
+            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+            gen_op_movl_seg_T0_vm(R_CS);
+            /* add stack offset */
+            gen_stack_update(s, val + (4 << s->dflag));
+        }
+        gen_eob(s);
+        break;
+    case 0xcb: /* lret */
+        val = 0;
+        goto do_lret;
+    case 0xcf: /* iret */
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
+        if (!s->pe) {
+            /* real mode */
+            gen_helper_iret_real(tcg_const_i32(s->dflag));
+            s->cc_op = CC_OP_EFLAGS;
+        } else if (s->vm86) {
+            if (s->iopl != 3) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_helper_iret_real(tcg_const_i32(s->dflag));
+                s->cc_op = CC_OP_EFLAGS;
+            }
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_iret_protected(tcg_const_i32(s->dflag), 
+                                      tcg_const_i32(s->pc - s->cs_base));
+            s->cc_op = CC_OP_EFLAGS;
+        }
+        gen_eob(s);
+        break;
+    case 0xe8: /* call im */
+        {
+            if (dflag)
+                tval = (int32_t)insn_get(s, OT_LONG);
+            else
+                tval = (int16_t)insn_get(s, OT_WORD);
+            next_eip = s->pc - s->cs_base;
+            tval += next_eip;
+            if (s->dflag == 0)
+                tval &= 0xffff;
+            gen_movtl_T0_im(next_eip);
+            gen_push_T0(s);
+            gen_jmp(s, tval);
+        }
+        break;
+    case 0x9a: /* lcall im */
+        {
+            unsigned int selector, offset;
+
+            if (CODE64(s))
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            offset = insn_get(s, ot);
+            selector = insn_get(s, OT_WORD);
+
+            gen_op_movl_T0_im(selector);
+            gen_op_movl_T1_imu(offset);
+        }
+        goto do_lcall;
+    case 0xe9: /* jmp im */
+        if (dflag)
+            tval = (int32_t)insn_get(s, OT_LONG);
+        else
+            tval = (int16_t)insn_get(s, OT_WORD);
+        tval += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        else if(!CODE64(s))
+            tval &= 0xffffffff;
+        gen_jmp(s, tval);
+        break;
+    case 0xea: /* ljmp im */
+        {
+            unsigned int selector, offset;
+
+            if (CODE64(s))
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            offset = insn_get(s, ot);
+            selector = insn_get(s, OT_WORD);
+
+            gen_op_movl_T0_im(selector);
+            gen_op_movl_T1_imu(offset);
+        }
+        goto do_ljmp;
+    case 0xeb: /* jmp Jb */
+        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        gen_jmp(s, tval);
+        break;
+    case 0x70 ... 0x7f: /* jcc Jb */
+        tval = (int8_t)insn_get(s, OT_BYTE);
+        goto do_jcc;
+    case 0x180 ... 0x18f: /* jcc Jv */
+        if (dflag) {
+            tval = (int32_t)insn_get(s, OT_LONG);
+        } else {
+            tval = (int16_t)insn_get(s, OT_WORD);
+        }
+    do_jcc:
+        next_eip = s->pc - s->cs_base;
+        tval += next_eip;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        gen_jcc(s, b, tval, next_eip);
+        break;
+
+    case 0x190 ... 0x19f: /* setcc Gv */
+        modrm = ldub_code(s->pc++);
+        gen_setcc(s, b);
+        gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+        break;
+    case 0x140 ... 0x14f: /* cmov Gv, Ev */
+        {
+            int l1;
+            TCGv t0;
+
+            ot = dflag + OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            t0 = tcg_temp_local_new();
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_mov_v_reg(ot, t0, rm);
+            }
+#ifdef TARGET_X86_64
+            if (ot == OT_LONG) {
+                /* XXX: specific Intel behaviour ? */
+                l1 = gen_new_label();
+                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+                gen_set_label(l1);
+                tcg_gen_movi_tl(cpu_tmp0, 0);
+                tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+            } else
+#endif
+            {
+                l1 = gen_new_label();
+                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                gen_op_mov_reg_v(ot, reg, t0);
+                gen_set_label(l1);
+            }
+            tcg_temp_free(t0);
+        }
+        break;
+
+        /************************/
+        /* flags */
+    case 0x9c: /* pushf */
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
+        if (s->vm86 && s->iopl != 3) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_helper_read_eflags(cpu_T[0]);
+            gen_push_T0(s);
+        }
+        break;
+    case 0x9d: /* popf */
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
+        if (s->vm86 && s->iopl != 3) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_pop_T0(s);
+            if (s->cpl == 0) {
+                if (s->dflag) {
+                    gen_helper_write_eflags(cpu_T[0],
+                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK)));
+                } else {
+                    gen_helper_write_eflags(cpu_T[0],
+                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff));
+                }
+            } else {
+                if (s->cpl <= s->iopl) {
+                    if (s->dflag) {
+                        gen_helper_write_eflags(cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK)));
+                    } else {
+                        gen_helper_write_eflags(cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff));
+                    }
+                } else {
+                    if (s->dflag) {
+                        gen_helper_write_eflags(cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK)));
+                    } else {
+                        gen_helper_write_eflags(cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff));
+                    }
+                }
+            }
+            gen_pop_update(s);
+            s->cc_op = CC_OP_EFLAGS;
+            /* abort translation because TF flag may change */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x9e: /* sahf */
+        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+            goto illegal_op;
+        gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(cpu_cc_src);
+        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x9f: /* lahf */
+        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(cpu_T[0]);
+        /* Note: gen_compute_eflags() only gives the condition codes */
+        tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+        gen_op_mov_reg_T0(OT_BYTE, R_AH);
+        break;
+    case 0xf5: /* cmc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(cpu_cc_src);
+        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf8: /* clc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(cpu_cc_src);
+        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf9: /* stc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(cpu_cc_src);
+        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xfc: /* cld */
+        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
+        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+        break;
+    case 0xfd: /* std */
+        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
+        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+        break;
+
+        /************************/
+        /* bit operations */
+    case 0x1ba: /* bt/bts/btr/btc Gv, im */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        op = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        if (mod != 3) {
+            s->rip_offset = 1;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0(ot + s->mem_index);
+        } else {
+            gen_op_mov_TN_reg(ot, 0, rm);
+        }
+        /* load shift */
+        val = ldub_code(s->pc++);
+        gen_op_movl_T1_im(val);
+        if (op < 4)
+            goto illegal_op;
+        op -= 4;
+        goto bt_op;
+    case 0x1a3: /* bt Gv, Ev */
+        op = 0;
+        goto do_btx;
+    case 0x1ab: /* bts */
+        op = 1;
+        goto do_btx;
+    case 0x1b3: /* btr */
+        op = 2;
+        goto do_btx;
+    case 0x1bb: /* btc */
+        op = 3;
+    do_btx:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        gen_op_mov_TN_reg(OT_LONG, 1, reg);
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            /* specific case: we need to add a displacement */
+            gen_exts(ot, cpu_T[1]);
+            tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
+            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
+            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+            gen_op_ld_T0_A0(ot + s->mem_index);
+        } else {
+            gen_op_mov_TN_reg(ot, 0, rm);
+        }
+    bt_op:
+        tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+        switch(op) {
+        case 0:
+            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
+            tcg_gen_movi_tl(cpu_cc_dst, 0);
+            break;
+        case 1:
+            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+            tcg_gen_movi_tl(cpu_tmp0, 1);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            break;
+        case 2:
+            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+            tcg_gen_movi_tl(cpu_tmp0, 1);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            break;
+        default:
+        case 3:
+            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+            tcg_gen_movi_tl(cpu_tmp0, 1);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+            tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            break;
+        }
+        s->cc_op = CC_OP_SARB + ot;
+        if (op != 0) {
+            if (mod != 3)
+                gen_op_st_T0_A0(ot + s->mem_index);
+            else
+                gen_op_mov_reg_T0(ot, rm);
+            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+            tcg_gen_movi_tl(cpu_cc_dst, 0);
+        }
+        break;
+    case 0x1bc: /* bsf */
+    case 0x1bd: /* bsr */
+        {
+            int label1;
+            TCGv t0;
+
+            ot = dflag + OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_extu(ot, cpu_T[0]);
+            label1 = gen_new_label();
+            tcg_gen_movi_tl(cpu_cc_dst, 0);
+            t0 = tcg_temp_local_new();
+            tcg_gen_mov_tl(t0, cpu_T[0]);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
+            if (b & 1) {
+                gen_helper_bsr(cpu_T[0], t0);
+            } else {
+                gen_helper_bsf(cpu_T[0], t0);
+            }
+            gen_op_mov_reg_T0(ot, reg);
+            tcg_gen_movi_tl(cpu_cc_dst, 1);
+            gen_set_label(label1);
+            tcg_gen_discard_tl(cpu_cc_src);
+            s->cc_op = CC_OP_LOGICB + ot;
+            tcg_temp_free(t0);
+        }
+        break;
+        /************************/
+        /* bcd */
+    case 0x27: /* daa */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_helper_daa();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x2f: /* das */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_helper_das();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x37: /* aaa */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_helper_aaa();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x3f: /* aas */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_helper_aas();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xd4: /* aam */
+        if (CODE64(s))
+            goto illegal_op;
+        val = ldub_code(s->pc++);
+        if (val == 0) {
+            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+        } else {
+            gen_helper_aam(tcg_const_i32(val));
+            s->cc_op = CC_OP_LOGICB;
+        }
+        break;
+    case 0xd5: /* aad */
+        if (CODE64(s))
+            goto illegal_op;
+        val = ldub_code(s->pc++);
+        gen_helper_aad(tcg_const_i32(val));
+        s->cc_op = CC_OP_LOGICB;
+        break;
+        /************************/
+        /* misc */
+    case 0x90: /* nop */
+        /* XXX: xchg + rex handling */
+        /* XXX: correct lock test for all insn */
+        if (prefixes & PREFIX_LOCK)
+            goto illegal_op;
+        if (prefixes & PREFIX_REPZ) {
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE);
+        }
+        break;
+    case 0x9b: /* fwait */
+        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
+            (HF_MP_MASK | HF_TS_MASK)) {
+            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_fwait();
+        }
+        break;
+    case 0xcc: /* int3 */
+        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
+        break;
+    case 0xcd: /* int N */
+        val = ldub_code(s->pc++);
+        if (s->vm86 && s->iopl != 3) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
+        }
+        break;
+    case 0xce: /* into */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_helper_into(tcg_const_i32(s->pc - pc_start));
+        break;
+#ifdef WANT_ICEBP
+    case 0xf1: /* icebp (undocumented, exits to external debugger) */
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
+#if 1
+        gen_debug(s, pc_start - s->cs_base);
+#else
+        /* start debug */
+        tb_flush(cpu_single_env);
+        cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+#endif
+        break;
+#endif
+    case 0xfa: /* cli */
+        if (!s->vm86) {
+            if (s->cpl <= s->iopl) {
+                gen_helper_cli();
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        } else {
+            if (s->iopl == 3) {
+                gen_helper_cli();
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        }
+        break;
+    case 0xfb: /* sti */
+        if (!s->vm86) {
+            if (s->cpl <= s->iopl) {
+            gen_sti:
+                gen_helper_sti();
+                /* interruptions are enabled only the first insn after sti */
+                /* If several instructions disable interrupts, only the
+                   _first_ does it */
+                if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                    gen_helper_set_inhibit_irq();
+                /* give a chance to handle pending irqs */
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        } else {
+            if (s->iopl == 3) {
+                goto gen_sti;
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        }
+        break;
+    case 0x62: /* bound */
+        if (CODE64(s))
+            goto illegal_op;
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        gen_op_mov_TN_reg(ot, 0, reg);
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_jmp_im(pc_start - s->cs_base);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        if (ot == OT_WORD)
+            gen_helper_boundw(cpu_A0, cpu_tmp2_i32);
+        else
+            gen_helper_boundl(cpu_A0, cpu_tmp2_i32);
+        break;
+    case 0x1c8 ... 0x1cf: /* bswap reg */
+        reg = (b & 7) | REX_B(s);
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_mov_TN_reg(OT_QUAD, 0, reg);
+            tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_QUAD, reg);
+        } else
+#endif
+        {
+            gen_op_mov_TN_reg(OT_LONG, 0, reg);
+            tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_LONG, reg);
+        }
+        break;
+    case 0xd6: /* salc */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags_c(cpu_T[0]);
+        tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+        break;
+    case 0xe0: /* loopnz */
+    case 0xe1: /* loopz */
+    case 0xe2: /* loop */
+    case 0xe3: /* jecxz */
+        {
+            int l1, l2, l3;
+
+            tval = (int8_t)insn_get(s, OT_BYTE);
+            next_eip = s->pc - s->cs_base;
+            tval += next_eip;
+            if (s->dflag == 0)
+                tval &= 0xffff;
+
+            l1 = gen_new_label();
+            l2 = gen_new_label();
+            l3 = gen_new_label();
+            b &= 3;
+            switch(b) {
+            case 0: /* loopnz */
+            case 1: /* loopz */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_op_add_reg_im(s->aflag, R_ECX, -1);
+                gen_op_jz_ecx(s->aflag, l3);
+                gen_compute_eflags(cpu_tmp0);
+                tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
+                if (b == 0) {
+                    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
+                } else {
+                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
+                }
+                break;
+            case 2: /* loop */
+                gen_op_add_reg_im(s->aflag, R_ECX, -1);
+                gen_op_jnz_ecx(s->aflag, l1);
+                break;
+            default:
+            case 3: /* jcxz */
+                gen_op_jz_ecx(s->aflag, l1);
+                break;
+            }
+
+            gen_set_label(l3);
+            gen_jmp_im(next_eip);
+            tcg_gen_br(l2);
+
+            gen_set_label(l1);
+            gen_jmp_im(tval);
+            gen_set_label(l2);
+            gen_eob(s);
+        }
+        break;
+    case 0x130: /* wrmsr */
+    case 0x132: /* rdmsr */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            if (b & 2) {
+                gen_helper_rdmsr();
+            } else {
+                gen_helper_wrmsr();
+            }
+        }
+        break;
+    case 0x131: /* rdtsc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
+        if (use_icount)
+            gen_io_start();
+        gen_helper_rdtsc();
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
+        break;
+    case 0x133: /* rdpmc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_helper_rdpmc();
+        break;
+    case 0x134: /* sysenter */
+        /* For Intel SYSENTER is valid on 64-bit */
+        if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+            goto illegal_op;
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_sysenter();
+            gen_eob(s);
+        }
+        break;
+    case 0x135: /* sysexit */
+        /* For Intel SYSEXIT is valid on 64-bit */
+        if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+            goto illegal_op;
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_sysexit(tcg_const_i32(dflag));
+            gen_eob(s);
+        }
+        break;
+#ifdef TARGET_X86_64
+    case 0x105: /* syscall */
+        /* XXX: is it usable in real mode ? */
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_helper_syscall(tcg_const_i32(s->pc - pc_start));
+        gen_eob(s);
+        break;
+    case 0x107: /* sysret */
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_sysret(tcg_const_i32(s->dflag));
+            /* condition codes are modified only in long mode */
+            if (s->lma)
+                s->cc_op = CC_OP_EFLAGS;
+            gen_eob(s);
+        }
+        break;
+#endif
+    case 0x1a2: /* cpuid */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_helper_cpuid();
+        break;
+    case 0xf4: /* hlt */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_hlt(tcg_const_i32(s->pc - pc_start));
+            s->is_jmp = 3;
+        }
+        break;
+    case 0x100:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* sldt */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
+            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
+            ot = OT_WORD;
+            if (mod == 3)
+                ot += s->dflag;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            break;
+        case 2: /* lldt */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_jmp_im(pc_start - s->cs_base);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_lldt(cpu_tmp2_i32);
+            }
+            break;
+        case 1: /* str */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
+            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
+            ot = OT_WORD;
+            if (mod == 3)
+                ot += s->dflag;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            break;
+        case 3: /* ltr */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_jmp_im(pc_start - s->cs_base);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_ltr(cpu_tmp2_i32);
+            }
+            break;
+        case 4: /* verr */
+        case 5: /* verw */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            if (op == 4)
+                gen_helper_verr(cpu_T[0]);
+            else
+                gen_helper_verw(cpu_T[0]);
+            s->cc_op = CC_OP_EFLAGS;
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x101:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        rm = modrm & 7;
+        switch(op) {
+        case 0: /* sgdt */
+            if (mod == 3)
+                goto illegal_op;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
+            gen_add_A0_im(s, 2);
+            tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
+            if (!s->dflag)
+                gen_op_andl_T0_im(0xffffff);
+            gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+            break;
+        case 1:
+            if (mod == 3) {
+                switch (rm) {
+                case 0: /* monitor */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
+                    gen_jmp_im(pc_start - s->cs_base);
+#ifdef TARGET_X86_64
+                    if (s->aflag == 2) {
+                        gen_op_movq_A0_reg(R_EAX);
+                    } else
+#endif
+                    {
+                        gen_op_movl_A0_reg(R_EAX);
+                        if (s->aflag == 0)
+                            gen_op_andl_A0_ffff();
+                    }
+                    gen_add_A0_ds_seg(s);
+                    gen_helper_monitor(cpu_A0);
+                    break;
+                case 1: /* mwait */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    if (s->cc_op != CC_OP_DYNAMIC) {
+                        gen_op_set_cc_op(s->cc_op);
+                        s->cc_op = CC_OP_DYNAMIC;
+                    }
+                    gen_jmp_im(pc_start - s->cs_base);
+                    gen_helper_mwait(tcg_const_i32(s->pc - pc_start));
+                    gen_eob(s);
+                    break;
+                default:
+                    goto illegal_op;
+                }
+            } else { /* sidt */
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
+                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                gen_add_A0_im(s, 2);
+                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
+                if (!s->dflag)
+                    gen_op_andl_T0_im(0xffffff);
+                gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+            }
+            break;
+        case 2: /* lgdt */
+        case 3: /* lidt */
+            if (mod == 3) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                switch(rm) {
+                case 0: /* VMRUN */
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_vmrun(tcg_const_i32(s->aflag),
+                                         tcg_const_i32(s->pc - pc_start));
+                        tcg_gen_exit_tb(0);
+                        s->is_jmp = 3;
+                    }
+                    break;
+                case 1: /* VMMCALL */
+                    if (!(s->flags & HF_SVME_MASK))
+                        goto illegal_op;
+                    gen_helper_vmmcall();
+                    break;
+                case 2: /* VMLOAD */
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_vmload(tcg_const_i32(s->aflag));
+                    }
+                    break;
+                case 3: /* VMSAVE */
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_vmsave(tcg_const_i32(s->aflag));
+                    }
+                    break;
+                case 4: /* STGI */
+                    if ((!(s->flags & HF_SVME_MASK) &&
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                        !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_stgi();
+                    }
+                    break;
+                case 5: /* CLGI */
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_clgi();
+                    }
+                    break;
+                case 6: /* SKINIT */
+                    if ((!(s->flags & HF_SVME_MASK) && 
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                        !s->pe)
+                        goto illegal_op;
+                    gen_helper_skinit();
+                    break;
+                case 7: /* INVLPGA */
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        gen_helper_invlpga(tcg_const_i32(s->aflag));
+                    }
+                    break;
+                default:
+                    goto illegal_op;
+                }
+            } else if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_svm_check_intercept(s, pc_start,
+                                        op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T1_A0(OT_WORD + s->mem_index);
+                gen_add_A0_im(s, 2);
+                gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+                if (!s->dflag)
+                    gen_op_andl_T0_im(0xffffff);
+                if (op == 2) {
+                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
+                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
+                } else {
+                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
+                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
+                }
+            }
+            break;
+        case 4: /* smsw */
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
+#if defined TARGET_X86_64 && defined WORDS_BIGENDIAN
+            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
+#else
+            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
+#endif
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+            break;
+        case 6: /* lmsw */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_helper_lmsw(cpu_T[0]);
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            }
+            break;
+        case 7: /* invlpg */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                if (mod == 3) {
+#ifdef TARGET_X86_64
+                    if (CODE64(s) && rm == 0) {
+                        /* swapgs */
+                        tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,segs[R_GS].base));
+                        tcg_gen_ld_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,kernelgsbase));
+                        tcg_gen_st_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,segs[R_GS].base));
+                        tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,kernelgsbase));
+                    } else
+#endif
+                    {
+                        goto illegal_op;
+                    }
+                } else {
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
+                    gen_jmp_im(pc_start - s->cs_base);
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_helper_invlpg(cpu_A0);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                }
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x108: /* invd */
+    case 0x109: /* wbinvd */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
+            /* nothing to do */
+        }
+        break;
+    case 0x63: /* arpl or movslS (x86_64) */
+#ifdef TARGET_X86_64
+        if (CODE64(s)) {
+            int d_ot;
+            /* d_ot is the size of destination */
+            d_ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+
+            if (mod == 3) {
+                gen_op_mov_TN_reg(OT_LONG, 0, rm);
+                /* sign extend */
+                if (d_ot == OT_QUAD)
+                    tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                gen_op_mov_reg_T0(d_ot, reg);
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if (d_ot == OT_QUAD) {
+                    gen_op_lds_T0_A0(OT_LONG + s->mem_index);
+                } else {
+                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                }
+                gen_op_mov_reg_T0(d_ot, reg);
+            }
+        } else
+#endif
+        {
+            int label1;
+            TCGv t0, t1, t2;
+
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            t0 = tcg_temp_local_new();
+            t1 = tcg_temp_local_new();
+            t2 = tcg_temp_local_new();
+            ot = OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = (modrm >> 3) & 7;
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+            } else {
+                gen_op_mov_v_reg(ot, t0, rm);
+            }
+            gen_op_mov_v_reg(ot, t1, reg);
+            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+            tcg_gen_andi_tl(t1, t1, 3);
+            tcg_gen_movi_tl(t2, 0);
+            label1 = gen_new_label();
+            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+            tcg_gen_andi_tl(t0, t0, ~3);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_movi_tl(t2, CC_Z);
+            gen_set_label(label1);
+            if (mod != 3) {
+                gen_op_st_v(ot + s->mem_index, t0, cpu_A0);
+            } else {
+                gen_op_mov_reg_v(ot, rm, t0);
+            }
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_compute_eflags(cpu_cc_src);
+            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
+            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
+            s->cc_op = CC_OP_EFLAGS;
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            tcg_temp_free(t2);
+        }
+        break;
+    case 0x102: /* lar */
+    case 0x103: /* lsl */
+        {
+            int label1;
+            TCGv t0;
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            t0 = tcg_temp_local_new();
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            if (b == 0x102)
+                gen_helper_lar(t0, cpu_T[0]);
+            else
+                gen_helper_lsl(t0, cpu_T[0]);
+            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+            label1 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+            gen_op_mov_reg_v(ot, reg, t0);
+            gen_set_label(label1);
+            s->cc_op = CC_OP_EFLAGS;
+            tcg_temp_free(t0);
+        }
+        break;
+    case 0x118:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* prefetchnta */
+        case 1: /* prefetchnt0 */
+        case 2: /* prefetchnt0 */
+        case 3: /* prefetchnt0 */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            /* nothing more to do */
+            break;
+        default: /* nop (multi byte) */
+            gen_nop_modrm(s, modrm);
+            break;
+        }
+        break;
+    case 0x119 ... 0x11f: /* nop (multi byte) */
+        modrm = ldub_code(s->pc++);
+        gen_nop_modrm(s, modrm);
+        break;
+    case 0x120: /* mov reg, crN */
+    case 0x122: /* mov crN, reg */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            modrm = ldub_code(s->pc++);
+            if ((modrm & 0xc0) != 0xc0)
+                goto illegal_op;
+            rm = (modrm & 7) | REX_B(s);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (CODE64(s))
+                ot = OT_QUAD;
+            else
+                ot = OT_LONG;
+            switch(reg) {
+            case 0:
+            case 2:
+            case 3:
+            case 4:
+            case 8:
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                if (b & 2) {
+                    gen_op_mov_TN_reg(ot, 0, rm);
+                    gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                } else {
+                    gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg));
+                    gen_op_mov_reg_T0(ot, rm);
+                }
+                break;
+            default:
+                goto illegal_op;
+            }
+        }
+        break;
+    case 0x121: /* mov reg, drN */
+    case 0x123: /* mov drN, reg */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            modrm = ldub_code(s->pc++);
+            if ((modrm & 0xc0) != 0xc0)
+                goto illegal_op;
+            rm = (modrm & 7) | REX_B(s);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (CODE64(s))
+                ot = OT_QUAD;
+            else
+                ot = OT_LONG;
+            /* XXX: do it dynamically with CR4.DE bit */
+            if (reg == 4 || reg == 5 || reg >= 8)
+                goto illegal_op;
+            if (b & 2) {
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
+                gen_op_mov_TN_reg(ot, 0, rm);
+                gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]);
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            } else {
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
+                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
+                gen_op_mov_reg_T0(ot, rm);
+            }
+        }
+        break;
+    case 0x106: /* clts */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+            gen_helper_clts();
+            /* abort block because static cpu state changed */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
+    case 0x1c3: /* MOVNTI reg, mem */
+        if (!(s->cpuid_features & CPUID_SSE2))
+            goto illegal_op;
+        ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        reg = ((modrm >> 3) & 7) | rex_r;
+        /* generate a generic store */
+        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        break;
+    case 0x1ae:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* fxsave */
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+                (s->flags & HF_EM_MASK))
+                goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            break;
+        case 1: /* fxrstor */
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+                (s->flags & HF_EM_MASK))
+                goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            break;
+        case 2: /* ldmxcsr */
+        case 3: /* stmxcsr */
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
+                mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (op == 2) {
+                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+            } else {
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+                gen_op_st_T0_A0(OT_LONG + s->mem_index);
+            }
+            break;
+        case 5: /* lfence */
+        case 6: /* mfence */
+            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+                goto illegal_op;
+            break;
+        case 7: /* sfence / clflush */
+            if ((modrm & 0xc7) == 0xc0) {
+                /* sfence */
+                /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
+                if (!(s->cpuid_features & CPUID_SSE))
+                    goto illegal_op;
+            } else {
+                /* clflush */
+                if (!(s->cpuid_features & CPUID_CLFLUSH))
+                    goto illegal_op;
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x10d: /* 3DNow! prefetch(w) */
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        /* ignore for now */
+        break;
+    case 0x1aa: /* rsm */
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
+        if (!(s->flags & HF_SMM_MASK))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_jmp_im(s->pc - s->cs_base);
+        gen_helper_rsm();
+        gen_eob(s);
+        break;
+    case 0x1b8: /* SSE4.2 popcnt */
+        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
+             PREFIX_REPZ)
+            goto illegal_op;
+        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
+            goto illegal_op;
+
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7);
+
+        if (s->prefix & PREFIX_DATA)
+            ot = OT_WORD;
+        else if (s->dflag != 2)
+            ot = OT_LONG;
+        else
+            ot = OT_QUAD;
+
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot));
+        gen_op_mov_reg_T0(ot, reg);
+
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x10e ... 0x10f:
+        /* 3DNow! instructions, ignore prefixes */
+        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
+    case 0x110 ... 0x117:
+    case 0x128 ... 0x12f:
+    case 0x138 ... 0x13a:
+    case 0x150 ... 0x177:
+    case 0x17c ... 0x17f:
+    case 0x1c2:
+    case 0x1c4 ... 0x1c6:
+    case 0x1d0 ... 0x1fe:
+        gen_sse(s, b, pc_start, rex_r);
+        break;
+    default:
+        goto illegal_op;
+    }
+    /* lock generation */
+    if (s->prefix & PREFIX_LOCK)
+        gen_helper_unlock();
+    return s->pc;
+ illegal_op:
+    if (s->prefix & PREFIX_LOCK)
+        gen_helper_unlock();
+    /* XXX: ensure that no lock was generated */
+    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+    return s->pc;
+}
+
+void optimize_flags_init(void)
+{
+#if TCG_TARGET_REG_BITS == 32
+    assert(sizeof(CCTable) == (1 << 3));
+#else
+    assert(sizeof(CCTable) == (1 << 4));
+#endif
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+    cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
+                                       offsetof(CPUState, cc_op), "cc_op");
+    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src),
+                                    "cc_src");
+    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst),
+                                    "cc_dst");
+    cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
+                                    "cc_tmp");
+
+    /* register helpers */
+#define GEN_HELPER 2
+#include "helper.h"
+}
+
+/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
+   basic block 'tb'. If search_pc is TRUE, also generate PC
+   information for each intermediate instruction. */
+static inline void gen_intermediate_code_internal(CPUState *env,
+                                                  TranslationBlock *tb,
+                                                  int search_pc)
+{
+    DisasContext dc1, *dc = &dc1;
+    target_ulong pc_ptr;
+    uint16_t *gen_opc_end;
+    CPUBreakpoint *bp;
+    int j, lj, cflags;
+    uint64_t flags;
+    target_ulong pc_start;
+    target_ulong cs_base;
+    int num_insns;
+    int max_insns;
+
+    /* generate intermediate code */
+    pc_start = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    cflags = tb->cflags;
+
+    dc->pe = (flags >> HF_PE_SHIFT) & 1;
+    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
+    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
+    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
+    dc->f_st = 0;
+    dc->vm86 = (flags >> VM_SHIFT) & 1;
+    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
+    dc->iopl = (flags >> IOPL_SHIFT) & 3;
+    dc->tf = (flags >> TF_SHIFT) & 1;
+    dc->singlestep_enabled = env->singlestep_enabled;
+    dc->cc_op = CC_OP_DYNAMIC;
+    dc->cs_base = cs_base;
+    dc->tb = tb;
+    dc->popl_esp_hack = 0;
+    /* select memory access functions */
+    dc->mem_index = 0;
+    if (flags & HF_SOFTMMU_MASK) {
+        if (dc->cpl == 3)
+            dc->mem_index = 2 * 4;
+        else
+            dc->mem_index = 1 * 4;
+    }
+    dc->cpuid_features = env->cpuid_features;
+    dc->cpuid_ext_features = env->cpuid_ext_features;
+    dc->cpuid_ext2_features = env->cpuid_ext2_features;
+    dc->cpuid_ext3_features = env->cpuid_ext3_features;
+#ifdef TARGET_X86_64
+    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
+    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
+#endif
+    dc->flags = flags;
+    dc->jmp_opt = !(dc->tf || env->singlestep_enabled ||
+                    (flags & HF_INHIBIT_IRQ_MASK)
+#ifndef CONFIG_SOFTMMU
+                    || (flags & HF_SOFTMMU_MASK)
+#endif
+                    );
+#if 0
+    /* check addseg logic */
+    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
+        printf("ERROR addseg\n");
+#endif
+
+    cpu_T[0] = tcg_temp_new();
+    cpu_T[1] = tcg_temp_new();
+    cpu_A0 = tcg_temp_new();
+    cpu_T3 = tcg_temp_new();
+
+    cpu_tmp0 = tcg_temp_new();
+    cpu_tmp1_i64 = tcg_temp_new_i64();
+    cpu_tmp2_i32 = tcg_temp_new_i32();
+    cpu_tmp3_i32 = tcg_temp_new_i32();
+    cpu_tmp4 = tcg_temp_new();
+    cpu_tmp5 = tcg_temp_new();
+    cpu_tmp6 = tcg_temp_new();
+    cpu_ptr0 = tcg_temp_new_ptr();
+    cpu_ptr1 = tcg_temp_new_ptr();
+
+    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+
+    dc->is_jmp = DISAS_NEXT;
+    pc_ptr = pc_start;
+    lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
+    for(;;) {
+        if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
+            QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+                if (bp->pc == pc_ptr &&
+                    !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
+                    gen_debug(dc, pc_ptr - dc->cs_base);
+                    break;
+                }
+            }
+        }
+        if (search_pc) {
+            j = gen_opc_ptr - gen_opc_buf;
+            if (lj < j) {
+                lj++;
+                while (lj < j)
+                    gen_opc_instr_start[lj++] = 0;
+            }
+            gen_opc_pc[lj] = pc_ptr;
+            gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
+        }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
+        pc_ptr = disas_insn(dc, pc_ptr);
+        num_insns++;
+        /* stop translation if indicated */
+        if (dc->is_jmp)
+            break;
+        /* if single step mode, we generate only one instruction and
+           generate an exception */
+        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+           the flag and abort the translation to give the irqs a
+           change to be happen */
+        if (dc->tf || dc->singlestep_enabled ||
+            (flags & HF_INHIBIT_IRQ_MASK)) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+        /* if too long translation, stop generation too */
+        if (gen_opc_ptr >= gen_opc_end ||
+            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
+            num_insns >= max_insns) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+        if (singlestep) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+    }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
+    gen_icount_end(tb, num_insns);
+    *gen_opc_ptr = INDEX_op_end;
+    /* we don't forget to fill the last values */
+    if (search_pc) {
+        j = gen_opc_ptr - gen_opc_buf;
+        lj++;
+        while (lj <= j)
+            gen_opc_instr_start[lj++] = 0;
+    }
+
+#ifdef DEBUG_DISAS
+    log_cpu_state_mask(CPU_LOG_TB_CPU, env, X86_DUMP_CCOP);
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        int disas_flags;
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+#ifdef TARGET_X86_64
+        if (dc->code64)
+            disas_flags = 2;
+        else
+#endif
+            disas_flags = !dc->code32;
+        log_target_disas(pc_start, pc_ptr - pc_start, disas_flags);
+        qemu_log("\n");
+    }
+#endif
+
+    if (!search_pc) {
+        tb->size = pc_ptr - pc_start;
+        tb->icount = num_insns;
+    }
+}
+
+void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 0);
+}
+
+void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 1);
+}
+
+void gen_pc_load(CPUState *env, TranslationBlock *tb,
+                unsigned long searched_pc, int pc_pos, void *puc)
+{
+    int cc_op;
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
+        int i;
+        qemu_log("RESTORE:\n");
+        for(i = 0;i <= pc_pos; i++) {
+            if (gen_opc_instr_start[i]) {
+                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
+            }
+        }
+        qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+                searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+                (uint32_t)tb->cs_base);
+    }
+#endif
+    env->eip = gen_opc_pc[pc_pos] - tb->cs_base;
+    cc_op = gen_opc_cc_op[pc_pos];
+    if (cc_op != CC_OP_DYNAMIC)
+        env->cc_op = cc_op;
+}
diff --git a/user-events-ui.c b/user-events-ui.c
deleted file mode 100644
index 67fafd6..0000000
--- a/user-events-ui.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* Copyright (C) 2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#include "user-events.h"
-#include "android/utils/debug.h"
-#include "android/user-events-common.h"
-#include "console.h"
-#include <stdio.h>
-
-#include "android/looper.h"
-#include "android/async-utils.h"
-#include "android/core-connection.h"
-
-/* Descriptor for the user events client. */
-typedef struct ClientUserEvents {
-    /* Core connection instance for the user events client. */
-    CoreConnection* core_connection;
-
-    /* Socket for the client. */
-    int             sock;
-
-    /* Socket wrapper for sync I/O. */
-    SyncSocket*     sync_socket;
-} ClientUserEvents;
-
-/* One and only one user events client instance. */
-static ClientUserEvents _client_ue = { 0 };
-
-int
-clientue_create(SockAddress* console_socket)
-{
-    char* connect_message = NULL;
-    char switch_cmd[256];
-
-    // Connect to the framebuffer service.
-    _client_ue.core_connection = core_connection_create(console_socket);
-    if (_client_ue.core_connection == NULL) {
-        derror("User events client is unable to connect to the console: %s\n",
-               errno_str);
-        return -1;
-    }
-    if (core_connection_open(_client_ue.core_connection)) {
-        core_connection_free(_client_ue.core_connection);
-        _client_ue.core_connection = NULL;
-        derror("User events client is unable to open the console: %s\n",
-               errno_str);
-        return -1;
-    }
-    snprintf(switch_cmd, sizeof(switch_cmd), "user-events");
-    if (core_connection_switch_stream(_client_ue.core_connection, switch_cmd,
-                                      &connect_message)) {
-        derror("Unable to connect to the user events service: %s\n",
-               connect_message ? connect_message : "");
-        if (connect_message != NULL) {
-            free(connect_message);
-        }
-        core_connection_close(_client_ue.core_connection);
-        core_connection_free(_client_ue.core_connection);
-        _client_ue.core_connection = NULL;
-        return -1;
-    }
-
-    // Now that we're connected lets initialize the descriptor.
-    _client_ue.sock = core_connection_get_socket(_client_ue.core_connection);
-    _client_ue.sync_socket = syncsocket_init(_client_ue.sock);
-    if (connect_message != NULL) {
-        free(connect_message);
-    }
-
-    fprintf(stdout, "User events client is now attached to the core %s\n",
-            sock_address_to_string(console_socket));
-
-    return 0;
-}
-
-/* Sends an event to the core.
- * Parameters:
- *  ue - User events client instance.
- *  event - Event type. Must be one of the AUSER_EVENT_XXX.
- *  event_param - Event parameters.
- *  size - Byte size of the event parameters buffer.
- * Return:
- *  0 on success, or -1 on failure.
- */
-static int
-clientue_send(ClientUserEvents* ue,
-              uint8_t event,
-              const void* event_param,
-              size_t size)
-{
-    int res;
-    UserEventHeader header;
-
-    header.event_type = event;
-    res = syncsocket_start_write(ue->sync_socket);
-    if (!res) {
-        // Send event type first (event header)
-        res = syncsocket_write(ue->sync_socket, &header, sizeof(header), 500);
-        if (res > 0) {
-            // Send event param next.
-            res = syncsocket_write(ue->sync_socket, event_param, size, 500);
-        }
-        res = syncsocket_result(res);
-        syncsocket_stop_write(ue->sync_socket);
-    }
-    if (res < 0) {
-        derror("Unable to send user event: %s\n", errno_str);
-    }
-    return res;
-}
-
-void
-user_event_keycodes(int *kcodes, int count)
-{
-    int nn;
-    for (nn = 0; nn < count; nn++)
-        user_event_keycode(kcodes[nn]);
-}
-
-void
-user_event_keycode(int  kcode)
-{
-    UserEventKeycode    message;
-    message.keycode = kcode;
-    clientue_send(&_client_ue, AUSER_EVENT_KEYCODE, &message, sizeof(message));
-}
-
-void
-user_event_key(unsigned code, unsigned down)
-{
-    if(code == 0) {
-        return;
-    }
-    if (VERBOSE_CHECK(keys))
-        printf(">> KEY [0x%03x,%s]\n", (code & 0x1ff), down ? "down" : " up " );
-
-    user_event_keycode((code & 0x1ff) | (down ? 0x200 : 0));
-}
-
-
-void
-user_event_mouse(int dx, int dy, int dz, unsigned buttons_state)
-{
-    UserEventMouse    message;
-    message.dx = dx;
-    message.dy = dy;
-    message.dz = dz;
-    message.buttons_state = buttons_state;
-    clientue_send(&_client_ue, AUSER_EVENT_MOUSE, &message, sizeof(message));
-}
-
-void  user_event_register_generic(void* opaque, QEMUPutGenericEvent *callback)
-{
-}
-
-void
-user_event_generic(int type, int code, int value)
-{
-    UserEventGeneric    message;
-    message.type = type;
-    message.code = code;
-    message.value = value;
-    clientue_send(&_client_ue, AUSER_EVENT_GENERIC, &message, sizeof(message));
-}