exec.c: Use a QTAILQ to list CPUs. This introduces CPUState, a mere typedef to CPUOldState for now, and changes the way CPUs are listed in QEMU. The goal is to get closer to upstream, while also allowing really separating CPUState from CPUArmState in future patches. + Move "current_cpu" to a thread-local variable on Linux, just like upstream. Testing shows this doesn't affect performance. Change-Id: Id07bbef4ba1584e607bec647d5117ac755c48ce6

commit: 4ab1225535dfc5fbcbde37a171b39224ea34e30b [log] [tgz]
author: David 'Digit' Turner <digit@google.com> Mon Mar 24 11:29:53 2014 +0100
committer: David 'Digit' Turner <digit@google.com> Wed Apr 09 19:43:25 2014 +0200
tree: c0348e130e3aee6a3d9b1997ed24b84666ada361
parent: a49711e9c9b502d186114e600a045d33b5b61c4c [diff]
diff --git a/cpus.c b/cpus.c
index 53ccd8a..38ecaf3 100644
--- a/cpus.c
+++ b/cpus.c

@@ -43,13 +43,13 @@
 void hw_error(const char *fmt, ...)
 {
     va_list ap;
-    CPUOldState *env;
+    CPUState *env;
 
     va_start(ap, fmt);
     fprintf(stderr, "qemu: hardware error: ");
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         fprintf(stderr, "CPU #%d:\n", env->cpu_index);
 #ifdef TARGET_I386
         cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
@@ -80,26 +80,21 @@
     return 1;
 }
 
-static int internal_cpu_has_work(CPUOldState *env)
-{
-    if (env->stop)
-        return 1;
-    if (env->stopped)
-        return 0;
-    if (!env->halted)
-        return 1;
-    if (cpu_has_work(env))
-        return 1;
-    return 0;
-}
-
 int tcg_has_work(void)
 {
     CPUOldState *env;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        if (internal_cpu_has_work(env))
+    CPU_FOREACH(env) {
+        if (env->stop)
             return 1;
+        if (env->stopped)
+            return 0;
+        if (!env->halted)
+            return 1;
+        if (cpu_has_work(env))
+            return 1;
+        return 0;
+    }
     return 0;
 }
 
@@ -227,8 +222,8 @@
     int ret = 0;
 
     if (next_cpu == NULL)
-        next_cpu = first_cpu;
-    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
+        next_cpu = QTAILQ_FIRST(&cpus);
+    for (; next_cpu != NULL; next_cpu = QTAILQ_NEXT(next_cpu, node)) {
         CPUOldState *env = cur_cpu = next_cpu;
 
         if (!vm_running)

diff --git a/exec.c b/exec.c
index fff1090..61ba9e3 100644
--- a/exec.c
+++ b/exec.c

@@ -40,6 +40,7 @@
 #include "hw/qdev.h"
 #include "hw/xen/xen.h"
 #include "qemu/osdep.h"
+#include "qemu/tls.h"
 #include "sysemu/kvm.h"
 #include "exec/cputlb.h"
 #include "exec/hax.h"
@@ -60,10 +61,9 @@
 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
 #endif
 
-CPUArchState *first_cpu;
-/* current CPU in the current thread. It is only valid inside
-   cpu_exec() */
-CPUArchState *cpu_single_env;
+struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
+DEFINE_TLS(CPUState *, current_cpu);
+
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
@@ -153,37 +153,32 @@
 
 CPUArchState *qemu_get_cpu(int cpu)
 {
-    CPUArchState *env = first_cpu;
+    CPUState *env;
 
-    while (env) {
+    CPU_FOREACH(env) {
         if (env->cpu_index == cpu)
             break;
-        env = env->next_cpu;
     }
-
     return env;
 }
 
 void cpu_exec_init(CPUArchState *env)
 {
-    CPUArchState **penv;
-    int cpu_index;
-
 #if defined(CONFIG_USER_ONLY)
     cpu_list_lock();
 #endif
-    env->next_cpu = NULL;
-    penv = &first_cpu;
-    cpu_index = 0;
-    while (*penv != NULL) {
-        penv = &(*penv)->next_cpu;
+    // Compute CPU index from list position.
+    int cpu_index = 0;
+    CPUState *env1;
+    CPU_FOREACH(env1) {
         cpu_index++;
     }
     env->cpu_index = cpu_index;
+    QTAILQ_INSERT_TAIL(&cpus, env, node);
+
     env->numa_node = 0;
     QTAILQ_INIT(&env->breakpoints);
     QTAILQ_INIT(&env->watchpoints);
-    *penv = env;
 #if defined(CONFIG_USER_ONLY)
     cpu_list_unlock();
 #endif
@@ -559,7 +554,7 @@
         abort();
     }
 
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         int mmu_idx;
         for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
             for(i = 0; i < CPU_TLB_SIZE; i++)
@@ -775,7 +770,7 @@
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
     /* XXX: slow ! */
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         tlb_flush(env, 1);
     }
 }

diff --git a/gdbstub.c b/gdbstub.c
index cea89dc..96ce5e5 100644
--- a/gdbstub.c
+++ b/gdbstub.c

@@ -1363,7 +1363,7 @@
                      "<xi:include href=\"%s\"/>",
                      GDB_CORE_XML);
 
-            for (r = first_cpu->gdb_regs; r; r = r->next) {
+            for (r = QTAILQ_FIRST(&cpus)->gdb_regs; r; r = r->next) {
                 pstrcat(target_xml, sizeof(target_xml), "<xi:include href=\"");
                 pstrcat(target_xml, sizeof(target_xml), r->xml);
                 pstrcat(target_xml, sizeof(target_xml), "\"/>");
@@ -1470,7 +1470,7 @@
     switch (type) {
     case GDB_BREAKPOINT_SW:
     case GDB_BREAKPOINT_HW:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPU_FOREACH(env) {
             err = cpu_breakpoint_insert(env, addr, BP_GDB, NULL);
             if (err)
                 break;
@@ -1480,7 +1480,7 @@
     case GDB_WATCHPOINT_WRITE:
     case GDB_WATCHPOINT_READ:
     case GDB_WATCHPOINT_ACCESS:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPU_FOREACH(env) {
             err = cpu_watchpoint_insert(env, addr, len, xlat_gdb_type[type],
                                         NULL);
             if (err)
@@ -1504,7 +1504,7 @@
     switch (type) {
     case GDB_BREAKPOINT_SW:
     case GDB_BREAKPOINT_HW:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPU_FOREACH(env) {
             err = cpu_breakpoint_remove(env, addr, BP_GDB);
             if (err)
                 break;
@@ -1514,7 +1514,7 @@
     case GDB_WATCHPOINT_WRITE:
     case GDB_WATCHPOINT_READ:
     case GDB_WATCHPOINT_ACCESS:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPU_FOREACH(env) {
             err = cpu_watchpoint_remove(env, addr, len, xlat_gdb_type[type]);
             if (err)
                 break;
@@ -1535,7 +1535,7 @@
         return;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         cpu_breakpoint_remove_all(env, BP_GDB);
 #ifndef CONFIG_USER_ONLY
         cpu_watchpoint_remove_all(env, BP_GDB);
@@ -1581,7 +1581,7 @@
 {
     CPUOldState *env;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         if (gdb_id(env) == thread_id) {
             return env;
         }
@@ -1834,14 +1834,14 @@
             put_packet(s, "QC1");
             break;
         } else if (strcmp(p,"fThreadInfo") == 0) {
-            s->query_cpu = first_cpu;
+            s->query_cpu = QTAILQ_FIRST(&cpus);
             goto report_cpuinfo;
         } else if (strcmp(p,"sThreadInfo") == 0) {
         report_cpuinfo:
             if (s->query_cpu) {
                 snprintf(buf, sizeof(buf), "m%x", gdb_id(s->query_cpu));
                 put_packet(s, buf);
-                s->query_cpu = s->query_cpu->next_cpu;
+                s->query_cpu = QTAILQ_NEXT(s->query_cpu, node);
             } else
                 put_packet(s, "l");
             break;
@@ -2256,8 +2256,8 @@
     qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
 
     s = g_malloc0(sizeof(GDBState));
-    s->c_cpu = first_cpu;
-    s->g_cpu = first_cpu;
+    s->c_cpu = QTAILQ_FIRST(&cpus);
+    s->g_cpu = QTAILQ_FIRST(&cpus);
     s->fd = fd;
     gdb_has_xml = 0;
 
@@ -2434,8 +2434,8 @@
         mon_chr = s->mon_chr;
         memset(s, 0, sizeof(GDBState));
     }
-    s->c_cpu = first_cpu;
-    s->g_cpu = first_cpu;
+    s->c_cpu = QTAILQ_FIRST(&cpus);
+    s->g_cpu = QTAILQ_FIRST(&cpus);
     s->chr = chr;
     s->state = chr ? RS_IDLE : RS_INACTIVE;
     s->mon_chr = mon_chr;

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 71b9430..634d73c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c

@@ -152,7 +152,7 @@
         while (env) {
             if (apic_accept_pic_intr(env))
                 apic_deliver_pic_intr(env, level);
-            env = env->next_cpu;
+            env = QTAILQ_NEXT(env, node);
         }
     } else {
         if (level)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 91822d9..0b269e3 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h

@@ -20,7 +20,9 @@
 #define CPU_ALL_H
 
 #include "qemu-common.h"
+#include "qemu/queue.h"
 #include "qemu/thread.h"
+#include "qemu/tls.h"
 #include "exec/cpu-common.h"
 
 /* some important defines:
@@ -371,8 +373,20 @@
 
 void QEMU_NORETURN cpu_abort(CPUArchState *env, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
-extern CPUArchState *first_cpu;
-extern CPUArchState *cpu_single_env;
+
+typedef CPUOldState CPUState;
+Q_TAILQ_HEAD(CPUTailQ, CPUState,);
+extern struct CPUTailQ cpus;
+#define CPU_NEXT(cpu) QTAILQ_NEXT(cpu, node)
+#define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node)
+#define CPU_FOREACH_SAFE(cpu, next_cpu) \
+    QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu)
+#define first_cpu QTAILQ_FIRST(&cpus)
+
+DECLARE_TLS(CPUState *, current_cpu);
+#define current_cpu tls_var(current_cpu)
+// TODO(digit): Remove this.
+#define cpu_single_env current_cpu
 
 /* Flags for use in ENV->INTERRUPT_PENDING.
 

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index d8583ad..4ed8a24 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h

@@ -191,7 +191,7 @@
     jmp_buf jmp_env;                                                    \
     int exception_index;                                                \
                                                                         \
-    CPUOldState *next_cpu; /* next CPU sharing TB cache */                 \
+    Q_TAILQ_ENTRY(CPUOldState,) node;   /* next CPU sharing TB cache */      \
     int cpu_index; /* CPU index (informative) */                        \
     uint32_t host_tid; /* host thread ID */                             \
     int numa_node; /* NUMA node this cpu is belonging to  */            \

diff --git a/include/qemu/tls.h b/include/qemu/tls.h
new file mode 100644
index 0000000..b92ea9d
--- /dev/null
+++ b/include/qemu/tls.h

@@ -0,0 +1,52 @@
+/*
+ * Abstraction layer for defining and using TLS variables
+ *
+ * Copyright (c) 2011 Red Hat, Inc
+ * Copyright (c) 2011 Linaro Limited
+ *
+ * Authors:
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *  Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_TLS_H
+#define QEMU_TLS_H
+
+/* Per-thread variables. Note that we only have implementations
+ * which are really thread-local on Linux; the dummy implementations
+ * define plain global variables.
+ *
+ * This means that for the moment use should be restricted to
+ * per-VCPU variables, which are OK because:
+ *  - the only -user mode supporting multiple VCPU threads is linux-user
+ *  - TCG system mode is single-threaded regarding VCPUs
+ *  - KVM system mode is multi-threaded but limited to Linux
+ *
+ * TODO: proper implementations via Win32 .tls sections and
+ * POSIX pthread_getspecific.
+ */
+#ifdef __linux__
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x)  __thread __typeof__(type) tls__##x
+#define tls_var(x)           tls__##x
+#else
+/* Dummy implementations which define plain global variables */
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x)  __typeof__(type) tls__##x
+#define tls_var(x)           tls__##x
+#endif
+
+#endif

diff --git a/kvm-all.c b/kvm-all.c
index 3ee3296..20fefac 100644
--- a/kvm-all.c
+++ b/kvm-all.c

@@ -206,10 +206,8 @@
 {
     CPUOldState *env;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        int ret;
-
-        ret = kvm_arch_put_registers(env);
+    CPU_FOREACH(env) {
+        int ret = kvm_arch_put_registers(env);
         if (ret)
             return ret;
     }
@@ -955,7 +953,7 @@
             return err;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         err = kvm_update_guest_debug(env, 0);
         if (err)
             return err;
@@ -992,7 +990,7 @@
             return err;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         err = kvm_update_guest_debug(env, 0);
         if (err)
             return err;
@@ -1009,7 +1007,7 @@
     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
             /* Try harder to find a CPU that currently sees the breakpoint. */
-            for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            CPU_FOREACH(env) {
                 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
                     break;
             }
@@ -1017,8 +1015,9 @@
     }
     kvm_arch_remove_all_hw_breakpoints();
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
+    CPU_FOREACH(env) {
         kvm_update_guest_debug(env, 0);
+    }
 }
 
 #else /* !KVM_CAP_SET_GUEST_DEBUG */

diff --git a/target-i386/hax-all.c b/target-i386/hax-all.c
index c41a874..f61cc25 100644
--- a/target-i386/hax-all.c
+++ b/target-i386/hax-all.c

@@ -1045,20 +1045,12 @@
  */
 int hax_sync_vcpus(void)
 {
-    if (hax_enabled())
-    {
-        CPUX86State *env;
+    if (hax_enabled()) {
+        CPUState *cpu;
 
-        env = first_cpu;
-        if (!env)
-            return 0;
-
-        for (; env != NULL; env = env->next_cpu) {
-            int ret;
-
-            ret = hax_arch_set_registers(env);
-            if (ret < 0)
-            {
+        CPU_FOREACH(cpu) {
+            int ret = hax_arch_set_registers(cpu);
+            if (ret < 0) {
                 dprint("Failed to sync HAX vcpu context\n");
                 exit(1);
             }
@@ -1070,14 +1062,12 @@
 
 void hax_reset_vcpu_state(void *opaque)
 {
-    CPUX86State *env;
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-    {
-        if (env->hax_vcpu)
-        {
-            env->hax_vcpu->emulation_state  = HAX_EMULATE_STATE_INITIAL;
-            env->hax_vcpu->tunnel->user_event_pending = 0;
-            env->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
+    CPUState *cpu;
+    CPU_FOREACH(cpu) {
+        if (cpu->hax_vcpu) {
+            cpu->hax_vcpu->emulation_state  = HAX_EMULATE_STATE_INITIAL;
+            cpu->hax_vcpu->tunnel->user_event_pending = 0;
+            cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
         }
     }
 }

diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index b878e5b..96825b2 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c

@@ -4813,7 +4813,7 @@
     EIP += next_eip_addend;
 
     /* XXX: not complete but not completely erroneous */
-    if (env->cpu_index != 0 || env->next_cpu != NULL) {
+    if (env->cpu_index != 0 || QTAILQ_NEXT(env, node) != NULL) {
         /* more than one CPU: do not sleep because another CPU may
            wake this one */
     } else {

diff --git a/translate-all.c b/translate-all.c
index 0864a57..2777983 100644
--- a/translate-all.c
+++ b/translate-all.c

@@ -774,7 +774,7 @@
     }
     tcg_ctx.tb_ctx.nb_tbs = 0;
 
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
 #ifdef CONFIG_ANDROID_MEMCHECK
         int tb_to_clean;
         for (tb_to_clean = 0; tb_to_clean < TB_JMP_CACHE_SIZE; tb_to_clean++) {
@@ -937,7 +937,7 @@
 
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         if (env->tb_jmp_cache[h] == tb) {
             env->tb_jmp_cache[h] = NULL;
         }

diff --git a/vl-android.c b/vl-android.c
index b71f24a..cd614c5 100644
--- a/vl-android.c
+++ b/vl-android.c

@@ -4080,7 +4080,7 @@
         stralloc_reset(kernel_config);
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(env) {
         for (i = 0; i < nb_numa_nodes; i++) {
             if (node_cpumask[i] & (1 << env->cpu_index)) {
                 env->numa_node = i;
commit	4ab1225535dfc5fbcbde37a171b39224ea34e30b	[log] [tgz]
author	David 'Digit' Turner <digit@google.com>	Mon Mar 24 11:29:53 2014 +0100
committer	David 'Digit' Turner <digit@google.com>	Wed Apr 09 19:43:25 2014 +0200
tree	c0348e130e3aee6a3d9b1997ed24b84666ada361
parent	a49711e9c9b502d186114e600a045d33b5b61c4c [diff]