Merge branch 'x86/urgent' into x86/xen
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651f..d679cb2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -10,7 +10,7 @@
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
-CFLAGS_REMOVE_paravirt.o = -pg
+CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
endif
#
@@ -89,7 +89,7 @@
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f..6e38841 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -51,6 +51,8 @@
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
+ paravirt_alloc_ldt(newldt, mincount);
+
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
@@ -73,6 +75,7 @@
#endif
}
if (oldsize) {
+ paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
@@ -84,10 +87,13 @@
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
+ int i;
if (err < 0)
return err;
- memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
+
+ for(i = 0; i < old->size; i++)
+ write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
}
@@ -124,6 +130,7 @@
if (mm == current->active_mm)
clear_LDT();
#endif
+ paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
new file mode 100644
index 0000000..38d7f7f
--- /dev/null
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -0,0 +1,31 @@
+/*
+ * Split spinlock implementation out into its own file, so it can be
+ * compiled in a FTRACE-compatible way.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/paravirt.h>
+
+struct pv_lock_ops pv_lock_ops = {
+#ifdef CONFIG_SMP
+ .spin_is_locked = __ticket_spin_is_locked,
+ .spin_is_contended = __ticket_spin_is_contended,
+
+ .spin_lock = __ticket_spin_lock,
+ .spin_trylock = __ticket_spin_trylock,
+ .spin_unlock = __ticket_spin_unlock,
+#endif
+};
+EXPORT_SYMBOL_GPL(pv_lock_ops);
+
+void __init paravirt_use_bytelocks(void)
+{
+#ifdef CONFIG_SMP
+ pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+ pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+ pv_lock_ops.spin_lock = __byte_spin_lock;
+ pv_lock_ops.spin_trylock = __byte_spin_trylock;
+ pv_lock_ops.spin_unlock = __byte_spin_unlock;
+#endif
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d5..7faea18 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,17 +268,6 @@
return __get_cpu_var(paravirt_lazy_mode);
}
-void __init paravirt_use_bytelocks(void)
-{
-#ifdef CONFIG_SMP
- pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
- pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
- pv_lock_ops.spin_lock = __byte_spin_lock;
- pv_lock_ops.spin_trylock = __byte_spin_trylock;
- pv_lock_ops.spin_unlock = __byte_spin_unlock;
-#endif
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -348,6 +337,10 @@
.write_ldt_entry = native_write_ldt_entry,
.write_gdt_entry = native_write_gdt_entry,
.write_idt_entry = native_write_idt_entry,
+
+ .alloc_ldt = paravirt_nop,
+ .free_ldt = paravirt_nop,
+
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -461,18 +454,6 @@
.set_fixmap = native_set_fixmap,
};
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
- .spin_is_locked = __ticket_spin_is_locked,
- .spin_is_contended = __ticket_spin_is_contended,
-
- .spin_lock = __ticket_spin_lock,
- .spin_trylock = __ticket_spin_trylock,
- .spin_unlock = __ticket_spin_unlock,
-#endif
-};
-EXPORT_SYMBOL_GPL(pv_lock_ops);
-
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 59c1e53..9ee745f 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,11 @@
-obj-y := enlighten.o setup.o multicalls.o mmu.o \
+ifdef CONFIG_FTRACE
+# Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_spinlock.o = -pg
+CFLAGS_REMOVE_time.o = -pg
+CFLAGS_REMOVE_irq.o = -pg
+endif
+
+obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm_$(BITS).o grant-table.o suspend.o
-obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smp.o spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3c..cf8b3a9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -30,7 +30,6 @@
#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
-#include <xen/interface/sched.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
@@ -226,94 +225,6 @@
return HYPERVISOR_get_debugreg(reg);
}
-static unsigned long xen_save_fl(void)
-{
- struct vcpu_info *vcpu;
- unsigned long flags;
-
- vcpu = x86_read_percpu(xen_vcpu);
-
- /* flag has opposite sense of mask */
- flags = !vcpu->evtchn_upcall_mask;
-
- /* convert to IF type flag
- -0 -> 0x00000000
- -1 -> 0xffffffff
- */
- return (-flags) & X86_EFLAGS_IF;
-}
-
-static void xen_restore_fl(unsigned long flags)
-{
- struct vcpu_info *vcpu;
-
- /* convert from IF type flag */
- flags = !(flags & X86_EFLAGS_IF);
-
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = flags;
- preempt_enable_no_resched();
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- if (flags == 0) {
- preempt_check_resched();
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
- }
-}
-
-static void xen_irq_disable(void)
-{
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
- preempt_enable_no_resched();
-}
-
-static void xen_irq_enable(void)
-{
- struct vcpu_info *vcpu;
-
- /* We don't need to worry about being preempted here, since
- either a) interrupts are disabled, so no preemption, or b)
- the caller is confused and is trying to re-enable interrupts
- on an indeterminate processor. */
-
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = 0;
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
-}
-
-static void xen_safe_halt(void)
-{
- /* Blocking includes an implicit local_irq_enable(). */
- if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
- BUG();
-}
-
-static void xen_halt(void)
-{
- if (irqs_disabled())
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- else
- xen_safe_halt();
-}
-
static void xen_leave_lazy(void)
{
paravirt_leave_lazy(paravirt_get_lazy_mode());
@@ -325,6 +236,57 @@
return 0;
}
+/*
+ * If 'v' is a vmalloc mapping, then find the linear mapping of the
+ * page (if any) and also set its protections to match:
+ */
+static void set_aliased_prot(void *v, pgprot_t prot)
+{
+ int level;
+ pte_t *ptep;
+ pte_t pte;
+ unsigned long pfn;
+ struct page *page;
+
+ ptep = lookup_address((unsigned long)v, &level);
+ BUG_ON(ptep == NULL);
+
+ pfn = pte_pfn(*ptep);
+ page = pfn_to_page(pfn);
+
+ pte = pfn_pte(pfn, prot);
+
+ if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
+ BUG();
+
+ if (!PageHighMem(page)) {
+ void *av = __va(PFN_PHYS(pfn));
+
+ if (av != v)
+ if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
+ BUG();
+ } else
+ kmap_flush_unused();
+}
+
+static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+ int i;
+
+ for(i = 0; i < entries; i += entries_per_page)
+ set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
+}
+
+static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+ int i;
+
+ for(i = 0; i < entries; i += entries_per_page)
+ set_aliased_prot(ldt + i, PAGE_KERNEL);
+}
+
static void xen_set_ldt(const void *addr, unsigned entries)
{
struct mmuext_op *op;
@@ -426,7 +388,7 @@
const void *ptr)
{
unsigned long lp = (unsigned long)&dt[entrynum];
- xmaddr_t mach_lp = virt_to_machine(lp);
+ xmaddr_t mach_lp = arbitrary_virt_to_machine(lp);
u64 entry = *(u64 *)ptr;
preempt_disable();
@@ -559,7 +521,7 @@
}
static void xen_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+ struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@@ -803,6 +765,19 @@
ret = -EFAULT;
break;
#endif
+
+ case MSR_STAR:
+ case MSR_CSTAR:
+ case MSR_LSTAR:
+ case MSR_SYSCALL_MASK:
+ case MSR_IA32_SYSENTER_CS:
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ /* Fast syscall setup is all done in hypercalls, so
+ these are all ignored. Stub them out here to stop
+ Xen console noise. */
+ break;
+
default:
ret = native_write_msr_safe(msr, low, high);
}
@@ -1220,6 +1195,9 @@
.load_gs_index = xen_load_gs_index,
#endif
+ .alloc_ldt = xen_alloc_ldt,
+ .free_ldt = xen_free_ldt,
+
.store_gdt = native_store_gdt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr,
@@ -1241,36 +1219,6 @@
},
};
-static void __init __xen_init_IRQ(void)
-{
-#ifdef CONFIG_X86_64
- int i;
-
- /* Create identity vector->irq map */
- for(i = 0; i < NR_VECTORS; i++) {
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(vector_irq, cpu)[i] = i;
- }
-#endif /* CONFIG_X86_64 */
-
- xen_init_IRQ();
-}
-
-static const struct pv_irq_ops xen_irq_ops __initdata = {
- .init_IRQ = __xen_init_IRQ,
- .save_fl = xen_save_fl,
- .restore_fl = xen_restore_fl,
- .irq_disable = xen_irq_disable,
- .irq_enable = xen_irq_enable,
- .safe_halt = xen_safe_halt,
- .halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
-};
-
static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = xen_apic_write,
@@ -1324,7 +1272,7 @@
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
.pte_val = xen_pte_val,
- .pte_flags = native_pte_val,
+ .pte_flags = native_pte_flags,
.pgd_val = xen_pgd_val,
.make_pte = xen_make_pte,
@@ -1673,10 +1621,11 @@
pv_init_ops = xen_init_ops;
pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops;
- pv_irq_ops = xen_irq_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
+ xen_init_irq_ops();
+
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
new file mode 100644
index 0000000..28b85ab
--- /dev/null
+++ b/arch/x86/xen/irq.c
@@ -0,0 +1,143 @@
+#include <linux/hardirq.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include "xen-ops.h"
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void xen_force_evtchn_callback(void)
+{
+ (void)HYPERVISOR_xen_version(0, NULL);
+}
+
+static void __init __xen_init_IRQ(void)
+{
+#ifdef CONFIG_X86_64
+ int i;
+
+ /* Create identity vector->irq map */
+ for(i = 0; i < NR_VECTORS; i++) {
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(vector_irq, cpu)[i] = i;
+ }
+#endif /* CONFIG_X86_64 */
+
+ xen_init_IRQ();
+}
+
+static unsigned long xen_save_fl(void)
+{
+ struct vcpu_info *vcpu;
+ unsigned long flags;
+
+ vcpu = x86_read_percpu(xen_vcpu);
+
+ /* flag has opposite sense of mask */
+ flags = !vcpu->evtchn_upcall_mask;
+
+ /* convert to IF type flag
+ -0 -> 0x00000000
+ -1 -> 0xffffffff
+ */
+ return (-flags) & X86_EFLAGS_IF;
+}
+
+static void xen_restore_fl(unsigned long flags)
+{
+ struct vcpu_info *vcpu;
+
+ /* convert from IF type flag */
+ flags = !(flags & X86_EFLAGS_IF);
+
+ /* There's a one instruction preempt window here. We need to
+ make sure we're don't switch CPUs between getting the vcpu
+ pointer and updating the mask. */
+ preempt_disable();
+ vcpu = x86_read_percpu(xen_vcpu);
+ vcpu->evtchn_upcall_mask = flags;
+ preempt_enable_no_resched();
+
+ /* Doesn't matter if we get preempted here, because any
+ pending event will get dealt with anyway. */
+
+ if (flags == 0) {
+ preempt_check_resched();
+ barrier(); /* unmask then check (avoid races) */
+ if (unlikely(vcpu->evtchn_upcall_pending))
+ xen_force_evtchn_callback();
+ }
+}
+
+static void xen_irq_disable(void)
+{
+ /* There's a one instruction preempt window here. We need to
+ make sure we're don't switch CPUs between getting the vcpu
+ pointer and updating the mask. */
+ preempt_disable();
+ x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
+ preempt_enable_no_resched();
+}
+
+static void xen_irq_enable(void)
+{
+ struct vcpu_info *vcpu;
+
+ /* We don't need to worry about being preempted here, since
+ either a) interrupts are disabled, so no preemption, or b)
+ the caller is confused and is trying to re-enable interrupts
+ on an indeterminate processor. */
+
+ vcpu = x86_read_percpu(xen_vcpu);
+ vcpu->evtchn_upcall_mask = 0;
+
+ /* Doesn't matter if we get preempted here, because any
+ pending event will get dealt with anyway. */
+
+ barrier(); /* unmask then check (avoid races) */
+ if (unlikely(vcpu->evtchn_upcall_pending))
+ xen_force_evtchn_callback();
+}
+
+static void xen_safe_halt(void)
+{
+ /* Blocking includes an implicit local_irq_enable(). */
+ if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
+ BUG();
+}
+
+static void xen_halt(void)
+{
+ if (irqs_disabled())
+ HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ else
+ xen_safe_halt();
+}
+
+static const struct pv_irq_ops xen_irq_ops __initdata = {
+ .init_IRQ = __xen_init_IRQ,
+ .save_fl = xen_save_fl,
+ .restore_fl = xen_restore_fl,
+ .irq_disable = xen_irq_disable,
+ .irq_enable = xen_irq_enable,
+ .safe_halt = xen_safe_halt,
+ .halt = xen_halt,
+#ifdef CONFIG_X86_64
+ .adjust_exception_frame = xen_adjust_exception_frame,
+#endif
+};
+
+void __init xen_init_irq_ops()
+{
+ pv_irq_ops = xen_irq_ops;
+}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d8faf79..baca7f2 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,7 +15,6 @@
* This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
-#include <linux/kernel_stat.h>
#include <linux/err.h>
#include <linux/smp.h>
@@ -36,8 +35,6 @@
#include "xen-ops.h"
#include "mmu.h"
-static void __cpuinit xen_init_lock_cpu(int cpu);
-
cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq);
@@ -419,170 +416,6 @@
return IRQ_HANDLED;
}
-struct xen_spinlock {
- unsigned char lock; /* 0 -> free; 1 -> locked */
- unsigned short spinners; /* count of waiting cpus */
-};
-
-static int xen_spin_is_locked(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- /* Not strictly true; this is only the count of contended
- lock-takers entering the slow path. */
- return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- u8 old = 1;
-
- asm("xchgb %b0,%1"
- : "+q" (old), "+m" (xl->lock) : : "memory");
-
- return old == 0;
-}
-
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
-
-static inline void spinning_lock(struct xen_spinlock *xl)
-{
- __get_cpu_var(lock_spinners) = xl;
- wmb(); /* set lock of interest before count */
- asm(LOCK_PREFIX " incw %0"
- : "+m" (xl->spinners) : : "memory");
-}
-
-static inline void unspinning_lock(struct xen_spinlock *xl)
-{
- asm(LOCK_PREFIX " decw %0"
- : "+m" (xl->spinners) : : "memory");
- wmb(); /* decrement count before clearing lock */
- __get_cpu_var(lock_spinners) = NULL;
-}
-
-static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- int irq = __get_cpu_var(lock_kicker_irq);
- int ret;
-
- /* If kicker interrupts not initialized yet, just spin */
- if (irq == -1)
- return 0;
-
- /* announce we're spinning */
- spinning_lock(xl);
-
- /* clear pending */
- xen_clear_irq_pending(irq);
-
- /* check again make sure it didn't become free while
- we weren't looking */
- ret = xen_spin_trylock(lock);
- if (ret)
- goto out;
-
- /* block until irq becomes pending */
- xen_poll_irq(irq);
- kstat_this_cpu.irqs[irq]++;
-
-out:
- unspinning_lock(xl);
- return ret;
-}
-
-static void xen_spin_lock(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- int timeout;
- u8 oldval;
-
- do {
- timeout = 1 << 10;
-
- asm("1: xchgb %1,%0\n"
- " testb %1,%1\n"
- " jz 3f\n"
- "2: rep;nop\n"
- " cmpb $0,%0\n"
- " je 1b\n"
- " dec %2\n"
- " jnz 2b\n"
- "3:\n"
- : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
- : "1" (1)
- : "memory");
-
- } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
-}
-
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- /* XXX should mix up next cpu selection */
- if (per_cpu(lock_spinners, cpu) == xl) {
- xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
- break;
- }
- }
-}
-
-static void xen_spin_unlock(struct raw_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- smp_wmb(); /* make sure no writes get moved after unlock */
- xl->lock = 0; /* release lock */
-
- /* make sure unlock happens before kick */
- barrier();
-
- if (unlikely(xl->spinners))
- xen_spin_unlock_slow(xl);
-}
-
-static __cpuinit void xen_init_lock_cpu(int cpu)
-{
- int irq;
- const char *name;
-
- name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
- irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
- cpu,
- xen_reschedule_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- name,
- NULL);
-
- if (irq >= 0) {
- disable_irq(irq); /* make sure it's never delivered */
- per_cpu(lock_kicker_irq, cpu) = irq;
- }
-
- printk("cpu %d spinlock event irq %d\n", cpu, irq);
-}
-
-static void __init xen_init_spinlocks(void)
-{
- pv_lock_ops.spin_is_locked = xen_spin_is_locked;
- pv_lock_ops.spin_is_contended = xen_spin_is_contended;
- pv_lock_ops.spin_lock = xen_spin_lock;
- pv_lock_ops.spin_trylock = xen_spin_trylock;
- pv_lock_ops.spin_unlock = xen_spin_unlock;
-}
-
static const struct smp_ops xen_smp_ops __initdata = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
new file mode 100644
index 0000000..8dc4d31
--- /dev/null
+++ b/arch/x86/xen/spinlock.c
@@ -0,0 +1,183 @@
+/*
+ * Split spinlock implementation out into its own file, so it can be
+ * compiled in a FTRACE-compatible way.
+ */
+#include <linux/kernel_stat.h>
+#include <linux/spinlock.h>
+
+#include <asm/paravirt.h>
+
+#include <xen/interface/xen.h>
+#include <xen/events.h>
+
+#include "xen-ops.h"
+
+struct xen_spinlock {
+ unsigned char lock; /* 0 -> free; 1 -> locked */
+ unsigned short spinners; /* count of waiting cpus */
+};
+
+static int xen_spin_is_locked(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ return xl->lock != 0;
+}
+
+static int xen_spin_is_contended(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ /* Not strictly true; this is only the count of contended
+ lock-takers entering the slow path. */
+ return xl->spinners != 0;
+}
+
+static int xen_spin_trylock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ u8 old = 1;
+
+ asm("xchgb %b0,%1"
+ : "+q" (old), "+m" (xl->lock) : : "memory");
+
+ return old == 0;
+}
+
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+
+static inline void spinning_lock(struct xen_spinlock *xl)
+{
+ __get_cpu_var(lock_spinners) = xl;
+ wmb(); /* set lock of interest before count */
+ asm(LOCK_PREFIX " incw %0"
+ : "+m" (xl->spinners) : : "memory");
+}
+
+static inline void unspinning_lock(struct xen_spinlock *xl)
+{
+ asm(LOCK_PREFIX " decw %0"
+ : "+m" (xl->spinners) : : "memory");
+ wmb(); /* decrement count before clearing lock */
+ __get_cpu_var(lock_spinners) = NULL;
+}
+
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int irq = __get_cpu_var(lock_kicker_irq);
+ int ret;
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
+ return 0;
+
+ /* announce we're spinning */
+ spinning_lock(xl);
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+
+ /* check again make sure it didn't become free while
+ we weren't looking */
+ ret = xen_spin_trylock(lock);
+ if (ret)
+ goto out;
+
+ /* block until irq becomes pending */
+ xen_poll_irq(irq);
+ kstat_this_cpu.irqs[irq]++;
+
+out:
+ unspinning_lock(xl);
+ return ret;
+}
+
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int timeout;
+ u8 oldval;
+
+ do {
+ timeout = 1 << 10;
+
+ asm("1: xchgb %1,%0\n"
+ " testb %1,%1\n"
+ " jz 3f\n"
+ "2: rep;nop\n"
+ " cmpb $0,%0\n"
+ " je 1b\n"
+ " dec %2\n"
+ " jnz 2b\n"
+ "3:\n"
+ : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
+ : "1" (1)
+ : "memory");
+
+ } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+}
+
+static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* XXX should mix up next cpu selection */
+ if (per_cpu(lock_spinners, cpu) == xl) {
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+ break;
+ }
+ }
+}
+
+static void xen_spin_unlock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ smp_wmb(); /* make sure no writes get moved after unlock */
+ xl->lock = 0; /* release lock */
+
+ /* make sure unlock happens before kick */
+ barrier();
+
+ if (unlikely(xl->spinners))
+ xen_spin_unlock_slow(xl);
+}
+
+static irqreturn_t dummy_handler(int irq, void *dev_id)
+{
+ BUG();
+ return IRQ_HANDLED;
+}
+
+void __cpuinit xen_init_lock_cpu(int cpu)
+{
+ int irq;
+ const char *name;
+
+ name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+ irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+ cpu,
+ dummy_handler,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ name,
+ NULL);
+
+ if (irq >= 0) {
+ disable_irq(irq); /* make sure it's never delivered */
+ per_cpu(lock_kicker_irq, cpu) = irq;
+ }
+
+ printk("cpu %d spinlock event irq %d\n", cpu, irq);
+}
+
+void __init xen_init_spinlocks(void)
+{
+ pv_lock_ops.spin_is_locked = xen_spin_is_locked;
+ pv_lock_ops.spin_is_contended = xen_spin_is_contended;
+ pv_lock_ops.spin_lock = xen_spin_lock;
+ pv_lock_ops.spin_trylock = xen_spin_trylock;
+ pv_lock_ops.spin_unlock = xen_spin_unlock;
+}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 2497a30..42786f5 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -298,7 +298,7 @@
push %eax
push %ecx
push %edx
- call force_evtchn_callback
+ call xen_force_evtchn_callback
pop %edx
pop %ecx
pop %eax
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 7f58304..3b9bda4 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -122,7 +122,7 @@
push %r9
push %r10
push %r11
- call force_evtchn_callback
+ call xen_force_evtchn_callback
pop %r11
pop %r10
pop %r9
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index dd3c231..3c70ebc 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -31,6 +31,7 @@
void __init xen_build_dynamic_phys_to_machine(void);
+void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_setup_cpu_clockevents(void);
unsigned long xen_tsc_khz(void);
@@ -50,6 +51,9 @@
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void __init xen_init_spinlocks(void);
+__cpuinit void xen_init_lock_cpu(int cpu);
+
extern cpumask_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d4427cb..fff987b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -60,7 +60,7 @@
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
-#define BALLOON_CLASS_NAME "memory"
+#define BALLOON_CLASS_NAME "xen_memory"
struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
@@ -226,9 +226,8 @@
}
set_xen_guest_handle(reservation.extent_start, frame_list);
- reservation.nr_extents = nr_pages;
- rc = HYPERVISOR_memory_op(
- XENMEM_populate_physmap, &reservation);
+ reservation.nr_extents = nr_pages;
+ rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
if (rc > 0) {
int ret;
@@ -236,7 +235,7 @@
/* We hit the Xen hard limit: reprobe. */
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
+ &reservation);
BUG_ON(ret != rc);
}
if (rc >= 0)
@@ -464,136 +463,13 @@
module_exit(balloon_exit);
-static void balloon_update_driver_allowance(long delta)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&balloon_lock, flags);
- balloon_stats.driver_pages += delta;
- spin_unlock_irqrestore(&balloon_lock, flags);
-}
-
-static int dealloc_pte_fn(
- pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
-{
- unsigned long mfn = pte_mfn(*pte);
- int ret;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
- set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
- set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
- BUG_ON(ret != 1);
- return 0;
-}
-
-static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
-{
- unsigned long vaddr, flags;
- struct page *page, **pagevec;
- int i, ret;
-
- pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
- if (pagevec == NULL)
- return NULL;
-
- for (i = 0; i < nr_pages; i++) {
- page = pagevec[i] = alloc_page(GFP_KERNEL);
- if (page == NULL)
- goto err;
-
- vaddr = (unsigned long)page_address(page);
-
- scrub_page(page);
-
- spin_lock_irqsave(&balloon_lock, flags);
-
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- unsigned long gmfn = page_to_pfn(page);
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &gmfn);
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- if (ret == 1)
- ret = 0; /* success */
- } else {
- ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
- dealloc_pte_fn, NULL);
- }
-
- if (ret != 0) {
- spin_unlock_irqrestore(&balloon_lock, flags);
- __free_page(page);
- goto err;
- }
-
- totalram_pages = --balloon_stats.current_pages;
-
- spin_unlock_irqrestore(&balloon_lock, flags);
- }
-
- out:
- schedule_work(&balloon_worker);
- flush_tlb_all();
- return pagevec;
-
- err:
- spin_lock_irqsave(&balloon_lock, flags);
- while (--i >= 0)
- balloon_append(pagevec[i]);
- spin_unlock_irqrestore(&balloon_lock, flags);
- kfree(pagevec);
- pagevec = NULL;
- goto out;
-}
-
-static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
-{
- unsigned long flags;
- int i;
-
- if (pagevec == NULL)
- return;
-
- spin_lock_irqsave(&balloon_lock, flags);
- for (i = 0; i < nr_pages; i++) {
- BUG_ON(page_count(pagevec[i]) != 1);
- balloon_append(pagevec[i]);
- }
- spin_unlock_irqrestore(&balloon_lock, flags);
-
- kfree(pagevec);
-
- schedule_work(&balloon_worker);
-}
-
-static void balloon_release_driver_page(struct page *page)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&balloon_lock, flags);
- balloon_append(page);
- balloon_stats.driver_pages--;
- spin_unlock_irqrestore(&balloon_lock, flags);
-
- schedule_work(&balloon_worker);
-}
-
-
-#define BALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- char *buf) \
- { \
- return sprintf(buf, format, ##args); \
- } \
+#define BALLOON_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
@@ -604,7 +480,8 @@
(balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
-static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
}
@@ -614,19 +491,14 @@
const char *buf,
size_t count)
{
- char memstring[64], *endchar;
+ char *endchar;
unsigned long long target_bytes;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (count <= 1)
- return -EBADMSG; /* runt */
- if (count > sizeof(memstring))
- return -EFBIG; /* too long */
- strcpy(memstring, buf);
+ target_bytes = memparse(buf, &endchar);
- target_bytes = memparse(memstring, &endchar);
balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
@@ -694,20 +566,4 @@
return error;
}
-static void unregister_balloon(struct sys_device *sysdev)
-{
- int i;
-
- sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
- for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
- sysdev_remove_file(sysdev, balloon_attrs[i]);
- sysdev_unregister(sysdev);
- sysdev_class_unregister(&balloon_sysdev_class);
-}
-
-static void balloon_sysfs_exit(void)
-{
- unregister_balloon(&balloon_sysdev);
-}
-
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0e0c285..a083703 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -84,17 +84,6 @@
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
static struct irq_chip xen_dynamic_chip;
/* Constructor for packed IRQ information. */
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc..06f786f 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -24,6 +24,11 @@
desc->d = info->seg_32bit;
desc->g = info->limit_in_pages;
desc->base2 = (info->base_addr & 0xff000000) >> 24;
+ /*
+ * Don't allow setting of the lm bit. It is useless anyway
+ * because 64bit system calls require __USER_CS:
+ */
+ desc->l = 0;
}
extern struct desc_ptr idt_descr;
@@ -97,7 +102,15 @@
native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g) \
native_write_idt_entry(dt, entry, g)
-#endif
+
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+#endif /* CONFIG_PARAVIRT */
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93..db9b064 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -124,6 +124,9 @@
int entrynum, const void *desc, int size);
void (*write_idt_entry)(gate_desc *,
int entrynum, const gate_desc *gate);
+ void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+ void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask);
@@ -824,6 +827,16 @@
(aux) = __aux; \
} while (0)
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
+}
+
static inline void load_TR_desc(void)
{
PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fdbbf72..7889c2f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,7 +176,7 @@
extern int get_option(char **str, int *pint);
extern char *get_options(const char *str, int nints, int *ints);
-extern unsigned long long memparse(char *ptr, char **retptr);
+extern unsigned long long memparse(const char *ptr, char **retptr);
extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 5ba8a94..f5f3ad8 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -126,7 +126,7 @@
* megabyte, or one gigabyte, respectively.
*/
-unsigned long long memparse(char *ptr, char **retptr)
+unsigned long long memparse(const char *ptr, char **retptr)
{
char *endptr; /* local pointer to end of parsed string */