| /* |
| * Split spinlock implementation out into its own file, so it can be |
| * compiled in a FTRACE-compatible way. |
| */ |
| #include <linux/kernel_stat.h> |
| #include <linux/spinlock.h> |
| #include <linux/debugfs.h> |
| #include <linux/log2.h> |
| #include <linux/gfp.h> |
| #include <linux/slab.h> |
| |
| #include <asm/paravirt.h> |
| |
| #include <xen/interface/xen.h> |
| #include <xen/events.h> |
| |
| #include "xen-ops.h" |
| #include "debugfs.h" |
| |
| #ifdef CONFIG_XEN_DEBUG_FS |
| static struct xen_spinlock_stats |
| { |
| u64 taken; |
| u32 taken_slow; |
| u32 taken_slow_nested; |
| u32 taken_slow_pickup; |
| u32 taken_slow_spurious; |
| u32 taken_slow_irqenable; |
| |
| u64 released; |
| u32 released_slow; |
| u32 released_slow_kicked; |
| |
| #define HISTO_BUCKETS 30 |
| u32 histo_spin_total[HISTO_BUCKETS+1]; |
| u32 histo_spin_spinning[HISTO_BUCKETS+1]; |
| u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
| |
| u64 time_total; |
| u64 time_spinning; |
| u64 time_blocked; |
| } spinlock_stats; |
| |
| static u8 zero_stats; |
| |
| static unsigned lock_timeout = 1 << 10; |
| #define TIMEOUT lock_timeout |
| |
| static inline void check_zero(void) |
| { |
| if (unlikely(zero_stats)) { |
| memset(&spinlock_stats, 0, sizeof(spinlock_stats)); |
| zero_stats = 0; |
| } |
| } |
| |
| #define ADD_STATS(elem, val) \ |
| do { check_zero(); spinlock_stats.elem += (val); } while(0) |
| |
| static inline u64 spin_time_start(void) |
| { |
| return xen_clocksource_read(); |
| } |
| |
| static void __spin_time_accum(u64 delta, u32 *array) |
| { |
| unsigned index = ilog2(delta); |
| |
| check_zero(); |
| |
| if (index < HISTO_BUCKETS) |
| array[index]++; |
| else |
| array[HISTO_BUCKETS]++; |
| } |
| |
| static inline void spin_time_accum_spinning(u64 start) |
| { |
| u32 delta = xen_clocksource_read() - start; |
| |
| __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); |
| spinlock_stats.time_spinning += delta; |
| } |
| |
| static inline void spin_time_accum_total(u64 start) |
| { |
| u32 delta = xen_clocksource_read() - start; |
| |
| __spin_time_accum(delta, spinlock_stats.histo_spin_total); |
| spinlock_stats.time_total += delta; |
| } |
| |
| static inline void spin_time_accum_blocked(u64 start) |
| { |
| u32 delta = xen_clocksource_read() - start; |
| |
| __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); |
| spinlock_stats.time_blocked += delta; |
| } |
| #else /* !CONFIG_XEN_DEBUG_FS */ |
| #define TIMEOUT (1 << 10) |
| #define ADD_STATS(elem, val) do { (void)(val); } while(0) |
| |
| static inline u64 spin_time_start(void) |
| { |
| return 0; |
| } |
| |
| static inline void spin_time_accum_total(u64 start) |
| { |
| } |
| static inline void spin_time_accum_spinning(u64 start) |
| { |
| } |
| static inline void spin_time_accum_blocked(u64 start) |
| { |
| } |
| #endif /* CONFIG_XEN_DEBUG_FS */ |
| |
| /* |
| * Size struct xen_spinlock so it's the same as arch_spinlock_t. |
| */ |
| #if NR_CPUS < 256 |
| typedef u8 xen_spinners_t; |
| # define inc_spinners(xl) \ |
| asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); |
| # define dec_spinners(xl) \ |
| asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); |
| #else |
| typedef u16 xen_spinners_t; |
| # define inc_spinners(xl) \ |
| asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); |
| # define dec_spinners(xl) \ |
| asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
| #endif |
| |
| struct xen_spinlock { |
| unsigned char lock; /* 0 -> free; 1 -> locked */ |
| xen_spinners_t spinners; /* count of waiting cpus */ |
| }; |
| |
| static int xen_spin_is_locked(struct arch_spinlock *lock) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| |
| return xl->lock != 0; |
| } |
| |
| static int xen_spin_is_contended(struct arch_spinlock *lock) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| |
| /* Not strictly true; this is only the count of contended |
| lock-takers entering the slow path. */ |
| return xl->spinners != 0; |
| } |
| |
| static int xen_spin_trylock(struct arch_spinlock *lock) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| u8 old = 1; |
| |
| asm("xchgb %b0,%1" |
| : "+q" (old), "+m" (xl->lock) : : "memory"); |
| |
| return old == 0; |
| } |
| |
| static DEFINE_PER_CPU(char *, irq_name); |
| static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
| static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); |
| |
| /* |
| * Mark a cpu as interested in a lock. Returns the CPU's previous |
| * lock of interest, in case we got preempted by an interrupt. |
| */ |
| static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) |
| { |
| struct xen_spinlock *prev; |
| |
| prev = __this_cpu_read(lock_spinners); |
| __this_cpu_write(lock_spinners, xl); |
| |
| wmb(); /* set lock of interest before count */ |
| |
| inc_spinners(xl); |
| |
| return prev; |
| } |
| |
| /* |
| * Mark a cpu as no longer interested in a lock. Restores previous |
| * lock of interest (NULL for none). |
| */ |
| static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) |
| { |
| dec_spinners(xl); |
| wmb(); /* decrement count before restoring lock */ |
| __this_cpu_write(lock_spinners, prev); |
| } |
| |
| static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| struct xen_spinlock *prev; |
| int irq = __this_cpu_read(lock_kicker_irq); |
| int ret; |
| u64 start; |
| |
| /* If kicker interrupts not initialized yet, just spin */ |
| if (irq == -1) |
| return 0; |
| |
| start = spin_time_start(); |
| |
| /* announce we're spinning */ |
| prev = spinning_lock(xl); |
| |
| ADD_STATS(taken_slow, 1); |
| ADD_STATS(taken_slow_nested, prev != NULL); |
| |
| do { |
| unsigned long flags; |
| |
| /* clear pending */ |
| xen_clear_irq_pending(irq); |
| |
| /* check again make sure it didn't become free while |
| we weren't looking */ |
| ret = xen_spin_trylock(lock); |
| if (ret) { |
| ADD_STATS(taken_slow_pickup, 1); |
| |
| /* |
| * If we interrupted another spinlock while it |
| * was blocking, make sure it doesn't block |
| * without rechecking the lock. |
| */ |
| if (prev != NULL) |
| xen_set_irq_pending(irq); |
| goto out; |
| } |
| |
| flags = arch_local_save_flags(); |
| if (irq_enable) { |
| ADD_STATS(taken_slow_irqenable, 1); |
| raw_local_irq_enable(); |
| } |
| |
| /* |
| * Block until irq becomes pending. If we're |
| * interrupted at this point (after the trylock but |
| * before entering the block), then the nested lock |
| * handler guarantees that the irq will be left |
| * pending if there's any chance the lock became free; |
| * xen_poll_irq() returns immediately if the irq is |
| * pending. |
| */ |
| xen_poll_irq(irq); |
| |
| raw_local_irq_restore(flags); |
| |
| ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); |
| } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ |
| |
| kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
| |
| out: |
| unspinning_lock(xl, prev); |
| spin_time_accum_blocked(start); |
| |
| return ret; |
| } |
| |
| static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| unsigned timeout; |
| u8 oldval; |
| u64 start_spin; |
| |
| ADD_STATS(taken, 1); |
| |
| start_spin = spin_time_start(); |
| |
| do { |
| u64 start_spin_fast = spin_time_start(); |
| |
| timeout = TIMEOUT; |
| |
| asm("1: xchgb %1,%0\n" |
| " testb %1,%1\n" |
| " jz 3f\n" |
| "2: rep;nop\n" |
| " cmpb $0,%0\n" |
| " je 1b\n" |
| " dec %2\n" |
| " jnz 2b\n" |
| "3:\n" |
| : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) |
| : "1" (1) |
| : "memory"); |
| |
| spin_time_accum_spinning(start_spin_fast); |
| |
| } while (unlikely(oldval != 0 && |
| (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); |
| |
| spin_time_accum_total(start_spin); |
| } |
| |
| static void xen_spin_lock(struct arch_spinlock *lock) |
| { |
| __xen_spin_lock(lock, false); |
| } |
| |
| static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) |
| { |
| __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); |
| } |
| |
| static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) |
| { |
| int cpu; |
| |
| ADD_STATS(released_slow, 1); |
| |
| for_each_online_cpu(cpu) { |
| /* XXX should mix up next cpu selection */ |
| if (per_cpu(lock_spinners, cpu) == xl) { |
| ADD_STATS(released_slow_kicked, 1); |
| xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
| } |
| } |
| } |
| |
| static void xen_spin_unlock(struct arch_spinlock *lock) |
| { |
| struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
| |
| ADD_STATS(released, 1); |
| |
| smp_wmb(); /* make sure no writes get moved after unlock */ |
| xl->lock = 0; /* release lock */ |
| |
| /* |
| * Make sure unlock happens before checking for waiting |
| * spinners. We need a strong barrier to enforce the |
| * write-read ordering to different memory locations, as the |
| * CPU makes no implied guarantees about their ordering. |
| */ |
| mb(); |
| |
| if (unlikely(xl->spinners)) |
| xen_spin_unlock_slow(xl); |
| } |
| |
| static irqreturn_t dummy_handler(int irq, void *dev_id) |
| { |
| BUG(); |
| return IRQ_HANDLED; |
| } |
| |
| void __cpuinit xen_init_lock_cpu(int cpu) |
| { |
| int irq; |
| char *name; |
| |
| WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", |
| cpu, per_cpu(lock_kicker_irq, cpu)); |
| |
| /* |
| * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 |
| * (xen: disable PV spinlocks on HVM) |
| */ |
| if (xen_hvm_domain()) |
| return; |
| |
| name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); |
| irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, |
| cpu, |
| dummy_handler, |
| IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, |
| name, |
| NULL); |
| |
| if (irq >= 0) { |
| disable_irq(irq); /* make sure it's never delivered */ |
| per_cpu(lock_kicker_irq, cpu) = irq; |
| per_cpu(irq_name, cpu) = name; |
| } |
| |
| printk("cpu %d spinlock event irq %d\n", cpu, irq); |
| } |
| |
| void xen_uninit_lock_cpu(int cpu) |
| { |
| /* |
| * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 |
| * (xen: disable PV spinlocks on HVM) |
| */ |
| if (xen_hvm_domain()) |
| return; |
| |
| unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); |
| per_cpu(lock_kicker_irq, cpu) = -1; |
| kfree(per_cpu(irq_name, cpu)); |
| per_cpu(irq_name, cpu) = NULL; |
| } |
| |
| void __init xen_init_spinlocks(void) |
| { |
| /* |
| * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 |
| * (xen: disable PV spinlocks on HVM) |
| */ |
| if (xen_hvm_domain()) |
| return; |
| |
| BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); |
| |
| pv_lock_ops.spin_is_locked = xen_spin_is_locked; |
| pv_lock_ops.spin_is_contended = xen_spin_is_contended; |
| pv_lock_ops.spin_lock = xen_spin_lock; |
| pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; |
| pv_lock_ops.spin_trylock = xen_spin_trylock; |
| pv_lock_ops.spin_unlock = xen_spin_unlock; |
| } |
| |
| #ifdef CONFIG_XEN_DEBUG_FS |
| |
| static struct dentry *d_spin_debug; |
| |
| static int __init xen_spinlock_debugfs(void) |
| { |
| struct dentry *d_xen = xen_init_debugfs(); |
| |
| if (d_xen == NULL) |
| return -ENOMEM; |
| |
| d_spin_debug = debugfs_create_dir("spinlocks", d_xen); |
| |
| debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
| |
| debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); |
| |
| debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); |
| debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
| &spinlock_stats.taken_slow); |
| debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, |
| &spinlock_stats.taken_slow_nested); |
| debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
| &spinlock_stats.taken_slow_pickup); |
| debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
| &spinlock_stats.taken_slow_spurious); |
| debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, |
| &spinlock_stats.taken_slow_irqenable); |
| |
| debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); |
| debugfs_create_u32("released_slow", 0444, d_spin_debug, |
| &spinlock_stats.released_slow); |
| debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
| &spinlock_stats.released_slow_kicked); |
| |
| debugfs_create_u64("time_spinning", 0444, d_spin_debug, |
| &spinlock_stats.time_spinning); |
| debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
| &spinlock_stats.time_blocked); |
| debugfs_create_u64("time_total", 0444, d_spin_debug, |
| &spinlock_stats.time_total); |
| |
| debugfs_create_u32_array("histo_total", 0444, d_spin_debug, |
| spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); |
| debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, |
| spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); |
| debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
| spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
| |
| return 0; |
| } |
| fs_initcall(xen_spinlock_debugfs); |
| |
| #endif /* CONFIG_XEN_DEBUG_FS */ |