| #ifndef _ASM_X86_BARRIER_H |
| #define _ASM_X86_BARRIER_H |
| |
| #include <asm/alternative.h> |
| #include <asm/nops.h> |
| |
| /* |
| * Force strict CPU ordering. |
| * And yes, this is required on UP too when we're talking |
| * to devices. |
| */ |
| |
| #ifdef CONFIG_X86_32 |
| /* |
| * Some non-Intel clones support out of order store. wmb() ceases to be a |
| * nop for these. |
| */ |
| #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) |
| #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) |
| #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) |
| #else |
| #define mb() asm volatile("mfence":::"memory") |
| #define rmb() asm volatile("lfence":::"memory") |
| #define wmb() asm volatile("sfence" ::: "memory") |
| #endif |
| |
| /** |
| * read_barrier_depends - Flush all pending reads that subsequents reads |
| * depend on. |
| * |
| * No data-dependent reads from memory-like regions are ever reordered |
| * over this barrier. All reads preceding this primitive are guaranteed |
| * to access memory (but not necessarily other CPUs' caches) before any |
| * reads following this primitive that depend on the data return by |
| * any of the preceding reads. This primitive is much lighter weight than |
| * rmb() on most CPUs, and is never heavier weight than is |
| * rmb(). |
| * |
| * These ordering constraints are respected by both the local CPU |
| * and the compiler. |
| * |
| * Ordering is not guaranteed by anything other than these primitives, |
| * not even by data dependencies. See the documentation for |
| * memory_barrier() for examples and URLs to more information. |
| * |
| * For example, the following code would force ordering (the initial |
| * value of "a" is zero, "b" is one, and "p" is "&a"): |
| * |
| * <programlisting> |
| * CPU 0 CPU 1 |
| * |
| * b = 2; |
| * memory_barrier(); |
| * p = &b; q = p; |
| * read_barrier_depends(); |
| * d = *q; |
| * </programlisting> |
| * |
| * because the read of "*q" depends on the read of "p" and these |
| * two reads are separated by a read_barrier_depends(). However, |
| * the following code, with the same initial values for "a" and "b": |
| * |
| * <programlisting> |
| * CPU 0 CPU 1 |
| * |
| * a = 2; |
| * memory_barrier(); |
| * b = 3; y = b; |
| * read_barrier_depends(); |
| * x = a; |
| * </programlisting> |
| * |
| * does not enforce ordering, since there is no data dependency between |
| * the read of "a" and the read of "b". Therefore, on some CPUs, such |
| * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() |
| * in cases like this where there are no data dependencies. |
| **/ |
| |
| #define read_barrier_depends() do { } while (0) |
| |
| #ifdef CONFIG_SMP |
| #define smp_mb() mb() |
| #ifdef CONFIG_X86_PPRO_FENCE |
| # define smp_rmb() rmb() |
| #else |
| # define smp_rmb() barrier() |
| #endif |
| #ifdef CONFIG_X86_OOSTORE |
| # define smp_wmb() wmb() |
| #else |
| # define smp_wmb() barrier() |
| #endif |
| #define smp_read_barrier_depends() read_barrier_depends() |
| #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
| #else /* !SMP */ |
| #define smp_mb() barrier() |
| #define smp_rmb() barrier() |
| #define smp_wmb() barrier() |
| #define smp_read_barrier_depends() do { } while (0) |
| #define set_mb(var, value) do { var = value; barrier(); } while (0) |
| #endif /* SMP */ |
| |
| #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) |
| |
| /* |
| * For either of these options x86 doesn't have a strong TSO memory |
| * model and we should fall back to full barriers. |
| */ |
| |
| #define smp_store_release(p, v) \ |
| do { \ |
| compiletime_assert_atomic_type(*p); \ |
| smp_mb(); \ |
| ACCESS_ONCE(*p) = (v); \ |
| } while (0) |
| |
| #define smp_load_acquire(p) \ |
| ({ \ |
| typeof(*p) ___p1 = ACCESS_ONCE(*p); \ |
| compiletime_assert_atomic_type(*p); \ |
| smp_mb(); \ |
| ___p1; \ |
| }) |
| |
| #else /* regular x86 TSO memory ordering */ |
| |
| #define smp_store_release(p, v) \ |
| do { \ |
| compiletime_assert_atomic_type(*p); \ |
| barrier(); \ |
| ACCESS_ONCE(*p) = (v); \ |
| } while (0) |
| |
| #define smp_load_acquire(p) \ |
| ({ \ |
| typeof(*p) ___p1 = ACCESS_ONCE(*p); \ |
| compiletime_assert_atomic_type(*p); \ |
| barrier(); \ |
| ___p1; \ |
| }) |
| |
| #endif |
| |
| /* |
| * Stop RDTSC speculation. This is needed when you need to use RDTSC |
| * (or get_cycles or vread that possibly accesses the TSC) in a defined |
| * code region. |
| * |
| * (Could use an alternative three way for this if there was one.) |
| */ |
| static __always_inline void rdtsc_barrier(void) |
| { |
| alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); |
| alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); |
| } |
| |
| #endif /* _ASM_X86_BARRIER_H */ |