arch/x86/include/asm/barrier.h - kernel/msm-4.9 - Gitiles

 #ifndef _ASM_X86_BARRIER_H
 #define _ASM_X86_BARRIER_H

 #include <asm/alternative.h>
 #include <asm/nops.h>

 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
  * to devices.
  */

 #ifdef CONFIG_X86_32
 /*
  * Some non-Intel clones support out of order store. wmb() ceases to be a
  * nop for these.
  */
 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
 #else
 #define mb() 	asm volatile("mfence":::"memory")
 #define rmb()	asm volatile("lfence":::"memory")
 #define wmb()	asm volatile("sfence" ::: "memory")
 #endif

 /**
  * read_barrier_depends - Flush all pending reads that subsequents reads
  * depend on.
  *
  * No data-dependent reads from memory-like regions are ever reordered
  * over this barrier.  All reads preceding this primitive are guaranteed
  * to access memory (but not necessarily other CPUs' caches) before any
  * reads following this primitive that depend on the data return by
  * any of the preceding reads.  This primitive is much lighter weight than
  * rmb() on most CPUs, and is never heavier weight than is
  * rmb().
  *
  * These ordering constraints are respected by both the local CPU
  * and the compiler.
  *
  * Ordering is not guaranteed by anything other than these primitives,
  * not even by data dependencies.  See the documentation for
  * memory_barrier() for examples and URLs to more information.
  *
  * For example, the following code would force ordering (the initial
  * value of "a" is zero, "b" is one, and "p" is "&a"):
  *
  * <programlisting>
  *	CPU 0				CPU 1
  *
  *	b = 2;
  *	memory_barrier();
  *	p = &b;				q = p;
  *					read_barrier_depends();
  *					d = *q;
  * </programlisting>
  *
  * because the read of "*q" depends on the read of "p" and these
  * two reads are separated by a read_barrier_depends().  However,
  * the following code, with the same initial values for "a" and "b":
  *
  * <programlisting>
  *	CPU 0				CPU 1
  *
  *	a = 2;
  *	memory_barrier();
  *	b = 3;				y = b;
  *					read_barrier_depends();
  *					x = a;
  * </programlisting>
  *
  * does not enforce ordering, since there is no data dependency between
  * the read of "a" and the read of "b".  Therefore, on some CPUs, such
  * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
  * in cases like this where there are no data dependencies.
  **/

 #define read_barrier_depends()	do { } while (0)

 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #ifdef CONFIG_X86_PPRO_FENCE
 # define smp_rmb()	rmb()
 #else
 # define smp_rmb()	barrier()
 #endif
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
 #else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif /* SMP */

 #if defined(CONFIG_X86_PPRO_FENCE)

 /*
  * For either of these options x86 doesn't have a strong TSO memory
  * model and we should fall back to full barriers.
  */

 #define smp_store_release(p, v)						\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	ACCESS_ONCE(*p) = (v);						\
 } while (0)

 #define smp_load_acquire(p)						\
 ({									\
 	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
 })

 #else /* regular x86 TSO memory ordering */

 #define smp_store_release(p, v)						\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	ACCESS_ONCE(*p) = (v);						\
 } while (0)

 #define smp_load_acquire(p)						\
 ({									\
 	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
 })

 #endif

 /*
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
  * code region.
  *
  * (Could use an alternative three way for this if there was one.)
  */
 static __always_inline void rdtsc_barrier(void)
 {
 	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
 	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
 }

 #endif /* _ASM_X86_BARRIER_H */
	#ifndef _ASM_X86_BARRIER_H
	#define _ASM_X86_BARRIER_H

	#include <asm/alternative.h>
	#include <asm/nops.h>

	/*
	* Force strict CPU ordering.
	* And yes, this is required on UP too when we're talking
	* to devices.
	*/

	#ifdef CONFIG_X86_32
	/*
	* Some non-Intel clones support out of order store. wmb() ceases to be a
	* nop for these.
	*/
	#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
	#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
	#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
	#else
	#define mb() asm volatile("mfence":::"memory")
	#define rmb() asm volatile("lfence":::"memory")
	#define wmb() asm volatile("sfence" ::: "memory")
	#endif

	/**
	* read_barrier_depends - Flush all pending reads that subsequents reads
	* depend on.
	*
	* No data-dependent reads from memory-like regions are ever reordered
	* over this barrier. All reads preceding this primitive are guaranteed
	* to access memory (but not necessarily other CPUs' caches) before any
	* reads following this primitive that depend on the data return by
	* any of the preceding reads. This primitive is much lighter weight than
	* rmb() on most CPUs, and is never heavier weight than is
	* rmb().
	*
	* These ordering constraints are respected by both the local CPU
	* and the compiler.
	*
	* Ordering is not guaranteed by anything other than these primitives,
	* not even by data dependencies. See the documentation for
	* memory_barrier() for examples and URLs to more information.
	*
	* For example, the following code would force ordering (the initial
	* value of "a" is zero, "b" is one, and "p" is "&a"):
	*
	* <programlisting>
	* CPU 0 CPU 1
	*
	* b = 2;
	* memory_barrier();
	* p = &b; q = p;
	* read_barrier_depends();
	* d = *q;
	* </programlisting>
	*
	* because the read of "*q" depends on the read of "p" and these
	* two reads are separated by a read_barrier_depends(). However,
	* the following code, with the same initial values for "a" and "b":
	*
	* <programlisting>
	* CPU 0 CPU 1
	*
	* a = 2;
	* memory_barrier();
	* b = 3; y = b;
	* read_barrier_depends();
	* x = a;
	* </programlisting>
	*
	* does not enforce ordering, since there is no data dependency between
	* the read of "a" and the read of "b". Therefore, on some CPUs, such
	* as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
	* in cases like this where there are no data dependencies.
	**/

	#define read_barrier_depends() do { } while (0)

	#ifdef CONFIG_SMP
	#define smp_mb() mb()
	#ifdef CONFIG_X86_PPRO_FENCE
	# define smp_rmb() rmb()
	#else
	# define smp_rmb() barrier()
	#endif
	#define smp_wmb() barrier()
	#define smp_read_barrier_depends() read_barrier_depends()
	#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
	#else /* !SMP */
	#define smp_mb() barrier()
	#define smp_rmb() barrier()
	#define smp_wmb() barrier()
	#define smp_read_barrier_depends() do { } while (0)
	#define set_mb(var, value) do { var = value; barrier(); } while (0)
	#endif /* SMP */

	#if defined(CONFIG_X86_PPRO_FENCE)

	/*
	* For either of these options x86 doesn't have a strong TSO memory
	* model and we should fall back to full barriers.
	*/

	#define smp_store_release(p, v) \
	do { \
	compiletime_assert_atomic_type(*p); \
	smp_mb(); \
	ACCESS_ONCE(*p) = (v); \
	} while (0)

	#define smp_load_acquire(p) \
	({ \
	typeof(p) ___p1 = ACCESS_ONCE(p); \
	compiletime_assert_atomic_type(*p); \
	smp_mb(); \
	___p1; \
	})

	#else /* regular x86 TSO memory ordering */

	#define smp_store_release(p, v) \
	do { \
	compiletime_assert_atomic_type(*p); \
	barrier(); \
	ACCESS_ONCE(*p) = (v); \
	} while (0)

	#define smp_load_acquire(p) \
	({ \
	typeof(p) ___p1 = ACCESS_ONCE(p); \
	compiletime_assert_atomic_type(*p); \
	barrier(); \
	___p1; \
	})

	#endif

	/*
	* Stop RDTSC speculation. This is needed when you need to use RDTSC
	* (or get_cycles or vread that possibly accesses the TSC) in a defined
	* code region.
	*
	* (Could use an alternative three way for this if there was one.)
	*/
	static __always_inline void rdtsc_barrier(void)
	{
	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
	}

	#endif /* _ASM_X86_BARRIER_H */