Blame - base/atomicops_internals_arm_gcc.h - platform/external/libchrome

blob: e654afa7d270ee7ef60aed48cc152b7d3e19fb1b [file] [log] [blame]

digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// This file is an internal atomic implementation, use base/atomicops.h instead.
				6	//
				7	// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
				8
				9	#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				10	#define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				11
ctruta@blackberry.com	57a7a05	2014-02-06 21:27:37 +0900	[diff] [blame]	12	#if defined(OS_QNX)
				13	#include <sys/cpuinline.h>
				14	#endif
				15
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	16	namespace base {
				17	namespace subtle {
				18
				19	// Memory barriers on ARM are funky, but the kernel is here to help:
				20	//
				21	// * ARMv5 didn't support SMP, there is no memory barrier instruction at
				22	// all on this architecture, or when targeting its machine code.
				23	//
				24	// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
				25	// writing a random value to a very specific coprocessor register.
				26	//
				27	// * On ARMv7, the "dmb" instruction is used to perform a full memory
				28	// barrier (though writing to the co-processor will still work).
				29	// However, on single core devices (e.g. Nexus One, or Nexus S),
				30	// this instruction will take up to 200 ns, which is huge, even though
				31	// it's completely un-needed on these devices.
				32	//
				33	// * There is no easy way to determine at runtime if the device is
				34	// single or multi-core. However, the kernel provides a useful helper
				35	// function at a fixed memory address (0xffff0fa0), which will always
				36	// perform a memory barrier in the most efficient way. I.e. on single
				37	// core devices, this is an empty function that exits immediately.
				38	// On multi-core devices, it implements a full memory barrier.
				39	//
				40	// * This source could be compiled to ARMv5 machine code that runs on a
				41	// multi-core ARMv6 or ARMv7 device. In this case, memory barriers
				42	// are needed for correct execution. Always call the kernel helper, even
				43	// when targeting ARMv5TE.
				44	//
				45
				46	inline void MemoryBarrier() {
ctruta@blackberry.com	57a7a05	2014-02-06 21:27:37 +0900	[diff] [blame]	47	#if defined(OS_LINUX) \|\| defined(OS_ANDROID)
				48	// Note: This is a function call, which is also an implicit compiler barrier.
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	49	typedef void (*KernelMemoryBarrierFunc)();
				50	((KernelMemoryBarrierFunc)0xffff0fa0)();
ctruta@blackberry.com	57a7a05	2014-02-06 21:27:37 +0900	[diff] [blame]	51	#elif defined(OS_QNX)
				52	__cpu_membarrier();
				53	#else
				54	#error MemoryBarrier() is not implemented on this platform.
				55	#endif
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	56	}
				57
				58	// An ARM toolchain would only define one of these depending on which
				59	// variant of the target architecture is being used. This tests against
				60	// any known ARMv6 or ARMv7 variant, where it is possible to directly
				61	// use ldrex/strex instructions to implement fast atomic operations.
				62	#if defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
				63	defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
				64	defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
				65	defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
namnguyen@chromium.org	5c4f9d3	2014-05-09 09:07:07 +0900	[diff] [blame]	66	defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__)
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	67
				68	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				69	Atomic32 old_value,
				70	Atomic32 new_value) {
				71	Atomic32 prev_value;
				72	int reloop;
				73	do {
				74	// The following is equivalent to:
				75	//
				76	// prev_value = LDREX(ptr)
				77	// reloop = 0
				78	// if (prev_value != old_value)
				79	// reloop = STREX(ptr, new_value)
				80	__asm__ __volatile__(" ldrex %0, [%3]\n"
				81	" mov %1, #0\n"
digit@chromium.org	efaa34f	2013-07-11 12:52:52 +0900	[diff] [blame]	82	" cmp %0, %4\n"
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	83	#ifdef __thumb2__
				84	" it eq\n"
				85	#endif
				86	" strexeq %1, %5, [%3]\n"
				87	: "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
				88	: "r"(ptr), "r"(old_value), "r"(new_value)
				89	: "cc", "memory");
				90	} while (reloop != 0);
				91	return prev_value;
				92	}
				93
				94	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				95	Atomic32 old_value,
				96	Atomic32 new_value) {
				97	Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				98	MemoryBarrier();
				99	return result;
				100	}
				101
				102	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				103	Atomic32 old_value,
				104	Atomic32 new_value) {
				105	MemoryBarrier();
				106	return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				107	}
				108
				109	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				110	Atomic32 increment) {
				111	Atomic32 value;
				112	int reloop;
				113	do {
				114	// Equivalent to:
				115	//
				116	// value = LDREX(ptr)
				117	// value += increment
				118	// reloop = STREX(ptr, value)
				119	//
				120	__asm__ __volatile__(" ldrex %0, [%3]\n"
				121	" add %0, %0, %4\n"
				122	" strex %1, %0, [%3]\n"
				123	: "=&r"(value), "=&r"(reloop), "+m"(*ptr)
				124	: "r"(ptr), "r"(increment)
				125	: "cc", "memory");
				126	} while (reloop);
				127	return value;
				128	}
				129
				130	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				131	Atomic32 increment) {
				132	// TODO(digit): Investigate if it's possible to implement this with
				133	// a single MemoryBarrier() operation between the LDREX and STREX.
				134	// See http://crbug.com/246514
				135	MemoryBarrier();
				136	Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
				137	MemoryBarrier();
				138	return result;
				139	}
				140
				141	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				142	Atomic32 new_value) {
				143	Atomic32 old_value;
				144	int reloop;
				145	do {
				146	// old_value = LDREX(ptr)
digit@chromium.org	efaa34f	2013-07-11 12:52:52 +0900	[diff] [blame]	147	// reloop = STREX(ptr, new_value)
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	148	__asm__ __volatile__(" ldrex %0, [%3]\n"
				149	" strex %1, %4, [%3]\n"
				150	: "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
				151	: "r"(ptr), "r"(new_value)
				152	: "cc", "memory");
				153	} while (reloop != 0);
				154	return old_value;
				155	}
				156
				157	// This tests against any known ARMv5 variant.
				158	#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
				159	defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
				160
				161	// The kernel also provides a helper function to perform an atomic
				162	// compare-and-swap operation at the hard-wired address 0xffff0fc0.
				163	// On ARMv5, this is implemented by a special code path that the kernel
				164	// detects and treats specially when thread pre-emption happens.
				165	// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
				166	//
				167	// Note that this always perform a full memory barrier, there is no
				168	// need to add calls MemoryBarrier() before or after it. It also
				169	// returns 0 on success, and 1 on exit.
				170	//
				171	// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
				172	// use newer kernel revisions, so this should not be a concern.
				173	namespace {
				174
				175	inline int LinuxKernelCmpxchg(Atomic32 old_value,
				176	Atomic32 new_value,
				177	volatile Atomic32* ptr) {
				178	typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
				179	return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
				180	}
				181
				182	} // namespace
				183
				184	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				185	Atomic32 old_value,
				186	Atomic32 new_value) {
				187	Atomic32 prev_value;
				188	for (;;) {
				189	prev_value = *ptr;
				190	if (prev_value != old_value)
				191	return prev_value;
				192	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				193	return old_value;
				194	}
				195	}
				196
				197	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				198	Atomic32 new_value) {
				199	Atomic32 old_value;
				200	do {
				201	old_value = *ptr;
				202	} while (LinuxKernelCmpxchg(old_value, new_value, ptr));
				203	return old_value;
				204	}
				205
				206	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				207	Atomic32 increment) {
				208	return Barrier_AtomicIncrement(ptr, increment);
				209	}
				210
				211	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				212	Atomic32 increment) {
				213	for (;;) {
				214	// Atomic exchange the old value with an incremented one.
				215	Atomic32 old_value = *ptr;
				216	Atomic32 new_value = old_value + increment;
				217	if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
				218	// The exchange took place as expected.
				219	return new_value;
				220	}
				221	// Otherwise, *ptr changed mid-loop and we need to retry.
				222	}
				223	}
				224
				225	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				226	Atomic32 old_value,
				227	Atomic32 new_value) {
				228	Atomic32 prev_value;
				229	for (;;) {
				230	prev_value = *ptr;
				231	if (prev_value != old_value) {
				232	// Always ensure acquire semantics.
				233	MemoryBarrier();
				234	return prev_value;
				235	}
				236	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				237	return old_value;
				238	}
				239	}
				240
				241	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				242	Atomic32 old_value,
				243	Atomic32 new_value) {
digit@chromium.org	efaa34f	2013-07-11 12:52:52 +0900	[diff] [blame]	244	// This could be implemented as:
				245	// MemoryBarrier();
				246	// return NoBarrier_CompareAndSwap();
				247	//
				248	// But would use 3 barriers per succesful CAS. To save performance,
				249	// use Acquire_CompareAndSwap(). Its implementation guarantees that:
				250	// - A succesful swap uses only 2 barriers (in the kernel helper).
				251	// - An early return due to (prev_value != old_value) performs
				252	// a memory barrier with no store, which is equivalent to the
				253	// generic implementation above.
				254	return Acquire_CompareAndSwap(ptr, old_value, new_value);
digit@chromium.org	9de6497	2013-06-10 23:00:06 +0900	[diff] [blame]	255	}
				256
				257	#else
				258	# error "Your CPU's ARM architecture is not supported yet"
				259	#endif
				260
				261	// NOTE: Atomicity of the following load and store operations is only
				262	// guaranteed in case of 32-bit alignement of \|ptr\| values.
				263
				264	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
				265	*ptr = value;
				266	}
				267
				268	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
				269	*ptr = value;
				270	MemoryBarrier();
				271	}
				272
				273	inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
				274	MemoryBarrier();
				275	*ptr = value;
				276	}
				277
				278	inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
				279
				280	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
				281	Atomic32 value = *ptr;
				282	MemoryBarrier();
				283	return value;
				284	}
				285
				286	inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
				287	MemoryBarrier();
				288	return *ptr;
				289	}
				290
				291	} // namespace base::subtle
				292	} // namespace base
				293
				294	#endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_