Blame - src/base/atomicops_internals_arm_gcc.h - fp2-dev/platform/external/v8

blob: 6c8b27ea24ec69512e0339ccc17f6dc366e0fd90 [file] [log] [blame]

Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	1	// Copyright 2010 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// This file is an internal atomic implementation, use atomicops.h instead.
				6	//
				7	// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
				8
				9	#ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				10	#define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				11
				12	#if defined(__QNXNTO__)
				13	#include <sys/cpuinline.h>
				14	#endif
				15
				16	namespace v8 {
				17	namespace base {
				18
				19	// Memory barriers on ARM are funky, but the kernel is here to help:
				20	//
				21	// * ARMv5 didn't support SMP, there is no memory barrier instruction at
				22	// all on this architecture, or when targeting its machine code.
				23	//
				24	// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
				25	// writing a random value to a very specific coprocessor register.
				26	//
				27	// * On ARMv7, the "dmb" instruction is used to perform a full memory
				28	// barrier (though writing to the co-processor will still work).
				29	// However, on single core devices (e.g. Nexus One, or Nexus S),
				30	// this instruction will take up to 200 ns, which is huge, even though
				31	// it's completely un-needed on these devices.
				32	//
				33	// * There is no easy way to determine at runtime if the device is
				34	// single or multi-core. However, the kernel provides a useful helper
				35	// function at a fixed memory address (0xffff0fa0), which will always
				36	// perform a memory barrier in the most efficient way. I.e. on single
				37	// core devices, this is an empty function that exits immediately.
				38	// On multi-core devices, it implements a full memory barrier.
				39	//
				40	// * This source could be compiled to ARMv5 machine code that runs on a
				41	// multi-core ARMv6 or ARMv7 device. In this case, memory barriers
				42	// are needed for correct execution. Always call the kernel helper, even
				43	// when targeting ARMv5TE.
				44	//
				45
				46	inline void MemoryBarrier() {
				47	#if defined(__linux__) \|\| defined(__ANDROID__)
				48	// Note: This is a function call, which is also an implicit compiler barrier.
				49	typedef void (*KernelMemoryBarrierFunc)();
				50	((KernelMemoryBarrierFunc)0xffff0fa0)();
				51	#elif defined(__QNXNTO__)
				52	__cpu_membarrier();
				53	#else
				54	#error MemoryBarrier() is not implemented on this platform.
				55	#endif
				56	}
				57
				58	// An ARM toolchain would only define one of these depending on which
				59	// variant of the target architecture is being used. This tests against
				60	// any known ARMv6 or ARMv7 variant, where it is possible to directly
				61	// use ldrex/strex instructions to implement fast atomic operations.
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame^]	62	#if defined(__ARM_ARCH_8A__) \|\| \
				63	defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	64	defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame^]	65	defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	66	defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame^]	67	defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__)
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	68
				69	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				70	Atomic32 old_value,
				71	Atomic32 new_value) {
				72	Atomic32 prev_value;
				73	int reloop;
				74	do {
				75	// The following is equivalent to:
				76	//
				77	// prev_value = LDREX(ptr)
				78	// reloop = 0
				79	// if (prev_value != old_value)
				80	// reloop = STREX(ptr, new_value)
				81	__asm__ __volatile__(" ldrex %0, [%3]\n"
				82	" mov %1, #0\n"
				83	" cmp %0, %4\n"
				84	#ifdef __thumb2__
				85	" it eq\n"
				86	#endif
				87	" strexeq %1, %5, [%3]\n"
				88	: "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
				89	: "r"(ptr), "r"(old_value), "r"(new_value)
				90	: "cc", "memory");
				91	} while (reloop != 0);
				92	return prev_value;
				93	}
				94
				95	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				96	Atomic32 old_value,
				97	Atomic32 new_value) {
				98	Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				99	MemoryBarrier();
				100	return result;
				101	}
				102
				103	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				104	Atomic32 old_value,
				105	Atomic32 new_value) {
				106	MemoryBarrier();
				107	return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				108	}
				109
				110	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				111	Atomic32 increment) {
				112	Atomic32 value;
				113	int reloop;
				114	do {
				115	// Equivalent to:
				116	//
				117	// value = LDREX(ptr)
				118	// value += increment
				119	// reloop = STREX(ptr, value)
				120	//
				121	__asm__ __volatile__(" ldrex %0, [%3]\n"
				122	" add %0, %0, %4\n"
				123	" strex %1, %0, [%3]\n"
				124	: "=&r"(value), "=&r"(reloop), "+m"(*ptr)
				125	: "r"(ptr), "r"(increment)
				126	: "cc", "memory");
				127	} while (reloop);
				128	return value;
				129	}
				130
				131	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				132	Atomic32 increment) {
				133	// TODO(digit): Investigate if it's possible to implement this with
				134	// a single MemoryBarrier() operation between the LDREX and STREX.
				135	// See http://crbug.com/246514
				136	MemoryBarrier();
				137	Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
				138	MemoryBarrier();
				139	return result;
				140	}
				141
				142	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				143	Atomic32 new_value) {
				144	Atomic32 old_value;
				145	int reloop;
				146	do {
				147	// old_value = LDREX(ptr)
				148	// reloop = STREX(ptr, new_value)
				149	__asm__ __volatile__(" ldrex %0, [%3]\n"
				150	" strex %1, %4, [%3]\n"
				151	: "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
				152	: "r"(ptr), "r"(new_value)
				153	: "cc", "memory");
				154	} while (reloop != 0);
				155	return old_value;
				156	}
				157
				158	// This tests against any known ARMv5 variant.
				159	#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
				160	defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
				161
				162	// The kernel also provides a helper function to perform an atomic
				163	// compare-and-swap operation at the hard-wired address 0xffff0fc0.
				164	// On ARMv5, this is implemented by a special code path that the kernel
				165	// detects and treats specially when thread pre-emption happens.
				166	// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
				167	//
				168	// Note that this always perform a full memory barrier, there is no
				169	// need to add calls MemoryBarrier() before or after it. It also
				170	// returns 0 on success, and 1 on exit.
				171	//
				172	// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
				173	// use newer kernel revisions, so this should not be a concern.
				174	namespace {
				175
				176	inline int LinuxKernelCmpxchg(Atomic32 old_value,
				177	Atomic32 new_value,
				178	volatile Atomic32* ptr) {
				179	typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
				180	return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
				181	}
				182
				183	} // namespace
				184
				185	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				186	Atomic32 old_value,
				187	Atomic32 new_value) {
				188	Atomic32 prev_value;
				189	for (;;) {
				190	prev_value = *ptr;
				191	if (prev_value != old_value)
				192	return prev_value;
				193	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				194	return old_value;
				195	}
				196	}
				197
				198	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				199	Atomic32 new_value) {
				200	Atomic32 old_value;
				201	do {
				202	old_value = *ptr;
				203	} while (LinuxKernelCmpxchg(old_value, new_value, ptr));
				204	return old_value;
				205	}
				206
				207	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				208	Atomic32 increment) {
				209	return Barrier_AtomicIncrement(ptr, increment);
				210	}
				211
				212	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				213	Atomic32 increment) {
				214	for (;;) {
				215	// Atomic exchange the old value with an incremented one.
				216	Atomic32 old_value = *ptr;
				217	Atomic32 new_value = old_value + increment;
				218	if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
				219	// The exchange took place as expected.
				220	return new_value;
				221	}
				222	// Otherwise, *ptr changed mid-loop and we need to retry.
				223	}
				224	}
				225
				226	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				227	Atomic32 old_value,
				228	Atomic32 new_value) {
				229	Atomic32 prev_value;
				230	for (;;) {
				231	prev_value = *ptr;
				232	if (prev_value != old_value) {
				233	// Always ensure acquire semantics.
				234	MemoryBarrier();
				235	return prev_value;
				236	}
				237	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				238	return old_value;
				239	}
				240	}
				241
				242	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				243	Atomic32 old_value,
				244	Atomic32 new_value) {
				245	// This could be implemented as:
				246	// MemoryBarrier();
				247	// return NoBarrier_CompareAndSwap();
				248	//
				249	// But would use 3 barriers per succesful CAS. To save performance,
				250	// use Acquire_CompareAndSwap(). Its implementation guarantees that:
				251	// - A succesful swap uses only 2 barriers (in the kernel helper).
				252	// - An early return due to (prev_value != old_value) performs
				253	// a memory barrier with no store, which is equivalent to the
				254	// generic implementation above.
				255	return Acquire_CompareAndSwap(ptr, old_value, new_value);
				256	}
				257
				258	#else
				259	# error "Your CPU's ARM architecture is not supported yet"
				260	#endif
				261
				262	// NOTE: Atomicity of the following load and store operations is only
				263	// guaranteed in case of 32-bit alignement of \|ptr\| values.
				264
				265	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
				266	*ptr = value;
				267	}
				268
				269	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
				270	*ptr = value;
				271	MemoryBarrier();
				272	}
				273
				274	inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
				275	MemoryBarrier();
				276	*ptr = value;
				277	}
				278
				279	inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
				280
				281	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
				282	Atomic32 value = *ptr;
				283	MemoryBarrier();
				284	return value;
				285	}
				286
				287	inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
				288	MemoryBarrier();
				289	return *ptr;
				290	}
				291
				292	// Byte accessors.
				293
				294	inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {
				295	*ptr = value;
				296	}
				297
				298	inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }
				299
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame^]	300	} // namespace base
				301	} // namespace v8
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	302
				303	#endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_