Blame - src/base/atomicops_internals_arm_gcc.h - fp2-dev/platform/external/v8

blob: 8d049e04b48b7ebf7e58f902617218eca1e33730 [file] [log] [blame]

Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	1	// Copyright 2010 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// This file is an internal atomic implementation, use atomicops.h instead.
				6	//
				7	// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
				8
				9	#ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				10	#define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				11
				12	#if defined(__QNXNTO__)
				13	#include <sys/cpuinline.h>
				14	#endif
				15
				16	namespace v8 {
				17	namespace base {
				18
				19	// Memory barriers on ARM are funky, but the kernel is here to help:
				20	//
				21	// * ARMv5 didn't support SMP, there is no memory barrier instruction at
				22	// all on this architecture, or when targeting its machine code.
				23	//
				24	// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
				25	// writing a random value to a very specific coprocessor register.
				26	//
				27	// * On ARMv7, the "dmb" instruction is used to perform a full memory
				28	// barrier (though writing to the co-processor will still work).
				29	// However, on single core devices (e.g. Nexus One, or Nexus S),
				30	// this instruction will take up to 200 ns, which is huge, even though
				31	// it's completely un-needed on these devices.
				32	//
				33	// * There is no easy way to determine at runtime if the device is
				34	// single or multi-core. However, the kernel provides a useful helper
				35	// function at a fixed memory address (0xffff0fa0), which will always
				36	// perform a memory barrier in the most efficient way. I.e. on single
				37	// core devices, this is an empty function that exits immediately.
				38	// On multi-core devices, it implements a full memory barrier.
				39	//
				40	// * This source could be compiled to ARMv5 machine code that runs on a
				41	// multi-core ARMv6 or ARMv7 device. In this case, memory barriers
				42	// are needed for correct execution. Always call the kernel helper, even
				43	// when targeting ARMv5TE.
				44	//
				45
				46	inline void MemoryBarrier() {
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	47	#if defined(__ANDROID__)
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	48	// Note: This is a function call, which is also an implicit compiler barrier.
				49	typedef void (*KernelMemoryBarrierFunc)();
				50	((KernelMemoryBarrierFunc)0xffff0fa0)();
				51	#elif defined(__QNXNTO__)
				52	__cpu_membarrier();
				53	#else
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	54	// Fallback to GCC built-in function
				55	__sync_synchronize();
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	56	#endif
				57	}
				58
				59	// An ARM toolchain would only define one of these depending on which
				60	// variant of the target architecture is being used. This tests against
				61	// any known ARMv6 or ARMv7 variant, where it is possible to directly
				62	// use ldrex/strex instructions to implement fast atomic operations.
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	63	#if defined(__ARM_ARCH_8A__) \|\| \
				64	defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	65	defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	66	defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	67	defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	68	defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__)
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	69
				70	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				71	Atomic32 old_value,
				72	Atomic32 new_value) {
				73	Atomic32 prev_value;
				74	int reloop;
				75	do {
				76	// The following is equivalent to:
				77	//
				78	// prev_value = LDREX(ptr)
				79	// reloop = 0
				80	// if (prev_value != old_value)
				81	// reloop = STREX(ptr, new_value)
				82	__asm__ __volatile__(" ldrex %0, [%3]\n"
				83	" mov %1, #0\n"
				84	" cmp %0, %4\n"
				85	#ifdef __thumb2__
				86	" it eq\n"
				87	#endif
				88	" strexeq %1, %5, [%3]\n"
				89	: "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
				90	: "r"(ptr), "r"(old_value), "r"(new_value)
				91	: "cc", "memory");
				92	} while (reloop != 0);
				93	return prev_value;
				94	}
				95
				96	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				97	Atomic32 old_value,
				98	Atomic32 new_value) {
				99	Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				100	MemoryBarrier();
				101	return result;
				102	}
				103
				104	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				105	Atomic32 old_value,
				106	Atomic32 new_value) {
				107	MemoryBarrier();
				108	return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				109	}
				110
				111	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				112	Atomic32 increment) {
				113	Atomic32 value;
				114	int reloop;
				115	do {
				116	// Equivalent to:
				117	//
				118	// value = LDREX(ptr)
				119	// value += increment
				120	// reloop = STREX(ptr, value)
				121	//
				122	__asm__ __volatile__(" ldrex %0, [%3]\n"
				123	" add %0, %0, %4\n"
				124	" strex %1, %0, [%3]\n"
				125	: "=&r"(value), "=&r"(reloop), "+m"(*ptr)
				126	: "r"(ptr), "r"(increment)
				127	: "cc", "memory");
				128	} while (reloop);
				129	return value;
				130	}
				131
				132	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				133	Atomic32 increment) {
				134	// TODO(digit): Investigate if it's possible to implement this with
				135	// a single MemoryBarrier() operation between the LDREX and STREX.
				136	// See http://crbug.com/246514
				137	MemoryBarrier();
				138	Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
				139	MemoryBarrier();
				140	return result;
				141	}
				142
				143	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				144	Atomic32 new_value) {
				145	Atomic32 old_value;
				146	int reloop;
				147	do {
				148	// old_value = LDREX(ptr)
				149	// reloop = STREX(ptr, new_value)
				150	__asm__ __volatile__(" ldrex %0, [%3]\n"
				151	" strex %1, %4, [%3]\n"
				152	: "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
				153	: "r"(ptr), "r"(new_value)
				154	: "cc", "memory");
				155	} while (reloop != 0);
				156	return old_value;
				157	}
				158
				159	// This tests against any known ARMv5 variant.
				160	#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
				161	defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
				162
				163	// The kernel also provides a helper function to perform an atomic
				164	// compare-and-swap operation at the hard-wired address 0xffff0fc0.
				165	// On ARMv5, this is implemented by a special code path that the kernel
				166	// detects and treats specially when thread pre-emption happens.
				167	// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
				168	//
				169	// Note that this always perform a full memory barrier, there is no
				170	// need to add calls MemoryBarrier() before or after it. It also
				171	// returns 0 on success, and 1 on exit.
				172	//
				173	// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
				174	// use newer kernel revisions, so this should not be a concern.
				175	namespace {
				176
				177	inline int LinuxKernelCmpxchg(Atomic32 old_value,
				178	Atomic32 new_value,
				179	volatile Atomic32* ptr) {
				180	typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
				181	return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
				182	}
				183
				184	} // namespace
				185
				186	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				187	Atomic32 old_value,
				188	Atomic32 new_value) {
				189	Atomic32 prev_value;
				190	for (;;) {
				191	prev_value = *ptr;
				192	if (prev_value != old_value)
				193	return prev_value;
				194	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				195	return old_value;
				196	}
				197	}
				198
				199	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				200	Atomic32 new_value) {
				201	Atomic32 old_value;
				202	do {
				203	old_value = *ptr;
				204	} while (LinuxKernelCmpxchg(old_value, new_value, ptr));
				205	return old_value;
				206	}
				207
				208	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				209	Atomic32 increment) {
				210	return Barrier_AtomicIncrement(ptr, increment);
				211	}
				212
				213	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				214	Atomic32 increment) {
				215	for (;;) {
				216	// Atomic exchange the old value with an incremented one.
				217	Atomic32 old_value = *ptr;
				218	Atomic32 new_value = old_value + increment;
				219	if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
				220	// The exchange took place as expected.
				221	return new_value;
				222	}
				223	// Otherwise, *ptr changed mid-loop and we need to retry.
				224	}
				225	}
				226
				227	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				228	Atomic32 old_value,
				229	Atomic32 new_value) {
				230	Atomic32 prev_value;
				231	for (;;) {
				232	prev_value = *ptr;
				233	if (prev_value != old_value) {
				234	// Always ensure acquire semantics.
				235	MemoryBarrier();
				236	return prev_value;
				237	}
				238	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				239	return old_value;
				240	}
				241	}
				242
				243	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				244	Atomic32 old_value,
				245	Atomic32 new_value) {
				246	// This could be implemented as:
				247	// MemoryBarrier();
				248	// return NoBarrier_CompareAndSwap();
				249	//
				250	// But would use 3 barriers per succesful CAS. To save performance,
				251	// use Acquire_CompareAndSwap(). Its implementation guarantees that:
				252	// - A succesful swap uses only 2 barriers (in the kernel helper).
				253	// - An early return due to (prev_value != old_value) performs
				254	// a memory barrier with no store, which is equivalent to the
				255	// generic implementation above.
				256	return Acquire_CompareAndSwap(ptr, old_value, new_value);
				257	}
				258
				259	#else
				260	# error "Your CPU's ARM architecture is not supported yet"
				261	#endif
				262
				263	// NOTE: Atomicity of the following load and store operations is only
				264	// guaranteed in case of 32-bit alignement of \|ptr\| values.
				265
				266	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
				267	*ptr = value;
				268	}
				269
				270	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
				271	*ptr = value;
				272	MemoryBarrier();
				273	}
				274
				275	inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
				276	MemoryBarrier();
				277	*ptr = value;
				278	}
				279
				280	inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
				281
				282	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
				283	Atomic32 value = *ptr;
				284	MemoryBarrier();
				285	return value;
				286	}
				287
				288	inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
				289	MemoryBarrier();
				290	return *ptr;
				291	}
				292
				293	// Byte accessors.
				294
				295	inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {
				296	*ptr = value;
				297	}
				298
				299	inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }
				300
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	301	} // namespace base
				302	} // namespace v8
Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame]	303
				304	#endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_