Blame - arch/tile/lib/atomic_asm_32.S - kernel/msm-4.9

blob: 30638042691ddba9ed1fee52d8c086da2502dbd5 [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*
				14	* Support routines for atomic operations. Each function takes:
				15	*
				16	* r0: address to manipulate
Chris Metcalf	5fb682b	2011-02-28 15:58:39 -0500	[diff] [blame]	17	* r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	18	* r2: new value to write, or for cmpxchg/add_unless, value to compare against
				19	* r3: (cmpxchg/xchg_add_unless) new value to write or add;
				20	* (atomic64 ops) high word of value to write
				21	* r4/r5: (cmpxchg64/add_unless64) new value to write or add
				22	*
				23	* The 32-bit routines return a "struct __get_user" so that the futex code
				24	* has an opportunity to return -EFAULT to the user if needed.
				25	* The 64-bit routines just return a "long long" with the value,
				26	* since they are only used from kernel space and don't expect to fault.
				27	* Support for 16-bit ops is included in the framework but we don't provide
				28	* any (x86_64 has an atomic_inc_short(), so we might want to some day).
				29	*
				30	* Note that the caller is advised to issue a suitable L1 or L2
				31	* prefetch on the address being manipulated to avoid extra stalls.
				32	* In addition, the hot path is on two icache lines, and we start with
				33	* a jump to the second line to make sure they are both in cache so
				34	* that we never stall waiting on icache fill while holding the lock.
				35	* (This doesn't work out with most 64-bit ops, since they consume
				36	* too many bundles, so may take an extra i-cache stall.)
				37	*
				38	* These routines set the INTERRUPT_CRITICAL_SECTION bit, just
				39	* like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
				40	* the code, just page faults.
				41	*
				42	* If the load or store faults in a way that can be directly fixed in
				43	* the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
				44	* directly, return to the instruction that faulted, and retry it.
				45	*
				46	* If the load or store faults in a way that potentially requires us
				47	* to release the atomic lock, then retry (e.g. a migrating PTE), we
				48	* reset the PC in do_page_fault_ics() to the "tns" instruction so
				49	* that on return we will reacquire the lock and restart the op. We
				50	* are somewhat overloading the exception_table_entry notion by doing
				51	* this, since those entries are not normally used for migrating PTEs.
				52	*
				53	* If the main page fault handler discovers a bad address, it will see
				54	* the PC pointing to the "tns" instruction (due to the earlier
				55	* exception_table_entry processing in do_page_fault_ics), and
				56	* re-reset the PC to the fault handler, atomic_bad_address(), which
				57	* effectively takes over from the atomic op and can either return a
				58	* bad "struct __get_user" (for user addresses) or can just panic (for
				59	* bad kernel addresses).
				60	*
				61	* Note that if the value we would store is the same as what we
Chris Metcalf	df29ccb	2011-05-02 15:13:13 -0400	[diff] [blame]	62	* loaded, we bypass the store. Other platforms with true atomics can
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	63	* make the guarantee that a non-atomic __clear_bit(), for example,
				64	* can safely race with an atomic test_and_set_bit(); this example is
				65	* from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
				66	* that on Tile since the "atomic" op is really just a
				67	* read/modify/write, and can race with the non-atomic
				68	* read/modify/write. However, if we can short-circuit the write when
				69	* it is not needed, in the atomic case, we avoid the race.
				70	*/
				71
				72	#include <linux/linkage.h>
Chris Metcalf	d52104b	2011-10-05 17:09:29 -0400	[diff] [blame]	73	#include <asm/atomic_32.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	74	#include <asm/page.h>
				75	#include <asm/processor.h>
				76
				77	.section .text.atomic,"ax"
				78	ENTRY(__start_atomic_asm_code)
				79
				80	.macro atomic_op, name, bitwidth, body
				81	.align 64
				82	STD_ENTRY_SECTION(__atomic\name, .text.atomic)
				83	{
				84	movei r24, 1
				85	j 4f /* branch to second cache line */
				86	}
				87	1: {
				88	.ifc \bitwidth,16
				89	lh r22, r0
				90	.else
				91	lw r22, r0
Chris Metcalf	2db0982	2010-06-25 17:02:40 -0400	[diff] [blame]	92	addi r28, r0, 4
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	93	.endif
				94	}
				95	.ifc \bitwidth,64
Chris Metcalf	2db0982	2010-06-25 17:02:40 -0400	[diff] [blame]	96	lw r23, r28
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	97	.endif
				98	\body /* set r24, and r25 if 64-bit */
				99	{
				100	seq r26, r22, r24
				101	seq r27, r23, r25
				102	}
				103	.ifc \bitwidth,64
				104	bbnst r27, 2f
				105	.endif
				106	bbs r26, 3f /* skip write-back if it's the same value */
				107	2: {
				108	.ifc \bitwidth,16
				109	sh r0, r24
				110	.else
				111	sw r0, r24
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	112	.endif
				113	}
				114	.ifc \bitwidth,64
Chris Metcalf	2db0982	2010-06-25 17:02:40 -0400	[diff] [blame]	115	sw r28, r25
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	116	.endif
				117	mf
				118	3: {
				119	move r0, r22
				120	.ifc \bitwidth,64
				121	move r1, r23
				122	.else
				123	move r1, zero
				124	.endif
				125	sw ATOMIC_LOCK_REG_NAME, zero
				126	}
				127	mtspr INTERRUPT_CRITICAL_SECTION, zero
				128	jrp lr
				129	4: {
				130	move ATOMIC_LOCK_REG_NAME, r1
				131	mtspr INTERRUPT_CRITICAL_SECTION, r24
				132	}
				133	#ifndef CONFIG_SMP
				134	j 1b /* no atomic locks */
				135	#else
				136	{
				137	tns r21, ATOMIC_LOCK_REG_NAME
				138	moveli r23, 2048 /* maximum backoff time in cycles */
				139	}
				140	{
				141	bzt r21, 1b /* branch if lock acquired */
				142	moveli r25, 32 /* starting backoff time in cycles */
				143	}
				144	5: mtspr INTERRUPT_CRITICAL_SECTION, zero
				145	mfspr r26, CYCLE_LOW /* get start point for this backoff */
				146	6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
				147	sub r22, r22, r26
				148	slt r22, r22, r25
				149	bbst r22, 6b
				150	{
				151	mtspr INTERRUPT_CRITICAL_SECTION, r24
				152	shli r25, r25, 1 /* double the backoff; retry the tns */
				153	}
				154	{
				155	tns r21, ATOMIC_LOCK_REG_NAME
				156	slt r26, r23, r25 /* is the proposed backoff too big? */
				157	}
				158	{
				159	bzt r21, 1b /* branch if lock acquired */
				160	mvnz r25, r26, r23
				161	}
				162	j 5b
				163	#endif
				164	STD_ENDPROC(__atomic\name)
				165	.ifc \bitwidth,32
				166	.pushsection __ex_table,"a"
				167	.word 1b, __atomic\name
				168	.word 2b, __atomic\name
				169	.word __atomic\name, __atomic_bad_address
				170	.popsection
				171	.endif
				172	.endm
				173
				174	atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
				175	atomic_op _xchg, 32, "move r24, r2"
				176	atomic_op _xchg_add, 32, "add r24, r22, r2"
				177	atomic_op _xchg_add_unless, 32, \
				178	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
				179	atomic_op _or, 32, "or r24, r22, r2"
				180	atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
				181	atomic_op _xor, 32, "xor r24, r22, r2"
				182
				183	atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
				184	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
				185	atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
				186	atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
				187	slt_u r26, r24, r22; add r25, r25, r26"
				188	atomic_op 64_xchg_add_unless, 64, \
				189	"{ sne r26, r22, r2; sne r27, r23, r3 }; \
				190	{ bbns r26, 3f; add r24, r22, r4 }; \
				191	{ bbns r27, 3f; add r25, r23, r5 }; \
				192	slt_u r26, r24, r22; add r25, r25, r26"
				193
				194	jrp lr /* happy backtracer */
				195
				196	ENTRY(__end_atomic_asm_code)