blob: 5a5514b77e783d5ebe0dc7455c7ed4e17f592a84 [file] [log] [blame]
Chris Metcalf867e3592010-05-28 23:09:12 -04001/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Support routines for atomic operations. Each function takes:
15 *
16 * r0: address to manipulate
17 * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG)
18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against
19 * r3: (cmpxchg/xchg_add_unless) new value to write or add;
20 * (atomic64 ops) high word of value to write
21 * r4/r5: (cmpxchg64/add_unless64) new value to write or add
22 *
23 * The 32-bit routines return a "struct __get_user" so that the futex code
24 * has an opportunity to return -EFAULT to the user if needed.
25 * The 64-bit routines just return a "long long" with the value,
26 * since they are only used from kernel space and don't expect to fault.
27 * Support for 16-bit ops is included in the framework but we don't provide
28 * any (x86_64 has an atomic_inc_short(), so we might want to some day).
29 *
30 * Note that the caller is advised to issue a suitable L1 or L2
31 * prefetch on the address being manipulated to avoid extra stalls.
32 * In addition, the hot path is on two icache lines, and we start with
33 * a jump to the second line to make sure they are both in cache so
34 * that we never stall waiting on icache fill while holding the lock.
35 * (This doesn't work out with most 64-bit ops, since they consume
36 * too many bundles, so may take an extra i-cache stall.)
37 *
38 * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
39 * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
40 * the code, just page faults.
41 *
42 * If the load or store faults in a way that can be directly fixed in
43 * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
44 * directly, return to the instruction that faulted, and retry it.
45 *
46 * If the load or store faults in a way that potentially requires us
47 * to release the atomic lock, then retry (e.g. a migrating PTE), we
48 * reset the PC in do_page_fault_ics() to the "tns" instruction so
49 * that on return we will reacquire the lock and restart the op. We
50 * are somewhat overloading the exception_table_entry notion by doing
51 * this, since those entries are not normally used for migrating PTEs.
52 *
53 * If the main page fault handler discovers a bad address, it will see
54 * the PC pointing to the "tns" instruction (due to the earlier
55 * exception_table_entry processing in do_page_fault_ics), and
56 * re-reset the PC to the fault handler, atomic_bad_address(), which
57 * effectively takes over from the atomic op and can either return a
58 * bad "struct __get_user" (for user addresses) or can just panic (for
59 * bad kernel addresses).
60 *
61 * Note that if the value we would store is the same as what we
62 * loaded, we bypass the load. Other platforms with true atomics can
63 * make the guarantee that a non-atomic __clear_bit(), for example,
64 * can safely race with an atomic test_and_set_bit(); this example is
65 * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
66 * that on Tile since the "atomic" op is really just a
67 * read/modify/write, and can race with the non-atomic
68 * read/modify/write. However, if we can short-circuit the write when
69 * it is not needed, in the atomic case, we avoid the race.
70 */
71
72#include <linux/linkage.h>
73#include <asm/atomic.h>
74#include <asm/page.h>
75#include <asm/processor.h>
76
77 .section .text.atomic,"ax"
78ENTRY(__start_atomic_asm_code)
79
80 .macro atomic_op, name, bitwidth, body
81 .align 64
82STD_ENTRY_SECTION(__atomic\name, .text.atomic)
83 {
84 movei r24, 1
85 j 4f /* branch to second cache line */
86 }
871: {
88 .ifc \bitwidth,16
89 lh r22, r0
90 .else
91 lw r22, r0
Chris Metcalf2db09822010-06-25 17:02:40 -040092 addi r28, r0, 4
Chris Metcalf867e3592010-05-28 23:09:12 -040093 .endif
94 }
95 .ifc \bitwidth,64
Chris Metcalf2db09822010-06-25 17:02:40 -040096 lw r23, r28
Chris Metcalf867e3592010-05-28 23:09:12 -040097 .endif
98 \body /* set r24, and r25 if 64-bit */
99 {
100 seq r26, r22, r24
101 seq r27, r23, r25
102 }
103 .ifc \bitwidth,64
104 bbnst r27, 2f
105 .endif
106 bbs r26, 3f /* skip write-back if it's the same value */
1072: {
108 .ifc \bitwidth,16
109 sh r0, r24
110 .else
111 sw r0, r24
Chris Metcalf867e3592010-05-28 23:09:12 -0400112 .endif
113 }
114 .ifc \bitwidth,64
Chris Metcalf2db09822010-06-25 17:02:40 -0400115 sw r28, r25
Chris Metcalf867e3592010-05-28 23:09:12 -0400116 .endif
117 mf
1183: {
119 move r0, r22
120 .ifc \bitwidth,64
121 move r1, r23
122 .else
123 move r1, zero
124 .endif
125 sw ATOMIC_LOCK_REG_NAME, zero
126 }
127 mtspr INTERRUPT_CRITICAL_SECTION, zero
128 jrp lr
1294: {
130 move ATOMIC_LOCK_REG_NAME, r1
131 mtspr INTERRUPT_CRITICAL_SECTION, r24
132 }
133#ifndef CONFIG_SMP
134 j 1b /* no atomic locks */
135#else
136 {
137 tns r21, ATOMIC_LOCK_REG_NAME
138 moveli r23, 2048 /* maximum backoff time in cycles */
139 }
140 {
141 bzt r21, 1b /* branch if lock acquired */
142 moveli r25, 32 /* starting backoff time in cycles */
143 }
1445: mtspr INTERRUPT_CRITICAL_SECTION, zero
145 mfspr r26, CYCLE_LOW /* get start point for this backoff */
1466: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
147 sub r22, r22, r26
148 slt r22, r22, r25
149 bbst r22, 6b
150 {
151 mtspr INTERRUPT_CRITICAL_SECTION, r24
152 shli r25, r25, 1 /* double the backoff; retry the tns */
153 }
154 {
155 tns r21, ATOMIC_LOCK_REG_NAME
156 slt r26, r23, r25 /* is the proposed backoff too big? */
157 }
158 {
159 bzt r21, 1b /* branch if lock acquired */
160 mvnz r25, r26, r23
161 }
162 j 5b
163#endif
164 STD_ENDPROC(__atomic\name)
165 .ifc \bitwidth,32
166 .pushsection __ex_table,"a"
167 .word 1b, __atomic\name
168 .word 2b, __atomic\name
169 .word __atomic\name, __atomic_bad_address
170 .popsection
171 .endif
172 .endm
173
174atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
175atomic_op _xchg, 32, "move r24, r2"
176atomic_op _xchg_add, 32, "add r24, r22, r2"
177atomic_op _xchg_add_unless, 32, \
178 "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
179atomic_op _or, 32, "or r24, r22, r2"
180atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
181atomic_op _xor, 32, "xor r24, r22, r2"
182
183atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
184 { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
185atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
186atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
187 slt_u r26, r24, r22; add r25, r25, r26"
188atomic_op 64_xchg_add_unless, 64, \
189 "{ sne r26, r22, r2; sne r27, r23, r3 }; \
190 { bbns r26, 3f; add r24, r22, r4 }; \
191 { bbns r27, 3f; add r25, r23, r5 }; \
192 slt_u r26, r24, r22; add r25, r25, r26"
193
194 jrp lr /* happy backtracer */
195
196ENTRY(__end_atomic_asm_code)