Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation, version 2. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but |
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
| 11 | * NON INFRINGEMENT. See the GNU General Public License for |
| 12 | * more details. |
| 13 | * |
| 14 | * Support routines for atomic operations. Each function takes: |
| 15 | * |
| 16 | * r0: address to manipulate |
Chris Metcalf | 5fb682b | 2011-02-28 15:58:39 -0500 | [diff] [blame^] | 17 | * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against |
| 19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; |
| 20 | * (atomic64 ops) high word of value to write |
| 21 | * r4/r5: (cmpxchg64/add_unless64) new value to write or add |
| 22 | * |
| 23 | * The 32-bit routines return a "struct __get_user" so that the futex code |
| 24 | * has an opportunity to return -EFAULT to the user if needed. |
| 25 | * The 64-bit routines just return a "long long" with the value, |
| 26 | * since they are only used from kernel space and don't expect to fault. |
| 27 | * Support for 16-bit ops is included in the framework but we don't provide |
| 28 | * any (x86_64 has an atomic_inc_short(), so we might want to some day). |
| 29 | * |
| 30 | * Note that the caller is advised to issue a suitable L1 or L2 |
| 31 | * prefetch on the address being manipulated to avoid extra stalls. |
| 32 | * In addition, the hot path is on two icache lines, and we start with |
| 33 | * a jump to the second line to make sure they are both in cache so |
| 34 | * that we never stall waiting on icache fill while holding the lock. |
| 35 | * (This doesn't work out with most 64-bit ops, since they consume |
| 36 | * too many bundles, so may take an extra i-cache stall.) |
| 37 | * |
| 38 | * These routines set the INTERRUPT_CRITICAL_SECTION bit, just |
| 39 | * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt |
| 40 | * the code, just page faults. |
| 41 | * |
| 42 | * If the load or store faults in a way that can be directly fixed in |
| 43 | * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it |
| 44 | * directly, return to the instruction that faulted, and retry it. |
| 45 | * |
| 46 | * If the load or store faults in a way that potentially requires us |
| 47 | * to release the atomic lock, then retry (e.g. a migrating PTE), we |
| 48 | * reset the PC in do_page_fault_ics() to the "tns" instruction so |
| 49 | * that on return we will reacquire the lock and restart the op. We |
| 50 | * are somewhat overloading the exception_table_entry notion by doing |
| 51 | * this, since those entries are not normally used for migrating PTEs. |
| 52 | * |
| 53 | * If the main page fault handler discovers a bad address, it will see |
| 54 | * the PC pointing to the "tns" instruction (due to the earlier |
| 55 | * exception_table_entry processing in do_page_fault_ics), and |
| 56 | * re-reset the PC to the fault handler, atomic_bad_address(), which |
| 57 | * effectively takes over from the atomic op and can either return a |
| 58 | * bad "struct __get_user" (for user addresses) or can just panic (for |
| 59 | * bad kernel addresses). |
| 60 | * |
| 61 | * Note that if the value we would store is the same as what we |
| 62 | * loaded, we bypass the load. Other platforms with true atomics can |
| 63 | * make the guarantee that a non-atomic __clear_bit(), for example, |
| 64 | * can safely race with an atomic test_and_set_bit(); this example is |
| 65 | * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do |
| 66 | * that on Tile since the "atomic" op is really just a |
| 67 | * read/modify/write, and can race with the non-atomic |
| 68 | * read/modify/write. However, if we can short-circuit the write when |
| 69 | * it is not needed, in the atomic case, we avoid the race. |
| 70 | */ |
| 71 | |
| 72 | #include <linux/linkage.h> |
| 73 | #include <asm/atomic.h> |
| 74 | #include <asm/page.h> |
| 75 | #include <asm/processor.h> |
| 76 | |
| 77 | .section .text.atomic,"ax" |
| 78 | ENTRY(__start_atomic_asm_code) |
| 79 | |
| 80 | .macro atomic_op, name, bitwidth, body |
| 81 | .align 64 |
| 82 | STD_ENTRY_SECTION(__atomic\name, .text.atomic) |
| 83 | { |
| 84 | movei r24, 1 |
| 85 | j 4f /* branch to second cache line */ |
| 86 | } |
| 87 | 1: { |
| 88 | .ifc \bitwidth,16 |
| 89 | lh r22, r0 |
| 90 | .else |
| 91 | lw r22, r0 |
Chris Metcalf | 2db0982 | 2010-06-25 17:02:40 -0400 | [diff] [blame] | 92 | addi r28, r0, 4 |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 93 | .endif |
| 94 | } |
| 95 | .ifc \bitwidth,64 |
Chris Metcalf | 2db0982 | 2010-06-25 17:02:40 -0400 | [diff] [blame] | 96 | lw r23, r28 |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 97 | .endif |
| 98 | \body /* set r24, and r25 if 64-bit */ |
| 99 | { |
| 100 | seq r26, r22, r24 |
| 101 | seq r27, r23, r25 |
| 102 | } |
| 103 | .ifc \bitwidth,64 |
| 104 | bbnst r27, 2f |
| 105 | .endif |
| 106 | bbs r26, 3f /* skip write-back if it's the same value */ |
| 107 | 2: { |
| 108 | .ifc \bitwidth,16 |
| 109 | sh r0, r24 |
| 110 | .else |
| 111 | sw r0, r24 |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 112 | .endif |
| 113 | } |
| 114 | .ifc \bitwidth,64 |
Chris Metcalf | 2db0982 | 2010-06-25 17:02:40 -0400 | [diff] [blame] | 115 | sw r28, r25 |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 116 | .endif |
| 117 | mf |
| 118 | 3: { |
| 119 | move r0, r22 |
| 120 | .ifc \bitwidth,64 |
| 121 | move r1, r23 |
| 122 | .else |
| 123 | move r1, zero |
| 124 | .endif |
| 125 | sw ATOMIC_LOCK_REG_NAME, zero |
| 126 | } |
| 127 | mtspr INTERRUPT_CRITICAL_SECTION, zero |
| 128 | jrp lr |
| 129 | 4: { |
| 130 | move ATOMIC_LOCK_REG_NAME, r1 |
| 131 | mtspr INTERRUPT_CRITICAL_SECTION, r24 |
| 132 | } |
| 133 | #ifndef CONFIG_SMP |
| 134 | j 1b /* no atomic locks */ |
| 135 | #else |
| 136 | { |
| 137 | tns r21, ATOMIC_LOCK_REG_NAME |
| 138 | moveli r23, 2048 /* maximum backoff time in cycles */ |
| 139 | } |
| 140 | { |
| 141 | bzt r21, 1b /* branch if lock acquired */ |
| 142 | moveli r25, 32 /* starting backoff time in cycles */ |
| 143 | } |
| 144 | 5: mtspr INTERRUPT_CRITICAL_SECTION, zero |
| 145 | mfspr r26, CYCLE_LOW /* get start point for this backoff */ |
| 146 | 6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ |
| 147 | sub r22, r22, r26 |
| 148 | slt r22, r22, r25 |
| 149 | bbst r22, 6b |
| 150 | { |
| 151 | mtspr INTERRUPT_CRITICAL_SECTION, r24 |
| 152 | shli r25, r25, 1 /* double the backoff; retry the tns */ |
| 153 | } |
| 154 | { |
| 155 | tns r21, ATOMIC_LOCK_REG_NAME |
| 156 | slt r26, r23, r25 /* is the proposed backoff too big? */ |
| 157 | } |
| 158 | { |
| 159 | bzt r21, 1b /* branch if lock acquired */ |
| 160 | mvnz r25, r26, r23 |
| 161 | } |
| 162 | j 5b |
| 163 | #endif |
| 164 | STD_ENDPROC(__atomic\name) |
| 165 | .ifc \bitwidth,32 |
| 166 | .pushsection __ex_table,"a" |
| 167 | .word 1b, __atomic\name |
| 168 | .word 2b, __atomic\name |
| 169 | .word __atomic\name, __atomic_bad_address |
| 170 | .popsection |
| 171 | .endif |
| 172 | .endm |
| 173 | |
| 174 | atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" |
| 175 | atomic_op _xchg, 32, "move r24, r2" |
| 176 | atomic_op _xchg_add, 32, "add r24, r22, r2" |
| 177 | atomic_op _xchg_add_unless, 32, \ |
| 178 | "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" |
| 179 | atomic_op _or, 32, "or r24, r22, r2" |
| 180 | atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" |
| 181 | atomic_op _xor, 32, "xor r24, r22, r2" |
| 182 | |
| 183 | atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ |
| 184 | { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" |
| 185 | atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" |
| 186 | atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ |
| 187 | slt_u r26, r24, r22; add r25, r25, r26" |
| 188 | atomic_op 64_xchg_add_unless, 64, \ |
| 189 | "{ sne r26, r22, r2; sne r27, r23, r3 }; \ |
| 190 | { bbns r26, 3f; add r24, r22, r4 }; \ |
| 191 | { bbns r27, 3f; add r25, r23, r5 }; \ |
| 192 | slt_u r26, r24, r22; add r25, r25, r26" |
| 193 | |
| 194 | jrp lr /* happy backtracer */ |
| 195 | |
| 196 | ENTRY(__end_atomic_asm_code) |