Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* rwsem.c: R/W semaphores: contention handling functions |
| 2 | * |
| 3 | * Written by David Howells (dhowells@redhat.com). |
| 4 | * Derived from arch/i386/kernel/semaphore.c |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 5 | * |
| 6 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 7 | */ |
| 8 | #include <linux/rwsem.h> |
| 9 | #include <linux/sched.h> |
| 10 | #include <linux/init.h> |
Paul Gortmaker | 8bc3bcc | 2011-11-16 21:29:17 -0500 | [diff] [blame] | 11 | #include <linux/export.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | |
Ingo Molnar | 4ea2176 | 2006-07-03 00:24:53 -0700 | [diff] [blame] | 13 | /* |
| 14 | * Initialize an rwsem: |
| 15 | */ |
| 16 | void __init_rwsem(struct rw_semaphore *sem, const char *name, |
| 17 | struct lock_class_key *key) |
| 18 | { |
| 19 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 20 | /* |
| 21 | * Make sure we are not reinitializing a held semaphore: |
| 22 | */ |
| 23 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
Peter Zijlstra | 4dfbb9d | 2006-10-11 01:45:14 -0400 | [diff] [blame] | 24 | lockdep_init_map(&sem->dep_map, name, key, 0); |
Ingo Molnar | 4ea2176 | 2006-07-03 00:24:53 -0700 | [diff] [blame] | 25 | #endif |
| 26 | sem->count = RWSEM_UNLOCKED_VALUE; |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 27 | raw_spin_lock_init(&sem->wait_lock); |
Ingo Molnar | 4ea2176 | 2006-07-03 00:24:53 -0700 | [diff] [blame] | 28 | INIT_LIST_HEAD(&sem->wait_list); |
| 29 | } |
| 30 | |
| 31 | EXPORT_SYMBOL(__init_rwsem); |
| 32 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | struct rwsem_waiter { |
| 34 | struct list_head list; |
| 35 | struct task_struct *task; |
| 36 | unsigned int flags; |
| 37 | #define RWSEM_WAITING_FOR_READ 0x00000001 |
| 38 | #define RWSEM_WAITING_FOR_WRITE 0x00000002 |
| 39 | }; |
| 40 | |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 41 | /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and |
| 42 | * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held |
| 43 | * since the rwsem value was observed. |
| 44 | */ |
| 45 | #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ |
| 46 | #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ |
| 47 | #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ |
| 48 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 49 | /* |
| 50 | * handle the lock release when processes blocked on it that can now run |
| 51 | * - if we come here from up_xxxx(), then: |
| 52 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) |
| 53 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 54 | * - there must be someone on the queue |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 | * - the spinlock must be held by the caller |
| 56 | * - woken process blocks are discarded from the list after having task zeroed |
| 57 | * - writers are only woken if downgrading is false |
| 58 | */ |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 59 | static struct rw_semaphore * |
| 60 | __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 61 | { |
| 62 | struct rwsem_waiter *waiter; |
| 63 | struct task_struct *tsk; |
| 64 | struct list_head *next; |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 65 | signed long woken, loop, adjustment; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 66 | |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 67 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
| 68 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) |
| 69 | goto readers_only; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 70 | |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 71 | if (wake_type == RWSEM_WAKE_READ_OWNED) |
Michel Lespinasse | 424acaa | 2010-08-09 17:21:19 -0700 | [diff] [blame] | 72 | /* Another active reader was observed, so wakeup is not |
| 73 | * likely to succeed. Save the atomic op. |
| 74 | */ |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 75 | goto out; |
| 76 | |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 77 | /* Wake up the writing waiter and let the task grab the sem: */ |
| 78 | wake_up_process(waiter->task); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | goto out; |
| 80 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 81 | readers_only: |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 82 | /* If we come here from up_xxxx(), another thread might have reached |
| 83 | * rwsem_down_failed_common() before we acquired the spinlock and |
| 84 | * woken up a waiter, making it now active. We prefer to check for |
| 85 | * this first in order to not spend too much time with the spinlock |
| 86 | * held if we're not going to be able to wake up readers in the end. |
| 87 | * |
| 88 | * Note that we do not need to update the rwsem count: any writer |
| 89 | * trying to acquire rwsem will run rwsem_down_write_failed() due |
| 90 | * to the waiting threads and block trying to acquire the spinlock. |
| 91 | * |
| 92 | * We use a dummy atomic update in order to acquire the cache line |
| 93 | * exclusively since we expect to succeed and run the final rwsem |
| 94 | * count adjustment pretty soon. |
| 95 | */ |
| 96 | if (wake_type == RWSEM_WAKE_ANY && |
Michel Lespinasse | 424acaa | 2010-08-09 17:21:19 -0700 | [diff] [blame] | 97 | rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) |
| 98 | /* Someone grabbed the sem for write already */ |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 99 | goto out; |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 100 | |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 101 | /* Grant an infinite number of read locks to the readers at the front |
| 102 | * of the queue. Note we increment the 'active part' of the count by |
| 103 | * the number of readers before waking any processes up. |
| 104 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 105 | woken = 0; |
| 106 | do { |
| 107 | woken++; |
| 108 | |
| 109 | if (waiter->list.next == &sem->wait_list) |
| 110 | break; |
| 111 | |
| 112 | waiter = list_entry(waiter->list.next, |
| 113 | struct rwsem_waiter, list); |
| 114 | |
| 115 | } while (waiter->flags & RWSEM_WAITING_FOR_READ); |
| 116 | |
Michel Lespinasse | fd41b33 | 2010-08-09 17:21:18 -0700 | [diff] [blame] | 117 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS; |
| 118 | if (waiter->flags & RWSEM_WAITING_FOR_READ) |
| 119 | /* hit end of list above */ |
| 120 | adjustment -= RWSEM_WAITING_BIAS; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | |
Michel Lespinasse | fd41b33 | 2010-08-09 17:21:18 -0700 | [diff] [blame] | 122 | rwsem_atomic_add(adjustment, sem); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 123 | |
| 124 | next = sem->wait_list.next; |
Michel Lespinasse | fd41b33 | 2010-08-09 17:21:18 -0700 | [diff] [blame] | 125 | for (loop = woken; loop > 0; loop--) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 126 | waiter = list_entry(next, struct rwsem_waiter, list); |
| 127 | next = waiter->list.next; |
| 128 | tsk = waiter->task; |
akpm@osdl.org | d59dd46 | 2005-05-01 08:58:47 -0700 | [diff] [blame] | 129 | smp_mb(); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 130 | waiter->task = NULL; |
| 131 | wake_up_process(tsk); |
| 132 | put_task_struct(tsk); |
| 133 | } |
| 134 | |
| 135 | sem->wait_list.next = next; |
| 136 | next->prev = &sem->wait_list; |
| 137 | |
| 138 | out: |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 139 | return sem; |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 140 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 141 | |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 142 | /* Try to get write sem, caller holds sem->wait_lock: */ |
| 143 | static int try_get_writer_sem(struct rw_semaphore *sem, |
| 144 | struct rwsem_waiter *waiter) |
| 145 | { |
| 146 | struct rwsem_waiter *fwaiter; |
| 147 | long oldcount, adjustment; |
| 148 | |
| 149 | /* only steal when first waiter is writing */ |
| 150 | fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
| 151 | if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) |
| 152 | return 0; |
| 153 | |
| 154 | adjustment = RWSEM_ACTIVE_WRITE_BIAS; |
| 155 | /* Only one waiter in the queue: */ |
| 156 | if (fwaiter == waiter && waiter->list.next == &sem->wait_list) |
| 157 | adjustment -= RWSEM_WAITING_BIAS; |
| 158 | |
| 159 | try_again_write: |
| 160 | oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; |
| 161 | if (!(oldcount & RWSEM_ACTIVE_MASK)) { |
| 162 | /* No active lock: */ |
| 163 | struct task_struct *tsk = waiter->task; |
| 164 | |
| 165 | list_del(&waiter->list); |
| 166 | smp_mb(); |
| 167 | put_task_struct(tsk); |
| 168 | tsk->state = TASK_RUNNING; |
| 169 | return 1; |
| 170 | } |
| 171 | /* some one grabbed the sem already */ |
Michel Lespinasse | fd41b33 | 2010-08-09 17:21:18 -0700 | [diff] [blame] | 172 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 173 | return 0; |
Michel Lespinasse | 345af7b | 2010-08-09 17:21:15 -0700 | [diff] [blame] | 174 | goto try_again_write; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | /* |
| 178 | * wait for a lock to be granted |
| 179 | */ |
Livio Soares | c7af77b | 2007-12-18 15:21:13 +0100 | [diff] [blame] | 180 | static struct rw_semaphore __sched * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | rwsem_down_failed_common(struct rw_semaphore *sem, |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 182 | unsigned int flags, signed long adjustment) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | { |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 184 | struct rwsem_waiter waiter; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 185 | struct task_struct *tsk = current; |
| 186 | signed long count; |
| 187 | |
| 188 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 189 | |
| 190 | /* set up my own style of waitqueue */ |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 191 | raw_spin_lock_irq(&sem->wait_lock); |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 192 | waiter.task = tsk; |
| 193 | waiter.flags = flags; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 194 | get_task_struct(tsk); |
| 195 | |
Michel Lespinasse | fd41b33 | 2010-08-09 17:21:18 -0700 | [diff] [blame] | 196 | if (list_empty(&sem->wait_list)) |
| 197 | adjustment += RWSEM_WAITING_BIAS; |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 198 | list_add_tail(&waiter.list, &sem->wait_list); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 200 | /* we're now waiting on the lock, but no longer actively locking */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 201 | count = rwsem_atomic_update(adjustment, sem); |
| 202 | |
Michel Lespinasse | 424acaa | 2010-08-09 17:21:19 -0700 | [diff] [blame] | 203 | /* If there are no active locks, wake the front queued process(es) up. |
| 204 | * |
| 205 | * Alternatively, if we're called from a failed down_write(), there |
| 206 | * were already threads queued before us and there are no active |
| 207 | * writers, the lock must be read owned; so we try to wake any read |
| 208 | * locks that were queued ahead of us. */ |
| 209 | if (count == RWSEM_WAITING_BIAS) |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 210 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); |
Michel Lespinasse | 424acaa | 2010-08-09 17:21:19 -0700 | [diff] [blame] | 211 | else if (count > RWSEM_WAITING_BIAS && |
| 212 | adjustment == -RWSEM_ACTIVE_WRITE_BIAS) |
| 213 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 214 | |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 215 | raw_spin_unlock_irq(&sem->wait_lock); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 216 | |
| 217 | /* wait to be given the lock */ |
| 218 | for (;;) { |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 219 | if (!waiter.task) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 220 | break; |
Alex Shi | ce6711f | 2013-02-05 21:11:55 +0800 | [diff] [blame] | 221 | |
| 222 | raw_spin_lock_irq(&sem->wait_lock); |
| 223 | /* Try to get the writer sem, may steal from the head writer: */ |
| 224 | if (flags == RWSEM_WAITING_FOR_WRITE) |
| 225 | if (try_get_writer_sem(sem, &waiter)) { |
| 226 | raw_spin_unlock_irq(&sem->wait_lock); |
| 227 | return sem; |
| 228 | } |
| 229 | raw_spin_unlock_irq(&sem->wait_lock); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 230 | schedule(); |
| 231 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 232 | } |
| 233 | |
| 234 | tsk->state = TASK_RUNNING; |
| 235 | |
| 236 | return sem; |
| 237 | } |
| 238 | |
| 239 | /* |
| 240 | * wait for the read lock to be granted |
| 241 | */ |
Thomas Gleixner | d123375 | 2011-01-26 21:32:01 +0100 | [diff] [blame] | 242 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 243 | { |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 244 | return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, |
| 245 | -RWSEM_ACTIVE_READ_BIAS); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 246 | } |
| 247 | |
| 248 | /* |
| 249 | * wait for the write lock to be granted |
| 250 | */ |
Thomas Gleixner | d123375 | 2011-01-26 21:32:01 +0100 | [diff] [blame] | 251 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | { |
Michel Lespinasse | a8618a0 | 2010-08-09 17:21:20 -0700 | [diff] [blame] | 253 | return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, |
| 254 | -RWSEM_ACTIVE_WRITE_BIAS); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | /* |
| 258 | * handle waking up a waiter on the semaphore |
| 259 | * - up_read/up_write has decremented the active part of count if we come here |
| 260 | */ |
Thomas Gleixner | d123375 | 2011-01-26 21:32:01 +0100 | [diff] [blame] | 261 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 262 | { |
| 263 | unsigned long flags; |
| 264 | |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 265 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 266 | |
| 267 | /* do nothing if list empty */ |
| 268 | if (!list_empty(&sem->wait_list)) |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 269 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 270 | |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 271 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 272 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 273 | return sem; |
| 274 | } |
| 275 | |
| 276 | /* |
| 277 | * downgrade a write lock into a read lock |
| 278 | * - caller incremented waiting part of count and discovered it still negative |
| 279 | * - just wake up any readers at the front of the queue |
| 280 | */ |
Thomas Gleixner | d123375 | 2011-01-26 21:32:01 +0100 | [diff] [blame] | 281 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | { |
| 283 | unsigned long flags; |
| 284 | |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 285 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 286 | |
| 287 | /* do nothing if list empty */ |
| 288 | if (!list_empty(&sem->wait_list)) |
Michel Lespinasse | 70bdc6e | 2010-08-09 17:21:17 -0700 | [diff] [blame] | 289 | sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 290 | |
Thomas Gleixner | ddb6c9b | 2010-02-24 09:54:54 +0100 | [diff] [blame] | 291 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 292 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 293 | return sem; |
| 294 | } |
| 295 | |
| 296 | EXPORT_SYMBOL(rwsem_down_read_failed); |
| 297 | EXPORT_SYMBOL(rwsem_down_write_failed); |
| 298 | EXPORT_SYMBOL(rwsem_wake); |
| 299 | EXPORT_SYMBOL(rwsem_downgrade_wake); |