blob: 22fad8ad5ac206c4cf22f820e9a4cf5c3f6cd8bd [file] [log] [blame]
Peter Hurley4898e6402013-04-16 06:15:50 -04001/*
2 * Ldisc rw semaphore
3 *
4 * The ldisc semaphore is semantically a rw_semaphore but which enforces
5 * an alternate policy, namely:
6 * 1) Supports lock wait timeouts
7 * 2) Write waiter has priority
8 * 3) Downgrading is not supported
9 *
10 * Implementation notes:
11 * 1) Upper half of semaphore count is a wait count (differs from rwsem
12 * in that rwsem normalizes the upper half to the wait bias)
13 * 2) Lacks overflow checking
14 *
15 * The generic counting was copied and modified from include/asm-generic/rwsem.h
16 * by Paul Mackerras <paulus@samba.org>.
17 *
18 * The scheduling policy was copied and modified from lib/rwsem.c
19 * Written by David Howells (dhowells@redhat.com).
20 *
21 * This implementation incorporates the write lock stealing work of
22 * Michel Lespinasse <walken@google.com>.
23 *
24 * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com>
25 *
26 * This file may be redistributed under the terms of the GNU General Public
27 * License v2.
28 */
29
30#include <linux/list.h>
31#include <linux/spinlock.h>
32#include <linux/atomic.h>
33#include <linux/tty.h>
34#include <linux/sched.h>
35
36
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38# define __acq(l, s, t, r, c, n, i) \
39 lock_acquire(&(l)->dep_map, s, t, r, c, n, i)
40# define __rel(l, n, i) \
41 lock_release(&(l)->dep_map, n, i)
42# ifdef CONFIG_PROVE_LOCKING
43# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 2, NULL, i)
44# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 2, n, i)
45# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 2, NULL, i)
46# define lockdep_release(l, n, i) __rel(l, n, i)
47# else
48# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i)
49# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i)
50# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i)
51# define lockdep_release(l, n, i) __rel(l, n, i)
52# endif
53#else
54# define lockdep_acquire(l, s, t, i) do { } while (0)
55# define lockdep_acquire_nest(l, s, t, n, i) do { } while (0)
56# define lockdep_acquire_read(l, s, t, i) do { } while (0)
57# define lockdep_release(l, n, i) do { } while (0)
58#endif
59
60#ifdef CONFIG_LOCK_STAT
61# define lock_stat(_lock, stat) lock_##stat(&(_lock)->dep_map, _RET_IP_)
62#else
63# define lock_stat(_lock, stat) do { } while (0)
64#endif
65
66
67#if BITS_PER_LONG == 64
68# define LDSEM_ACTIVE_MASK 0xffffffffL
69#else
70# define LDSEM_ACTIVE_MASK 0x0000ffffL
71#endif
72
73#define LDSEM_UNLOCKED 0L
74#define LDSEM_ACTIVE_BIAS 1L
75#define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1)
76#define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS
77#define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS)
78
79struct ldsem_waiter {
80 struct list_head list;
81 struct task_struct *task;
82};
83
84static inline long ldsem_atomic_update(long delta, struct ld_semaphore *sem)
85{
86 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
87}
88
89static inline int ldsem_cmpxchg(long *old, long new, struct ld_semaphore *sem)
90{
91 long tmp = *old;
92 *old = atomic_long_cmpxchg(&sem->count, *old, new);
93 return *old == tmp;
94}
95
96/*
97 * Initialize an ldsem:
98 */
99void __init_ldsem(struct ld_semaphore *sem, const char *name,
100 struct lock_class_key *key)
101{
102#ifdef CONFIG_DEBUG_LOCK_ALLOC
103 /*
104 * Make sure we are not reinitializing a held semaphore:
105 */
106 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
107 lockdep_init_map(&sem->dep_map, name, key, 0);
108#endif
109 sem->count = LDSEM_UNLOCKED;
110 sem->wait_readers = 0;
111 raw_spin_lock_init(&sem->wait_lock);
112 INIT_LIST_HEAD(&sem->read_wait);
113 INIT_LIST_HEAD(&sem->write_wait);
114}
115
116static void __ldsem_wake_readers(struct ld_semaphore *sem)
117{
118 struct ldsem_waiter *waiter, *next;
119 struct task_struct *tsk;
120 long adjust, count;
121
122 /* Try to grant read locks to all readers on the read wait list.
123 * Note the 'active part' of the count is incremented by
124 * the number of readers before waking any processes up.
125 */
126 adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS);
127 count = ldsem_atomic_update(adjust, sem);
128 do {
129 if (count > 0)
130 break;
131 if (ldsem_cmpxchg(&count, count - adjust, sem))
132 return;
133 } while (1);
134
135 list_for_each_entry_safe(waiter, next, &sem->read_wait, list) {
136 tsk = waiter->task;
137 smp_mb();
138 waiter->task = NULL;
139 wake_up_process(tsk);
140 put_task_struct(tsk);
141 }
142 INIT_LIST_HEAD(&sem->read_wait);
143 sem->wait_readers = 0;
144}
145
146static inline int writer_trylock(struct ld_semaphore *sem)
147{
148 /* only wake this writer if the active part of the count can be
149 * transitioned from 0 -> 1
150 */
151 long count = ldsem_atomic_update(LDSEM_ACTIVE_BIAS, sem);
152 do {
153 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS)
154 return 1;
155 if (ldsem_cmpxchg(&count, count - LDSEM_ACTIVE_BIAS, sem))
156 return 0;
157 } while (1);
158}
159
160static void __ldsem_wake_writer(struct ld_semaphore *sem)
161{
162 struct ldsem_waiter *waiter;
163
164 waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list);
165 wake_up_process(waiter->task);
166}
167
168/*
169 * handle the lock release when processes blocked on it that can now run
170 * - if we come here from up_xxxx(), then:
171 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
172 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
173 * - the spinlock must be held by the caller
174 * - woken process blocks are discarded from the list after having task zeroed
175 */
176static void __ldsem_wake(struct ld_semaphore *sem)
177{
178 if (!list_empty(&sem->write_wait))
179 __ldsem_wake_writer(sem);
180 else if (!list_empty(&sem->read_wait))
181 __ldsem_wake_readers(sem);
182}
183
184static void ldsem_wake(struct ld_semaphore *sem)
185{
186 unsigned long flags;
187
188 raw_spin_lock_irqsave(&sem->wait_lock, flags);
189 __ldsem_wake(sem);
190 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
191}
192
193/*
194 * wait for the read lock to be granted
195 */
196static struct ld_semaphore __sched *
197down_read_failed(struct ld_semaphore *sem, long count, long timeout)
198{
199 struct ldsem_waiter waiter;
200 struct task_struct *tsk = current;
201 long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS;
202
203 /* set up my own style of waitqueue */
204 raw_spin_lock_irq(&sem->wait_lock);
205
206 /* Try to reverse the lock attempt but if the count has changed
207 * so that reversing fails, check if there are are no waiters,
208 * and early-out if not */
209 do {
210 if (ldsem_cmpxchg(&count, count + adjust, sem))
211 break;
212 if (count > 0) {
213 raw_spin_unlock_irq(&sem->wait_lock);
214 return sem;
215 }
216 } while (1);
217
218 list_add_tail(&waiter.list, &sem->read_wait);
219 sem->wait_readers++;
220
221 waiter.task = tsk;
222 get_task_struct(tsk);
223
224 /* if there are no active locks, wake the new lock owner(s) */
225 if ((count & LDSEM_ACTIVE_MASK) == 0)
226 __ldsem_wake(sem);
227
228 raw_spin_unlock_irq(&sem->wait_lock);
229
230 /* wait to be given the lock */
231 for (;;) {
232 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
233
234 if (!waiter.task)
235 break;
236 if (!timeout)
237 break;
238 timeout = schedule_timeout(timeout);
239 }
240
241 __set_task_state(tsk, TASK_RUNNING);
242
243 if (!timeout) {
244 /* lock timed out but check if this task was just
245 * granted lock ownership - if so, pretend there
246 * was no timeout; otherwise, cleanup lock wait */
247 raw_spin_lock_irq(&sem->wait_lock);
248 if (waiter.task) {
249 ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
250 list_del(&waiter.list);
251 raw_spin_unlock_irq(&sem->wait_lock);
252 put_task_struct(waiter.task);
253 return NULL;
254 }
255 raw_spin_unlock_irq(&sem->wait_lock);
256 }
257
258 return sem;
259}
260
261/*
262 * wait for the write lock to be granted
263 */
264static struct ld_semaphore __sched *
265down_write_failed(struct ld_semaphore *sem, long count, long timeout)
266{
267 struct ldsem_waiter waiter;
268 struct task_struct *tsk = current;
269 long adjust = -LDSEM_ACTIVE_BIAS;
270 int locked = 0;
271
272 /* set up my own style of waitqueue */
273 raw_spin_lock_irq(&sem->wait_lock);
274
275 /* Try to reverse the lock attempt but if the count has changed
276 * so that reversing fails, check if the lock is now owned,
277 * and early-out if so */
278 do {
279 if (ldsem_cmpxchg(&count, count + adjust, sem))
280 break;
281 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) {
282 raw_spin_unlock_irq(&sem->wait_lock);
283 return sem;
284 }
285 } while (1);
286
287 list_add_tail(&waiter.list, &sem->write_wait);
288
289 waiter.task = tsk;
290
291 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
292 for (;;) {
293 if (!timeout)
294 break;
295 raw_spin_unlock_irq(&sem->wait_lock);
296 timeout = schedule_timeout(timeout);
297 raw_spin_lock_irq(&sem->wait_lock);
298 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
299 if ((locked = writer_trylock(sem)))
300 break;
301 }
302
303 if (!locked)
304 ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
305 list_del(&waiter.list);
306 raw_spin_unlock_irq(&sem->wait_lock);
307
308 __set_task_state(tsk, TASK_RUNNING);
309
310 /* lock wait may have timed out */
311 if (!locked)
312 return NULL;
313 return sem;
314}
315
316
317
318static inline int __ldsem_down_read_nested(struct ld_semaphore *sem,
319 int subclass, long timeout)
320{
321 long count;
322
323 lockdep_acquire_read(sem, subclass, 0, _RET_IP_);
324
325 count = ldsem_atomic_update(LDSEM_READ_BIAS, sem);
326 if (count <= 0) {
327 lock_stat(sem, contended);
328 if (!down_read_failed(sem, count, timeout)) {
329 lockdep_release(sem, 1, _RET_IP_);
330 return 0;
331 }
332 }
333 lock_stat(sem, acquired);
334 return 1;
335}
336
337static inline int __ldsem_down_write_nested(struct ld_semaphore *sem,
338 int subclass, long timeout)
339{
340 long count;
341
342 lockdep_acquire(sem, subclass, 0, _RET_IP_);
343
344 count = ldsem_atomic_update(LDSEM_WRITE_BIAS, sem);
345 if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) {
346 lock_stat(sem, contended);
347 if (!down_write_failed(sem, count, timeout)) {
348 lockdep_release(sem, 1, _RET_IP_);
349 return 0;
350 }
351 }
352 lock_stat(sem, acquired);
353 return 1;
354}
355
356
357/*
358 * lock for reading -- returns 1 if successful, 0 if timed out
359 */
360int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout)
361{
362 might_sleep();
363 return __ldsem_down_read_nested(sem, 0, timeout);
364}
365
366/*
367 * trylock for reading -- returns 1 if successful, 0 if contention
368 */
369int ldsem_down_read_trylock(struct ld_semaphore *sem)
370{
371 long count = sem->count;
372
373 while (count >= 0) {
374 if (ldsem_cmpxchg(&count, count + LDSEM_READ_BIAS, sem)) {
375 lockdep_acquire_read(sem, 0, 1, _RET_IP_);
376 lock_stat(sem, acquired);
377 return 1;
378 }
379 }
380 return 0;
381}
382
383/*
384 * lock for writing -- returns 1 if successful, 0 if timed out
385 */
386int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout)
387{
388 might_sleep();
389 return __ldsem_down_write_nested(sem, 0, timeout);
390}
391
392/*
393 * trylock for writing -- returns 1 if successful, 0 if contention
394 */
395int ldsem_down_write_trylock(struct ld_semaphore *sem)
396{
397 long count = sem->count;
398
399 while ((count & LDSEM_ACTIVE_MASK) == 0) {
400 if (ldsem_cmpxchg(&count, count + LDSEM_WRITE_BIAS, sem)) {
401 lockdep_acquire(sem, 0, 1, _RET_IP_);
402 lock_stat(sem, acquired);
403 return 1;
404 }
405 }
406 return 0;
407}
408
409/*
410 * release a read lock
411 */
412void ldsem_up_read(struct ld_semaphore *sem)
413{
414 long count;
415
416 lockdep_release(sem, 1, _RET_IP_);
417
418 count = ldsem_atomic_update(-LDSEM_READ_BIAS, sem);
419 if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0)
420 ldsem_wake(sem);
421}
422
423/*
424 * release a write lock
425 */
426void ldsem_up_write(struct ld_semaphore *sem)
427{
428 long count;
429
430 lockdep_release(sem, 1, _RET_IP_);
431
432 count = ldsem_atomic_update(-LDSEM_WRITE_BIAS, sem);
433 if (count < 0)
434 ldsem_wake(sem);
435}
436
437
438#ifdef CONFIG_DEBUG_LOCK_ALLOC
439
440int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout)
441{
442 might_sleep();
443 return __ldsem_down_read_nested(sem, subclass, timeout);
444}
445
446int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
447 long timeout)
448{
449 might_sleep();
450 return __ldsem_down_write_nested(sem, subclass, timeout);
451}
452
453#endif