blob: 00039924ea9dde4ff64f7510a44202d1a1330be0 [file] [log] [blame]
Kent Overstreetcafe5632013-03-23 16:11:31 -07001#ifndef _LINUX_CLOSURE_H
2#define _LINUX_CLOSURE_H
3
4#include <linux/llist.h>
5#include <linux/sched.h>
6#include <linux/workqueue.h>
7
8/*
9 * Closure is perhaps the most overused and abused term in computer science, but
10 * since I've been unable to come up with anything better you're stuck with it
11 * again.
12 *
13 * What are closures?
14 *
15 * They embed a refcount. The basic idea is they count "things that are in
16 * progress" - in flight bios, some other thread that's doing something else -
17 * anything you might want to wait on.
18 *
19 * The refcount may be manipulated with closure_get() and closure_put().
20 * closure_put() is where many of the interesting things happen, when it causes
21 * the refcount to go to 0.
22 *
23 * Closures can be used to wait on things both synchronously and asynchronously,
24 * and synchronous and asynchronous use can be mixed without restriction. To
25 * wait synchronously, use closure_sync() - you will sleep until your closure's
26 * refcount hits 1.
27 *
28 * To wait asynchronously, use
29 * continue_at(cl, next_function, workqueue);
30 *
31 * passing it, as you might expect, the function to run when nothing is pending
32 * and the workqueue to run that function out of.
33 *
34 * continue_at() also, critically, is a macro that returns the calling function.
35 * There's good reason for this.
36 *
37 * To use safely closures asynchronously, they must always have a refcount while
38 * they are running owned by the thread that is running them. Otherwise, suppose
39 * you submit some bios and wish to have a function run when they all complete:
40 *
41 * foo_endio(struct bio *bio, int error)
42 * {
43 * closure_put(cl);
44 * }
45 *
46 * closure_init(cl);
47 *
48 * do_stuff();
49 * closure_get(cl);
50 * bio1->bi_endio = foo_endio;
51 * bio_submit(bio1);
52 *
53 * do_more_stuff();
54 * closure_get(cl);
55 * bio2->bi_endio = foo_endio;
56 * bio_submit(bio2);
57 *
58 * continue_at(cl, complete_some_read, system_wq);
59 *
60 * If closure's refcount started at 0, complete_some_read() could run before the
61 * second bio was submitted - which is almost always not what you want! More
62 * importantly, it wouldn't be possible to say whether the original thread or
63 * complete_some_read()'s thread owned the closure - and whatever state it was
64 * associated with!
65 *
66 * So, closure_init() initializes a closure's refcount to 1 - and when a
67 * closure_fn is run, the refcount will be reset to 1 first.
68 *
69 * Then, the rule is - if you got the refcount with closure_get(), release it
70 * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
71 * on a closure because you called closure_init() or you were run out of a
72 * closure - _always_ use continue_at(). Doing so consistently will help
73 * eliminate an entire class of particularly pernicious races.
74 *
75 * For a closure to wait on an arbitrary event, we need to introduce waitlists:
76 *
77 * struct closure_waitlist list;
78 * closure_wait_event(list, cl, condition);
79 * closure_wake_up(wait_list);
80 *
81 * These work analagously to wait_event() and wake_up() - except that instead of
82 * operating on the current thread (for wait_event()) and lists of threads, they
83 * operate on an explicit closure and lists of closures.
84 *
85 * Because it's a closure we can now wait either synchronously or
86 * asynchronously. closure_wait_event() returns the current value of the
87 * condition, and if it returned false continue_at() or closure_sync() can be
88 * used to wait for it to become true.
89 *
90 * It's useful for waiting on things when you can't sleep in the context in
91 * which you must check the condition (perhaps a spinlock held, or you might be
92 * beneath generic_make_request() - in which case you can't sleep on IO).
93 *
94 * closure_wait_event() will wait either synchronously or asynchronously,
95 * depending on whether the closure is in blocking mode or not. You can pick a
96 * mode explicitly with closure_wait_event_sync() and
97 * closure_wait_event_async(), which do just what you might expect.
98 *
99 * Lastly, you might have a wait list dedicated to a specific event, and have no
100 * need for specifying the condition - you just want to wait until someone runs
101 * closure_wake_up() on the appropriate wait list. In that case, just use
102 * closure_wait(). It will return either true or false, depending on whether the
103 * closure was already on a wait list or not - a closure can only be on one wait
104 * list at a time.
105 *
106 * Parents:
107 *
108 * closure_init() takes two arguments - it takes the closure to initialize, and
109 * a (possibly null) parent.
110 *
111 * If parent is non null, the new closure will have a refcount for its lifetime;
112 * a closure is considered to be "finished" when its refcount hits 0 and the
113 * function to run is null. Hence
114 *
115 * continue_at(cl, NULL, NULL);
116 *
117 * returns up the (spaghetti) stack of closures, precisely like normal return
118 * returns up the C stack. continue_at() with non null fn is better thought of
119 * as doing a tail call.
120 *
121 * All this implies that a closure should typically be embedded in a particular
122 * struct (which its refcount will normally control the lifetime of), and that
123 * struct can very much be thought of as a stack frame.
124 *
125 * Locking:
126 *
127 * Closures are based on work items but they can be thought of as more like
128 * threads - in that like threads and unlike work items they have a well
129 * defined lifetime; they are created (with closure_init()) and eventually
130 * complete after a continue_at(cl, NULL, NULL).
131 *
132 * Suppose you've got some larger structure with a closure embedded in it that's
133 * used for periodically doing garbage collection. You only want one garbage
134 * collection happening at a time, so the natural thing to do is protect it with
135 * a lock. However, it's difficult to use a lock protecting a closure correctly
136 * because the unlock should come after the last continue_to() (additionally, if
137 * you're using the closure asynchronously a mutex won't work since a mutex has
138 * to be unlocked by the same process that locked it).
139 *
140 * So to make it less error prone and more efficient, we also have the ability
141 * to use closures as locks:
142 *
143 * closure_init_unlocked();
144 * closure_trylock();
145 *
146 * That's all we need for trylock() - the last closure_put() implicitly unlocks
147 * it for you. But for closure_lock(), we also need a wait list:
148 *
149 * struct closure_with_waitlist frobnicator_cl;
150 *
151 * closure_init_unlocked(&frobnicator_cl);
152 * closure_lock(&frobnicator_cl);
153 *
154 * A closure_with_waitlist embeds a closure and a wait list - much like struct
155 * delayed_work embeds a work item and a timer_list. The important thing is, use
156 * it exactly like you would a regular closure and closure_put() will magically
157 * handle everything for you.
158 *
159 * We've got closures that embed timers, too. They're called, appropriately
160 * enough:
161 * struct closure_with_timer;
162 *
163 * This gives you access to closure_delay(). It takes a refcount for a specified
164 * number of jiffies - you could then call closure_sync() (for a slightly
165 * convoluted version of msleep()) or continue_at() - which gives you the same
166 * effect as using a delayed work item, except you can reuse the work_struct
167 * already embedded in struct closure.
168 *
169 * Lastly, there's struct closure_with_waitlist_and_timer. It does what you
170 * probably expect, if you happen to need the features of both. (You don't
171 * really want to know how all this is implemented, but if I've done my job
172 * right you shouldn't have to care).
173 */
174
175struct closure;
176typedef void (closure_fn) (struct closure *);
177
178struct closure_waitlist {
179 struct llist_head list;
180};
181
182enum closure_type {
183 TYPE_closure = 0,
184 TYPE_closure_with_waitlist = 1,
185 TYPE_closure_with_timer = 2,
186 TYPE_closure_with_waitlist_and_timer = 3,
187 MAX_CLOSURE_TYPE = 3,
188};
189
190enum closure_state {
191 /*
192 * CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of
193 * waiting asynchronously
194 *
195 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
196 * the thread that owns the closure, and cleared by the thread that's
197 * waking up the closure.
198 *
199 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
200 * - indicates that cl->task is valid and closure_put() may wake it up.
201 * Only set or cleared by the thread that owns the closure.
202 *
203 * CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure
204 * has an outstanding timer. Must be set by the thread that owns the
205 * closure, and cleared by the timer function when the timer goes off.
206 *
207 * The rest are for debugging and don't affect behaviour:
208 *
209 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
210 * closure_init() and when closure_put() runs then next function), and
211 * must be cleared before remaining hits 0. Primarily to help guard
212 * against incorrect usage and accidentally transferring references.
213 * continue_at() and closure_return() clear it for you, if you're doing
214 * something unusual you can use closure_set_dead() which also helps
215 * annotate where references are being transferred.
216 *
217 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
218 * closure with this flag set
219 */
220
221 CLOSURE_BITS_START = (1 << 19),
222 CLOSURE_DESTRUCTOR = (1 << 19),
223 CLOSURE_BLOCKING = (1 << 21),
224 CLOSURE_WAITING = (1 << 23),
225 CLOSURE_SLEEPING = (1 << 25),
226 CLOSURE_TIMER = (1 << 27),
227 CLOSURE_RUNNING = (1 << 29),
228 CLOSURE_STACK = (1 << 31),
229};
230
231#define CLOSURE_GUARD_MASK \
232 ((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \
233 CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1)
234
235#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
236#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
237
238struct closure {
239 union {
240 struct {
241 struct workqueue_struct *wq;
242 struct task_struct *task;
243 struct llist_node list;
244 closure_fn *fn;
245 };
246 struct work_struct work;
247 };
248
249 struct closure *parent;
250
251 atomic_t remaining;
252
253 enum closure_type type;
254
255#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
256#define CLOSURE_MAGIC_DEAD 0xc054dead
257#define CLOSURE_MAGIC_ALIVE 0xc054a11e
258
259 unsigned magic;
260 struct list_head all;
261 unsigned long ip;
262 unsigned long waiting_on;
263#endif
264};
265
266struct closure_with_waitlist {
267 struct closure cl;
268 struct closure_waitlist wait;
269};
270
271struct closure_with_timer {
272 struct closure cl;
273 struct timer_list timer;
274};
275
276struct closure_with_waitlist_and_timer {
277 struct closure cl;
278 struct closure_waitlist wait;
279 struct timer_list timer;
280};
281
282extern unsigned invalid_closure_type(void);
283
284#define __CLOSURE_TYPE(cl, _t) \
285 __builtin_types_compatible_p(typeof(cl), struct _t) \
286 ? TYPE_ ## _t : \
287
288#define __closure_type(cl) \
289( \
290 __CLOSURE_TYPE(cl, closure) \
291 __CLOSURE_TYPE(cl, closure_with_waitlist) \
292 __CLOSURE_TYPE(cl, closure_with_timer) \
293 __CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \
294 invalid_closure_type() \
295)
296
297void closure_sub(struct closure *cl, int v);
298void closure_put(struct closure *cl);
299void closure_queue(struct closure *cl);
300void __closure_wake_up(struct closure_waitlist *list);
301bool closure_wait(struct closure_waitlist *list, struct closure *cl);
302void closure_sync(struct closure *cl);
303
304bool closure_trylock(struct closure *cl, struct closure *parent);
305void __closure_lock(struct closure *cl, struct closure *parent,
306 struct closure_waitlist *wait_list);
307
308void do_closure_timer_init(struct closure *cl);
309bool __closure_delay(struct closure *cl, unsigned long delay,
310 struct timer_list *timer);
311void __closure_flush(struct closure *cl, struct timer_list *timer);
312void __closure_flush_sync(struct closure *cl, struct timer_list *timer);
313
314#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
315
Kent Overstreet07e86cc2013-03-25 11:46:43 -0700316void closure_debug_init(void);
Kent Overstreetcafe5632013-03-23 16:11:31 -0700317void closure_debug_create(struct closure *cl);
318void closure_debug_destroy(struct closure *cl);
319
320#else
321
Kent Overstreet07e86cc2013-03-25 11:46:43 -0700322static inline void closure_debug_init(void) {}
Kent Overstreetcafe5632013-03-23 16:11:31 -0700323static inline void closure_debug_create(struct closure *cl) {}
324static inline void closure_debug_destroy(struct closure *cl) {}
325
326#endif
327
328static inline void closure_set_ip(struct closure *cl)
329{
330#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
331 cl->ip = _THIS_IP_;
332#endif
333}
334
335static inline void closure_set_ret_ip(struct closure *cl)
336{
337#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
338 cl->ip = _RET_IP_;
339#endif
340}
341
342static inline void closure_get(struct closure *cl)
343{
344#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
345 BUG_ON((atomic_inc_return(&cl->remaining) &
346 CLOSURE_REMAINING_MASK) <= 1);
347#else
348 atomic_inc(&cl->remaining);
349#endif
350}
351
352static inline void closure_set_stopped(struct closure *cl)
353{
354 atomic_sub(CLOSURE_RUNNING, &cl->remaining);
355}
356
357static inline bool closure_is_stopped(struct closure *cl)
358{
359 return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING);
360}
361
362static inline bool closure_is_unlocked(struct closure *cl)
363{
364 return atomic_read(&cl->remaining) == -1;
365}
366
367static inline void do_closure_init(struct closure *cl, struct closure *parent,
368 bool running)
369{
370 switch (cl->type) {
371 case TYPE_closure_with_timer:
372 case TYPE_closure_with_waitlist_and_timer:
373 do_closure_timer_init(cl);
374 default:
375 break;
376 }
377
378 cl->parent = parent;
379 if (parent)
380 closure_get(parent);
381
382 if (running) {
383 closure_debug_create(cl);
384 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
385 } else
386 atomic_set(&cl->remaining, -1);
387
388 closure_set_ip(cl);
389}
390
391/*
392 * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
393 * the result of __closure_type() is thrown away, it's used merely for type
394 * checking.
395 */
396#define __to_internal_closure(cl) \
397({ \
398 BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \
399 (struct closure *) cl; \
400})
401
402#define closure_init_type(cl, parent, running) \
403do { \
404 struct closure *_cl = __to_internal_closure(cl); \
405 _cl->type = __closure_type(*(cl)); \
406 do_closure_init(_cl, parent, running); \
407} while (0)
408
409/**
410 * __closure_init() - Initialize a closure, skipping the memset()
411 *
412 * May be used instead of closure_init() when memory has already been zeroed.
413 */
414#define __closure_init(cl, parent) \
415 closure_init_type(cl, parent, true)
416
417/**
418 * closure_init() - Initialize a closure, setting the refcount to 1
419 * @cl: closure to initialize
420 * @parent: parent of the new closure. cl will take a refcount on it for its
421 * lifetime; may be NULL.
422 */
423#define closure_init(cl, parent) \
424do { \
425 memset((cl), 0, sizeof(*(cl))); \
426 __closure_init(cl, parent); \
427} while (0)
428
429static inline void closure_init_stack(struct closure *cl)
430{
431 memset(cl, 0, sizeof(struct closure));
432 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|
433 CLOSURE_BLOCKING|CLOSURE_STACK);
434}
435
436/**
437 * closure_init_unlocked() - Initialize a closure but leave it unlocked.
438 * @cl: closure to initialize
439 *
440 * For when the closure will be used as a lock. The closure may not be used
441 * until after a closure_lock() or closure_trylock().
442 */
443#define closure_init_unlocked(cl) \
444do { \
445 memset((cl), 0, sizeof(*(cl))); \
446 closure_init_type(cl, NULL, false); \
447} while (0)
448
449/**
450 * closure_lock() - lock and initialize a closure.
451 * @cl: the closure to lock
452 * @parent: the new parent for this closure
453 *
454 * The closure must be of one of the types that has a waitlist (otherwise we
455 * wouldn't be able to sleep on contention).
456 *
457 * @parent has exactly the same meaning as in closure_init(); if non null, the
458 * closure will take a reference on @parent which will be released when it is
459 * unlocked.
460 */
461#define closure_lock(cl, parent) \
462 __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
463
464/**
465 * closure_delay() - delay some number of jiffies
466 * @cl: the closure that will sleep
467 * @delay: the delay in jiffies
468 *
469 * Takes a refcount on @cl which will be released after @delay jiffies; this may
470 * be used to have a function run after a delay with continue_at(), or
471 * closure_sync() may be used for a convoluted version of msleep().
472 */
473#define closure_delay(cl, delay) \
474 __closure_delay(__to_internal_closure(cl), delay, &(cl)->timer)
475
476#define closure_flush(cl) \
477 __closure_flush(__to_internal_closure(cl), &(cl)->timer)
478
479#define closure_flush_sync(cl) \
480 __closure_flush_sync(__to_internal_closure(cl), &(cl)->timer)
481
482static inline void __closure_end_sleep(struct closure *cl)
483{
484 __set_current_state(TASK_RUNNING);
485
486 if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
487 atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
488}
489
490static inline void __closure_start_sleep(struct closure *cl)
491{
492 closure_set_ip(cl);
493 cl->task = current;
494 set_current_state(TASK_UNINTERRUPTIBLE);
495
496 if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
497 atomic_add(CLOSURE_SLEEPING, &cl->remaining);
498}
499
500/**
501 * closure_blocking() - returns true if the closure is in blocking mode.
502 *
503 * If a closure is in blocking mode, closure_wait_event() will sleep until the
504 * condition is true instead of waiting asynchronously.
505 */
506static inline bool closure_blocking(struct closure *cl)
507{
508 return atomic_read(&cl->remaining) & CLOSURE_BLOCKING;
509}
510
511/**
512 * set_closure_blocking() - put a closure in blocking mode.
513 *
514 * If a closure is in blocking mode, closure_wait_event() will sleep until the
515 * condition is true instead of waiting asynchronously.
516 *
517 * Not thread safe - can only be called by the thread running the closure.
518 */
519static inline void set_closure_blocking(struct closure *cl)
520{
521 if (!closure_blocking(cl))
522 atomic_add(CLOSURE_BLOCKING, &cl->remaining);
523}
524
525/*
526 * Not thread safe - can only be called by the thread running the closure.
527 */
528static inline void clear_closure_blocking(struct closure *cl)
529{
530 if (closure_blocking(cl))
531 atomic_sub(CLOSURE_BLOCKING, &cl->remaining);
532}
533
534/**
535 * closure_wake_up() - wake up all closures on a wait list.
536 */
537static inline void closure_wake_up(struct closure_waitlist *list)
538{
539 smp_mb();
540 __closure_wake_up(list);
541}
542
543/*
544 * Wait on an event, synchronously or asynchronously - analogous to wait_event()
545 * but for closures.
546 *
547 * The loop is oddly structured so as to avoid a race; we must check the
548 * condition again after we've added ourself to the waitlist. We know if we were
549 * already on the waitlist because closure_wait() returns false; thus, we only
550 * schedule or break if closure_wait() returns false. If it returns true, we
551 * just loop again - rechecking the condition.
552 *
553 * The __closure_wake_up() is necessary because we may race with the event
554 * becoming true; i.e. we see event false -> wait -> recheck condition, but the
555 * thread that made the event true may have called closure_wake_up() before we
556 * added ourself to the wait list.
557 *
558 * We have to call closure_sync() at the end instead of just
559 * __closure_end_sleep() because a different thread might've called
560 * closure_wake_up() before us and gotten preempted before they dropped the
561 * refcount on our closure. If this was a stack allocated closure, that would be
562 * bad.
563 */
564#define __closure_wait_event(list, cl, condition, _block) \
565({ \
566 bool block = _block; \
567 typeof(condition) ret; \
568 \
569 while (1) { \
570 ret = (condition); \
571 if (ret) { \
572 __closure_wake_up(list); \
573 if (block) \
574 closure_sync(cl); \
575 \
576 break; \
577 } \
578 \
579 if (block) \
580 __closure_start_sleep(cl); \
581 \
582 if (!closure_wait(list, cl)) { \
583 if (!block) \
584 break; \
585 \
586 schedule(); \
587 } \
588 } \
589 \
590 ret; \
591})
592
593/**
594 * closure_wait_event() - wait on a condition, synchronously or asynchronously.
595 * @list: the wait list to wait on
596 * @cl: the closure that is doing the waiting
597 * @condition: a C expression for the event to wait for
598 *
599 * If the closure is in blocking mode, sleeps until the @condition evaluates to
600 * true - exactly like wait_event().
601 *
602 * If the closure is not in blocking mode, waits asynchronously; if the
603 * condition is currently false the @cl is put onto @list and returns. @list
604 * owns a refcount on @cl; closure_sync() or continue_at() may be used later to
605 * wait for another thread to wake up @list, which drops the refcount on @cl.
606 *
607 * Returns the value of @condition; @cl will be on @list iff @condition was
608 * false.
609 *
610 * closure_wake_up(@list) must be called after changing any variable that could
611 * cause @condition to become true.
612 */
613#define closure_wait_event(list, cl, condition) \
614 __closure_wait_event(list, cl, condition, closure_blocking(cl))
615
616#define closure_wait_event_async(list, cl, condition) \
617 __closure_wait_event(list, cl, condition, false)
618
619#define closure_wait_event_sync(list, cl, condition) \
620 __closure_wait_event(list, cl, condition, true)
621
622static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
623 struct workqueue_struct *wq)
624{
625 BUG_ON(object_is_on_stack(cl));
626 closure_set_ip(cl);
627 cl->fn = fn;
628 cl->wq = wq;
629 /* between atomic_dec() in closure_put() */
630 smp_mb__before_atomic_dec();
631}
632
633#define continue_at(_cl, _fn, _wq) \
634do { \
635 set_closure_fn(_cl, _fn, _wq); \
636 closure_sub(_cl, CLOSURE_RUNNING + 1); \
637 return; \
638} while (0)
639
640#define closure_return(_cl) continue_at((_cl), NULL, NULL)
641
642#define continue_at_nobarrier(_cl, _fn, _wq) \
643do { \
644 set_closure_fn(_cl, _fn, _wq); \
645 closure_queue(cl); \
646 return; \
647} while (0)
648
649#define closure_return_with_destructor(_cl, _destructor) \
650do { \
651 set_closure_fn(_cl, _destructor, NULL); \
652 closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
653 return; \
654} while (0)
655
656static inline void closure_call(struct closure *cl, closure_fn fn,
657 struct workqueue_struct *wq,
658 struct closure *parent)
659{
660 closure_init(cl, parent);
661 continue_at_nobarrier(cl, fn, wq);
662}
663
664static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
665 struct workqueue_struct *wq,
666 struct closure *parent)
667{
668 if (closure_trylock(cl, parent))
669 continue_at_nobarrier(cl, fn, wq);
670}
671
672#endif /* _LINUX_CLOSURE_H */