| /* |
| * Copyright (C) 2007 Oracle. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public |
| * License v2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public |
| * License along with this program; if not, write to the |
| * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| * Boston, MA 021110-1307, USA. |
| */ |
| |
| #include <linux/kthread.h> |
| #include <linux/list.h> |
| #include <linux/spinlock.h> |
| #include <linux/freezer.h> |
| #include "async-thread.h" |
| |
| #define WORK_QUEUED_BIT 0 |
| #define WORK_DONE_BIT 1 |
| #define WORK_ORDER_DONE_BIT 2 |
| #define WORK_HIGH_PRIO_BIT 3 |
| |
| /* |
| * container for the kthread task pointer and the list of pending work |
| * One of these is allocated per thread. |
| */ |
| struct btrfs_worker_thread { |
| /* pool we belong to */ |
| struct btrfs_workers *workers; |
| |
| /* list of struct btrfs_work that are waiting for service */ |
| struct list_head pending; |
| struct list_head prio_pending; |
| |
| /* list of worker threads from struct btrfs_workers */ |
| struct list_head worker_list; |
| |
| /* kthread */ |
| struct task_struct *task; |
| |
| /* number of things on the pending list */ |
| atomic_t num_pending; |
| |
| unsigned long sequence; |
| |
| /* protects the pending list. */ |
| spinlock_t lock; |
| |
| /* set to non-zero when this thread is already awake and kicking */ |
| int working; |
| |
| /* are we currently idle */ |
| int idle; |
| }; |
| |
| /* |
| * helper function to move a thread onto the idle list after it |
| * has finished some requests. |
| */ |
| static void check_idle_worker(struct btrfs_worker_thread *worker) |
| { |
| if (!worker->idle && atomic_read(&worker->num_pending) < |
| worker->workers->idle_thresh / 2) { |
| unsigned long flags; |
| spin_lock_irqsave(&worker->workers->lock, flags); |
| worker->idle = 1; |
| list_move(&worker->worker_list, &worker->workers->idle_list); |
| spin_unlock_irqrestore(&worker->workers->lock, flags); |
| } |
| } |
| |
| /* |
| * helper function to move a thread off the idle list after new |
| * pending work is added. |
| */ |
| static void check_busy_worker(struct btrfs_worker_thread *worker) |
| { |
| if (worker->idle && atomic_read(&worker->num_pending) >= |
| worker->workers->idle_thresh) { |
| unsigned long flags; |
| spin_lock_irqsave(&worker->workers->lock, flags); |
| worker->idle = 0; |
| list_move_tail(&worker->worker_list, |
| &worker->workers->worker_list); |
| spin_unlock_irqrestore(&worker->workers->lock, flags); |
| } |
| } |
| |
| static noinline int run_ordered_completions(struct btrfs_workers *workers, |
| struct btrfs_work *work) |
| { |
| unsigned long flags; |
| |
| if (!workers->ordered) |
| return 0; |
| |
| set_bit(WORK_DONE_BIT, &work->flags); |
| |
| spin_lock_irqsave(&workers->lock, flags); |
| |
| while (1) { |
| if (!list_empty(&workers->prio_order_list)) { |
| work = list_entry(workers->prio_order_list.next, |
| struct btrfs_work, order_list); |
| } else if (!list_empty(&workers->order_list)) { |
| work = list_entry(workers->order_list.next, |
| struct btrfs_work, order_list); |
| } else { |
| break; |
| } |
| if (!test_bit(WORK_DONE_BIT, &work->flags)) |
| break; |
| |
| /* we are going to call the ordered done function, but |
| * we leave the work item on the list as a barrier so |
| * that later work items that are done don't have their |
| * functions called before this one returns |
| */ |
| if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| break; |
| |
| spin_unlock_irqrestore(&workers->lock, flags); |
| |
| work->ordered_func(work); |
| |
| /* now take the lock again and call the freeing code */ |
| spin_lock_irqsave(&workers->lock, flags); |
| list_del(&work->order_list); |
| work->ordered_free(work); |
| } |
| |
| spin_unlock_irqrestore(&workers->lock, flags); |
| return 0; |
| } |
| |
| /* |
| * main loop for servicing work items |
| */ |
| static int worker_loop(void *arg) |
| { |
| struct btrfs_worker_thread *worker = arg; |
| struct list_head *cur; |
| struct btrfs_work *work; |
| do { |
| spin_lock_irq(&worker->lock); |
| again_locked: |
| while (1) { |
| if (!list_empty(&worker->prio_pending)) |
| cur = worker->prio_pending.next; |
| else if (!list_empty(&worker->pending)) |
| cur = worker->pending.next; |
| else |
| break; |
| |
| work = list_entry(cur, struct btrfs_work, list); |
| list_del(&work->list); |
| clear_bit(WORK_QUEUED_BIT, &work->flags); |
| |
| work->worker = worker; |
| spin_unlock_irq(&worker->lock); |
| |
| work->func(work); |
| |
| atomic_dec(&worker->num_pending); |
| /* |
| * unless this is an ordered work queue, |
| * 'work' was probably freed by func above. |
| */ |
| run_ordered_completions(worker->workers, work); |
| |
| spin_lock_irq(&worker->lock); |
| check_idle_worker(worker); |
| } |
| if (freezing(current)) { |
| worker->working = 0; |
| spin_unlock_irq(&worker->lock); |
| refrigerator(); |
| } else { |
| spin_unlock_irq(&worker->lock); |
| if (!kthread_should_stop()) { |
| cpu_relax(); |
| /* |
| * we've dropped the lock, did someone else |
| * jump_in? |
| */ |
| smp_mb(); |
| if (!list_empty(&worker->pending) || |
| !list_empty(&worker->prio_pending)) |
| continue; |
| |
| /* |
| * this short schedule allows more work to |
| * come in without the queue functions |
| * needing to go through wake_up_process() |
| * |
| * worker->working is still 1, so nobody |
| * is going to try and wake us up |
| */ |
| schedule_timeout(1); |
| smp_mb(); |
| if (!list_empty(&worker->pending) || |
| !list_empty(&worker->prio_pending)) |
| continue; |
| |
| if (kthread_should_stop()) |
| break; |
| |
| /* still no more work?, sleep for real */ |
| spin_lock_irq(&worker->lock); |
| set_current_state(TASK_INTERRUPTIBLE); |
| if (!list_empty(&worker->pending) || |
| !list_empty(&worker->prio_pending)) |
| goto again_locked; |
| |
| /* |
| * this makes sure we get a wakeup when someone |
| * adds something new to the queue |
| */ |
| worker->working = 0; |
| spin_unlock_irq(&worker->lock); |
| |
| if (!kthread_should_stop()) |
| schedule(); |
| } |
| __set_current_state(TASK_RUNNING); |
| } |
| } while (!kthread_should_stop()); |
| return 0; |
| } |
| |
| /* |
| * this will wait for all the worker threads to shutdown |
| */ |
| int btrfs_stop_workers(struct btrfs_workers *workers) |
| { |
| struct list_head *cur; |
| struct btrfs_worker_thread *worker; |
| |
| list_splice_init(&workers->idle_list, &workers->worker_list); |
| while (!list_empty(&workers->worker_list)) { |
| cur = workers->worker_list.next; |
| worker = list_entry(cur, struct btrfs_worker_thread, |
| worker_list); |
| kthread_stop(worker->task); |
| list_del(&worker->worker_list); |
| kfree(worker); |
| } |
| return 0; |
| } |
| |
| /* |
| * simple init on struct btrfs_workers |
| */ |
| void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) |
| { |
| workers->num_workers = 0; |
| INIT_LIST_HEAD(&workers->worker_list); |
| INIT_LIST_HEAD(&workers->idle_list); |
| INIT_LIST_HEAD(&workers->order_list); |
| INIT_LIST_HEAD(&workers->prio_order_list); |
| spin_lock_init(&workers->lock); |
| workers->max_workers = max; |
| workers->idle_thresh = 32; |
| workers->name = name; |
| workers->ordered = 0; |
| } |
| |
| /* |
| * starts new worker threads. This does not enforce the max worker |
| * count in case you need to temporarily go past it. |
| */ |
| int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) |
| { |
| struct btrfs_worker_thread *worker; |
| int ret = 0; |
| int i; |
| |
| for (i = 0; i < num_workers; i++) { |
| worker = kzalloc(sizeof(*worker), GFP_NOFS); |
| if (!worker) { |
| ret = -ENOMEM; |
| goto fail; |
| } |
| |
| INIT_LIST_HEAD(&worker->pending); |
| INIT_LIST_HEAD(&worker->prio_pending); |
| INIT_LIST_HEAD(&worker->worker_list); |
| spin_lock_init(&worker->lock); |
| atomic_set(&worker->num_pending, 0); |
| worker->workers = workers; |
| worker->task = kthread_run(worker_loop, worker, |
| "btrfs-%s-%d", workers->name, |
| workers->num_workers + i); |
| if (IS_ERR(worker->task)) { |
| ret = PTR_ERR(worker->task); |
| kfree(worker); |
| goto fail; |
| } |
| |
| spin_lock_irq(&workers->lock); |
| list_add_tail(&worker->worker_list, &workers->idle_list); |
| worker->idle = 1; |
| workers->num_workers++; |
| spin_unlock_irq(&workers->lock); |
| } |
| return 0; |
| fail: |
| btrfs_stop_workers(workers); |
| return ret; |
| } |
| |
| /* |
| * run through the list and find a worker thread that doesn't have a lot |
| * to do right now. This can return null if we aren't yet at the thread |
| * count limit and all of the threads are busy. |
| */ |
| static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) |
| { |
| struct btrfs_worker_thread *worker; |
| struct list_head *next; |
| int enforce_min = workers->num_workers < workers->max_workers; |
| |
| /* |
| * if we find an idle thread, don't move it to the end of the |
| * idle list. This improves the chance that the next submission |
| * will reuse the same thread, and maybe catch it while it is still |
| * working |
| */ |
| if (!list_empty(&workers->idle_list)) { |
| next = workers->idle_list.next; |
| worker = list_entry(next, struct btrfs_worker_thread, |
| worker_list); |
| return worker; |
| } |
| if (enforce_min || list_empty(&workers->worker_list)) |
| return NULL; |
| |
| /* |
| * if we pick a busy task, move the task to the end of the list. |
| * hopefully this will keep things somewhat evenly balanced. |
| * Do the move in batches based on the sequence number. This groups |
| * requests submitted at roughly the same time onto the same worker. |
| */ |
| next = workers->worker_list.next; |
| worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
| atomic_inc(&worker->num_pending); |
| worker->sequence++; |
| |
| if (worker->sequence % workers->idle_thresh == 0) |
| list_move_tail(next, &workers->worker_list); |
| return worker; |
| } |
| |
| /* |
| * selects a worker thread to take the next job. This will either find |
| * an idle worker, start a new worker up to the max count, or just return |
| * one of the existing busy workers. |
| */ |
| static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) |
| { |
| struct btrfs_worker_thread *worker; |
| unsigned long flags; |
| |
| again: |
| spin_lock_irqsave(&workers->lock, flags); |
| worker = next_worker(workers); |
| spin_unlock_irqrestore(&workers->lock, flags); |
| |
| if (!worker) { |
| spin_lock_irqsave(&workers->lock, flags); |
| if (workers->num_workers >= workers->max_workers) { |
| struct list_head *fallback = NULL; |
| /* |
| * we have failed to find any workers, just |
| * return the force one |
| */ |
| if (!list_empty(&workers->worker_list)) |
| fallback = workers->worker_list.next; |
| if (!list_empty(&workers->idle_list)) |
| fallback = workers->idle_list.next; |
| BUG_ON(!fallback); |
| worker = list_entry(fallback, |
| struct btrfs_worker_thread, worker_list); |
| spin_unlock_irqrestore(&workers->lock, flags); |
| } else { |
| spin_unlock_irqrestore(&workers->lock, flags); |
| /* we're below the limit, start another worker */ |
| btrfs_start_workers(workers, 1); |
| goto again; |
| } |
| } |
| return worker; |
| } |
| |
| /* |
| * btrfs_requeue_work just puts the work item back on the tail of the list |
| * it was taken from. It is intended for use with long running work functions |
| * that make some progress and want to give the cpu up for others. |
| */ |
| int btrfs_requeue_work(struct btrfs_work *work) |
| { |
| struct btrfs_worker_thread *worker = work->worker; |
| unsigned long flags; |
| int wake = 0; |
| |
| if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) |
| goto out; |
| |
| spin_lock_irqsave(&worker->lock, flags); |
| if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
| list_add_tail(&work->list, &worker->prio_pending); |
| else |
| list_add_tail(&work->list, &worker->pending); |
| atomic_inc(&worker->num_pending); |
| |
| /* by definition we're busy, take ourselves off the idle |
| * list |
| */ |
| if (worker->idle) { |
| spin_lock_irqsave(&worker->workers->lock, flags); |
| worker->idle = 0; |
| list_move_tail(&worker->worker_list, |
| &worker->workers->worker_list); |
| spin_unlock_irqrestore(&worker->workers->lock, flags); |
| } |
| if (!worker->working) { |
| wake = 1; |
| worker->working = 1; |
| } |
| |
| spin_unlock_irqrestore(&worker->lock, flags); |
| if (wake) |
| wake_up_process(worker->task); |
| out: |
| |
| return 0; |
| } |
| |
| void btrfs_set_work_high_prio(struct btrfs_work *work) |
| { |
| set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
| } |
| |
| /* |
| * places a struct btrfs_work into the pending queue of one of the kthreads |
| */ |
| int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) |
| { |
| struct btrfs_worker_thread *worker; |
| unsigned long flags; |
| int wake = 0; |
| |
| /* don't requeue something already on a list */ |
| if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) |
| goto out; |
| |
| worker = find_worker(workers); |
| if (workers->ordered) { |
| spin_lock_irqsave(&workers->lock, flags); |
| if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
| list_add_tail(&work->order_list, |
| &workers->prio_order_list); |
| } else { |
| list_add_tail(&work->order_list, &workers->order_list); |
| } |
| spin_unlock_irqrestore(&workers->lock, flags); |
| } else { |
| INIT_LIST_HEAD(&work->order_list); |
| } |
| |
| spin_lock_irqsave(&worker->lock, flags); |
| |
| if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
| list_add_tail(&work->list, &worker->prio_pending); |
| else |
| list_add_tail(&work->list, &worker->pending); |
| atomic_inc(&worker->num_pending); |
| check_busy_worker(worker); |
| |
| /* |
| * avoid calling into wake_up_process if this thread has already |
| * been kicked |
| */ |
| if (!worker->working) |
| wake = 1; |
| worker->working = 1; |
| |
| spin_unlock_irqrestore(&worker->lock, flags); |
| |
| if (wake) |
| wake_up_process(worker->task); |
| out: |
| return 0; |
| } |