Arnd Bergmann | 8b3d666 | 2005-11-15 15:53:52 -0500 | [diff] [blame] | 1 | /* sched.c - SPU scheduler. |
| 2 | * |
| 3 | * Copyright (C) IBM 2005 |
| 4 | * Author: Mark Nutter <mnutter@us.ibm.com> |
| 5 | * |
| 6 | * SPU scheduler, based on Linux thread priority. For now use |
| 7 | * a simple "cooperative" yield model with no preemption. SPU |
| 8 | * scheduling will eventually be preemptive: When a thread with |
| 9 | * a higher static priority gets ready to run, then an active SPU |
| 10 | * context will be preempted and returned to the waitq. |
| 11 | * |
| 12 | * This program is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU General Public License as published by |
| 14 | * the Free Software Foundation; either version 2, or (at your option) |
| 15 | * any later version. |
| 16 | * |
| 17 | * This program is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU General Public License |
| 23 | * along with this program; if not, write to the Free Software |
| 24 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 25 | */ |
| 26 | |
Arnd Bergmann | 3b3d22c | 2005-12-05 22:52:24 -0500 | [diff] [blame^] | 27 | #undef DEBUG |
| 28 | |
Arnd Bergmann | 8b3d666 | 2005-11-15 15:53:52 -0500 | [diff] [blame] | 29 | #include <linux/config.h> |
| 30 | #include <linux/module.h> |
| 31 | #include <linux/errno.h> |
| 32 | #include <linux/sched.h> |
| 33 | #include <linux/kernel.h> |
| 34 | #include <linux/mm.h> |
| 35 | #include <linux/completion.h> |
| 36 | #include <linux/vmalloc.h> |
| 37 | #include <linux/smp.h> |
| 38 | #include <linux/smp_lock.h> |
| 39 | #include <linux/stddef.h> |
| 40 | #include <linux/unistd.h> |
| 41 | |
| 42 | #include <asm/io.h> |
| 43 | #include <asm/mmu_context.h> |
| 44 | #include <asm/spu.h> |
| 45 | #include <asm/spu_csa.h> |
| 46 | #include "spufs.h" |
| 47 | |
| 48 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) |
| 49 | struct spu_prio_array { |
| 50 | atomic_t nr_blocked; |
| 51 | unsigned long bitmap[SPU_BITMAP_SIZE]; |
| 52 | wait_queue_head_t waitq[MAX_PRIO]; |
| 53 | }; |
| 54 | |
| 55 | /* spu_runqueue - This is the main runqueue data structure for SPUs. */ |
| 56 | struct spu_runqueue { |
| 57 | struct semaphore sem; |
| 58 | unsigned long nr_active; |
| 59 | unsigned long nr_idle; |
| 60 | unsigned long nr_switches; |
| 61 | struct list_head active_list; |
| 62 | struct list_head idle_list; |
| 63 | struct spu_prio_array prio; |
| 64 | }; |
| 65 | |
| 66 | static struct spu_runqueue *spu_runqueues = NULL; |
| 67 | |
| 68 | static inline struct spu_runqueue *spu_rq(void) |
| 69 | { |
| 70 | /* Future: make this a per-NODE array, |
| 71 | * and use cpu_to_node(smp_processor_id()) |
| 72 | */ |
| 73 | return spu_runqueues; |
| 74 | } |
| 75 | |
| 76 | static inline struct spu *del_idle(struct spu_runqueue *rq) |
| 77 | { |
| 78 | struct spu *spu; |
| 79 | |
| 80 | BUG_ON(rq->nr_idle <= 0); |
| 81 | BUG_ON(list_empty(&rq->idle_list)); |
| 82 | /* Future: Move SPU out of low-power SRI state. */ |
| 83 | spu = list_entry(rq->idle_list.next, struct spu, sched_list); |
| 84 | list_del_init(&spu->sched_list); |
| 85 | rq->nr_idle--; |
| 86 | return spu; |
| 87 | } |
| 88 | |
| 89 | static inline void del_active(struct spu_runqueue *rq, struct spu *spu) |
| 90 | { |
| 91 | BUG_ON(rq->nr_active <= 0); |
| 92 | BUG_ON(list_empty(&rq->active_list)); |
| 93 | list_del_init(&spu->sched_list); |
| 94 | rq->nr_active--; |
| 95 | } |
| 96 | |
| 97 | static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) |
| 98 | { |
| 99 | /* Future: Put SPU into low-power SRI state. */ |
| 100 | list_add_tail(&spu->sched_list, &rq->idle_list); |
| 101 | rq->nr_idle++; |
| 102 | } |
| 103 | |
| 104 | static inline void add_active(struct spu_runqueue *rq, struct spu *spu) |
| 105 | { |
| 106 | rq->nr_active++; |
| 107 | rq->nr_switches++; |
| 108 | list_add_tail(&spu->sched_list, &rq->active_list); |
| 109 | } |
| 110 | |
| 111 | static void prio_wakeup(struct spu_runqueue *rq) |
| 112 | { |
| 113 | if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { |
| 114 | int best = sched_find_first_bit(rq->prio.bitmap); |
| 115 | if (best < MAX_PRIO) { |
| 116 | wait_queue_head_t *wq = &rq->prio.waitq[best]; |
| 117 | wake_up_interruptible_nr(wq, 1); |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | static void prio_wait(struct spu_runqueue *rq, u64 flags) |
| 123 | { |
| 124 | int prio = current->prio; |
| 125 | wait_queue_head_t *wq = &rq->prio.waitq[prio]; |
| 126 | DEFINE_WAIT(wait); |
| 127 | |
| 128 | __set_bit(prio, rq->prio.bitmap); |
| 129 | atomic_inc(&rq->prio.nr_blocked); |
| 130 | prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); |
| 131 | if (!signal_pending(current)) { |
| 132 | up(&rq->sem); |
| 133 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, |
| 134 | current->pid, current->prio); |
| 135 | schedule(); |
| 136 | down(&rq->sem); |
| 137 | } |
| 138 | finish_wait(wq, &wait); |
| 139 | atomic_dec(&rq->prio.nr_blocked); |
| 140 | if (!waitqueue_active(wq)) |
| 141 | __clear_bit(prio, rq->prio.bitmap); |
| 142 | } |
| 143 | |
| 144 | static inline int is_best_prio(struct spu_runqueue *rq) |
| 145 | { |
| 146 | int best_prio; |
| 147 | |
| 148 | best_prio = sched_find_first_bit(rq->prio.bitmap); |
| 149 | return (current->prio < best_prio) ? 1 : 0; |
| 150 | } |
| 151 | |
| 152 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) |
| 153 | { |
| 154 | /* Global TLBIE broadcast required with SPEs. */ |
| 155 | #if (NR_CPUS > 1) |
| 156 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); |
| 157 | #else |
| 158 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ |
| 159 | #endif |
| 160 | } |
| 161 | |
| 162 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) |
| 163 | { |
| 164 | pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, |
| 165 | spu->number); |
| 166 | spu->ctx = ctx; |
| 167 | spu->flags = 0; |
| 168 | ctx->spu = spu; |
| 169 | ctx->ops = &spu_hw_ops; |
| 170 | spu->pid = current->pid; |
| 171 | spu->prio = current->prio; |
| 172 | spu->mm = ctx->owner; |
| 173 | mm_needs_global_tlbie(spu->mm); |
| 174 | spu->ibox_callback = spufs_ibox_callback; |
| 175 | spu->wbox_callback = spufs_wbox_callback; |
| 176 | mb(); |
| 177 | spu_restore(&ctx->csa, spu); |
| 178 | } |
| 179 | |
| 180 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) |
| 181 | { |
| 182 | pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, |
| 183 | spu->pid, spu->number); |
| 184 | spu_save(&ctx->csa, spu); |
| 185 | ctx->state = SPU_STATE_SAVED; |
| 186 | spu->ibox_callback = NULL; |
| 187 | spu->wbox_callback = NULL; |
| 188 | spu->mm = NULL; |
| 189 | spu->pid = 0; |
| 190 | spu->prio = MAX_PRIO; |
| 191 | ctx->ops = &spu_backing_ops; |
| 192 | ctx->spu = NULL; |
| 193 | spu->ctx = NULL; |
| 194 | } |
| 195 | |
| 196 | static struct spu *preempt_active(struct spu_runqueue *rq) |
| 197 | { |
| 198 | struct list_head *p; |
| 199 | struct spu_context *ctx; |
| 200 | struct spu *spu; |
| 201 | |
| 202 | /* Future: implement real preemption. For now just |
| 203 | * boot a lower priority ctx that is in "detached" |
| 204 | * state, i.e. on a processor but not currently in |
| 205 | * spu_run(). |
| 206 | */ |
| 207 | list_for_each(p, &rq->active_list) { |
| 208 | spu = list_entry(p, struct spu, sched_list); |
| 209 | if (current->prio < spu->prio) { |
| 210 | ctx = spu->ctx; |
| 211 | if (down_write_trylock(&ctx->state_sema)) { |
| 212 | if (ctx->state != SPU_STATE_RUNNABLE) { |
| 213 | up_write(&ctx->state_sema); |
| 214 | continue; |
| 215 | } |
| 216 | pr_debug("%s: booting pid=%d from SPU %d\n", |
| 217 | __FUNCTION__, spu->pid, spu->number); |
| 218 | del_active(rq, spu); |
| 219 | up(&rq->sem); |
| 220 | unbind_context(spu, ctx); |
| 221 | up_write(&ctx->state_sema); |
| 222 | return spu; |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | return NULL; |
| 227 | } |
| 228 | |
| 229 | static struct spu *get_idle_spu(u64 flags) |
| 230 | { |
| 231 | struct spu_runqueue *rq; |
| 232 | struct spu *spu = NULL; |
| 233 | |
| 234 | rq = spu_rq(); |
| 235 | down(&rq->sem); |
| 236 | for (;;) { |
| 237 | if (rq->nr_idle > 0) { |
| 238 | if (is_best_prio(rq)) { |
| 239 | /* Fall through. */ |
| 240 | spu = del_idle(rq); |
| 241 | break; |
| 242 | } else { |
| 243 | prio_wakeup(rq); |
| 244 | up(&rq->sem); |
| 245 | yield(); |
| 246 | if (signal_pending(current)) { |
| 247 | return NULL; |
| 248 | } |
| 249 | rq = spu_rq(); |
| 250 | down(&rq->sem); |
| 251 | continue; |
| 252 | } |
| 253 | } else { |
| 254 | if (is_best_prio(rq)) { |
| 255 | if ((spu = preempt_active(rq)) != NULL) |
| 256 | return spu; |
| 257 | } |
| 258 | prio_wait(rq, flags); |
| 259 | if (signal_pending(current)) { |
| 260 | prio_wakeup(rq); |
| 261 | spu = NULL; |
| 262 | break; |
| 263 | } |
| 264 | continue; |
| 265 | } |
| 266 | } |
| 267 | up(&rq->sem); |
| 268 | return spu; |
| 269 | } |
| 270 | |
| 271 | static void put_idle_spu(struct spu *spu) |
| 272 | { |
| 273 | struct spu_runqueue *rq = spu->rq; |
| 274 | |
| 275 | down(&rq->sem); |
| 276 | add_idle(rq, spu); |
| 277 | prio_wakeup(rq); |
| 278 | up(&rq->sem); |
| 279 | } |
| 280 | |
| 281 | static int get_active_spu(struct spu *spu) |
| 282 | { |
| 283 | struct spu_runqueue *rq = spu->rq; |
| 284 | struct list_head *p; |
| 285 | struct spu *tmp; |
| 286 | int rc = 0; |
| 287 | |
| 288 | down(&rq->sem); |
| 289 | list_for_each(p, &rq->active_list) { |
| 290 | tmp = list_entry(p, struct spu, sched_list); |
| 291 | if (tmp == spu) { |
| 292 | del_active(rq, spu); |
| 293 | rc = 1; |
| 294 | break; |
| 295 | } |
| 296 | } |
| 297 | up(&rq->sem); |
| 298 | return rc; |
| 299 | } |
| 300 | |
| 301 | static void put_active_spu(struct spu *spu) |
| 302 | { |
| 303 | struct spu_runqueue *rq = spu->rq; |
| 304 | |
| 305 | down(&rq->sem); |
| 306 | add_active(rq, spu); |
| 307 | up(&rq->sem); |
| 308 | } |
| 309 | |
| 310 | /* Lock order: |
| 311 | * spu_activate() & spu_deactivate() require the |
| 312 | * caller to have down_write(&ctx->state_sema). |
| 313 | * |
| 314 | * The rq->sem is breifly held (inside or outside a |
| 315 | * given ctx lock) for list management, but is never |
| 316 | * held during save/restore. |
| 317 | */ |
| 318 | |
| 319 | int spu_activate(struct spu_context *ctx, u64 flags) |
| 320 | { |
| 321 | struct spu *spu; |
| 322 | |
| 323 | if (ctx->spu) |
| 324 | return 0; |
| 325 | spu = get_idle_spu(flags); |
| 326 | if (!spu) |
| 327 | return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; |
| 328 | bind_context(spu, ctx); |
| 329 | put_active_spu(spu); |
| 330 | return 0; |
| 331 | } |
| 332 | |
| 333 | void spu_deactivate(struct spu_context *ctx) |
| 334 | { |
| 335 | struct spu *spu; |
| 336 | int needs_idle; |
| 337 | |
| 338 | spu = ctx->spu; |
| 339 | if (!spu) |
| 340 | return; |
| 341 | needs_idle = get_active_spu(spu); |
| 342 | unbind_context(spu, ctx); |
| 343 | if (needs_idle) |
| 344 | put_idle_spu(spu); |
| 345 | } |
| 346 | |
| 347 | void spu_yield(struct spu_context *ctx) |
| 348 | { |
| 349 | struct spu *spu; |
| 350 | |
| 351 | if (!down_write_trylock(&ctx->state_sema)) |
| 352 | return; |
| 353 | spu = ctx->spu; |
| 354 | if ((ctx->state == SPU_STATE_RUNNABLE) && |
| 355 | (sched_find_first_bit(spu->rq->prio.bitmap) <= current->prio)) { |
| 356 | pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); |
| 357 | spu_deactivate(ctx); |
| 358 | ctx->state = SPU_STATE_SAVED; |
| 359 | } |
| 360 | up_write(&ctx->state_sema); |
| 361 | } |
| 362 | |
| 363 | int __init spu_sched_init(void) |
| 364 | { |
| 365 | struct spu_runqueue *rq; |
| 366 | struct spu *spu; |
| 367 | int i; |
| 368 | |
| 369 | rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); |
| 370 | if (!rq) { |
| 371 | printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", |
| 372 | __FUNCTION__); |
| 373 | return 1; |
| 374 | } |
| 375 | memset(rq, 0, sizeof(struct spu_runqueue)); |
| 376 | init_MUTEX(&rq->sem); |
| 377 | INIT_LIST_HEAD(&rq->active_list); |
| 378 | INIT_LIST_HEAD(&rq->idle_list); |
| 379 | rq->nr_active = 0; |
| 380 | rq->nr_idle = 0; |
| 381 | rq->nr_switches = 0; |
| 382 | atomic_set(&rq->prio.nr_blocked, 0); |
| 383 | for (i = 0; i < MAX_PRIO; i++) { |
| 384 | init_waitqueue_head(&rq->prio.waitq[i]); |
| 385 | __clear_bit(i, rq->prio.bitmap); |
| 386 | } |
| 387 | __set_bit(MAX_PRIO, rq->prio.bitmap); |
| 388 | for (;;) { |
| 389 | spu = spu_alloc(); |
| 390 | if (!spu) |
| 391 | break; |
| 392 | pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); |
| 393 | add_idle(rq, spu); |
| 394 | spu->rq = rq; |
| 395 | } |
| 396 | if (!rq->nr_idle) { |
| 397 | printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); |
| 398 | kfree(rq); |
| 399 | return 1; |
| 400 | } |
| 401 | return 0; |
| 402 | } |
| 403 | |
| 404 | void __exit spu_sched_exit(void) |
| 405 | { |
| 406 | struct spu_runqueue *rq = spu_rq(); |
| 407 | struct spu *spu; |
| 408 | |
| 409 | if (!rq) { |
| 410 | printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); |
| 411 | return; |
| 412 | } |
| 413 | while (rq->nr_idle > 0) { |
| 414 | spu = del_idle(rq); |
| 415 | if (!spu) |
| 416 | break; |
| 417 | spu_free(spu); |
| 418 | } |
| 419 | kfree(rq); |
| 420 | } |