blob: 9fc09306c9aef205a4ade026a647711c0b08c785 [file] [log] [blame]
Arnd Bergmann8b3d6662005-11-15 15:53:52 -05001/* sched.c - SPU scheduler.
2 *
3 * Copyright (C) IBM 2005
4 * Author: Mark Nutter <mnutter@us.ibm.com>
5 *
Mark Nuttera68cf982006-10-04 17:26:12 +02006 * 2006-03-31 NUMA domains added.
Arnd Bergmann8b3d6662005-11-15 15:53:52 -05007 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
Arnd Bergmann3b3d22c2005-12-05 22:52:24 -050023#undef DEBUG
24
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050025#include <linux/module.h>
26#include <linux/errno.h>
27#include <linux/sched.h>
28#include <linux/kernel.h>
29#include <linux/mm.h>
30#include <linux/completion.h>
31#include <linux/vmalloc.h>
32#include <linux/smp.h>
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050033#include <linux/stddef.h>
34#include <linux/unistd.h>
Mark Nuttera68cf982006-10-04 17:26:12 +020035#include <linux/numa.h>
36#include <linux/mutex.h>
Arnd Bergmann86767272006-10-04 17:26:21 +020037#include <linux/notifier.h>
Christoph Hellwig37901802007-06-29 10:57:51 +100038#include <linux/kthread.h>
Christoph Hellwig65de66f2007-06-29 10:58:02 +100039#include <linux/pid_namespace.h>
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050042
43#include <asm/io.h>
44#include <asm/mmu_context.h>
45#include <asm/spu.h>
46#include <asm/spu_csa.h>
Geoff Levanda91942a2006-06-19 20:33:30 +020047#include <asm/spu_priv1.h>
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050048#include "spufs.h"
49
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050050struct spu_prio_array {
Christoph Hellwig72cb3602007-02-13 21:54:28 +010051 DECLARE_BITMAP(bitmap, MAX_PRIO);
Christoph Hellwig079cdb62007-02-13 21:54:23 +010052 struct list_head runq[MAX_PRIO];
53 spinlock_t runq_lock;
Mark Nuttera68cf982006-10-04 17:26:12 +020054 struct list_head active_list[MAX_NUMNODES];
55 struct mutex active_mutex[MAX_NUMNODES];
Christoph Hellwig65de66f2007-06-29 10:58:02 +100056 int nr_active[MAX_NUMNODES];
57 int nr_waiting;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050058};
59
Christoph Hellwig65de66f2007-06-29 10:58:02 +100060static unsigned long spu_avenrun[3];
Mark Nuttera68cf982006-10-04 17:26:12 +020061static struct spu_prio_array *spu_prio;
Christoph Hellwig37901802007-06-29 10:57:51 +100062static struct task_struct *spusched_task;
63static struct timer_list spusched_timer;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -050064
Christoph Hellwigfe443ef2007-06-29 10:57:52 +100065/*
66 * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
67 */
68#define NORMAL_PRIO 120
69
70/*
71 * Frequency of the spu scheduler tick. By default we do one SPU scheduler
72 * tick for every 10 CPU scheduler ticks.
73 */
74#define SPUSCHED_TICK (10)
75
76/*
77 * These are the 'tuning knobs' of the scheduler:
78 *
Jeremy Kerr60e24232007-06-29 10:57:53 +100079 * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
80 * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
Christoph Hellwigfe443ef2007-06-29 10:57:52 +100081 */
Jeremy Kerr60e24232007-06-29 10:57:53 +100082#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
83#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))
Christoph Hellwigfe443ef2007-06-29 10:57:52 +100084
85#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO)
86#define SCALE_PRIO(x, prio) \
87 max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
88
89/*
90 * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
91 * [800ms ... 100ms ... 5ms]
92 *
93 * The higher a thread's priority, the bigger timeslices
94 * it gets during one round of execution. But even the lowest
95 * priority thread gets MIN_TIMESLICE worth of execution time.
96 */
97void spu_set_timeslice(struct spu_context *ctx)
98{
99 if (ctx->prio < NORMAL_PRIO)
100 ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
101 else
102 ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
103}
104
Christoph Hellwig2cf2b3b2007-06-29 10:57:55 +1000105/*
106 * Update scheduling information from the owning thread.
107 */
108void __spu_update_sched_info(struct spu_context *ctx)
109{
110 /*
Christoph Hellwig476273a2007-06-29 10:58:01 +1000111 * 32-Bit assignment are atomic on powerpc, and we don't care about
112 * memory ordering here because retriving the controlling thread is
113 * per defintion racy.
114 */
115 ctx->tid = current->pid;
116
117 /*
Christoph Hellwig2cf2b3b2007-06-29 10:57:55 +1000118 * We do our own priority calculations, so we normally want
119 * ->static_prio to start with. Unfortunately thies field
120 * contains junk for threads with a realtime scheduling
121 * policy so we have to look at ->prio in this case.
122 */
123 if (rt_prio(current->prio))
124 ctx->prio = current->prio;
125 else
126 ctx->prio = current->static_prio;
127 ctx->policy = current->policy;
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000128
129 /*
130 * A lot of places that don't hold active_mutex poke into
131 * cpus_allowed, including grab_runnable_context which
132 * already holds the runq_lock. So abuse runq_lock
133 * to protect this field aswell.
134 */
135 spin_lock(&spu_prio->runq_lock);
136 ctx->cpus_allowed = current->cpus_allowed;
137 spin_unlock(&spu_prio->runq_lock);
Christoph Hellwig2cf2b3b2007-06-29 10:57:55 +1000138}
139
140void spu_update_sched_info(struct spu_context *ctx)
141{
142 int node = ctx->spu->node;
143
144 mutex_lock(&spu_prio->active_mutex[node]);
145 __spu_update_sched_info(ctx);
146 mutex_unlock(&spu_prio->active_mutex[node]);
147}
148
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000149static int __node_allowed(struct spu_context *ctx, int node)
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500150{
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000151 if (nr_cpus_node(node)) {
152 cpumask_t mask = node_to_cpumask(node);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500153
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000154 if (cpus_intersects(mask, ctx->cpus_allowed))
155 return 1;
156 }
157
158 return 0;
159}
160
161static int node_allowed(struct spu_context *ctx, int node)
162{
163 int rval;
164
165 spin_lock(&spu_prio->runq_lock);
166 rval = __node_allowed(ctx, node);
167 spin_unlock(&spu_prio->runq_lock);
168
169 return rval;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500170}
171
Christoph Hellwig202557d2007-02-13 21:36:49 +0100172/**
173 * spu_add_to_active_list - add spu to active list
174 * @spu: spu to add to the active list
175 */
176static void spu_add_to_active_list(struct spu *spu)
177{
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000178 int node = spu->node;
179
180 mutex_lock(&spu_prio->active_mutex[node]);
181 spu_prio->nr_active[node]++;
182 list_add_tail(&spu->list, &spu_prio->active_list[node]);
183 mutex_unlock(&spu_prio->active_mutex[node]);
Christoph Hellwig202557d2007-02-13 21:36:49 +0100184}
185
Christoph Hellwig37901802007-06-29 10:57:51 +1000186static void __spu_remove_from_active_list(struct spu *spu)
187{
188 list_del_init(&spu->list);
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000189 spu_prio->nr_active[spu->node]--;
Christoph Hellwig37901802007-06-29 10:57:51 +1000190}
191
Christoph Hellwig202557d2007-02-13 21:36:49 +0100192/**
193 * spu_remove_from_active_list - remove spu from active list
194 * @spu: spu to remove from the active list
Christoph Hellwig202557d2007-02-13 21:36:49 +0100195 */
Christoph Hellwig678b2ff2007-02-13 21:54:25 +0100196static void spu_remove_from_active_list(struct spu *spu)
Christoph Hellwig202557d2007-02-13 21:36:49 +0100197{
198 int node = spu->node;
Christoph Hellwig202557d2007-02-13 21:36:49 +0100199
200 mutex_lock(&spu_prio->active_mutex[node]);
Christoph Hellwig37901802007-06-29 10:57:51 +1000201 __spu_remove_from_active_list(spu);
Christoph Hellwig202557d2007-02-13 21:36:49 +0100202 mutex_unlock(&spu_prio->active_mutex[node]);
Christoph Hellwig202557d2007-02-13 21:36:49 +0100203}
204
Arnd Bergmann86767272006-10-04 17:26:21 +0200205static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
206
207static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
208{
209 blocking_notifier_call_chain(&spu_switch_notifier,
210 ctx ? ctx->object_id : 0, spu);
211}
212
213int spu_switch_event_register(struct notifier_block * n)
214{
215 return blocking_notifier_chain_register(&spu_switch_notifier, n);
216}
217
218int spu_switch_event_unregister(struct notifier_block * n)
219{
220 return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
221}
222
Christoph Hellwig202557d2007-02-13 21:36:49 +0100223/**
224 * spu_bind_context - bind spu context to physical spu
225 * @spu: physical spu to bind to
226 * @ctx: context to bind
227 */
228static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500229{
Mark Nuttera68cf982006-10-04 17:26:12 +0200230 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
231 spu->number, spu->node);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500232 spu->ctx = ctx;
233 spu->flags = 0;
234 ctx->spu = spu;
235 ctx->ops = &spu_hw_ops;
236 spu->pid = current->pid;
Benjamin Herrenschmidt94b2a432007-03-10 00:05:37 +0100237 spu_associate_mm(spu, ctx->owner);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500238 spu->ibox_callback = spufs_ibox_callback;
239 spu->wbox_callback = spufs_wbox_callback;
Arnd Bergmann51104592005-12-05 22:52:25 -0500240 spu->stop_callback = spufs_stop_callback;
Arnd Bergmanna33a7d72006-03-23 00:00:11 +0100241 spu->mfc_callback = spufs_mfc_callback;
Arnd Bergmann9add11d2006-10-04 17:26:14 +0200242 spu->dma_callback = spufs_dma_callback;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500243 mb();
Arnd Bergmann51104592005-12-05 22:52:25 -0500244 spu_unmap_mappings(ctx);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500245 spu_restore(&ctx->csa, spu);
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500246 spu->timestamp = jiffies;
Mark Nuttera68cf982006-10-04 17:26:12 +0200247 spu_cpu_affinity_set(spu, raw_smp_processor_id());
Arnd Bergmann86767272006-10-04 17:26:21 +0200248 spu_switch_notify(spu, ctx);
Christoph Hellwig81998ba2007-02-13 21:36:48 +0100249 ctx->state = SPU_STATE_RUNNABLE;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500250}
251
Christoph Hellwig202557d2007-02-13 21:36:49 +0100252/**
253 * spu_unbind_context - unbind spu context from physical spu
254 * @spu: physical spu to unbind from
255 * @ctx: context to unbind
Christoph Hellwig202557d2007-02-13 21:36:49 +0100256 */
Christoph Hellwig678b2ff2007-02-13 21:54:25 +0100257static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500258{
Mark Nuttera68cf982006-10-04 17:26:12 +0200259 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
260 spu->pid, spu->number, spu->node);
Christoph Hellwig202557d2007-02-13 21:36:49 +0100261
Arnd Bergmann86767272006-10-04 17:26:21 +0200262 spu_switch_notify(spu, NULL);
Arnd Bergmann51104592005-12-05 22:52:25 -0500263 spu_unmap_mappings(ctx);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500264 spu_save(&ctx->csa, spu);
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500265 spu->timestamp = jiffies;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500266 ctx->state = SPU_STATE_SAVED;
267 spu->ibox_callback = NULL;
268 spu->wbox_callback = NULL;
Arnd Bergmann51104592005-12-05 22:52:25 -0500269 spu->stop_callback = NULL;
Arnd Bergmanna33a7d72006-03-23 00:00:11 +0100270 spu->mfc_callback = NULL;
Arnd Bergmann9add11d2006-10-04 17:26:14 +0200271 spu->dma_callback = NULL;
Benjamin Herrenschmidt94b2a432007-03-10 00:05:37 +0100272 spu_associate_mm(spu, NULL);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500273 spu->pid = 0;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500274 ctx->ops = &spu_backing_ops;
275 ctx->spu = NULL;
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500276 spu->flags = 0;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500277 spu->ctx = NULL;
278}
279
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100280/**
281 * spu_add_to_rq - add a context to the runqueue
282 * @ctx: context to add
283 */
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200284static void __spu_add_to_rq(struct spu_context *ctx)
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500285{
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200286 int prio = ctx->prio;
287
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000288 spu_prio->nr_waiting++;
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200289 list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
290 set_bit(prio, spu_prio->bitmap);
Mark Nuttera68cf982006-10-04 17:26:12 +0200291}
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500292
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200293static void __spu_del_from_rq(struct spu_context *ctx)
Christoph Hellwiga475c2f2007-04-23 21:08:11 +0200294{
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200295 int prio = ctx->prio;
296
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000297 if (!list_empty(&ctx->rq)) {
Christoph Hellwiga475c2f2007-04-23 21:08:11 +0200298 list_del_init(&ctx->rq);
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000299 spu_prio->nr_waiting--;
300 }
Christoph Hellwiga475c2f2007-04-23 21:08:11 +0200301 if (list_empty(&spu_prio->runq[prio]))
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200302 clear_bit(prio, spu_prio->bitmap);
Mark Nuttera68cf982006-10-04 17:26:12 +0200303}
304
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100305static void spu_prio_wait(struct spu_context *ctx)
306{
Mark Nuttera68cf982006-10-04 17:26:12 +0200307 DEFINE_WAIT(wait);
308
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200309 spin_lock(&spu_prio->runq_lock);
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100310 prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
Mark Nuttera68cf982006-10-04 17:26:12 +0200311 if (!signal_pending(current)) {
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200312 __spu_add_to_rq(ctx);
313 spin_unlock(&spu_prio->runq_lock);
Christoph Hellwig650f8b02007-02-13 21:36:50 +0100314 mutex_unlock(&ctx->state_mutex);
Mark Nuttera68cf982006-10-04 17:26:12 +0200315 schedule();
Christoph Hellwig650f8b02007-02-13 21:36:50 +0100316 mutex_lock(&ctx->state_mutex);
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200317 spin_lock(&spu_prio->runq_lock);
318 __spu_del_from_rq(ctx);
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500319 }
Luke Browning4e0f4ed2007-04-23 21:08:13 +0200320 spin_unlock(&spu_prio->runq_lock);
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100321 __set_current_state(TASK_RUNNING);
322 remove_wait_queue(&ctx->stop_wq, &wait);
Arnd Bergmann2a911f02005-12-05 22:52:26 -0500323}
324
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100325static struct spu *spu_get_idle(struct spu_context *ctx)
Mark Nuttera68cf982006-10-04 17:26:12 +0200326{
327 struct spu *spu = NULL;
328 int node = cpu_to_node(raw_smp_processor_id());
329 int n;
330
331 for (n = 0; n < MAX_NUMNODES; n++, node++) {
332 node = (node < MAX_NUMNODES) ? node : 0;
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000333 if (!node_allowed(ctx, node))
Mark Nuttera68cf982006-10-04 17:26:12 +0200334 continue;
335 spu = spu_alloc_node(node);
336 if (spu)
337 break;
338 }
339 return spu;
340}
341
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100342/**
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100343 * find_victim - find a lower priority context to preempt
344 * @ctx: canidate context for running
345 *
346 * Returns the freed physical spu to run the new context on.
347 */
348static struct spu *find_victim(struct spu_context *ctx)
349{
350 struct spu_context *victim = NULL;
351 struct spu *spu;
352 int node, n;
353
354 /*
355 * Look for a possible preemption candidate on the local node first.
356 * If there is no candidate look at the other nodes. This isn't
357 * exactly fair, but so far the whole spu schedule tries to keep
358 * a strong node affinity. We might want to fine-tune this in
359 * the future.
360 */
361 restart:
362 node = cpu_to_node(raw_smp_processor_id());
363 for (n = 0; n < MAX_NUMNODES; n++, node++) {
364 node = (node < MAX_NUMNODES) ? node : 0;
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000365 if (!node_allowed(ctx, node))
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100366 continue;
367
368 mutex_lock(&spu_prio->active_mutex[node]);
369 list_for_each_entry(spu, &spu_prio->active_list[node], list) {
370 struct spu_context *tmp = spu->ctx;
371
Christoph Hellwigfe443ef2007-06-29 10:57:52 +1000372 if (tmp->prio > ctx->prio &&
373 (!victim || tmp->prio > victim->prio))
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100374 victim = spu->ctx;
375 }
376 mutex_unlock(&spu_prio->active_mutex[node]);
377
378 if (victim) {
379 /*
380 * This nests ctx->state_mutex, but we always lock
381 * higher priority contexts before lower priority
382 * ones, so this is safe until we introduce
383 * priority inheritance schemes.
384 */
385 if (!mutex_trylock(&victim->state_mutex)) {
386 victim = NULL;
387 goto restart;
388 }
389
390 spu = victim->spu;
391 if (!spu) {
392 /*
393 * This race can happen because we've dropped
394 * the active list mutex. No a problem, just
395 * restart the search.
396 */
397 mutex_unlock(&victim->state_mutex);
398 victim = NULL;
399 goto restart;
400 }
Christoph Hellwig37901802007-06-29 10:57:51 +1000401 spu_remove_from_active_list(spu);
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100402 spu_unbind_context(spu, victim);
403 mutex_unlock(&victim->state_mutex);
Christoph Hellwige097b512007-04-23 21:08:09 +0200404 /*
405 * We need to break out of the wait loop in spu_run
406 * manually to ensure this context gets put on the
407 * runqueue again ASAP.
408 */
409 wake_up(&victim->stop_wq);
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100410 return spu;
411 }
412 }
413
414 return NULL;
415}
416
417/**
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100418 * spu_activate - find a free spu for a context and execute it
419 * @ctx: spu context to schedule
420 * @flags: flags (currently ignored)
421 *
Christoph Hellwig08873092007-04-23 21:08:06 +0200422 * Tries to find a free spu to run @ctx. If no free spu is available
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100423 * add the context to the runqueue so it gets woken up once an spu
424 * is available.
425 */
Christoph Hellwig26bec672007-02-13 21:54:24 +0100426int spu_activate(struct spu_context *ctx, unsigned long flags)
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500427{
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500428
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100429 if (ctx->spu)
430 return 0;
431
432 do {
433 struct spu *spu;
434
435 spu = spu_get_idle(ctx);
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100436 /*
437 * If this is a realtime thread we try to get it running by
438 * preempting a lower priority thread.
439 */
Christoph Hellwigfe443ef2007-06-29 10:57:52 +1000440 if (!spu && rt_prio(ctx->prio))
Christoph Hellwig52f04fc2007-02-13 21:54:27 +0100441 spu = find_victim(ctx);
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100442 if (spu) {
Christoph Hellwig202557d2007-02-13 21:36:49 +0100443 spu_bind_context(spu, ctx);
Christoph Hellwig37901802007-06-29 10:57:51 +1000444 spu_add_to_active_list(spu);
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100445 return 0;
Mark Nuttera68cf982006-10-04 17:26:12 +0200446 }
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100447
Christoph Hellwig50b520d2007-03-10 00:05:36 +0100448 spu_prio_wait(ctx);
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100449 } while (!signal_pending(current));
450
451 return -ERESTARTSYS;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500452}
453
Christoph Hellwig678b2ff2007-02-13 21:54:25 +0100454/**
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000455 * grab_runnable_context - try to find a runnable context
456 *
457 * Remove the highest priority context on the runqueue and return it
458 * to the caller. Returns %NULL if no runnable context was found.
459 */
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000460static struct spu_context *grab_runnable_context(int prio, int node)
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000461{
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000462 struct spu_context *ctx;
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000463 int best;
464
465 spin_lock(&spu_prio->runq_lock);
466 best = sched_find_first_bit(spu_prio->bitmap);
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000467 while (best < prio) {
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000468 struct list_head *rq = &spu_prio->runq[best];
469
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000470 list_for_each_entry(ctx, rq, rq) {
471 /* XXX(hch): check for affinity here aswell */
472 if (__node_allowed(ctx, node)) {
473 __spu_del_from_rq(ctx);
474 goto found;
475 }
476 }
477 best++;
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000478 }
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000479 ctx = NULL;
480 found:
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000481 spin_unlock(&spu_prio->runq_lock);
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000482 return ctx;
483}
484
485static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
486{
487 struct spu *spu = ctx->spu;
488 struct spu_context *new = NULL;
489
490 if (spu) {
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000491 new = grab_runnable_context(max_prio, spu->node);
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000492 if (new || force) {
Christoph Hellwig37901802007-06-29 10:57:51 +1000493 spu_remove_from_active_list(spu);
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000494 spu_unbind_context(spu, ctx);
495 spu_free(spu);
496 if (new)
497 wake_up(&new->stop_wq);
498 }
499
500 }
501
502 return new != NULL;
503}
504
505/**
Christoph Hellwig678b2ff2007-02-13 21:54:25 +0100506 * spu_deactivate - unbind a context from it's physical spu
507 * @ctx: spu context to unbind
508 *
509 * Unbind @ctx from the physical spu it is running on and schedule
510 * the highest priority context to run on the freed physical spu.
511 */
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500512void spu_deactivate(struct spu_context *ctx)
513{
Christoph Hellwig46cbf932007-06-29 10:57:57 +1000514 /*
515 * We must never reach this for a nosched context,
516 * but handle the case gracefull instead of panicing.
517 */
518 if (ctx->flags & SPU_CREATE_NOSCHED) {
519 WARN_ON(1);
520 return;
521 }
522
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000523 __spu_deactivate(ctx, 1, MAX_PRIO);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500524}
525
Christoph Hellwigae7b4c52007-02-13 21:54:26 +0100526/**
527 * spu_yield - yield a physical spu if others are waiting
528 * @ctx: spu context to yield
529 *
530 * Check if there is a higher priority context waiting and if yes
531 * unbind @ctx from the physical spu and schedule the highest
532 * priority context to run on the freed physical spu instead.
533 */
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500534void spu_yield(struct spu_context *ctx)
535{
Christoph Hellwige5c0b9e2007-06-05 11:25:59 +1000536 if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
537 mutex_lock(&ctx->state_mutex);
538 __spu_deactivate(ctx, 0, MAX_PRIO);
539 mutex_unlock(&ctx->state_mutex);
540 }
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000541}
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500542
Christoph Hellwig37901802007-06-29 10:57:51 +1000543static void spusched_tick(struct spu_context *ctx)
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000544{
Christoph Hellwigdf09cf32007-06-29 10:57:58 +1000545 if (ctx->flags & SPU_CREATE_NOSCHED)
546 return;
547 if (ctx->policy == SCHED_FIFO)
548 return;
549
550 if (--ctx->time_slice)
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000551 return;
552
Christoph Hellwig37901802007-06-29 10:57:51 +1000553 /*
554 * Unfortunately active_mutex ranks outside of state_mutex, so
555 * we have to trylock here. If we fail give the context another
556 * tick and try again.
557 */
558 if (mutex_trylock(&ctx->state_mutex)) {
Jeremy Kerr70225432007-06-29 10:58:00 +1000559 struct spu *spu = ctx->spu;
Christoph Hellwigea1ae592007-06-29 10:57:56 +1000560 struct spu_context *new;
561
562 new = grab_runnable_context(ctx->prio + 1, spu->node);
Christoph Hellwig37901802007-06-29 10:57:51 +1000563 if (new) {
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000564
Christoph Hellwig37901802007-06-29 10:57:51 +1000565 __spu_remove_from_active_list(spu);
566 spu_unbind_context(spu, ctx);
567 spu_free(spu);
568 wake_up(&new->stop_wq);
569 /*
570 * We need to break out of the wait loop in
571 * spu_run manually to ensure this context
572 * gets put on the runqueue again ASAP.
573 */
574 wake_up(&ctx->stop_wq);
575 }
Christoph Hellwigfe443ef2007-06-29 10:57:52 +1000576 spu_set_timeslice(ctx);
Christoph Hellwig37901802007-06-29 10:57:51 +1000577 mutex_unlock(&ctx->state_mutex);
Christoph Hellwigbb5db292007-06-04 23:26:51 +1000578 } else {
Christoph Hellwig37901802007-06-29 10:57:51 +1000579 ctx->time_slice++;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500580 }
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500581}
582
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000583/**
584 * count_active_contexts - count nr of active tasks
585 *
586 * Return the number of tasks currently running or waiting to run.
587 *
588 * Note that we don't take runq_lock / active_mutex here. Reading
589 * a single 32bit value is atomic on powerpc, and we don't care
590 * about memory ordering issues here.
591 */
592static unsigned long count_active_contexts(void)
593{
594 int nr_active = 0, node;
595
596 for (node = 0; node < MAX_NUMNODES; node++)
597 nr_active += spu_prio->nr_active[node];
598 nr_active += spu_prio->nr_waiting;
599
600 return nr_active;
601}
602
603/**
604 * spu_calc_load - given tick count, update the avenrun load estimates.
605 * @tick: tick count
606 *
607 * No locking against reading these values from userspace, as for
608 * the CPU loadavg code.
609 */
610static void spu_calc_load(unsigned long ticks)
611{
612 unsigned long active_tasks; /* fixed-point */
613 static int count = LOAD_FREQ;
614
615 count -= ticks;
616
617 if (unlikely(count < 0)) {
618 active_tasks = count_active_contexts() * FIXED_1;
619 do {
620 CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
621 CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
622 CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
623 count += LOAD_FREQ;
624 } while (count < 0);
625 }
626}
627
Christoph Hellwig37901802007-06-29 10:57:51 +1000628static void spusched_wake(unsigned long data)
629{
630 mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
631 wake_up_process(spusched_task);
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000632 spu_calc_load(SPUSCHED_TICK);
Christoph Hellwig37901802007-06-29 10:57:51 +1000633}
634
635static int spusched_thread(void *unused)
636{
637 struct spu *spu, *next;
638 int node;
639
640 setup_timer(&spusched_timer, spusched_wake, 0);
641 __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
642
643 while (!kthread_should_stop()) {
644 set_current_state(TASK_INTERRUPTIBLE);
645 schedule();
646 for (node = 0; node < MAX_NUMNODES; node++) {
647 mutex_lock(&spu_prio->active_mutex[node]);
648 list_for_each_entry_safe(spu, next,
649 &spu_prio->active_list[node],
650 list)
651 spusched_tick(spu->ctx);
652 mutex_unlock(&spu_prio->active_mutex[node]);
653 }
654 }
655
656 del_timer_sync(&spusched_timer);
657 return 0;
658}
659
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000660#define LOAD_INT(x) ((x) >> FSHIFT)
661#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
662
663static int show_spu_loadavg(struct seq_file *s, void *private)
664{
665 int a, b, c;
666
667 a = spu_avenrun[0] + (FIXED_1/200);
668 b = spu_avenrun[1] + (FIXED_1/200);
669 c = spu_avenrun[2] + (FIXED_1/200);
670
671 /*
672 * Note that last_pid doesn't really make much sense for the
673 * SPU loadavg (it even seems very odd on the CPU side..),
674 * but we include it here to have a 100% compatible interface.
675 */
676 seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
677 LOAD_INT(a), LOAD_FRAC(a),
678 LOAD_INT(b), LOAD_FRAC(b),
679 LOAD_INT(c), LOAD_FRAC(c),
680 count_active_contexts(),
681 atomic_read(&nr_spu_contexts),
682 current->nsproxy->pid_ns->last_pid);
683 return 0;
684}
685
686static int spu_loadavg_open(struct inode *inode, struct file *file)
687{
688 return single_open(file, show_spu_loadavg, NULL);
689}
690
691static const struct file_operations spu_loadavg_fops = {
692 .open = spu_loadavg_open,
693 .read = seq_read,
694 .llseek = seq_lseek,
695 .release = single_release,
696};
697
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500698int __init spu_sched_init(void)
699{
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000700 struct proc_dir_entry *entry;
701 int err = -ENOMEM, i;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500702
Mark Nuttera68cf982006-10-04 17:26:12 +0200703 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
Christoph Hellwig37901802007-06-29 10:57:51 +1000704 if (!spu_prio)
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000705 goto out;
Christoph Hellwig37901802007-06-29 10:57:51 +1000706
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500707 for (i = 0; i < MAX_PRIO; i++) {
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100708 INIT_LIST_HEAD(&spu_prio->runq[i]);
Mark Nuttera68cf982006-10-04 17:26:12 +0200709 __clear_bit(i, spu_prio->bitmap);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500710 }
Mark Nuttera68cf982006-10-04 17:26:12 +0200711 __set_bit(MAX_PRIO, spu_prio->bitmap);
712 for (i = 0; i < MAX_NUMNODES; i++) {
713 mutex_init(&spu_prio->active_mutex[i]);
714 INIT_LIST_HEAD(&spu_prio->active_list[i]);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500715 }
Christoph Hellwig079cdb62007-02-13 21:54:23 +0100716 spin_lock_init(&spu_prio->runq_lock);
Christoph Hellwig37901802007-06-29 10:57:51 +1000717
718 spusched_task = kthread_run(spusched_thread, NULL, "spusched");
719 if (IS_ERR(spusched_task)) {
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000720 err = PTR_ERR(spusched_task);
721 goto out_free_spu_prio;
Christoph Hellwig37901802007-06-29 10:57:51 +1000722 }
Jeremy Kerrf3f59be2007-06-29 10:57:54 +1000723
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000724 entry = create_proc_entry("spu_loadavg", 0, NULL);
725 if (!entry)
726 goto out_stop_kthread;
727 entry->proc_fops = &spu_loadavg_fops;
728
Jeremy Kerrf3f59be2007-06-29 10:57:54 +1000729 pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
730 SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500731 return 0;
Christoph Hellwig37901802007-06-29 10:57:51 +1000732
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000733 out_stop_kthread:
734 kthread_stop(spusched_task);
735 out_free_spu_prio:
736 kfree(spu_prio);
737 out:
738 return err;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500739}
740
741void __exit spu_sched_exit(void)
742{
Mark Nuttera68cf982006-10-04 17:26:12 +0200743 struct spu *spu, *tmp;
744 int node;
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500745
Christoph Hellwig65de66f2007-06-29 10:58:02 +1000746 remove_proc_entry("spu_loadavg", NULL);
747
Christoph Hellwig37901802007-06-29 10:57:51 +1000748 kthread_stop(spusched_task);
749
Mark Nuttera68cf982006-10-04 17:26:12 +0200750 for (node = 0; node < MAX_NUMNODES; node++) {
751 mutex_lock(&spu_prio->active_mutex[node]);
752 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
753 list) {
754 list_del_init(&spu->list);
755 spu_free(spu);
756 }
757 mutex_unlock(&spu_prio->active_mutex[node]);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500758 }
Mark Nuttera68cf982006-10-04 17:26:12 +0200759 kfree(spu_prio);
Arnd Bergmann8b3d6662005-11-15 15:53:52 -0500760}