[POWERPC] spu sched: forced preemption at execution

If we start a spu context with realtime priority we want it to run
immediately and not wait until some other lower priority thread has
finished.  Try to find a suitable victim and use it's spu in this
case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 056a8ad..d581f4e 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -53,6 +53,7 @@
 	ctx->owner = get_task_mm(current);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
+	ctx->rt_priority = current->rt_priority;
 	ctx->prio = current->prio;
 	goto out;
 out_free:
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index eb06a03..814f65e 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -282,6 +282,74 @@
 }
 
 /**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu schedule tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(node))
+			continue;
+
+		mutex_lock(&spu_prio->active_mutex[node]);
+		list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp->rt_priority < ctx->rt_priority &&
+			    (!victim || tmp->rt_priority < victim->rt_priority))
+				victim = spu->ctx;
+		}
+		mutex_unlock(&spu_prio->active_mutex[node]);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  No a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				victim = NULL;
+				goto restart;
+			}
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&victim->state_mutex);
+			return spu;
+		}
+	}
+
+	return NULL;
+}
+
+/**
  * spu_activate - find a free spu for a context and execute it
  * @ctx:	spu context to schedule
  * @flags:	flags (currently ignored)
@@ -300,6 +368,12 @@
 		struct spu *spu;
 
 		spu = spu_get_idle(ctx);
+		/*
+		 * If this is a realtime thread we try to get it running by
+		 * preempting a lower priority thread.
+		 */
+		if (!spu && ctx->rt_priority)
+			spu = find_victim(ctx);
 		if (spu) {
 			spu_bind_context(spu, ctx);
 			return 0;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 421f591..85b182d 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -83,6 +83,7 @@
 	/* scheduler fields */
  	struct list_head rq;
 	unsigned long sched_flags;
+	unsigned long rt_priority;
 	int prio;
 };