Merge branch 'spufs' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6 into for-2.6.22
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index eba7a26..8086eb1 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -36,6 +36,8 @@
 #include <asm/xmon.h>
 
 const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
 const struct spu_priv1_ops *spu_priv1_ops;
 
 static struct list_head spu_list[MAX_NUMNODES];
@@ -290,7 +292,6 @@
 
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data)
@@ -431,10 +432,11 @@
 		spu = list_entry(spu_list[node].next, struct spu, list);
 		list_del_init(&spu->list);
 		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-		spu_init_channels(spu);
 	}
 	mutex_unlock(&spu_mutex);
 
+	if (spu)
+		spu_init_channels(spu);
 	return spu;
 }
 EXPORT_SYMBOL_GPL(spu_alloc_node);
@@ -461,108 +463,6 @@
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-	struct mm_struct *mm = spu->mm;
-	struct vm_area_struct *vma;
-	u64 ea, dsisr, is_write;
-	int ret;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-#if 0
-	if (!IS_VALID_EA(ea)) {
-		return -EFAULT;
-	}
-#endif /* XXX */
-	if (mm == NULL) {
-		return -EFAULT;
-	}
-	if (mm->pgd == NULL) {
-		return -EFAULT;
-	}
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, ea);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= ea)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-#if 0
-	if (expand_stack(vma, ea))
-		goto bad_area;
-#endif /* XXX */
-good_area:
-	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-	if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (dsisr & MFC_DSISR_ACCESS_DENIED)
-			goto bad_area;
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-			goto bad_area;
-	}
-	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
-	case VM_FAULT_MINOR:
-		current->min_flt++;
-		break;
-	case VM_FAULT_MAJOR:
-		current->maj_flt++;
-		break;
-	case VM_FAULT_SIGBUS:
-		ret = -EFAULT;
-		goto bad_area;
-	case VM_FAULT_OOM:
-		ret = -ENOMEM;
-		goto bad_area;
-	default:
-		BUG();
-	}
-	up_read(&mm->mmap_sem);
-	return ret;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-	return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-	u64 ea, dsisr, access, error = 0UL;
-	int ret = 0;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-		u64 flags;
-
-		access = (_PAGE_PRESENT | _PAGE_USER);
-		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-		local_irq_save(flags);
-		if (hash_page(ea, access, 0x300) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		local_irq_restore(flags);
-	}
-	if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-		if ((ret = spu_handle_mm_fault(spu)) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		else
-			error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-	}
-	spu->dar = 0UL;
-	spu->dsisr = 0UL;
-	if (!error) {
-		spu_restart_dma(spu);
-	} else {
-		spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-	}
-	return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
 	set_kset_name("spu")
 };
@@ -636,12 +536,6 @@
 	return 0;
 }
 
-static void spu_destroy_sysdev(struct spu *spu)
-{
-	sysfs_remove_device_from_node(&spu->sysdev, spu->node);
-	sysdev_unregister(&spu->sysdev);
-}
-
 static int __init create_spu(void *data)
 {
 	struct spu *spu;
@@ -693,58 +587,37 @@
 	return ret;
 }
 
-static void destroy_spu(struct spu *spu)
-{
-	list_del_init(&spu->list);
-	list_del_init(&spu->full_list);
-
-	spu_destroy_sysdev(spu);
-	spu_free_irqs(spu);
-	spu_destroy_spu(spu);
-	kfree(spu);
-}
-
-static void cleanup_spu_base(void)
-{
-	struct spu *spu, *tmp;
-	int node;
-
-	mutex_lock(&spu_mutex);
-	for (node = 0; node < MAX_NUMNODES; node++) {
-		list_for_each_entry_safe(spu, tmp, &spu_list[node], list)
-			destroy_spu(spu);
-	}
-	mutex_unlock(&spu_mutex);
-	sysdev_class_unregister(&spu_sysdev_class);
-}
-module_exit(cleanup_spu_base);
-
 static int __init init_spu_base(void)
 {
-	int i, ret;
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++)
+		INIT_LIST_HEAD(&spu_list[i]);
 
 	if (!spu_management_ops)
-		return 0;
+		goto out;
 
 	/* create sysdev class for spus */
 	ret = sysdev_class_register(&spu_sysdev_class);
 	if (ret)
-		return ret;
-
-	for (i = 0; i < MAX_NUMNODES; i++)
-		INIT_LIST_HEAD(&spu_list[i]);
+		goto out;
 
 	ret = spu_enumerate_spus(create_spu);
 
 	if (ret) {
 		printk(KERN_WARNING "%s: Error initializing spus\n",
 			__FUNCTION__);
-		cleanup_spu_base();
-		return ret;
+		goto out_unregister_sysdev_class;
 	}
 
 	xmon_register_spus(&spu_full_list);
 
+	return 0;
+
+ out_unregister_sysdev_class:
+	sysdev_class_unregister(&spu_sysdev_class);
+ out:
+
 	return ret;
 }
 module_init(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_coredump.c b/arch/powerpc/platforms/cell/spu_coredump.c
index 6915b41..4fd37ff 100644
--- a/arch/powerpc/platforms/cell/spu_coredump.c
+++ b/arch/powerpc/platforms/cell/spu_coredump.c
@@ -26,19 +26,18 @@
 
 #include <asm/spu.h>
 
-static struct spu_coredump_calls spu_coredump_calls;
+static struct spu_coredump_calls *spu_coredump_calls;
 static DEFINE_MUTEX(spu_coredump_mutex);
 
 int arch_notes_size(void)
 {
 	long ret;
-	struct module *owner = spu_coredump_calls.owner;
 
 	ret = -ENOSYS;
 	mutex_lock(&spu_coredump_mutex);
-	if (owner && try_module_get(owner)) {
-		ret = spu_coredump_calls.arch_notes_size();
-		module_put(owner);
+	if (spu_coredump_calls && try_module_get(spu_coredump_calls->owner)) {
+		ret = spu_coredump_calls->arch_notes_size();
+		module_put(spu_coredump_calls->owner);
 	}
 	mutex_unlock(&spu_coredump_mutex);
 	return ret;
@@ -46,36 +45,35 @@
 
 void arch_write_notes(struct file *file)
 {
-	struct module *owner = spu_coredump_calls.owner;
-
 	mutex_lock(&spu_coredump_mutex);
-	if (owner && try_module_get(owner)) {
-		spu_coredump_calls.arch_write_notes(file);
-		module_put(owner);
+	if (spu_coredump_calls && try_module_get(spu_coredump_calls->owner)) {
+		spu_coredump_calls->arch_write_notes(file);
+		module_put(spu_coredump_calls->owner);
 	}
 	mutex_unlock(&spu_coredump_mutex);
 }
 
 int register_arch_coredump_calls(struct spu_coredump_calls *calls)
 {
-	if (spu_coredump_calls.owner)
-		return -EBUSY;
+	int ret = 0;
+
 
 	mutex_lock(&spu_coredump_mutex);
-	spu_coredump_calls.arch_notes_size = calls->arch_notes_size;
-	spu_coredump_calls.arch_write_notes = calls->arch_write_notes;
-	spu_coredump_calls.owner = calls->owner;
+	if (spu_coredump_calls)
+		ret = -EBUSY;
+	else
+		spu_coredump_calls = calls;
 	mutex_unlock(&spu_coredump_mutex);
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(register_arch_coredump_calls);
 
 void unregister_arch_coredump_calls(struct spu_coredump_calls *calls)
 {
-	BUG_ON(spu_coredump_calls.owner != calls->owner);
+	BUG_ON(spu_coredump_calls != calls);
 
 	mutex_lock(&spu_coredump_mutex);
-	spu_coredump_calls.owner = NULL;
+	spu_coredump_calls = NULL;
 	mutex_unlock(&spu_coredump_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_arch_coredump_calls);
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index 472217d..2cd89c1 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
-obj-y += switch.o
+obj-y += switch.o fault.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 1898f0d..3322528 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -350,6 +350,11 @@
 	return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	/* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
 	.mbox_read = spu_backing_mbox_read,
 	.mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@
 	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
 	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 04ad2e3..a87d9ca 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -41,9 +41,10 @@
 		goto out_free;
 	}
 	spin_lock_init(&ctx->mmio_lock);
+	spin_lock_init(&ctx->mapping_lock);
 	kref_init(&ctx->kref);
 	mutex_init(&ctx->state_mutex);
-	init_MUTEX(&ctx->run_sema);
+	mutex_init(&ctx->run_mutex);
 	init_waitqueue_head(&ctx->ibox_wq);
 	init_waitqueue_head(&ctx->wbox_wq);
 	init_waitqueue_head(&ctx->stop_wq);
@@ -51,6 +52,7 @@
 	ctx->state = SPU_STATE_SAVED;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
 	ctx->rt_priority = current->rt_priority;
@@ -75,6 +77,7 @@
 	spu_fini_csa(&ctx->csa);
 	if (ctx->gang)
 		spu_gang_remove_ctx(ctx->gang, ctx);
+	BUG_ON(!list_empty(&ctx->rq));
 	kfree(ctx);
 }
 
@@ -119,46 +122,6 @@
 }
 
 /**
- * spu_acquire_exclusive - lock spu contex and protect against userspace access
- * @ctx:	spu contex to lock
- *
- * Note:
- *	Returns 0 and with the context locked on success
- *	Returns negative error and with the context _unlocked_ on failure.
- */
-int spu_acquire_exclusive(struct spu_context *ctx)
-{
-	int ret = -EINVAL;
-
-	spu_acquire(ctx);
-	/*
-	 * Context is about to be freed, so we can't acquire it anymore.
-	 */
-	if (!ctx->owner)
-		goto out_unlock;
-
-	if (ctx->state == SPU_STATE_SAVED) {
-		ret = spu_activate(ctx, 0);
-		if (ret)
-			goto out_unlock;
-	} else {
-		/*
-		 * We need to exclude userspace access to the context.
-		 *
-		 * To protect against memory access we invalidate all ptes
-		 * and make sure the pagefault handlers block on the mutex.
-		 */
-		spu_unmap_mappings(ctx);
-	}
-
-	return 0;
-
- out_unlock:
-	spu_release(ctx);
-	return ret;
-}
-
-/**
  * spu_acquire_runnable - lock spu contex and make sure it is in runnable state
  * @ctx:	spu contex to lock
  *
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 725e195..5d9ad5a 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -169,12 +169,12 @@
 	struct spu_context *ctx;
 	loff_t pos = 0;
 	int sz, dfd, rc, total = 0;
-	const int bufsz = 4096;
+	const int bufsz = PAGE_SIZE;
 	char *name;
 	char fullname[80], *buf;
 	struct elf_note en;
 
-	buf = kmalloc(bufsz, GFP_KERNEL);
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!buf)
 		return;
 
@@ -187,9 +187,8 @@
 		sz = spufs_coredump_read[i].size;
 
 	ctx = ctx_info->ctx;
-	if (!ctx) {
-		return;
-	}
+	if (!ctx)
+		goto out;
 
 	sprintf(fullname, "SPU/%d/%s", dfd, name);
 	en.n_namesz = strlen(fullname) + 1;
@@ -197,23 +196,25 @@
 	en.n_type = NT_SPU;
 
 	if (!spufs_dump_write(file, &en, sizeof(en)))
-		return;
+		goto out;
 	if (!spufs_dump_write(file, fullname, en.n_namesz))
-		return;
+		goto out;
 	if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4)))
-		return;
+		goto out;
 
 	do {
 		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
 		if (rc > 0) {
 			if (!spufs_dump_write(file, buf, rc))
-				return;
+				goto out;
 			total += rc;
 		}
 	} while (rc == bufsz && total < sz);
 
 	spufs_dump_seek(file, roundup((unsigned long)file->f_pos
 						- total + sz, 4));
+out:
+	free_page((unsigned long)buf);
 }
 
 static void spufs_arch_write_notes(struct file *file)
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 0000000..0f75c07
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,211 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+#if 0
+	if (!IS_VALID_EA(ea)) {
+		return -EFAULT;
+	}
+#endif /* XXX */
+	if (mm == NULL) {
+		return -EFAULT;
+	}
+	if (mm->pgd == NULL) {
+		return -EFAULT;
+	}
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, ea);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= ea)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, ea))
+		goto bad_area;
+good_area:
+	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+	ret = 0;
+	switch (handle_mm_fault(mm, vma, ea, is_write)) {
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		ret = -EFAULT;
+		goto bad_area;
+	case VM_FAULT_OOM:
+		ret = -ENOMEM;
+		goto bad_area;
+	default:
+		BUG();
+	}
+	up_read(&mm->mmap_sem);
+	return ret;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+	return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+	} else {
+		siginfo_t info;
+		memset(&info, 0, sizeof(info));
+
+		switch (type) {
+		case SPE_EVENT_INVALID_DMA:
+			info.si_signo = SIGBUS;
+			info.si_code = BUS_OBJERR;
+			break;
+		case SPE_EVENT_SPE_DATA_STORAGE:
+			info.si_signo = SIGBUS;
+			info.si_addr = (void __user *)ea;
+			info.si_code = BUS_ADRERR;
+			break;
+		case SPE_EVENT_DMA_ALIGNMENT:
+			info.si_signo = SIGBUS;
+			/* DAR isn't set for an alignment fault :( */
+			info.si_code = BUS_ADRALN;
+			break;
+		case SPE_EVENT_SPE_ERROR:
+			info.si_signo = SIGILL;
+			info.si_addr = (void __user *)(unsigned long)
+				ctx->ops->npc_read(ctx) - 4;
+			info.si_code = ILL_ILLOPC;
+			break;
+		}
+		if (info.si_signo)
+			force_sig_info(info.si_signo, &info, current);
+	}
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+	spufs_handle_dma_error(spu->ctx, spu->dar, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		ea = ctx->spu->dar;
+		dsisr = ctx->spu->dsisr;
+		ctx->spu->dar= ctx->spu->dsisr = 0;
+	} else {
+		ea = ctx->csa.priv1.mfc_dar_RW;
+		dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+		ctx->csa.priv1.mfc_dar_RW = 0;
+		ctx->csa.priv1.mfc_dsisr_RW = 0;
+	}
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	/* we must not hold the lock when entering spu_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+	spu_acquire(ctx);
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 505266a..d010b24 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -44,9 +44,25 @@
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->local_store = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	spin_unlock(&ctx->mapping_lock);
 	return 0;
 }
 
@@ -149,6 +165,7 @@
 
 static const struct file_operations spufs_mem_fops = {
 	.open	 = spufs_mem_open,
+	.release = spufs_mem_release,
 	.read    = spufs_mem_read,
 	.write   = spufs_mem_write,
 	.llseek  = generic_file_llseek,
@@ -238,16 +255,33 @@
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->cntl = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return simple_attr_open(inode, file, spufs_cntl_get,
 					spufs_cntl_set, "0x%08lx");
 }
 
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_close(inode, file);
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_cntl_fops = {
 	.open = spufs_cntl_open,
-	.release = simple_attr_close,
+	.release = spufs_cntl_release,
 	.read = simple_attr_read,
 	.write = simple_attr_write,
 	.mmap = spufs_cntl_mmap,
@@ -723,12 +757,28 @@
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal1 = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -821,6 +871,7 @@
 
 static const struct file_operations spufs_signal1_fops = {
 	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
@@ -830,12 +881,28 @@
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal2 = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -932,6 +999,7 @@
 
 static const struct file_operations spufs_signal2_fops = {
 	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
@@ -1031,13 +1099,30 @@
 	struct spu_context *ctx = i->i_ctx;
 
 	file->private_data = i->i_ctx;
-	ctx->mss = inode->i_mapping;
-	smp_wmb();
+
+	spin_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
 	.mmap	 = spufs_mss_mmap,
 };
 
@@ -1072,14 +1157,30 @@
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = i->i_ctx;
-	ctx->psmap = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_psmap_fops = {
 	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
 	.mmap	 = spufs_psmap_mmap,
 };
 
@@ -1126,12 +1227,27 @@
 	if (atomic_read(&inode->i_count) != 1)
 		return -EBUSY;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->mfc = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 /* interrupt-level mfc callback function. */
 void spufs_mfc_callback(struct spu *spu)
 {
@@ -1313,7 +1429,10 @@
 	if (ret)
 		goto out;
 
-	spu_acquire_runnable(ctx, 0);
+	ret = spu_acquire_runnable(ctx, 0);
+	if (ret)
+		goto out;
+
 	if (file->f_flags & O_NONBLOCK) {
 		ret = ctx->ops->send_mfc_command(ctx, &cmd);
 	} else {
@@ -1399,6 +1518,7 @@
 
 static const struct file_operations spufs_mfc_fops = {
 	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
 	.read	 = spufs_mfc_read,
 	.write	 = spufs_mfc_write,
 	.poll	 = spufs_mfc_poll,
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index ae42e03..428875c 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -296,6 +296,14 @@
 	}
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
 	.mbox_read = spu_hw_mbox_read,
 	.mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@
 	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
 	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index e3f4ee9..13e4f70 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -36,6 +36,7 @@
 #include <asm/prom.h>
 #include <asm/semaphore.h>
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
@@ -54,6 +55,7 @@
 
 	ei->i_gang = NULL;
 	ei->i_ctx = NULL;
+	ei->i_openers = 0;
 
 	return &ei->vfs_inode;
 }
@@ -520,13 +522,14 @@
 
 /* File system initialization */
 enum {
-	Opt_uid, Opt_gid, Opt_err,
+	Opt_uid, Opt_gid, Opt_mode, Opt_err,
 };
 
 static match_table_t spufs_tokens = {
-	{ Opt_uid, "uid=%d" },
-	{ Opt_gid, "gid=%d" },
-	{ Opt_err, NULL  },
+	{ Opt_uid,  "uid=%d" },
+	{ Opt_gid,  "gid=%d" },
+	{ Opt_mode, "mode=%o" },
+	{ Opt_err,   NULL  },
 };
 
 static int
@@ -553,6 +556,11 @@
 				return 0;
 			root->i_gid = option;
 			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
 		default:
 			return 0;
 		}
@@ -560,6 +568,11 @@
 	return 1;
 }
 
+static void spufs_exit_isolated_loader(void)
+{
+	kfree(isolated_loader);
+}
+
 static void
 spufs_init_isolated_loader(void)
 {
@@ -653,6 +666,10 @@
 {
 	int ret;
 
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
 	ret = -ENOMEM;
 	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
 			sizeof(struct spufs_inode_info), 0,
@@ -660,25 +677,29 @@
 
 	if (!spufs_inode_cache)
 		goto out;
-	if (spu_sched_init() != 0) {
-		kmem_cache_destroy(spufs_inode_cache);
-		goto out;
-	}
-	ret = register_filesystem(&spufs_type);
+	ret = spu_sched_init();
 	if (ret)
 		goto out_cache;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_sched;
 	ret = register_spu_syscalls(&spufs_calls);
 	if (ret)
 		goto out_fs;
 	ret = register_arch_coredump_calls(&spufs_coredump_calls);
 	if (ret)
-		goto out_fs;
+		goto out_syscalls;
 
 	spufs_init_isolated_loader();
 
 	return 0;
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
 out_fs:
 	unregister_filesystem(&spufs_type);
+out_sched:
+	spu_sched_exit();
 out_cache:
 	kmem_cache_destroy(spufs_inode_cache);
 out:
@@ -689,6 +710,7 @@
 static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
+	spufs_exit_isolated_loader();
 	unregister_arch_coredump_calls(&spufs_coredump_calls);
 	unregister_spu_syscalls(&spufs_calls);
 	unregister_filesystem(&spufs_type);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index f95a611..5762660 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,27 +18,6 @@
 	wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-	struct spu_context *ctx = spu->ctx;
-
-	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-		ctx->event_return |= type;
-		wake_up_all(&ctx->stop_wq);
-	} else {
-		switch (type) {
-		case SPE_EVENT_DMA_ALIGNMENT:
-		case SPE_EVENT_SPE_DATA_STORAGE:
-		case SPE_EVENT_INVALID_DMA:
-			force_sig(SIGBUS, /* info, */ current);
-			break;
-		case SPE_EVENT_SPE_ERROR:
-			force_sig(SIGILL, /* info */ current);
-			break;
-		}
-	}
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
 	struct spu *spu;
@@ -63,13 +42,18 @@
 	const u32 status_loading = SPU_STATUS_RUNNING
 		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
 
+	ret = -ENODEV;
 	if (!isolated_loader)
-		return -ENODEV;
-
-	ret = spu_acquire_exclusive(ctx);
-	if (ret)
 		goto out;
 
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
 	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
 
 	/* purge the MFC DMA queue to ensure no spurious accesses before we
@@ -82,7 +66,7 @@
 			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
 					__FUNCTION__);
 			ret = -EIO;
-			goto out_unlock;
+			goto out;
 		}
 		cond_resched();
 	}
@@ -119,12 +103,15 @@
 		pr_debug("%s: isolated LOAD failed\n", __FUNCTION__);
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 		ret = -EACCES;
+		goto out_drop_priv;
+	}
 
-	} else if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
 		/* This isn't allowed by the CBEA, but check anyway */
 		pr_debug("%s: SPU fell out of isolated mode?\n", __FUNCTION__);
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
 		ret = -EINVAL;
+		goto out_drop_priv;
 	}
 
 out_drop_priv:
@@ -132,30 +119,19 @@
 	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
 	spu_mfc_sr1_set(ctx->spu, sr1);
 
-out_unlock:
-	spu_release(ctx);
 out:
 	return ret;
 }
 
-static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
+static int spu_run_init(struct spu_context *ctx, u32 * npc)
 {
-	int ret;
-	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
-
-	ret = spu_acquire_runnable(ctx, 0);
-	if (ret)
-		return ret;
-
 	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		unsigned long runcntl;
+
 		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
-			/* Need to release ctx, because spu_setup_isolated will
-			 * acquire it exclusively.
-			 */
-			spu_release(ctx);
-			ret = spu_setup_isolated(ctx);
-			if (!ret)
-				ret = spu_acquire_runnable(ctx, 0);
+			int ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
 		}
 
 		/* if userspace has set the runcntrl register (eg, to issue an
@@ -164,16 +140,17 @@
 			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
 		if (runcntl == 0)
 			runcntl = SPU_RUNCNTL_RUNNABLE;
+		ctx->ops->runcntl_write(ctx, runcntl);
 	} else {
 		spu_start_tick(ctx);
 		ctx->ops->npc_write(ctx, *npc);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	}
 
-	ctx->ops->runcntl_write(ctx, runcntl);
-	return ret;
+	return 0;
 }
 
-static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
+static int spu_run_fini(struct spu_context *ctx, u32 * npc,
 			       u32 * status)
 {
 	int ret = 0;
@@ -189,19 +166,27 @@
 	return ret;
 }
 
-static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
+static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
 				         u32 *status)
 {
 	int ret;
 
-	if ((ret = spu_run_fini(ctx, npc, status)) != 0)
+	ret = spu_run_fini(ctx, npc, status);
+	if (ret)
 		return ret;
-	if (*status & (SPU_STATUS_STOPPED_BY_STOP |
-		       SPU_STATUS_STOPPED_BY_HALT)) {
+
+	if (*status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT))
 		return *status;
-	}
-	if ((ret = spu_run_init(ctx, npc)) != 0)
+
+	ret = spu_acquire_runnable(ctx, 0);
+	if (ret)
 		return ret;
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
+		return ret;
+	}
 	return 0;
 }
 
@@ -253,17 +238,17 @@
 {
 	struct spu_syscall_block s;
 	u32 ls_pointer, npc;
-	char *ls;
+	void __iomem *ls;
 	long spu_ret;
 	int ret;
 
 	/* get syscall block from local store */
-	npc = ctx->ops->npc_read(ctx);
-	ls = ctx->ops->get_ls(ctx);
-	ls_pointer = *(u32*)(ls + npc);
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
 	if (ls_pointer > (LS_SIZE - sizeof(s)))
 		return -EFAULT;
-	memcpy(&s, ls + ls_pointer, sizeof (s));
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
 
 	/* do actual syscall without pinning the spu */
 	ret = 0;
@@ -283,7 +268,7 @@
 	}
 
 	/* write result, jump over indirect pointer */
-	memcpy(ls + ls_pointer, &spu_ret, sizeof (spu_ret));
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
 	ctx->ops->npc_write(ctx, npc);
 	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	return ret;
@@ -292,11 +277,8 @@
 static inline int spu_process_events(struct spu_context *ctx)
 {
 	struct spu *spu = ctx->spu;
-	u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
 	int ret = 0;
 
-	if (spu->dsisr & pte_fault)
-		ret = spu_irq_class_1_bottom(spu);
 	if (spu->class_0_pending)
 		ret = spu_irq_class_0_bottom(spu);
 	if (!ret && signal_pending(current))
@@ -310,14 +292,21 @@
 	int ret;
 	u32 status;
 
-	if (down_interruptible(&ctx->run_sema))
+	if (mutex_lock_interruptible(&ctx->run_mutex))
 		return -ERESTARTSYS;
 
 	ctx->ops->master_start(ctx);
 	ctx->event_return = 0;
-	ret = spu_run_init(ctx, npc);
+
+	ret = spu_acquire_runnable(ctx, 0);
 	if (ret)
+		return ret;
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
 		goto out;
+	}
 
 	do {
 		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
@@ -330,6 +319,10 @@
 				break;
 			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
 			ret = spu_reacquire_runnable(ctx, npc, &status);
 			if (ret) {
@@ -363,6 +356,6 @@
 
 out:
 	*event = ctx->event_return;
-	up(&ctx->run_sema);
+	mutex_unlock(&ctx->run_mutex);
 	return ret;
 }
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c956158..91030b8 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -71,14 +71,27 @@
 
 void spu_start_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * Make sure the exiting bit is cleared.
+		 */
+		clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
+	}
 }
 
 void spu_stop_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * While the work can be rearming normally setting this flag
+		 * makes sure it does not rearm itself anymore.
+		 */
+		set_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		cancel_delayed_work(&ctx->sched_work);
+	}
 }
 
 void spu_sched_tick(struct work_struct *work)
@@ -86,7 +99,15 @@
 	struct spu_context *ctx =
 		container_of(work, struct spu_context, sched_work.work);
 	struct spu *spu;
-	int rearm = 1;
+	int preempted = 0;
+
+	/*
+	 * If this context is being stopped avoid rescheduling from the
+	 * scheduler tick because we would block on the state_mutex.
+	 * The caller will yield the spu later on anyway.
+	 */
+	if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
+		return;
 
 	mutex_lock(&ctx->state_mutex);
 	spu = ctx->spu;
@@ -94,12 +115,19 @@
 		int best = sched_find_first_bit(spu_prio->bitmap);
 		if (best <= ctx->prio) {
 			spu_deactivate(ctx);
-			rearm = 0;
+			preempted = 1;
 		}
 	}
 	mutex_unlock(&ctx->state_mutex);
 
-	if (rearm)
+	if (preempted) {
+		/*
+		 * We need to break out of the wait loop in spu_run manually
+		 * to ensure this context gets put on the runqueue again
+		 * ASAP.
+		 */
+		wake_up(&ctx->stop_wq);
+	} else
 		spu_start_tick(ctx);
 }
 
@@ -208,58 +236,40 @@
  * spu_add_to_rq - add a context to the runqueue
  * @ctx:       context to add
  */
-static void spu_add_to_rq(struct spu_context *ctx)
+static void __spu_add_to_rq(struct spu_context *ctx)
 {
-	spin_lock(&spu_prio->runq_lock);
-	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
-	set_bit(ctx->prio, spu_prio->bitmap);
-	spin_unlock(&spu_prio->runq_lock);
+	int prio = ctx->prio;
+
+	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
+	set_bit(prio, spu_prio->bitmap);
 }
 
-/**
- * spu_del_from_rq - remove a context from the runqueue
- * @ctx:       context to remove
- */
-static void spu_del_from_rq(struct spu_context *ctx)
+static void __spu_del_from_rq(struct spu_context *ctx)
 {
-	spin_lock(&spu_prio->runq_lock);
-	list_del_init(&ctx->rq);
-	if (list_empty(&spu_prio->runq[ctx->prio]))
-		clear_bit(ctx->prio, spu_prio->bitmap);
-	spin_unlock(&spu_prio->runq_lock);
-}
+	int prio = ctx->prio;
 
-/**
- * spu_grab_context - remove one context from the runqueue
- * @prio:      priority of the context to be removed
- *
- * This function removes one context from the runqueue for priority @prio.
- * If there is more than one context with the given priority the first
- * task on the runqueue will be taken.
- *
- * Returns the spu_context it just removed.
- *
- * Must be called with spu_prio->runq_lock held.
- */
-static struct spu_context *spu_grab_context(int prio)
-{
-	struct list_head *rq = &spu_prio->runq[prio];
-
-	if (list_empty(rq))
-		return NULL;
-	return list_entry(rq->next, struct spu_context, rq);
+	if (!list_empty(&ctx->rq))
+		list_del_init(&ctx->rq);
+	if (list_empty(&spu_prio->runq[prio]))
+		clear_bit(prio, spu_prio->bitmap);
 }
 
 static void spu_prio_wait(struct spu_context *ctx)
 {
 	DEFINE_WAIT(wait);
 
+	spin_lock(&spu_prio->runq_lock);
 	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
 	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
 		mutex_unlock(&ctx->state_mutex);
 		schedule();
 		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
 	}
+	spin_unlock(&spu_prio->runq_lock);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->stop_wq, &wait);
 }
@@ -280,9 +290,14 @@
 	spin_lock(&spu_prio->runq_lock);
 	best = sched_find_first_bit(spu_prio->bitmap);
 	if (best < MAX_PRIO) {
-		struct spu_context *ctx = spu_grab_context(best);
-		if (ctx)
-			wake_up(&ctx->stop_wq);
+		struct list_head *rq = &spu_prio->runq[best];
+		struct spu_context *ctx;
+
+		BUG_ON(list_empty(rq));
+
+		ctx = list_entry(rq->next, struct spu_context, rq);
+		__spu_del_from_rq(ctx);
+		wake_up(&ctx->stop_wq);
 	}
 	spin_unlock(&spu_prio->runq_lock);
 }
@@ -365,6 +380,12 @@
 			}
 			spu_unbind_context(spu, victim);
 			mutex_unlock(&victim->state_mutex);
+			/*
+			 * We need to break out of the wait loop in spu_run
+			 * manually to ensure this context gets put on the
+			 * runqueue again ASAP.
+			 */
+			wake_up(&victim->stop_wq);
 			return spu;
 		}
 	}
@@ -377,7 +398,7 @@
  * @ctx:	spu context to schedule
  * @flags:	flags (currently ignored)
  *
- * Tries to find a free spu to run @ctx.  If no free spu is availble
+ * Tries to find a free spu to run @ctx.  If no free spu is available
  * add the context to the runqueue so it gets woken up once an spu
  * is available.
  */
@@ -402,9 +423,7 @@
 			return 0;
 		}
 
-		spu_add_to_rq(ctx);
 		spu_prio_wait(ctx);
-		spu_del_from_rq(ctx);
 	} while (!signal_pending(current));
 
 	return -ERESTARTSYS;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 5c4e47d..0a947fd 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -41,7 +41,7 @@
 
 /* ctx->sched_flags */
 enum {
-	SPU_SCHED_WAKE = 0, /* currently unused */
+	SPU_SCHED_EXITING = 0,
 };
 
 struct spu_context {
@@ -50,16 +50,17 @@
 	spinlock_t mmio_lock;		  /* protects mmio access */
 	struct address_space *local_store; /* local store mapping.  */
 	struct address_space *mfc;	   /* 'mfc' area mappings. */
-	struct address_space *cntl; 	   /* 'control' area mappings. */
-	struct address_space *signal1; 	   /* 'signal1' area mappings. */
-	struct address_space *signal2; 	   /* 'signal2' area mappings. */
-	struct address_space *mss; 	   /* 'mss' area mappings. */
-	struct address_space *psmap; 	   /* 'psmap' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	spinlock_t mapping_lock;
 	u64 object_id;		   /* user space pointer for oprofile */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
 	struct mutex state_mutex;
-	struct semaphore run_sema;
+	struct mutex run_mutex;
 
 	struct mm_struct *owner;
 
@@ -140,6 +141,7 @@
 			       struct spu_dma_info * info);
 	void (*proxydma_info_read) (struct spu_context * ctx,
 				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -149,6 +151,7 @@
 	struct spu_context *i_ctx;
 	struct spu_gang *i_gang;
 	struct inode vfs_inode;
+	int i_openers;
 };
 #define SPUFS_I(inode) \
 	container_of(inode, struct spufs_inode_info, vfs_inode)
@@ -170,6 +173,9 @@
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
@@ -190,7 +196,6 @@
 void spu_forget(struct spu_context *ctx);
 int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
 void spu_acquire_saved(struct spu_context *ctx);
-int spu_acquire_exclusive(struct spu_context *ctx);
 
 int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
@@ -218,14 +223,13 @@
 		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			spu_release(ctx);				\
-			schedule();					\
-			spu_acquire(ctx);				\
-			continue;					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		__ret = -ERESTARTSYS;					\
-		break;							\
+		spu_release(ctx);					\
+		schedule();						\
+		spu_acquire(ctx);					\
 	}								\
 	finish_wait(&(wq), &__wait);					\
 	__ret;								\
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index fd91c73..8347c4a 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2084,6 +2084,10 @@
 	int rc;
 
 	acquire_spu_lock(spu);	        /* Step 1.     */
+	prev->dar = spu->dar;
+	prev->dsisr = spu->dsisr;
+	spu->dar = 0;
+	spu->dsisr = 0;
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
 	if (rc != 0 && rc != 2 && rc != 6) {
@@ -2109,9 +2113,9 @@
 
 	acquire_spu_lock(spu);
 	harvest(NULL, spu);
-	spu->dar = 0;
-	spu->dsisr = 0;
 	spu->slb_replace = 0;
+	new->dar = 0;
+	new->dsisr = 0;
 	spu->class_0_pending = 0;
 	rc = __do_spu_restore(new, spu);
 	release_spu_lock(spu);
diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h
index 200055a..e22fd88 100644
--- a/include/asm-powerpc/mmu.h
+++ b/include/asm-powerpc/mmu.h
@@ -234,6 +234,7 @@
 			   unsigned long vsid, pte_t *ptep, unsigned long trap,
 			   unsigned int local);
 struct mm_struct;
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
 			  unsigned long ea, unsigned long vsid, int local,
 			  unsigned long trap);
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h
index 8aad061..02e56a6 100644
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -242,6 +242,7 @@
 	u64 spu_chnldata_RW[32];
 	u32 spu_mailbox_data[4];
 	u32 pu_mailbox_data[1];
+	u64 dar, dsisr;
 	unsigned long suspend_time;
 	spinlock_t register_lock;
 };