IB/hfi1: Consistently call ops->remove outside spinlock
The ops->remove() callback was called by hfi1_mmu_unregister() with a
NULL mm argument while holding a spinlock. In the case of sdma_rb_remove()
this caused it to pass current->mm to hfi1_release_user_pages()
This had 2 problems. First this would attempt to acquire the mmap_sem
under a spin lock. Second the use of current->mm is not always guaranteed
to be the proper mm when the fd is being closed.
Rather than depend on this implicit behavior we move all calls to
ops->remove outside of the spinlock. This also allows the correct
mm to be used in the remove callback without fear of deadlock.
Because the MMU notifier is not guaranteed to hold mm->mmap_sem, but
usually does, we must delay all remove callbacks until out of the notifier,
when the callbacks can take the mmap_sem if they need to.
Code comments were added to clarify what the expectations are for the
users of the mmu rb tree.
Suggested-by: Jim Foraker <foraker1@llnl.gov>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 3d76222..751aa22 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -310,8 +310,7 @@
static int sdma_rb_insert(void *, struct mmu_rb_node *);
static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
void *arg2, bool *stop);
-static void sdma_rb_remove(void *, struct mmu_rb_node *,
- struct mm_struct *);
+static void sdma_rb_remove(void *, struct mmu_rb_node *, struct mm_struct *);
static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
@@ -446,7 +445,8 @@
cq->nentries = hfi1_sdma_comp_ring_size;
fd->cq = cq;
- ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, &pq->handler);
+ ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
goto done;
@@ -1651,20 +1651,8 @@
atomic_sub(node->npages, &node->pq->n_locked);
- /*
- * If mm is set, we are being called by the MMU notifier and we
- * should not pass a mm_struct to unpin_vector_page(). This is to
- * prevent a deadlock when hfi1_release_user_pages() attempts to
- * take the mmap_sem, which the MMU notifier has already taken.
- */
- unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
- node->npages);
- /*
- * If called by the MMU notifier, we have to adjust the pinned
- * page count ourselves.
- */
- if (mm)
- mm->pinned_vm -= node->npages;
+ unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
+
kfree(node);
}