IB/ehca: Fix sync between completion handler and destroy cq
This patch fixes two issues reported by Roland Dreier and Christoph Hellwig:
- Mismatched sync/locking between completion handler and destroy cq We
introduced a counter nr_events per cq to track number of irq events
seen. This counter is incremented when an event queue entry is seen
and decremented after completion handler has been called regardless
if scaling code is active or not. Note that nr_callbacks tracks
number of events assigned to a cpu and both counters can potentially
diverge.
The sync between running completion handler and destroy cq is done
by using the global spin lock ehca_cq_idr_lock.
- Replace yield by wait_event on the counter above to become zero.
Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6ebfa27..e2cdc1a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -146,6 +146,7 @@
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+ init_waitqueue_head(&my_cq->wait_completion);
my_cq->ownpid = current->tgid;
cq = &my_cq->ib_cq;
@@ -302,6 +303,16 @@
return cq;
}
+static int get_cq_nr_events(struct ehca_cq *my_cq)
+{
+ int ret;
+ unsigned long flags;
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ ret = my_cq->nr_events;
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ return ret;
+}
+
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
@@ -329,10 +340,11 @@
}
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_callbacks) {
+ while (my_cq->nr_events) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- yield();
+ wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ /* recheck nr_events to assure no cqe has just arrived */
}
idr_remove(&ehca_cq_idr, my_cq->token);