IB/ehca: Fix sync between completion handler and destroy cq

This patch fixes two issues reported by Roland Dreier and Christoph Hellwig:

- Mismatched sync/locking between completion handler and destroy cq We
  introduced a counter nr_events per cq to track number of irq events
  seen. This counter is incremented when an event queue entry is seen
  and decremented after completion handler has been called regardless
  if scaling code is active or not. Note that nr_callbacks tracks
  number of events assigned to a cpu and both counters can potentially
  diverge.

  The sync between running completion handler and destroy cq is done
  by using the global spin lock ehca_cq_idr_lock.

- Replace yield by wait_event on the counter above to become zero.

Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6ebfa27..e2cdc1a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -146,6 +146,7 @@
 	spin_lock_init(&my_cq->spinlock);
 	spin_lock_init(&my_cq->cb_lock);
 	spin_lock_init(&my_cq->task_lock);
+	init_waitqueue_head(&my_cq->wait_completion);
 	my_cq->ownpid = current->tgid;
 
 	cq = &my_cq->ib_cq;
@@ -302,6 +303,16 @@
 	return cq;
 }
 
+static int get_cq_nr_events(struct ehca_cq *my_cq)
+{
+	int ret;
+	unsigned long flags;
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	ret = my_cq->nr_events;
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	return ret;
+}
+
 int ehca_destroy_cq(struct ib_cq *cq)
 {
 	u64 h_ret;
@@ -329,10 +340,11 @@
 	}
 
 	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-	while (my_cq->nr_callbacks) {
+	while (my_cq->nr_events) {
 		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-		yield();
+		wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
 		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		/* recheck nr_events to assure no cqe has just arrived */
 	}
 
 	idr_remove(&ehca_cq_idr, my_cq->token);