IB/qib: Optimize CQ callbacks
The current workqueue implemention has the following performance
deficiencies on QDR HCAs:
- The CQ call backs tend to run on the CPUs processing the
receive queues
- The single thread queue isn't optimal for multiple HCAs
This patch adds a dedicated per HCA bound thread to process CQ callbacks.
Reviewed-by: Ramkrishna Vepa <ramkrishna.vepa@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 2ee82e6..3a78b92c 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
#ifndef _QIB_KERNEL_H
#define _QIB_KERNEL_H
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -51,6 +51,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/sched.h>
+#include <linux/kthread.h>
#include "qib_common.h"
#include "qib_verbs.h"
@@ -1090,6 +1091,8 @@
u16 psxmitwait_check_rate;
/* high volume overflow errors defered to tasklet */
struct tasklet_struct error_tasklet;
+ /* per device cq worker */
+ struct kthread_worker *worker;
int assigned_node_id; /* NUMA node closest to HCA */
};