IB/mthca: Write FW commands through doorbell page

This patch is checks whether the HCA supports posting FW commands
through a doorbell page (user access region 0, or "UAR0").  If this is
supported, the driver maps UAR0 and uses it for FW commands. This can
be controlled by the value of a writable module parameter
fw_cmd_doorbell.  When the parameter is 0, the commands are posted
through HCR using the old method; otherwise if HCA is capable commands
go through UAR0.

This use of UAR0 to post commands eliminates the need for polling the
"go" bit prior to posting a new command. Since reading from a PCI
device is much more expensive then issuing a posted write, it is
expected that issuing FW commands this way will provide better CPU
utilization.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 890c060..d1e7ecb 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -182,25 +182,58 @@
 	u8                status;
 };
 
+static int fw_cmd_doorbell = 1;
+module_param(fw_cmd_doorbell, int, 0644);
+MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
+		 "(and supported by FW)");
+
 static inline int go_bit(struct mthca_dev *dev)
 {
 	return readl(dev->hcr + HCR_STATUS_OFFSET) &
 		swab32(1 << HCR_GO_BIT);
 }
 
-static int mthca_cmd_post(struct mthca_dev *dev,
-			  u64 in_param,
-			  u64 out_param,
-			  u32 in_modifier,
-			  u8 op_modifier,
-			  u16 op,
-			  u16 token,
-			  int event)
+static void mthca_cmd_post_dbell(struct mthca_dev *dev,
+				 u64 in_param,
+				 u64 out_param,
+				 u32 in_modifier,
+				 u8 op_modifier,
+				 u16 op,
+				 u16 token)
 {
-	int err = 0;
+	void __iomem *ptr = dev->cmd.dbell_map;
+	u16 *offs = dev->cmd.dbell_offsets;
 
-	mutex_lock(&dev->cmd.hcr_mutex);
+	__raw_writel((__force u32) cpu_to_be32(in_param >> 32),           ptr + offs[0]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful),  ptr + offs[1]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(in_modifier),              ptr + offs[2]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(out_param >> 32),          ptr + offs[3]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(token << 16),              ptr + offs[5]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT)                |
+					       (1 << HCA_E_BIT)                 |
+					       (op_modifier << HCR_OPMOD_SHIFT) |
+					        op),                      ptr + offs[6]);
+	wmb();
+	__raw_writel((__force u32) 0,                                     ptr + offs[7]);
+	wmb();
+}
 
+static int mthca_cmd_post_hcr(struct mthca_dev *dev,
+			      u64 in_param,
+			      u64 out_param,
+			      u32 in_modifier,
+			      u8 op_modifier,
+			      u16 op,
+			      u16 token,
+			      int event)
+{
 	if (event) {
 		unsigned long end = jiffies + GO_BIT_TIMEOUT;
 
@@ -210,10 +243,8 @@
 		}
 	}
 
-	if (go_bit(dev)) {
-		err = -EAGAIN;
-		goto out;
-	}
+	if (go_bit(dev))
+		return -EAGAIN;
 
 	/*
 	 * We use writel (instead of something like memcpy_toio)
@@ -236,7 +267,29 @@
 					       (op_modifier << HCR_OPMOD_SHIFT) |
 					       op),                       dev->hcr + 6 * 4);
 
-out:
+	return 0;
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+			  u64 in_param,
+			  u64 out_param,
+			  u32 in_modifier,
+			  u8 op_modifier,
+			  u16 op,
+			  u16 token,
+			  int event)
+{
+	int err = 0;
+
+	mutex_lock(&dev->cmd.hcr_mutex);
+
+	if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
+		mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
+					   op_modifier, op, token);
+	else
+		err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
+					 op_modifier, op, token, event);
+
 	mutex_unlock(&dev->cmd.hcr_mutex);
 	return err;
 }
@@ -386,7 +439,7 @@
 			 unsigned long timeout,
 			 u8 *status)
 {
-	if (dev->cmd.use_events)
+	if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
 		return mthca_cmd_wait(dev, in_param, &out_param, 0,
 				      in_modifier, op_modifier, op,
 				      timeout, status);
@@ -423,7 +476,7 @@
 			 unsigned long timeout,
 			 u8 *status)
 {
-	if (dev->cmd.use_events)
+	if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
 		return mthca_cmd_wait(dev, in_param, out_param, 1,
 				      in_modifier, op_modifier, op,
 				      timeout, status);
@@ -437,7 +490,7 @@
 {
 	mutex_init(&dev->cmd.hcr_mutex);
 	sema_init(&dev->cmd.poll_sem, 1);
-	dev->cmd.use_events = 0;
+	dev->cmd.flags = 0;
 
 	dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
 			   MTHCA_HCR_SIZE);
@@ -461,6 +514,8 @@
 {
 	pci_pool_destroy(dev->cmd.pool);
 	iounmap(dev->hcr);
+	if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
+		iounmap(dev->cmd.dbell_map);
 }
 
 /*
@@ -498,7 +553,8 @@
 		; /* nothing */
 	--dev->cmd.token_mask;
 
-	dev->cmd.use_events = 1;
+	dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
+
 	down(&dev->cmd.poll_sem);
 
 	return 0;
@@ -511,7 +567,7 @@
 {
 	int i;
 
-	dev->cmd.use_events = 0;
+	dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
 
 	for (i = 0; i < dev->cmd.max_cmds; ++i)
 		down(&dev->cmd.event_sem);
@@ -661,12 +717,41 @@
 	return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
 }
 
+static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
+{
+	unsigned long addr;
+	u16 max_off = 0;
+	int i;
+
+	for (i = 0; i < 8; ++i)
+		max_off = max(max_off, dev->cmd.dbell_offsets[i]);
+
+	if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
+		mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
+			   "length 0x%x crosses a page boundary\n",
+			   (unsigned long long) base, max_off);
+		return;
+	}
+
+	addr = pci_resource_start(dev->pdev, 2) +
+		((pci_resource_len(dev->pdev, 2) - 1) & base);
+	dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
+	if (!dev->cmd.dbell_map)
+		return;
+
+	dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
+	mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
+}
+
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 {
 	struct mthca_mailbox *mailbox;
 	u32 *outbox;
+	u64 base;
+	u32 tmp;
 	int err = 0;
 	u8 lg;
+	int i;
 
 #define QUERY_FW_OUT_SIZE             0x100
 #define QUERY_FW_VER_OFFSET            0x00
@@ -674,6 +759,10 @@
 #define QUERY_FW_ERR_START_OFFSET      0x30
 #define QUERY_FW_ERR_SIZE_OFFSET       0x38
 
+#define QUERY_FW_CMD_DB_EN_OFFSET      0x10
+#define QUERY_FW_CMD_DB_OFFSET         0x50
+#define QUERY_FW_CMD_DB_BASE           0x60
+
 #define QUERY_FW_START_OFFSET          0x20
 #define QUERY_FW_END_OFFSET            0x28
 
@@ -702,16 +791,29 @@
 		((dev->fw_ver & 0xffff0000ull) >> 16) |
 		((dev->fw_ver & 0x0000ffffull) << 16);
 
+	mthca_dbg(dev, "FW version %012llx, max commands %d\n",
+		  (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+
 	MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
 	dev->cmd.max_cmds = 1 << lg;
 	MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
 	MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
 
-	mthca_dbg(dev, "FW version %012llx, max commands %d\n",
-		  (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
 	mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
 		  (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
 
+	MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
+	if (tmp & 0x1) {
+		mthca_dbg(dev, "FW supports commands through doorbells\n");
+
+		MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
+		for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
+			MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
+				  QUERY_FW_CMD_DB_OFFSET + (i << 1));
+
+		mthca_setup_cmd_doorbells(dev, base);
+	}
+
 	if (mthca_is_memfree(dev)) {
 		MTHCA_GET(dev->fw.arbel.fw_pages,       outbox, QUERY_FW_SIZE_OFFSET);
 		MTHCA_GET(dev->fw.arbel.clr_int_base,   outbox, QUERY_FW_CLR_INT_BASE_OFFSET);