IB/mlx4: Add IPoIB checksum offload support

ConnectX devices support checksum generation and verification of TCP
and UDP packets for UD IPoIB messages.  This patch checks if the HCA
supports this and sets the IB_DEVICE_UD_IP_CSUM capability flag if it
does.  It implements support for handling the IB_SEND_IP_CSUM send
flag and setting the csum_ok field in receive work completions.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Ali Ayub <ali@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7360bba..d2e32b0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -297,6 +297,20 @@
 	wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
+static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+{
+	return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4F	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4OPT	|
+				      MLX4_CQE_IPOIB_STATUS_IPV6	|
+				      MLX4_CQE_IPOIB_STATUS_IPOK)) ==
+		cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+			    MLX4_CQE_IPOIB_STATUS_IPOK))		&&
+		(status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP	|
+				      MLX4_CQE_IPOIB_STATUS_TCP))	&&
+		checksum == cpu_to_be16(0xffff);
+}
+
 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 			    struct mlx4_ib_qp **cur_qp,
 			    struct ib_wc *wc)
@@ -434,6 +448,8 @@
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
 		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
+		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
+							   cqe->checksum);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d551201..6ea4746 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -99,6 +99,8 @@
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ac965ab..31b2b5b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1436,6 +1436,9 @@
 			 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
 			(wr->send_flags & IB_SEND_SOLICITED ?
 			 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+			((wr->send_flags & IB_SEND_IP_CSUM) ?
+			 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 			qp->sq_signal_bits;
 
 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 61dc495..f494c3e 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -696,6 +696,10 @@
 	/* Check port for UD address vector: */
 	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
 
+	/* Enable IPoIB checksumming if we can: */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h
index 0181e0a..1243eba 100644
--- a/include/linux/mlx4/cq.h
+++ b/include/linux/mlx4/cq.h
@@ -45,11 +45,11 @@
 	u8			sl;
 	u8			reserved1;
 	__be16			rlid;
-	u32			reserved2;
+	__be32			ipoib_status;
 	__be32			byte_cnt;
 	__be16			wqe_index;
 	__be16			checksum;
-	u8			reserved3[3];
+	u8			reserved2[3];
 	u8			owner_sr_opcode;
 };
 
@@ -85,6 +85,16 @@
 	MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR		= 0x22,
 };
 
+enum {
+	MLX4_CQE_IPOIB_STATUS_IPV4			= 1 << 22,
+	MLX4_CQE_IPOIB_STATUS_IPV4F			= 1 << 23,
+	MLX4_CQE_IPOIB_STATUS_IPV6			= 1 << 24,
+	MLX4_CQE_IPOIB_STATUS_IPV4OPT			= 1 << 25,
+	MLX4_CQE_IPOIB_STATUS_TCP			= 1 << 26,
+	MLX4_CQE_IPOIB_STATUS_UDP			= 1 << 27,
+	MLX4_CQE_IPOIB_STATUS_IPOK			= 1 << 28,
+};
+
 static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
 			       void __iomem *uar_page,
 			       spinlock_t *doorbell_lock)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 09a2230..31f9eb3 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -158,10 +158,12 @@
 #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
 
 enum {
-	MLX4_WQE_CTRL_NEC	= 1 << 29,
-	MLX4_WQE_CTRL_FENCE	= 1 << 6,
-	MLX4_WQE_CTRL_CQ_UPDATE	= 3 << 2,
-	MLX4_WQE_CTRL_SOLICITED	= 1 << 1,
+	MLX4_WQE_CTRL_NEC		= 1 << 29,
+	MLX4_WQE_CTRL_FENCE		= 1 << 6,
+	MLX4_WQE_CTRL_CQ_UPDATE		= 3 << 2,
+	MLX4_WQE_CTRL_SOLICITED		= 1 << 1,
+	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
+	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 };
 
 struct mlx4_wqe_ctrl_seg {