IB/mlx5: Modify MAD reading counters method to use counter registers

Modify mlx5_ib_process_mad to use PPCNT and query_vport commands
instead of MAD_IFC, as MAD_IFC is deprecated on new firmware
versions (and doesn't support RoCE anyway).

Traffic counters exist in both 32-bit and 64-bit forms.
Declaring support of extended coutners results in traffic counters
to be read in their 64-bit form only via the query_vport command.
Error counters exist only in 32-bit form and read via PPCNT command.

This commit also adds counters support in RoCE.

Signed-off-by: Meny Yossefi <menyy@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b84d13a..41d8a00 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -31,8 +31,10 @@
  */
 
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/vport.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
 #include "mlx5_ib.h"
 
 enum {
@@ -57,20 +59,12 @@
 	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
 }
 
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index)
+static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		       const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		       const struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
 	u16 slid;
 	int err;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
 	slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
 
@@ -117,6 +111,156 @@
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
+static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
+			       void *out)
+{
+#define MLX5_SUM_CNT(p, cntr1, cntr2)	\
+	(MLX5_GET64(query_vport_counter_out, p, cntr1) + \
+	MLX5_GET64(query_vport_counter_out, p, cntr2))
+
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
+					 transmitted_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
+					 received_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets,
+					 transmitted_ib_multicast.packets));
+	pma_cnt_ext->port_rcv_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets,
+					 received_ib_multicast.packets));
+	pma_cnt_ext->port_unicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_unicast.packets);
+	pma_cnt_ext->port_unicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_unicast.packets);
+	pma_cnt_ext->port_multicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_multicast.packets);
+	pma_cnt_ext->port_multicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_multicast.packets);
+}
+
+static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
+			   void *out)
+{
+	/* Traffic counters will be reported in
+	 * their 64bit form via ib_pma_portcounters_ext by default.
+	 */
+	void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+				     counter_set);
+
+#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name)	{		\
+	counter_var = MLX5_GET_BE(typeof(counter_var),			\
+				  ib_port_cntrs_grp_data_layout,	\
+				  out_pma, counter_name);		\
+	}
+
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter,
+			     symbol_error_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter,
+			     link_error_recovery_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter,
+			     link_downed_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors,
+			     port_rcv_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors,
+			     port_rcv_remote_physical_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors,
+			     port_rcv_switch_relay_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards,
+			     port_xmit_discards);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
+			     port_xmit_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
+			     port_rcv_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
+			     link_overrun_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped,
+			     vl_15_dropped);
+}
+
+static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+			   const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	int err;
+	void *out_cnt;
+
+	/* Decalring support of extended counters */
+	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+		struct ib_class_port_info cpi = {};
+
+		cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+		memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+	}
+
+	if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
+		struct ib_pma_portcounters_ext *pma_cnt_ext =
+			(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_vport_counter(dev->mdev, 0,
+						    port_num, out_cnt, sz);
+		if (!err)
+			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+	} else {
+		struct ib_pma_portcounters *pma_cnt =
+			(struct ib_pma_portcounters *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+					       out_cnt, sz);
+		if (!err)
+			pma_cnt_assign(pma_cnt, out_cnt);
+		}
+
+	kvfree(out_cnt);
+	if (err)
+		return IB_MAD_RESULT_FAILURE;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+			const struct ib_mad_hdr *in, size_t in_mad_size,
+			struct ib_mad_hdr *out, size_t *out_mad_size,
+			u16 *out_mad_pkey_index)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	const struct ib_mad *in_mad = (const struct ib_mad *)in;
+	struct ib_mad *out_mad = (struct ib_mad *)out;
+
+	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
+			 *out_mad_size != sizeof(*out_mad)))
+		return IB_MAD_RESULT_FAILURE;
+
+	memset(out_mad->data, 0, sizeof(out_mad->data));
+
+	if (MLX5_CAP_GEN(mdev, vport_counters) &&
+	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+	    in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
+		return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+	} else {
+		return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+				   in_mad, out_mad);
+	}
+}
+
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 {
 	struct ib_smp *in_mad  = NULL;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 99e2edc..12079fd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -105,6 +105,29 @@
 	___t; \
 })
 
+/* Big endian getters */
+#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\
+	__mlx5_64_off(typ, fld)))
+
+#define MLX5_GET_BE(type_t, typ, p, fld) ({				  \
+		type_t tmp;						  \
+		switch (sizeof(tmp)) {					  \
+		case sizeof(u8):					  \
+			tmp = (__force type_t)MLX5_GET(typ, p, fld);	  \
+			break;						  \
+		case sizeof(u16):					  \
+			tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \
+			break;						  \
+		case sizeof(u32):					  \
+			tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \
+			break;						  \
+		case sizeof(u64):					  \
+			tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \
+			break;						  \
+			}						  \
+		tmp;							  \
+		})
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,