Merge branch 'tc_flower_offload'

Amir Vadai says:

====================
sched,mlx5: Offloaded TC flower filter statistics

This patchset introduces counters support for offloaded cls_flower filters.
When the user calls 'tc show -s ..', fl_dump is called.
Before fl_dump() returns the statistics, it calls the NIC driver (using a new
ndo_setup_tc() command - TC_CLSFLOWER_STATS) to read the hardware counters and
update the statistics accordingly. A new TC action op was added (stats_update())
to be used by the NIC driver to update the statistics.

Patchset was applied and tested over commit ed7cbbc ("udp: Resolve NULL pointer
dereference over flow-based vxlan device")
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b531d4f..9ea7b58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -2,7 +2,7 @@
 
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
-		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
+		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
 		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index eb926e1..dcd2df6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -294,6 +294,7 @@
 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
 	case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
 	case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -395,6 +396,8 @@
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -406,178 +409,142 @@
 
 const char *mlx5_command_str(int command)
 {
+#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd
+
 	switch (command) {
-	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
-		return "QUERY_HCA_VPORT_CONTEXT";
-
-	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
-		return "MODIFY_HCA_VPORT_CONTEXT";
-
-	case MLX5_CMD_OP_QUERY_HCA_CAP:
-		return "QUERY_HCA_CAP";
-
-	case MLX5_CMD_OP_SET_HCA_CAP:
-		return "SET_HCA_CAP";
-
-	case MLX5_CMD_OP_QUERY_ADAPTER:
-		return "QUERY_ADAPTER";
-
-	case MLX5_CMD_OP_INIT_HCA:
-		return "INIT_HCA";
-
-	case MLX5_CMD_OP_TEARDOWN_HCA:
-		return "TEARDOWN_HCA";
-
-	case MLX5_CMD_OP_ENABLE_HCA:
-		return "MLX5_CMD_OP_ENABLE_HCA";
-
-	case MLX5_CMD_OP_DISABLE_HCA:
-		return "MLX5_CMD_OP_DISABLE_HCA";
-
-	case MLX5_CMD_OP_QUERY_PAGES:
-		return "QUERY_PAGES";
-
-	case MLX5_CMD_OP_MANAGE_PAGES:
-		return "MANAGE_PAGES";
-
-	case MLX5_CMD_OP_CREATE_MKEY:
-		return "CREATE_MKEY";
-
-	case MLX5_CMD_OP_QUERY_MKEY:
-		return "QUERY_MKEY";
-
-	case MLX5_CMD_OP_DESTROY_MKEY:
-		return "DESTROY_MKEY";
-
-	case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
-		return "QUERY_SPECIAL_CONTEXTS";
-
-	case MLX5_CMD_OP_CREATE_EQ:
-		return "CREATE_EQ";
-
-	case MLX5_CMD_OP_DESTROY_EQ:
-		return "DESTROY_EQ";
-
-	case MLX5_CMD_OP_QUERY_EQ:
-		return "QUERY_EQ";
-
-	case MLX5_CMD_OP_CREATE_CQ:
-		return "CREATE_CQ";
-
-	case MLX5_CMD_OP_DESTROY_CQ:
-		return "DESTROY_CQ";
-
-	case MLX5_CMD_OP_QUERY_CQ:
-		return "QUERY_CQ";
-
-	case MLX5_CMD_OP_MODIFY_CQ:
-		return "MODIFY_CQ";
-
-	case MLX5_CMD_OP_CREATE_QP:
-		return "CREATE_QP";
-
-	case MLX5_CMD_OP_DESTROY_QP:
-		return "DESTROY_QP";
-
-	case MLX5_CMD_OP_RST2INIT_QP:
-		return "RST2INIT_QP";
-
-	case MLX5_CMD_OP_INIT2RTR_QP:
-		return "INIT2RTR_QP";
-
-	case MLX5_CMD_OP_RTR2RTS_QP:
-		return "RTR2RTS_QP";
-
-	case MLX5_CMD_OP_RTS2RTS_QP:
-		return "RTS2RTS_QP";
-
-	case MLX5_CMD_OP_SQERR2RTS_QP:
-		return "SQERR2RTS_QP";
-
-	case MLX5_CMD_OP_2ERR_QP:
-		return "2ERR_QP";
-
-	case MLX5_CMD_OP_2RST_QP:
-		return "2RST_QP";
-
-	case MLX5_CMD_OP_QUERY_QP:
-		return "QUERY_QP";
-
-	case MLX5_CMD_OP_MAD_IFC:
-		return "MAD_IFC";
-
-	case MLX5_CMD_OP_INIT2INIT_QP:
-		return "INIT2INIT_QP";
-
-	case MLX5_CMD_OP_CREATE_PSV:
-		return "CREATE_PSV";
-
-	case MLX5_CMD_OP_DESTROY_PSV:
-		return "DESTROY_PSV";
-
-	case MLX5_CMD_OP_CREATE_SRQ:
-		return "CREATE_SRQ";
-
-	case MLX5_CMD_OP_DESTROY_SRQ:
-		return "DESTROY_SRQ";
-
-	case MLX5_CMD_OP_QUERY_SRQ:
-		return "QUERY_SRQ";
-
-	case MLX5_CMD_OP_ARM_RQ:
-		return "ARM_RQ";
-
-	case MLX5_CMD_OP_CREATE_XRC_SRQ:
-		return "CREATE_XRC_SRQ";
-
-	case MLX5_CMD_OP_DESTROY_XRC_SRQ:
-		return "DESTROY_XRC_SRQ";
-
-	case MLX5_CMD_OP_QUERY_XRC_SRQ:
-		return "QUERY_XRC_SRQ";
-
-	case MLX5_CMD_OP_ARM_XRC_SRQ:
-		return "ARM_XRC_SRQ";
-
-	case MLX5_CMD_OP_ALLOC_PD:
-		return "ALLOC_PD";
-
-	case MLX5_CMD_OP_DEALLOC_PD:
-		return "DEALLOC_PD";
-
-	case MLX5_CMD_OP_ALLOC_UAR:
-		return "ALLOC_UAR";
-
-	case MLX5_CMD_OP_DEALLOC_UAR:
-		return "DEALLOC_UAR";
-
-	case MLX5_CMD_OP_ATTACH_TO_MCG:
-		return "ATTACH_TO_MCG";
-
-	case MLX5_CMD_OP_DETTACH_FROM_MCG:
-		return "DETTACH_FROM_MCG";
-
-	case MLX5_CMD_OP_ALLOC_XRCD:
-		return "ALLOC_XRCD";
-
-	case MLX5_CMD_OP_DEALLOC_XRCD:
-		return "DEALLOC_XRCD";
-
-	case MLX5_CMD_OP_ACCESS_REG:
-		return "MLX5_CMD_OP_ACCESS_REG";
-
-	case MLX5_CMD_OP_SET_WOL_ROL:
-		return "SET_WOL_ROL";
-
-	case MLX5_CMD_OP_QUERY_WOL_ROL:
-		return "QUERY_WOL_ROL";
-
-	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
-		return "ADD_VXLAN_UDP_DPORT";
-
-	case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
-		return "DELETE_VXLAN_UDP_DPORT";
-
+	MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP);
+	MLX5_COMMAND_STR_CASE(QUERY_ADAPTER);
+	MLX5_COMMAND_STR_CASE(INIT_HCA);
+	MLX5_COMMAND_STR_CASE(TEARDOWN_HCA);
+	MLX5_COMMAND_STR_CASE(ENABLE_HCA);
+	MLX5_COMMAND_STR_CASE(DISABLE_HCA);
+	MLX5_COMMAND_STR_CASE(QUERY_PAGES);
+	MLX5_COMMAND_STR_CASE(MANAGE_PAGES);
+	MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
+	MLX5_COMMAND_STR_CASE(QUERY_ISSI);
+	MLX5_COMMAND_STR_CASE(SET_ISSI);
+	MLX5_COMMAND_STR_CASE(CREATE_MKEY);
+	MLX5_COMMAND_STR_CASE(QUERY_MKEY);
+	MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
+	MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS);
+	MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME);
+	MLX5_COMMAND_STR_CASE(CREATE_EQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_EQ);
+	MLX5_COMMAND_STR_CASE(QUERY_EQ);
+	MLX5_COMMAND_STR_CASE(GEN_EQE);
+	MLX5_COMMAND_STR_CASE(CREATE_CQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_CQ);
+	MLX5_COMMAND_STR_CASE(QUERY_CQ);
+	MLX5_COMMAND_STR_CASE(MODIFY_CQ);
+	MLX5_COMMAND_STR_CASE(CREATE_QP);
+	MLX5_COMMAND_STR_CASE(DESTROY_QP);
+	MLX5_COMMAND_STR_CASE(RST2INIT_QP);
+	MLX5_COMMAND_STR_CASE(INIT2RTR_QP);
+	MLX5_COMMAND_STR_CASE(RTR2RTS_QP);
+	MLX5_COMMAND_STR_CASE(RTS2RTS_QP);
+	MLX5_COMMAND_STR_CASE(SQERR2RTS_QP);
+	MLX5_COMMAND_STR_CASE(2ERR_QP);
+	MLX5_COMMAND_STR_CASE(2RST_QP);
+	MLX5_COMMAND_STR_CASE(QUERY_QP);
+	MLX5_COMMAND_STR_CASE(SQD_RTS_QP);
+	MLX5_COMMAND_STR_CASE(INIT2INIT_QP);
+	MLX5_COMMAND_STR_CASE(CREATE_PSV);
+	MLX5_COMMAND_STR_CASE(DESTROY_PSV);
+	MLX5_COMMAND_STR_CASE(CREATE_SRQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_SRQ);
+	MLX5_COMMAND_STR_CASE(QUERY_SRQ);
+	MLX5_COMMAND_STR_CASE(ARM_RQ);
+	MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ);
+	MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ);
+	MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ);
+	MLX5_COMMAND_STR_CASE(CREATE_DCT);
+	MLX5_COMMAND_STR_CASE(DESTROY_DCT);
+	MLX5_COMMAND_STR_CASE(DRAIN_DCT);
+	MLX5_COMMAND_STR_CASE(QUERY_DCT);
+	MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION);
+	MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE);
+	MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE);
+	MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS);
+	MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS);
+	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
+	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+	MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
+	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(ALLOC_PD);
+	MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+	MLX5_COMMAND_STR_CASE(ALLOC_UAR);
+	MLX5_COMMAND_STR_CASE(DEALLOC_UAR);
+	MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
+	MLX5_COMMAND_STR_CASE(ACCESS_REG);
+	MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
+	MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG);
+	MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
+	MLX5_COMMAND_STR_CASE(MAD_IFC);
+	MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
+	MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX);
+	MLX5_COMMAND_STR_CASE(NOP);
+	MLX5_COMMAND_STR_CASE(ALLOC_XRCD);
+	MLX5_COMMAND_STR_CASE(DEALLOC_XRCD);
+	MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN);
+	MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN);
+	MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS);
+	MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS);
+	MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS);
+	MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS);
+	MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS);
+	MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT);
+	MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT);
+	MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
+	MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+	MLX5_COMMAND_STR_CASE(CREATE_TIR);
+	MLX5_COMMAND_STR_CASE(MODIFY_TIR);
+	MLX5_COMMAND_STR_CASE(DESTROY_TIR);
+	MLX5_COMMAND_STR_CASE(QUERY_TIR);
+	MLX5_COMMAND_STR_CASE(CREATE_SQ);
+	MLX5_COMMAND_STR_CASE(MODIFY_SQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_SQ);
+	MLX5_COMMAND_STR_CASE(QUERY_SQ);
+	MLX5_COMMAND_STR_CASE(CREATE_RQ);
+	MLX5_COMMAND_STR_CASE(MODIFY_RQ);
+	MLX5_COMMAND_STR_CASE(DESTROY_RQ);
+	MLX5_COMMAND_STR_CASE(QUERY_RQ);
+	MLX5_COMMAND_STR_CASE(CREATE_RMP);
+	MLX5_COMMAND_STR_CASE(MODIFY_RMP);
+	MLX5_COMMAND_STR_CASE(DESTROY_RMP);
+	MLX5_COMMAND_STR_CASE(QUERY_RMP);
+	MLX5_COMMAND_STR_CASE(CREATE_TIS);
+	MLX5_COMMAND_STR_CASE(MODIFY_TIS);
+	MLX5_COMMAND_STR_CASE(DESTROY_TIS);
+	MLX5_COMMAND_STR_CASE(QUERY_TIS);
+	MLX5_COMMAND_STR_CASE(CREATE_RQT);
+	MLX5_COMMAND_STR_CASE(MODIFY_RQT);
+	MLX5_COMMAND_STR_CASE(DESTROY_RQT);
+	MLX5_COMMAND_STR_CASE(QUERY_RQT);
+	MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT);
+	MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE);
+	MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE);
+	MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE);
+	MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP);
+	MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP);
+	MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP);
+	MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY);
+	MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0804070..fd43929 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2154,6 +2154,8 @@
 			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
 		case TC_CLSFLOWER_DESTROY:
 			return mlx5e_delete_flower(priv, tc->cls_flower);
+		case TC_CLSFLOWER_STATS:
+			return mlx5e_stats_flower(priv, tc->cls_flower);
 		}
 	default:
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ef017c0..704c3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -53,13 +53,24 @@
 						u32 *match_c, u32 *match_v,
 						u32 action, u32 flow_tag)
 {
-	struct mlx5_flow_destination dest = {
-		.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
-		{.ft = priv->fs.vlan.ft.t},
-	};
+	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_rule *rule;
 	bool table_created = false;
 
+	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest.ft = priv->fs.vlan.ft.t;
+	} else {
+		counter = mlx5_fc_create(dev, true);
+		if (IS_ERR(counter))
+			return ERR_CAST(counter);
+
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest.counter = counter;
+	}
+
 	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
 		priv->fs.tc.t =
 			mlx5_create_auto_grouped_flow_table(priv->fs.ns,
@@ -70,7 +81,8 @@
 		if (IS_ERR(priv->fs.tc.t)) {
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
-			return ERR_CAST(priv->fs.tc.t);
+			rule = ERR_CAST(priv->fs.tc.t);
+			goto err_create_ft;
 		}
 
 		table_created = true;
@@ -79,12 +91,20 @@
 	rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS,
 				  match_c, match_v,
 				  action, flow_tag,
-				  action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL);
+				  &dest);
 
-	if (IS_ERR(rule) && table_created) {
+	if (IS_ERR(rule))
+		goto err_add_rule;
+
+	return rule;
+
+err_add_rule:
+	if (table_created) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
 	}
+err_create_ft:
+	mlx5_fc_destroy(dev, counter);
 
 	return rule;
 }
@@ -92,8 +112,14 @@
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5_flow_rule *rule)
 {
+	struct mlx5_fc *counter = NULL;
+
+	counter = mlx5_flow_rule_counter(rule);
+
 	mlx5_del_flow_rule(rule);
 
+	mlx5_fc_destroy(priv->mdev, counter);
+
 	if (!mlx5e_tc_num_filters(priv)) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
@@ -286,6 +312,9 @@
 
 		if (is_tcf_gact_shot(a)) {
 			*action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+			if (MLX5_CAP_FLOWTABLE(priv->mdev,
+					       flow_table_properties_nic_receive.flow_counter))
+				*action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			continue;
 		}
 
@@ -394,6 +423,34 @@
 	return 0;
 }
 
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+		       struct tc_cls_flower_offload *f)
+{
+	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	struct mlx5e_tc_flow *flow;
+	struct tc_action *a;
+	struct mlx5_fc *counter;
+	u64 bytes;
+	u64 packets;
+	u64 lastuse;
+
+	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
+				      tc->ht_params);
+	if (!flow)
+		return -EINVAL;
+
+	counter = mlx5_flow_rule_counter(flow->rule);
+	if (!counter)
+		return 0;
+
+	mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+
+	tc_for_each_action(a, f->exts)
+		tcf_action_stats_update(a, bytes, packets, lastuse);
+
+	return 0;
+}
+
 static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
 	.head_offset = offsetof(struct mlx5e_tc_flow, node),
 	.key_offset = offsetof(struct mlx5e_tc_flow, cookie),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index a4f17b9..34bf903 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -43,6 +43,9 @@
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
 			struct tc_cls_flower_offload *f);
 
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+		       struct tc_cls_flower_offload *f);
+
 static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
 {
 	return atomic_read(&priv->fs.tc.ht.nelems);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9797768..a5bb6b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -241,17 +241,20 @@
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action);
-	MLX5_SET(flow_context, in_flow_context, destination_list_size,
-		 fte->dests_size);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
+	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
 	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-		in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+		int list_size = 0;
+
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			unsigned int id;
 
+			if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
+
 			MLX5_SET(dest_format_struct, in_dests, destination_type,
 				 dst->dest_attr.type);
 			if (dst->dest_attr.type ==
@@ -262,8 +265,31 @@
 			}
 			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+			list_size++;
 		}
+
+		MLX5_SET(flow_context, in_flow_context, destination_list_size,
+			 list_size);
 	}
+
+	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		int list_size = 0;
+
+		list_for_each_entry(dst, &fte->node.children, node.list) {
+			if (dst->dest_attr.type !=
+			    MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
+
+			MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+				 dst->dest_attr.counter->id);
+			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+			list_size++;
+		}
+
+		MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+			 list_size);
+	}
+
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
 					 sizeof(out));
@@ -283,18 +309,16 @@
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 			struct mlx5_flow_table *ft,
 			unsigned group_id,
+			int modify_mask,
 			struct fs_fte *fte)
 {
 	int opmod;
-	int modify_mask;
 	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
 						flow_table_properties_nic_receive.
 						flow_modify_en);
 	if (!atomic_mod_cap)
 		return -ENOTSUPP;
 	opmod = 1;
-	modify_mask = 1 <<
-		MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST;
 
 	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
@@ -323,3 +347,69 @@
 
 	return err;
 }
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
+{
+	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
+	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(alloc_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+
+	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					 sizeof(out));
+	if (err)
+		return err;
+
+	*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+
+	return 0;
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
+{
+	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
+	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(dealloc_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+	MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					  sizeof(out));
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
+		      u64 *packets, u64 *bytes)
+{
+	u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter)];
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+	void *stats;
+	int err = 0;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(query_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+
+	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+	*packets = MLX5_GET64(traffic_counter, stats, packets);
+	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c97b4a0..fc4f7b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -62,6 +62,7 @@
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 			struct mlx5_flow_table *ft,
 			unsigned group_id,
+			int modify_mask,
 			struct fs_fte *fte);
 
 int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
@@ -70,4 +71,9 @@
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_table *ft);
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
+		      u64 *packets, u64 *bytes);
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 659a698..8b5f0b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -344,6 +344,7 @@
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
 	u32	*match_value;
+	int modify_mask;
 	struct mlx5_core_dev *dev = get_dev(node);
 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
 	int err;
@@ -367,8 +368,11 @@
 	}
 	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
+		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
 		err = mlx5_cmd_update_fte(dev, ft,
-					  fg->id, fte);
+					  fg->id,
+					  modify_mask,
+					  fte);
 		if (err)
 			pr_warn("%s can't del rule fg id=%d fte_index=%d\n",
 				__func__, fg->id, fte->index);
@@ -615,6 +619,7 @@
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
+	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
@@ -626,7 +631,9 @@
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
 	err = mlx5_cmd_update_fte(get_dev(&ft->node),
-				  ft, fg->id, fte);
+				  ft, fg->id,
+				  modify_mask,
+				  fte);
 	unlock_ref_node(&fte->node);
 
 	return err;
@@ -877,6 +884,7 @@
 {
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_rule *rule;
+	int modify_mask = 0;
 	int err;
 
 	rule = alloc_rule(dest);
@@ -892,14 +900,20 @@
 		list_add(&rule->node.list, &fte->node.children);
 	else
 		list_add_tail(&rule->node.list, &fte->node.children);
-	if (dest)
+	if (dest) {
 		fte->dests_size++;
+
+		modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ?
+			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) :
+			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+	}
+
 	if (fte->dests_size == 1 || !dest)
 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
 					  ft, fg->id, fte);
 	else
 		err = mlx5_cmd_update_fte(get_dev(&ft->node),
-					  ft, fg->id, fte);
+					  ft, fg->id, modify_mask, fte);
 	if (err)
 		goto free_rule;
 
@@ -1092,10 +1106,40 @@
 	return rule;
 }
 
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule)
+{
+	struct mlx5_flow_rule *dst;
+	struct fs_fte *fte;
+
+	fs_get_obj(fte, rule->node.parent);
+
+	fs_for_each_dst(dst, fte) {
+		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			return dst->dest_attr.counter;
+	}
+
+	return NULL;
+}
+
+static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
+{
+	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
+		return !counter;
+
+	if (!counter)
+		return false;
+
+	/* Hardware support counter for a drop action only */
+	return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT);
+}
+
 static bool dest_is_valid(struct mlx5_flow_destination *dest,
 			  u32 action,
 			  struct mlx5_flow_table *ft)
 {
+	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
+		return counter_is_valid(dest->counter, action);
+
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return true;
 
@@ -1727,6 +1771,7 @@
 	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
 	cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
 	cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
+	mlx5_cleanup_fc_stats(dev);
 }
 
 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
@@ -1783,10 +1828,14 @@
 {
 	int err = 0;
 
+	err = mlx5_init_fc_stats(dev);
+	if (err)
+		return err;
+
 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
 		err = init_root_ns(dev);
 		if (err)
-			return err;
+			goto err;
 	}
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
 		err = init_fdb_root_ns(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 8e76cc5..aa41a73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -96,6 +96,28 @@
 	struct list_head		fwd_rules;
 };
 
+struct mlx5_fc_cache {
+	u64 packets;
+	u64 bytes;
+	u64 lastuse;
+};
+
+struct mlx5_fc {
+	struct list_head list;
+
+	/* last{packets,bytes} members are used when calculating the delta since
+	 * last reading
+	 */
+	u64 lastpackets;
+	u64 lastbytes;
+
+	u16 id;
+	bool deleted;
+	bool aging;
+
+	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
 /* Type of children is mlx5_flow_rule */
 struct fs_fte {
 	struct fs_node			node;
@@ -105,6 +127,7 @@
 	u32				index;
 	u32				action;
 	enum fs_fte_status		status;
+	struct mlx5_fc			*counter;
 };
 
 /* Type of children is mlx5_flow_table/namespace */
@@ -146,6 +169,9 @@
 	struct mutex			chain_lock;
 };
 
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+
 int mlx5_init_fs(struct mlx5_core_dev *dev);
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644
index 0000000..164dc37
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ *   - mlx5_fc_create() only adds to an addlist to be used by
+ *     mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ *   - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ *   - mark a counter as deleted
+ *   - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ *   user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ *   destroy/dump - no conflict (see destroy)
+ *   query/dump - packets and bytes might be inconsistent (since update is not
+ *                atomic)
+ *   query/create - no conflict (see create)
+ *   since every create/destroy spawn the work, only after necessary time has
+ *   elapsed, the thread will actually query the hardware.
+ */
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+						 priv.fc_stats.work.work);
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	unsigned long now = jiffies;
+	struct mlx5_fc *counter;
+	struct mlx5_fc *tmp;
+	int err = 0;
+
+	spin_lock(&fc_stats->addlist_lock);
+
+	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+
+	if (!list_empty(&fc_stats->list))
+		queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD);
+
+	spin_unlock(&fc_stats->addlist_lock);
+
+	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+		struct mlx5_fc_cache *c = &counter->cache;
+		u64 packets;
+		u64 bytes;
+
+		if (counter->deleted) {
+			list_del(&counter->list);
+
+			mlx5_cmd_fc_free(dev, counter->id);
+
+			kfree(counter);
+			continue;
+		}
+
+		if (time_before(now, fc_stats->next_query))
+			continue;
+
+		err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes);
+		if (err) {
+			pr_err("Error querying stats for counter id %d\n",
+			       counter->id);
+			continue;
+		}
+
+		if (packets == c->packets)
+			continue;
+
+		c->lastuse = jiffies;
+		c->packets = packets;
+		c->bytes   = bytes;
+	}
+
+	if (time_after_eq(now, fc_stats->next_query))
+		fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter;
+	int err;
+
+	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	if (!counter)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5_cmd_fc_alloc(dev, &counter->id);
+	if (err)
+		goto err_out;
+
+	if (aging) {
+		counter->aging = true;
+
+		spin_lock(&fc_stats->addlist_lock);
+		list_add(&counter->list, &fc_stats->addlist);
+		spin_unlock(&fc_stats->addlist_lock);
+
+		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+	}
+
+	return counter;
+
+err_out:
+	kfree(counter);
+
+	return ERR_PTR(err);
+}
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	if (!counter)
+		return;
+
+	if (counter->aging) {
+		counter->deleted = true;
+		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+		return;
+	}
+
+	mlx5_cmd_fc_free(dev, counter->id);
+	kfree(counter);
+}
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	INIT_LIST_HEAD(&fc_stats->list);
+	INIT_LIST_HEAD(&fc_stats->addlist);
+	spin_lock_init(&fc_stats->addlist_lock);
+
+	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+	if (!fc_stats->wq)
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+	return 0;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter;
+	struct mlx5_fc *tmp;
+
+	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+	destroy_workqueue(dev->priv.fc_stats.wq);
+	dev->priv.fc_stats.wq = NULL;
+
+	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+
+	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+		list_del(&counter->list);
+
+		mlx5_cmd_fc_free(dev, counter->id);
+
+		kfree(counter);
+	}
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+			  u64 *bytes, u64 *packets, u64 *lastuse)
+{
+	struct mlx5_fc_cache c;
+
+	c = counter->cache;
+
+	*bytes = c.bytes - counter->lastbytes;
+	*packets = c.packets - counter->lastpackets;
+	*lastuse = c.lastuse;
+
+	counter->lastbytes = c.bytes;
+	counter->lastpackets = c.packets;
+}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9613143..07b504f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/radix-tree.h>
+#include <linux/workqueue.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
@@ -457,6 +458,17 @@
 	char name[MLX5_MAX_IRQ_NAME];
 };
 
+struct mlx5_fc_stats {
+	struct list_head list;
+	struct list_head addlist;
+	/* protect addlist add/splice operations */
+	spinlock_t addlist_lock;
+
+	struct workqueue_struct *wq;
+	struct delayed_work work;
+	unsigned long next_query;
+};
+
 struct mlx5_eswitch;
 
 struct mlx5_priv {
@@ -520,6 +532,8 @@
 	struct mlx5_flow_root_namespace *fdb_root_ns;
 	struct mlx5_flow_root_namespace *esw_egress_root_ns;
 	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+
+	struct mlx5_fc_stats		fc_stats;
 };
 
 enum mlx5_device_state {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 6467569..4b7a107 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -73,6 +73,7 @@
 		u32			tir_num;
 		struct mlx5_flow_table	*ft;
 		u32			vport_num;
+		struct mlx5_fc		*counter;
 	};
 };
 
@@ -125,4 +126,10 @@
 int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 				 struct mlx5_flow_destination *dest);
 
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule);
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+			  u64 *bytes, u64 *packets, u64 *lastuse);
+
 #endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4ce4ea4..9a05cd7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -202,6 +202,9 @@
 	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x936,
 	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x937,
 	MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY       = 0x938,
+	MLX5_CMD_OP_ALLOC_FLOW_COUNTER            = 0x939,
+	MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
+	MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
 	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c
 };
 
@@ -265,7 +268,8 @@
 
 struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
-	u8         reserved_at_1[0x2];
+	u8         reserved_at_1[0x1];
+	u8         flow_counter[0x1];
 	u8	   flow_modify_en[0x1];
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
@@ -932,6 +936,8 @@
 	MLX5_FLOW_DESTINATION_TYPE_VPORT        = 0x0,
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
+
+	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 };
 
 struct mlx5_ifc_dest_format_struct_bits {
@@ -941,6 +947,19 @@
 	u8         reserved_at_20[0x20];
 };
 
+struct mlx5_ifc_flow_counter_list_bits {
+	u8         reserved_at_0[0x10];
+	u8         flow_counter_id[0x10];
+
+	u8         reserved_at_20[0x20];
+};
+
+union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
+	struct mlx5_ifc_dest_format_struct_bits dest_format_struct;
+	struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
+	u8         reserved_at_0[0x40];
+};
+
 struct mlx5_ifc_fte_match_param_bits {
 	struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers;
 
@@ -2006,6 +2025,7 @@
 	MLX5_FLOW_CONTEXT_ACTION_ALLOW     = 0x1,
 	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
 	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
+	MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2022,13 +2042,16 @@
 	u8         reserved_at_80[0x8];
 	u8         destination_list_size[0x18];
 
-	u8         reserved_at_a0[0x160];
+	u8         reserved_at_a0[0x8];
+	u8         flow_counter_list_size[0x18];
+
+	u8         reserved_at_c0[0x140];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
 	u8         reserved_at_1200[0x600];
 
-	struct mlx5_ifc_dest_format_struct_bits destination[0];
+	union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0];
 };
 
 enum {
@@ -3937,6 +3960,34 @@
 	u8         reserved_at_e0[0x120];
 };
 
+struct mlx5_ifc_query_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_traffic_counter_bits flow_statistics[0];
+};
+
+struct mlx5_ifc_query_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x80];
+
+	u8         clear[0x1];
+	u8         reserved_at_c1[0xf];
+	u8         num_of_counters[0x10];
+
+	u8         reserved_at_e0[0x10];
+	u8         flow_counter_id[0x10];
+};
+
 struct mlx5_ifc_query_esw_vport_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -5510,6 +5561,28 @@
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_dealloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         flow_counter_id[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_create_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6237,6 +6310,28 @@
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_alloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x10];
+	u8         flow_counter_id[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 03e322b..2cd9e9b 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -106,6 +106,7 @@
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int, struct tc_action *);
+	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 };
 
 struct tc_action_net {
@@ -178,10 +179,21 @@
 
 #define tc_for_each_action(_a, _exts) \
 	list_for_each_entry(a, &(_exts)->actions, list)
+
+static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
+					   u64 packets, u64 lastuse)
+{
+	if (!a->ops->stats_update)
+		return;
+
+	a->ops->stats_update(a, bytes, packets, lastuse);
+}
+
 #else /* CONFIG_NET_CLS_ACT */
 
 #define tc_no_actions(_exts) true
 #define tc_for_each_action(_a, _exts) while (0)
+#define tcf_action_stats_update(a, bytes, packets, lastuse)
 
 #endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 8b48938..0f7efa8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -426,6 +426,7 @@
 enum tc_fl_command {
 	TC_CLSFLOWER_REPLACE,
 	TC_CLSFLOWER_DESTROY,
+	TC_CLSFLOWER_STATS,
 };
 
 struct tc_cls_flower_offload {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 46e55f0..a1fd76c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -527,11 +527,27 @@
 	return q->flags & TCQ_F_CPUSTATS;
 }
 
+static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+				  __u64 bytes, __u32 packets)
+{
+	bstats->bytes += bytes;
+	bstats->packets += packets;
+}
+
 static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
 				 const struct sk_buff *skb)
 {
-	bstats->bytes += qdisc_pkt_len(skb);
-	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+	_bstats_update(bstats,
+		       qdisc_pkt_len(skb),
+		       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
+}
+
+static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+				      __u64 bytes, __u32 packets)
+{
+	u64_stats_update_begin(&bstats->syncp);
+	_bstats_update(&bstats->bstats, bytes, packets);
+	u64_stats_update_end(&bstats->syncp);
 }
 
 static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 1a6e09f..ec5cc84 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -148,6 +148,20 @@
 	return action;
 }
 
+static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+				  u64 lastuse)
+{
+	struct tcf_gact *gact = a->priv;
+	int action = READ_ONCE(gact->tcf_action);
+	struct tcf_t *tm = &gact->tcf_tm;
+
+	_bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets);
+	if (action == TC_ACT_SHOT)
+		this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
+
+	tm->lastuse = lastuse;
+}
+
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
@@ -207,6 +221,7 @@
 	.type		=	TCA_ACT_GACT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_gact,
+	.stats_update	=	tcf_gact_stats_update,
 	.dump		=	tcf_gact_dump,
 	.init		=	tcf_gact_init,
 	.walk		=	tcf_gact_walker,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2181ffc..730aaca 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -210,6 +210,25 @@
 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
 }
 
+static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_flower_offload offload = {0};
+	struct tc_to_netdev tc;
+
+	if (!tc_should_offload(dev, 0))
+		return;
+
+	offload.command = TC_CLSFLOWER_STATS;
+	offload.cookie = (unsigned long)f;
+	offload.exts = &f->exts;
+
+	tc.type = TC_SETUP_CLSFLOWER;
+	tc.cls_flower = &offload;
+
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -662,6 +681,8 @@
 			goto nla_put_failure;
 	}
 
+	fl_hw_update_stats(tp, f);
+
 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
 			    sizeof(key->eth.dst)) ||