i40e: add DCB and DCBNL support

This patch adds capability to configure DCB on i40e network
interfaces using Intel XL710 adapter firmware APIs.

By default all VSIs are only enabled for the default traffic
class enabled by firmware for any given PF. The driver would
query the firmware for the traffic classes that are enabled for
the port and reconfigure the LAN VSI to match to the port traffic
class settings. All other VSIs are only enabled for the default
traffic class settings for now.

The driver registers and listens to firmware events that may
require change in the DCB settings. It may reconfigure the VSI
settings based on these events.

This patch exposes IEEE DCBNL interfaces for the i40e driver to
allow any application to query the DCB settings on the adapter.

Signed-off-by: Neerav Parikh <Neerav.Parikh@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-By: Jack Morgan<jack.morgan@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0dd578f..3a9ede6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -55,6 +55,7 @@
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
+static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 
 /* i40e_pci_tbl - PCI Device ID Table
  *
@@ -1272,6 +1273,7 @@
 	u8 offset;
 	u16 qmap;
 	int i;
+	u16 num_tc_qps = 0;
 
 	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
 	offset = 0;
@@ -1293,6 +1295,9 @@
 
 	vsi->tc_config.numtc = numtc;
 	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+	/* Number of queues per enabled TC */
+	num_tc_qps = rounddown_pow_of_two(vsi->alloc_queue_pairs/numtc);
+	num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC);
 
 	/* Setup queue offset/count for all TCs for given VSI */
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
@@ -1300,30 +1305,25 @@
 		if (vsi->tc_config.enabled_tc & (1 << i)) { /* TC is enabled */
 			int pow, num_qps;
 
-			vsi->tc_config.tc_info[i].qoffset = offset;
 			switch (vsi->type) {
 			case I40E_VSI_MAIN:
-				if (i == 0)
-					qcount = pf->rss_size;
-				else
-					qcount = pf->num_tc_qps;
-				vsi->tc_config.tc_info[i].qcount = qcount;
+				qcount = min_t(int, pf->rss_size, num_tc_qps);
 				break;
 			case I40E_VSI_FDIR:
 			case I40E_VSI_SRIOV:
 			case I40E_VSI_VMDQ2:
 			default:
-				qcount = vsi->alloc_queue_pairs;
-				vsi->tc_config.tc_info[i].qcount = qcount;
+				qcount = num_tc_qps;
 				WARN_ON(i != 0);
 				break;
 			}
+			vsi->tc_config.tc_info[i].qoffset = offset;
+			vsi->tc_config.tc_info[i].qcount = qcount;
 
 			/* find the power-of-2 of the number of queue pairs */
-			num_qps = vsi->tc_config.tc_info[i].qcount;
+			num_qps = qcount;
 			pow = 0;
-			while (num_qps &&
-			      ((1 << pow) < vsi->tc_config.tc_info[i].qcount)) {
+			while (num_qps && ((1 << pow) < qcount)) {
 				pow++;
 				num_qps >>= 1;
 			}
@@ -1333,7 +1333,7 @@
 			    (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
 			    (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
 
-			offset += vsi->tc_config.tc_info[i].qcount;
+			offset += qcount;
 		} else {
 			/* TC is not enabled so set the offset to
 			 * default queue and allocate one queue
@@ -2162,6 +2162,7 @@
 
 	/* initialize XPS */
 	if (ring->q_vector && ring->netdev &&
+	    vsi->tc_config.numtc <= 1 &&
 	    !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state))
 		netif_set_xps_queue(ring->netdev,
 				    &ring->q_vector->affinity_mask,
@@ -3888,6 +3889,149 @@
 }
 
 /**
+ * i40e_veb_config_tc - Configure TCs for given VEB
+ * @veb: given VEB
+ * @enabled_tc: TC bitmap
+ *
+ * Configures given TC bitmap for VEB (switching) element
+ **/
+int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+{
+	struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
+	struct i40e_pf *pf = veb->pf;
+	int ret = 0;
+	int i;
+
+	/* No TCs or already enabled TCs just return */
+	if (!enabled_tc || veb->enabled_tc == enabled_tc)
+		return ret;
+
+	bw_data.tc_valid_bits = enabled_tc;
+	/* bw_data.absolute_credits is not set (relative) */
+
+	/* Enable ETS TCs with equal BW Share for now */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (enabled_tc & (1 << i))
+			bw_data.tc_bw_share_credits[i] = 1;
+	}
+
+	ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
+						   &bw_data, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "veb bw config failed, aq_err=%d\n",
+			 pf->hw.aq.asq_last_status);
+		goto out;
+	}
+
+	/* Update the BW information */
+	ret = i40e_veb_get_bw_info(veb);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed getting veb bw config, aq_err=%d\n",
+			 pf->hw.aq.asq_last_status);
+	}
+
+out:
+	return ret;
+}
+
+#ifdef CONFIG_I40E_DCB
+/**
+ * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
+ * @pf: PF struct
+ *
+ * Reconfigure VEB/VSIs on a given PF; it is assumed that
+ * the caller would've quiesce all the VSIs before calling
+ * this function
+ **/
+static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+{
+	u8 tc_map = 0;
+	int ret;
+	u8 v;
+
+	/* Enable the TCs available on PF to all VEBs */
+	tc_map = i40e_pf_get_tc_map(pf);
+	for (v = 0; v < I40E_MAX_VEB; v++) {
+		if (!pf->veb[v])
+			continue;
+		ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed configuring TC for VEB seid=%d\n",
+				 pf->veb[v]->seid);
+			/* Will try to configure as many components */
+		}
+	}
+
+	/* Update each VSI */
+	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+		if (!pf->vsi[v])
+			continue;
+
+		/* - Enable all TCs for the LAN VSI
+		 * - For all others keep them at TC0 for now
+		 */
+		if (v == pf->lan_vsi)
+			tc_map = i40e_pf_get_tc_map(pf);
+		else
+			tc_map = i40e_pf_get_default_tc(pf);
+
+		ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed configuring TC for VSI seid=%d\n",
+				 pf->vsi[v]->seid);
+			/* Will try to configure as many components */
+		} else {
+			if (pf->vsi[v]->netdev)
+				i40e_dcbnl_set_all(pf->vsi[v]);
+		}
+	}
+}
+
+/**
+ * i40e_init_pf_dcb - Initialize DCB configuration
+ * @pf: PF being configured
+ *
+ * Query the current DCB configuration and cache it
+ * in the hardware structure
+ **/
+static int i40e_init_pf_dcb(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int err = 0;
+
+	if (pf->hw.func_caps.npar_enable)
+		goto out;
+
+	/* Get the initial DCB configuration */
+	err = i40e_init_dcb(hw);
+	if (!err) {
+		/* Device/Function is not DCBX capable */
+		if ((!hw->func_caps.dcb) ||
+		    (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
+			dev_info(&pf->pdev->dev,
+				 "DCBX offload is not supported or is disabled for this PF.\n");
+
+			if (pf->flags & I40E_FLAG_MFP_ENABLED)
+				goto out;
+
+		} else {
+			/* When status is not DISABLED then DCBX in FW */
+			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+				       DCB_CAP_DCBX_VER_IEEE;
+			pf->flags |= I40E_FLAG_DCB_ENABLED;
+		}
+	}
+
+out:
+	return err;
+}
+#endif /* CONFIG_I40E_DCB */
+
+/**
  * i40e_up_complete - Finish the last steps of bringing up a connection
  * @vsi: the VSI being configured
  **/
@@ -4249,6 +4393,130 @@
 	}
 }
 
+#ifdef CONFIG_I40E_DCB
+/**
+ * i40e_dcb_need_reconfig - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ **/
+bool i40e_dcb_need_reconfig(struct i40e_pf *pf,
+			    struct i40e_dcbx_config *old_cfg,
+			    struct i40e_dcbx_config *new_cfg)
+{
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg,
+		   &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prioritytable,
+			   &old_cfg->etscfg.prioritytable,
+			   sizeof(new_cfg->etscfg.prioritytable))) {
+			need_reconfig = true;
+			dev_info(&pf->pdev->dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_info(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_info(&pf->pdev->dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc,
+		   &old_cfg->pfc,
+		   sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_info(&pf->pdev->dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app,
+		   &old_cfg->app,
+		   sizeof(new_cfg->app)))
+		need_reconfig = true;
+		dev_info(&pf->pdev->dev, "APP Table change detected.\n");
+
+	return need_reconfig;
+}
+
+/**
+ * i40e_handle_lldp_event - Handle LLDP Change MIB event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ **/
+static int i40e_handle_lldp_event(struct i40e_pf *pf,
+				  struct i40e_arq_event_info *e)
+{
+	struct i40e_aqc_lldp_get_mib *mib =
+		(struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_dcbx_config *dcbx_cfg = &hw->local_dcbx_config;
+	struct i40e_dcbx_config tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	int ret = 0;
+	u8 type;
+
+	/* Ignore if event is not for Nearest Bridge */
+	type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
+		& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+	if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
+		return ret;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
+	if (type == I40E_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
+				I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+				&hw->remote_dcbx_config);
+		goto exit;
+	}
+
+	/* Convert/store the DCBX data from LLDPDU temporarily */
+	memset(&tmp_dcbx_cfg, 0, sizeof(tmp_dcbx_cfg));
+	ret = i40e_lldp_to_dcb_config(e->msg_buf, &tmp_dcbx_cfg);
+	if (ret) {
+		/* Error in LLDPDU parsing return */
+		dev_info(&pf->pdev->dev, "Failed parsing LLDPDU from event buffer\n");
+		goto exit;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
+		dev_info(&pf->pdev->dev, "No change detected in DCBX configuration.\n");
+		goto exit;
+	}
+
+	need_reconfig = i40e_dcb_need_reconfig(pf, dcbx_cfg, &tmp_dcbx_cfg);
+
+	i40e_dcbnl_flush_apps(pf, &tmp_dcbx_cfg);
+
+	/* Overwrite the new configuration */
+	*dcbx_cfg = tmp_dcbx_cfg;
+
+	if (!need_reconfig)
+		goto exit;
+
+	/* Reconfiguration needed quiesce all VSIs */
+	i40e_pf_quiesce_all_vsi(pf);
+
+	/* Changes in configuration update VEB/VSI */
+	i40e_dcb_reconfigure(pf);
+
+	i40e_pf_unquiesce_all_vsi(pf);
+exit:
+	return ret;
+}
+#endif /* CONFIG_I40E_DCB */
+
 /**
  * i40e_do_reset_safe - Protected reset path for userland calls.
  * @pf: board private structure
@@ -4635,6 +4903,11 @@
 			break;
 		case i40e_aqc_opc_lldp_update_mib:
 			dev_info(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n");
+#ifdef CONFIG_I40E_DCB
+			rtnl_lock();
+			ret = i40e_handle_lldp_event(pf, &event);
+			rtnl_unlock();
+#endif /* CONFIG_I40E_DCB */
 			break;
 		case i40e_aqc_opc_event_lan_overflow:
 			dev_info(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
@@ -4983,6 +5256,14 @@
 		goto end_core_reset;
 	}
 
+#ifdef CONFIG_I40E_DCB
+	ret = i40e_init_pf_dcb(pf);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "init_pf_dcb failed: %d\n", ret);
+		goto end_core_reset;
+	}
+#endif /* CONFIG_I40E_DCB */
+
 	/* do basic switch setup */
 	ret = i40e_setup_pf_switch(pf, reinit);
 	if (ret)
@@ -5971,11 +6252,6 @@
 		dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
 	}
 
-	if (pf->hw.func_caps.dcb)
-		pf->num_tc_qps = I40E_DEFAULT_QUEUES_PER_TC;
-	else
-		pf->num_tc_qps = 0;
-
 	/* FW/NVM is not yet fixed in this regard */
 	if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
 	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
@@ -6779,6 +7055,10 @@
 			goto err_netdev;
 		vsi->netdev_registered = true;
 		netif_carrier_off(vsi->netdev);
+#ifdef CONFIG_I40E_DCB
+		/* Setup DCB netlink interface */
+		i40e_dcbnl_setup(vsi);
+#endif /* CONFIG_I40E_DCB */
 		/* fall through */
 
 	case I40E_VSI_FDIR:
@@ -7450,7 +7730,6 @@
 	int queues_left;
 
 	pf->num_lan_qps = 0;
-	pf->num_tc_qps = rounddown_pow_of_two(pf->num_tc_qps);
 
 	/* Find the max queues to be put into basic use.  We'll always be
 	 * using TC0, whether or not DCB is running, and TC0 will get the
@@ -7718,6 +7997,14 @@
 
 	pci_set_drvdata(pdev, pf);
 	pci_save_state(pdev);
+#ifdef CONFIG_I40E_DCB
+	err = i40e_init_pf_dcb(pf);
+	if (err) {
+		dev_info(&pdev->dev, "init_pf_dcb failed: %d\n", err);
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		goto err_init_dcb;
+	}
+#endif /* CONFIG_I40E_DCB */
 
 	/* set up periodic task facility */
 	setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
@@ -7831,6 +8118,9 @@
 err_switch_setup:
 	i40e_reset_interrupt_capability(pf);
 	del_timer_sync(&pf->service_timer);
+#ifdef CONFIG_I40E_DCB
+err_init_dcb:
+#endif /* CONFIG_I40E_DCB */
 err_mac_addr:
 err_configure_lan_hmc:
 	(void)i40e_shutdown_lan_hmc(hw);