ixgbe: this patch adds support for DCB to the kernel and ixgbe driver

This adds support for Data Center Bridging (DCB) features in the ixgbe
driver and adds an rtnetlink interface for configuring DCB to the
kernel.  The DCB feature support included are Priority Grouping (PG) -
which allows bandwidth guarantees to be allocated to groups to traffic
based on the 802.1q priority, and Priority Based Flow Control (PFC) -
which introduces a new MAC control PAUSE frame which works at
granularity of the 802.1p priority instead of the link (IEEE 802.3x).

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/Kconfig b/net/Kconfig
index 4e2e40b..c7d01c3 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -194,6 +194,7 @@
 source "net/econet/Kconfig"
 source "net/wanrouter/Kconfig"
 source "net/sched/Kconfig"
+source "net/dcb/Kconfig"
 
 menu "Network testing"
 
diff --git a/net/Makefile b/net/Makefile
index 27d1f10..83b0646 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,6 +57,9 @@
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
+ifeq ($(CONFIG_DCBNL),y)
+obj-$(CONFIG_DCB)		+= dcb/
+endif
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
new file mode 100644
index 0000000..bdf3880
--- /dev/null
+++ b/net/dcb/Kconfig
@@ -0,0 +1,12 @@
+config DCB
+        tristate "Data Center Bridging support"
+
+config DCBNL
+	bool "Data Center Bridging netlink interface support"
+	depends on DCB
+	default n
+	---help---
+	  This option turns on the netlink interface
+	  (dcbnl) for Data Center Bridging capable devices.
+
+	  If unsure, say N.
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
new file mode 100644
index 0000000..9930f4c
--- /dev/null
+++ b/net/dcb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DCB) += dcbnl.o
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
new file mode 100644
index 0000000..516e8be
--- /dev/null
+++ b/net/dcb/dcbnl.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include <linux/dcbnl.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+/**
+ * Data Center Bridging (DCB) is a collection of Ethernet enhancements
+ * intended to allow network traffic with differing requirements
+ * (highly reliable, no drops vs. best effort vs. low latency) to operate
+ * and co-exist on Ethernet.  Current DCB features are:
+ *
+ * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a
+ *   framework for assigning bandwidth guarantees to traffic classes.
+ *
+ * Priority-based Flow Control (PFC) - provides a flow control mechanism which
+ *   can work independently for each 802.1p priority.
+ *
+ * Congestion Notification - provides a mechanism for end-to-end congestion
+ *   control for protocols which do not have built-in congestion management.
+ *
+ * More information about the emerging standards for these Ethernet features
+ * can be found at: http://www.ieee802.org/1/pages/dcbridges.html
+ *
+ * This file implements an rtnetlink interface to allow configuration of DCB
+ * features for capable devices.
+ */
+
+MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
+MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
+MODULE_LICENSE("GPL");
+
+/**************** DCB attribute policies *************************************/
+
+/* DCB netlink attributes policy */
+static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
+	[DCB_ATTR_IFNAME]    = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
+	[DCB_ATTR_STATE]     = {.type = NLA_U8},
+	[DCB_ATTR_PFC_CFG]   = {.type = NLA_NESTED},
+	[DCB_ATTR_PG_CFG]    = {.type = NLA_NESTED},
+	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
+	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
+};
+
+/* DCB priority flow control to User Priority nested attributes */
+static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
+	[DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_3]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_4]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_5]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_6]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_7]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB priority grouping nested attributes */
+static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
+	[DCB_PG_ATTR_TC_0]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_1]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_2]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_3]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_4]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_5]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_6]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_7]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_ALL]    = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_BW_ID_0]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_1]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_2]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_3]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_4]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_5]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_6]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_7]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB traffic class nested attributes. */
+static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
+	[DCB_TC_ATTR_PARAM_PGID]            = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_UP_MAPPING]      = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_STRICT_PRIO]     = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_BW_PCT]          = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_ALL]             = {.type = NLA_FLAG},
+};
+
+
+/* standard netlink reply call */
+static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
+                       u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct dcbmsg *dcb;
+	struct nlmsghdr *nlh;
+	int ret = -EINVAL;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		return ret;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = cmd;
+	dcb->dcb_pad = 0;
+
+	ret = nla_put_u8(dcbnl_skb, attr, value);
+	if (ret)
+		goto err;
+
+	/* end the message, assign the nlmsg_len. */
+	nlmsg_end(dcbnl_skb, nlh);
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+	return ret;
+}
+
+static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	/* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */
+	if (!netdev->dcbnl_ops->getstate)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB,
+	                  DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_PFC_GCFG;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG);
+	if (!nest)
+		goto err;
+
+	if (data[DCB_PFC_UP_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0,
+		                             &value);
+		ret = nla_put_u8(dcbnl_skb, i, value);
+
+		if (ret) {
+			nla_nest_cancel(dcbnl_skb, nest);
+			goto err;
+		}
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
+                                u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	u8 perm_addr[MAX_ADDR_LEN];
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getpermhwaddr)
+		return ret;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GPERM_HWADDR;
+
+	netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
+
+	ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr),
+	              perm_addr);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *pg_nest, *param_nest, *data;
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	u8 prio, pgid, tc_pct, up_map;
+	int ret  = -EINVAL;
+	int getall = 0;
+	int i;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->getpgtccfgtx ||
+	    !netdev->dcbnl_ops->getpgtccfgrx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG;
+
+	pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG);
+	if (!pg_nest)
+		goto err;
+
+	if (pg_tb[DCB_PG_ATTR_TC_ALL])
+		getall = 1;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		if (pg_tb[DCB_PG_ATTR_TC_ALL])
+			data = pg_tb[DCB_PG_ATTR_TC_ALL];
+		else
+			data = pg_tb[i];
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+				       data, dcbnl_tc_param_nest);
+		if (ret)
+			goto err_pg;
+
+		param_nest = nla_nest_start(dcbnl_skb, i);
+		if (!param_nest)
+			goto err_pg;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgtccfgrx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgtccfgtx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		}
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_PGID, pgid);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_UP_MAPPING, up_map);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_STRICT_PRIO, prio);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT,
+			                 tc_pct);
+			if (ret)
+				goto err_param;
+		}
+		nla_nest_end(dcbnl_skb, param_nest);
+	}
+
+	if (pg_tb[DCB_PG_ATTR_BW_ID_ALL])
+		getall = 1;
+	else
+		getall = 0;
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgbwgcfgrx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgbwgcfgtx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		}
+		ret = nla_put_u8(dcbnl_skb, i, tc_pct);
+
+		if (ret)
+			goto err_pg;
+	}
+
+	nla_nest_end(dcbnl_skb, pg_nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err_param:
+	nla_nest_cancel(dcbnl_skb, param_nest);
+err_pg:
+	nla_nest_cancel(dcbnl_skb, pg_nest);
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	ret  = -EINVAL;
+	return ret;
+}
+
+static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_STATE]);
+
+	netdev->dcbnl_ops->setstate(netdev, value);
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
+	                  pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1];
+	int i;
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (data[i] == NULL)
+			continue;
+		value = nla_get_u8(data[i]);
+		netdev->dcbnl_ops->setpfccfg(netdev,
+			data[i]->nla_type - DCB_PFC_UP_ATTR_0, value);
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG,
+	                  pid, seq, flags);
+err:
+	return ret;
+}
+
+static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb,
+                        u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB,
+	                  DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags);
+
+	return ret;
+}
+
+static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	int ret = -EINVAL;
+	int i;
+	u8 pgid;
+	u8 up_map;
+	u8 prio;
+	u8 tc_pct;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->setpgtccfgtx ||
+	    !netdev->dcbnl_ops->setpgtccfgrx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+		                       pg_tb[i], dcbnl_tc_param_nest);
+		if (ret)
+			goto err;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO])
+			prio =
+			    nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID])
+			pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT])
+			tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING])
+			up_map =
+			     nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgtccfgrx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgtccfgtx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		}
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		tc_pct = nla_get_u8(pg_tb[i]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgbwgcfgrx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgbwgcfgtx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		}
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB,
+			  (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG),
+			  DCB_ATTR_PG_CFG, pid, seq, flags);
+
+err:
+	return ret;
+}
+
+static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *netdev;
+	struct dcbmsg  *dcb = (struct dcbmsg *)NLMSG_DATA(nlh);
+	struct nlattr *tb[DCB_ATTR_MAX + 1];
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = -EINVAL;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
+			  dcbnl_rtnl_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[DCB_ATTR_IFNAME])
+		return -EINVAL;
+
+	netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME]));
+	if (!netdev)
+		return -EINVAL;
+
+	if (!netdev->dcbnl_ops)
+		goto errout;
+
+	switch (dcb->cmd) {
+	case DCB_CMD_GSTATE:
+		ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_GCFG:
+		ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_GPERM_HWADDR:
+		ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq,
+		                           nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_GCFG:
+		ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_GCFG:
+		ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SSTATE:
+		ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_SCFG:
+		ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+
+	case DCB_CMD_SET_ALL:
+		ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq,
+		                   nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_SCFG:
+		ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_SCFG:
+		ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	default:
+		goto errout;
+	}
+errout:
+	ret = -EINVAL;
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static int __init dcbnl_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
+
+	return 0;
+}
+module_init(dcbnl_init);
+
+static void __exit dcbnl_exit(void)
+{
+	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
+	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+}
+module_exit(dcbnl_exit);
+
+