Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next Daniel Borkmann says: ==================== pull-request: bpf-next 2018-07-15 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Various different arm32 JIT improvements in order to optimize code emission and make the JIT code itself more robust, from Russell. 2) Support simultaneous driver and offloaded XDP in order to allow for advanced use-cases where some work is offloaded to the NIC and some to the host. Also add ability for bpftool to load programs and maps beyond just the cgroup case, from Jakub. 3) Add BPF JIT support in nfp for multiplication as well as division. For the latter in particular, it uses the reciprocal algorithm to emulate it, from Jiong. 4) Add BTF pretty print functionality to bpftool in plain and JSON output format, from Okash. 5) Add build and installation to the BPF helper man page into bpftool, from Quentin. 6) Add a TCP BPF callback for listening sockets which is triggered right after the socket transitions to TCP_LISTEN state, from Andrey. 7) Add a new cgroup tree command to bpftool which iterates over the whole cgroup tree and prints all attached programs, from Roman. 8) Improve xdp_redirect_cpu sample to support parsing of double VLAN tagged packets, from Jesper. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>

commit: 2aa4a3378ad077d02131a23d22641ae8ae44cb28 [log] [tgz]
author: David S. Miller <davem@davemloft.net> Sat Jul 14 18:47:44 2018 -0700
committer: David S. Miller <davem@davemloft.net> Sat Jul 14 18:47:44 2018 -0700
tree: da31b9ef7436fb938c874d549d95a0ab1aac1b84
parent: f5c64e566c521456e9c34e3a7208dabab0eaf18b [diff]
parent: 13f7432bdd8eb2eb55eed2af1469d37c0f667617 [diff]
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f4c042b..77c37fb 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt

@@ -1834,6 +1834,16 @@
 
 	By default the stable secret is unset.
 
+addr_gen_mode - INTEGER
+	Defines how link-local and autoconf addresses are generated.
+
+	0: generate address based on EUI64 (default)
+	1: do no generate a link-local address, use EUI64 for addresses generated
+	   from autoconf
+	2: generate stable privacy addresses, using the secret from
+	   stable_secret (RFC7217)
+	3: generate stable privacy addresses, using a random secret if unset
+
 drop_unicast_in_l2_multicast - BOOLEAN
 	Drop any unicast IPv6 packets that are received in link-layer
 	multicast (or broadcast) frames.

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index e718b8c..eeb7d31 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c

@@ -19,6 +19,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list.h>
@@ -239,7 +240,7 @@ void cn_del_callback(struct cb_id *id)
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-static int cn_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
 	struct cn_callback_entry *cbq;

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 2bb6f03..0997e16 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c

@@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->rcv_nxt++;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 	tp->srtt_us = 0;
 	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
 }

diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 5d65582..616fc9b 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c

@@ -423,7 +423,7 @@ static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
 
 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 				  struct sk_buff *skb,
-				  void *accel_priv,
+				  struct net_device *sb_dev,
 				  select_queue_fallback_t fallback)
 {
 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);

diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 0c8aec6..6155878 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c

@@ -95,7 +95,7 @@ static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
 }
 
 static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
-				 void *accel_priv,
+				 struct net_device *sb_dev,
 				 select_queue_fallback_t fallback)
 {
 	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
@@ -107,7 +107,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
 	mdata->entropy = opa_vnic_calc_entropy(skb);
 	mdata->vl = opa_vnic_get_vl(adapter, skb);
 	rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
-					       accel_priv, fallback);
+					       sb_dev, fallback);
 	skb_pull(skb, sizeof(*mdata));
 	return rc;
 }

diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 6e0c281..ef5560b 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c

@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -1321,7 +1322,7 @@ static inline void capinc_tty_exit(void) { }
  * /proc/capi/capi20:
  *  minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int capi20_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capi20_proc_show(struct seq_file *m, void *v)
 {
 	struct capidev *cdev;
 	struct list_head *l;
@@ -1344,7 +1345,7 @@ static int capi20_proc_show(struct seq_file *m, void *v)
  * /proc/capi/capi20ncci:
  *  applid ncci
  */
-static int capi20ncci_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capi20ncci_proc_show(struct seq_file *m, void *v)
 {
 	struct capidev *cdev;
 	struct capincci *np;

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index ee510f9..e8949f3 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c

@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -2451,7 +2452,7 @@ lower_callback(struct notifier_block *nb, unsigned long val, void *v)
  * /proc/capi/capidrv:
  * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int capidrv_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capidrv_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%lu %lu %lu %lu\n",
 		   global.ap.nrecvctlpkt,

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 63e3844..9a2ea3c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -4094,7 +4094,8 @@ static inline int bond_slave_override(struct bonding *bond,
 
 
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* This helper function exists to help dev_pick_tx get the correct
 	 * destination queue.  Using a helper function skips a call to

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index f2af87d..c673ac2d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c

@@ -2213,7 +2213,8 @@ static void ena_netpoll(struct net_device *netdev)
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	u16 qid;
 	/* we suspect that this is good for in--kernel network services that
@@ -2223,7 +2224,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (skb_rx_queue_recorded(skb))
 		qid = skb_get_rx_queue(skb);
 	else
-		qid = fallback(dev, skb);
+		qid = fallback(dev, skb, NULL);
 
 	return qid;
 }

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d5fca2e..eb890c4 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c

@@ -2107,7 +2107,7 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 };
 
 static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
-				    void *accel_priv,
+				    struct net_device *sb_dev,
 				    select_queue_fallback_t fallback)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
@@ -2116,7 +2116,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	unsigned int q, port;
 
 	if (!netdev_uses_dsa(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	/* DSA tagging layer will have configured the correct queue */
 	q = BRCM_TAG_GET_QUEUE(queue);
@@ -2124,7 +2124,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
 
 	if (unlikely(!tx_ring))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	return tx_ring->index;
 }

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index af7b5a4..5a727d4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

@@ -1910,7 +1910,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -1932,7 +1933,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
+	return fallback(dev, skb, NULL) %
+	       (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a8ce5c5..0e508e5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h

@@ -497,7 +497,8 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 2369b2f..d83233a 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c

@@ -4088,7 +4088,7 @@ static void cnic_cm_free_mem(struct cnic_dev *dev)
 {
 	struct cnic_local *cp = dev->cnic_priv;
 
-	kfree(cp->csk_tbl);
+	kvfree(cp->csk_tbl);
 	cp->csk_tbl = NULL;
 	cnic_free_id_tbl(&cp->csk_port_tbl);
 }
@@ -4098,8 +4098,8 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev)
 	struct cnic_local *cp = dev->cnic_priv;
 	u32 port_id;
 
-	cp->csk_tbl = kcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
-			      GFP_KERNEL);
+	cp->csk_tbl = kvcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
+			       GFP_KERNEL);
 	if (!cp->csk_tbl)
 		return -ENOMEM;
 

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 7cb4e75..a60d5af 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c

@@ -684,7 +684,7 @@ static void lio_sync_octeon_time(struct work_struct *work)
 	lt = (struct lio_time *)sc->virtdptr;
 
 	/* Get time of the day */
-	getnstimeofday64(&ts);
+	ktime_get_real_ts64(&ts);
 	lt->sec = ts.tv_sec;
 	lt->nsec = ts.tv_nsec;
 	octeon_swap_8B_data((u64 *)lt, (sizeof(struct lio_time)) / 8);
@@ -3299,7 +3299,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 {
 	struct lio *lio = NULL;
 	struct net_device *netdev;
-	u8 mac[6], i, j, *fw_ver;
+	u8 mac[6], i, j, *fw_ver, *micro_ver;
+	unsigned long micro;
+	u32 cur_ver;
 	struct octeon_soft_command *sc;
 	struct liquidio_if_cfg_context *ctx;
 	struct liquidio_if_cfg_resp *resp;
@@ -3429,6 +3431,14 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 				 fw_ver);
 		}
 
+		/* extract micro version field; point past '<maj>.<min>.' */
+		micro_ver = fw_ver + strlen(LIQUIDIO_BASE_VERSION) + 1;
+		if (kstrtoul(micro_ver, 10, &micro) != 0)
+			micro = 0;
+		octeon_dev->fw_info.ver.maj = LIQUIDIO_BASE_MAJOR_VERSION;
+		octeon_dev->fw_info.ver.min = LIQUIDIO_BASE_MINOR_VERSION;
+		octeon_dev->fw_info.ver.rev = micro;
+
 		octeon_swap_8B_data((u64 *)(&resp->cfg_info),
 				    (sizeof(struct liquidio_if_cfg_info)) >> 3);
 
@@ -3671,7 +3681,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			OCTEON_CN2350_25GB_SUBSYS_ID ||
 		    octeon_dev->subsystem_id ==
 			OCTEON_CN2360_25GB_SUBSYS_ID) {
-			liquidio_get_speed(lio);
+			cur_ver = OCT_FW_VER(octeon_dev->fw_info.ver.maj,
+					     octeon_dev->fw_info.ver.min,
+					     octeon_dev->fw_info.ver.rev);
+
+			/* speed control unsupported in f/w older than 1.7.2 */
+			if (cur_ver < OCT_FW_VER(1, 7, 2)) {
+				dev_info(&octeon_dev->pci_dev->dev,
+					 "speed setting not supported by f/w.");
+				octeon_dev->speed_setting = 25;
+				octeon_dev->no_speed_setting = 1;
+			} else {
+				liquidio_get_speed(lio);
+			}
 
 			if (octeon_dev->speed_setting == 0) {
 				octeon_dev->speed_setting = 25;

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 7f97ae4..0cc2338 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c

@@ -902,7 +902,7 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 	 *
 	 * Octeon always uses UTC time. so timezone information is not sent.
 	 */
-	getnstimeofday64(&ts);
+	ktime_get_real_ts64(&ts);
 	ret = snprintf(boottime, MAX_BOOTTIME_SIZE,
 		       " time_sec=%lld time_nsec=%ld",
 		       (s64)ts.tv_sec, ts.tv_nsec);

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 94a4ed88d..d99ca6b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h

@@ -288,8 +288,17 @@ struct oct_fw_info {
 	 */
 	u32 app_mode;
 	char   liquidio_firmware_version[32];
+	/* Fields extracted from legacy string 'liquidio_firmware_version' */
+	struct {
+		u8  maj;
+		u8  min;
+		u8  rev;
+	} ver;
 };
 
+#define OCT_FW_VER(maj, min, rev) \
+	(((u32)(maj) << 16) | ((u32)(min) << 8) | ((u32)(rev)))
+
 /* wrappers around work structs */
 struct cavium_wk {
 	struct delayed_work work;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 516c883..511606f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c

@@ -2962,6 +2962,169 @@ static const struct file_operations chcr_stats_debugfs_fops = {
         .llseek  = seq_lseek,
         .release = single_release,
 };
+
+#define PRINT_ADAP_STATS(string, value) \
+	seq_printf(seq, "%-25s %-20llu\n", (string), \
+		   (unsigned long long)(value))
+
+#define PRINT_CH_STATS(string, value) \
+do { \
+	seq_printf(seq, "%-25s ", (string)); \
+	for (i = 0; i < adap->params.arch.nchan; i++) \
+		seq_printf(seq, "%-20llu ", \
+			   (unsigned long long)stats.value[i]); \
+	seq_printf(seq, "\n"); \
+} while (0)
+
+#define PRINT_CH_STATS2(string, value) \
+do { \
+	seq_printf(seq, "%-25s ", (string)); \
+	for (i = 0; i < adap->params.arch.nchan; i++) \
+		seq_printf(seq, "%-20llu ", \
+			   (unsigned long long)stats[i].value); \
+	seq_printf(seq, "\n"); \
+} while (0)
+
+static void show_tcp_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_tcp_stats v4, v6;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_tcp_stats(adap, &v4, &v6, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("tcp_ipv4_out_rsts:", v4.tcp_out_rsts);
+	PRINT_ADAP_STATS("tcp_ipv4_in_segs:", v4.tcp_in_segs);
+	PRINT_ADAP_STATS("tcp_ipv4_out_segs:", v4.tcp_out_segs);
+	PRINT_ADAP_STATS("tcp_ipv4_retrans_segs:", v4.tcp_retrans_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_out_rsts:", v6.tcp_out_rsts);
+	PRINT_ADAP_STATS("tcp_ipv6_in_segs:", v6.tcp_in_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_out_segs:", v6.tcp_out_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_retrans_segs:", v6.tcp_retrans_segs);
+}
+
+static void show_ddp_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_usm_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_get_usm_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("usm_ddp_frames:", stats.frames);
+	PRINT_ADAP_STATS("usm_ddp_octets:", stats.octets);
+	PRINT_ADAP_STATS("usm_ddp_drops:", stats.drops);
+}
+
+static void show_rdma_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_rdma_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_rdma_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("rdma_no_rqe_mod_defer:", stats.rqe_dfr_mod);
+	PRINT_ADAP_STATS("rdma_no_rqe_pkt_defer:", stats.rqe_dfr_pkt);
+}
+
+static void show_tp_err_adapter_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_err_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_err_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("tp_err_ofld_no_neigh:", stats.ofld_no_neigh);
+	PRINT_ADAP_STATS("tp_err_ofld_cong_defer:", stats.ofld_cong_defer);
+}
+
+static void show_cpl_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_cpl_stats stats;
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_cpl_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS("tp_cpl_requests:", req);
+	PRINT_CH_STATS("tp_cpl_responses:", rsp);
+}
+
+static void show_tp_err_channel_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_err_stats stats;
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_err_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS("tp_mac_in_errs:", mac_in_errs);
+	PRINT_CH_STATS("tp_hdr_in_errs:", hdr_in_errs);
+	PRINT_CH_STATS("tp_tcp_in_errs:", tcp_in_errs);
+	PRINT_CH_STATS("tp_tcp6_in_errs:", tcp6_in_errs);
+	PRINT_CH_STATS("tp_tnl_cong_drops:", tnl_cong_drops);
+	PRINT_CH_STATS("tp_tnl_tx_drops:", tnl_tx_drops);
+	PRINT_CH_STATS("tp_ofld_vlan_drops:", ofld_vlan_drops);
+	PRINT_CH_STATS("tp_ofld_chan_drops:", ofld_chan_drops);
+}
+
+static void show_fcoe_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_fcoe_stats stats[NCHAN];
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	for (i = 0; i < adap->params.arch.nchan; i++)
+		t4_get_fcoe_stats(adap, i, &stats[i], false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS2("fcoe_octets_ddp", octets_ddp);
+	PRINT_CH_STATS2("fcoe_frames_ddp", frames_ddp);
+	PRINT_CH_STATS2("fcoe_frames_drop", frames_drop);
+}
+
+#undef PRINT_CH_STATS2
+#undef PRINT_CH_STATS
+#undef PRINT_ADAP_STATS
+
+static int tp_stats_show(struct seq_file *seq, void *v)
+{
+	struct adapter *adap = seq->private;
+
+	seq_puts(seq, "\n--------Adapter Stats--------\n");
+	show_tcp_stats(seq);
+	show_ddp_stats(seq);
+	show_rdma_stats(seq);
+	show_tp_err_adapter_stats(seq);
+
+	seq_puts(seq, "\n-------- Channel Stats --------\n");
+	if (adap->params.arch.nchan == NCHAN)
+		seq_printf(seq, "%-25s %-20s %-20s %-20s %-20s\n",
+			   " ", "channel 0", "channel 1",
+			   "channel 2", "channel 3");
+	else
+		seq_printf(seq, "%-25s %-20s %-20s\n",
+			   " ", "channel 0", "channel 1");
+	show_cpl_stats(seq);
+	show_tp_err_channel_stats(seq);
+	show_fcoe_stats(seq);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats);
+
 /* Add an array of Debug FS files.
  */
 void add_debugfs_files(struct adapter *adap,
@@ -3038,6 +3201,7 @@ int t4_setup_debugfs(struct adapter *adap)
 		{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
 		{ "meminfo", &meminfo_fops, 0400, 0 },
 		{ "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
+		{ "tp_stats", &tp_stats_debugfs_fops, 0400, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index ddb8b9e..a14a290 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c

@@ -115,42 +115,10 @@ static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
 	"db_drop                ",
 	"db_full                ",
 	"db_empty               ",
-	"tcp_ipv4_out_rsts      ",
-	"tcp_ipv4_in_segs       ",
-	"tcp_ipv4_out_segs      ",
-	"tcp_ipv4_retrans_segs  ",
-	"tcp_ipv6_out_rsts      ",
-	"tcp_ipv6_in_segs       ",
-	"tcp_ipv6_out_segs      ",
-	"tcp_ipv6_retrans_segs  ",
-	"usm_ddp_frames         ",
-	"usm_ddp_octets         ",
-	"usm_ddp_drops          ",
-	"rdma_no_rqe_mod_defer  ",
-	"rdma_no_rqe_pkt_defer  ",
-	"tp_err_ofld_no_neigh   ",
-	"tp_err_ofld_cong_defer ",
 	"write_coal_success     ",
 	"write_coal_fail        ",
 };
 
-static char channel_stats_strings[][ETH_GSTRING_LEN] = {
-	"--------Channel--------- ",
-	"tp_cpl_requests        ",
-	"tp_cpl_responses       ",
-	"tp_mac_in_errs         ",
-	"tp_hdr_in_errs         ",
-	"tp_tcp_in_errs         ",
-	"tp_tcp6_in_errs        ",
-	"tp_tnl_cong_drops      ",
-	"tp_tnl_tx_drops        ",
-	"tp_ofld_vlan_drops     ",
-	"tp_ofld_chan_drops     ",
-	"fcoe_octets_ddp        ",
-	"fcoe_frames_ddp        ",
-	"fcoe_frames_drop       ",
-};
-
 static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
 	"-------Loopback----------- ",
 	"octets_ok              ",
@@ -187,7 +155,6 @@ static int get_sset_count(struct net_device *dev, int sset)
 	case ETH_SS_STATS:
 		return ARRAY_SIZE(stats_strings) +
 		       ARRAY_SIZE(adapter_stats_strings) +
-		       ARRAY_SIZE(channel_stats_strings) +
 		       ARRAY_SIZE(loopback_stats_strings);
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(cxgb4_priv_flags_strings);
@@ -252,9 +219,6 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
 		memcpy(data, adapter_stats_strings,
 		       sizeof(adapter_stats_strings));
 		data += sizeof(adapter_stats_strings);
-		memcpy(data, channel_stats_strings,
-		       sizeof(channel_stats_strings));
-		data += sizeof(channel_stats_strings);
 		memcpy(data, loopback_stats_strings,
 		       sizeof(loopback_stats_strings));
 	} else if (stringset == ETH_SS_PRIV_FLAGS) {
@@ -280,41 +244,10 @@ struct adapter_stats {
 	u64 db_drop;
 	u64 db_full;
 	u64 db_empty;
-	u64 tcp_v4_out_rsts;
-	u64 tcp_v4_in_segs;
-	u64 tcp_v4_out_segs;
-	u64 tcp_v4_retrans_segs;
-	u64 tcp_v6_out_rsts;
-	u64 tcp_v6_in_segs;
-	u64 tcp_v6_out_segs;
-	u64 tcp_v6_retrans_segs;
-	u64 frames;
-	u64 octets;
-	u64 drops;
-	u64 rqe_dfr_mod;
-	u64 rqe_dfr_pkt;
-	u64 ofld_no_neigh;
-	u64 ofld_cong_defer;
 	u64 wc_success;
 	u64 wc_fail;
 };
 
-struct channel_stats {
-	u64 cpl_req;
-	u64 cpl_rsp;
-	u64 mac_in_errs;
-	u64 hdr_in_errs;
-	u64 tcp_in_errs;
-	u64 tcp6_in_errs;
-	u64 tnl_cong_drops;
-	u64 tnl_tx_drops;
-	u64 ofld_vlan_drops;
-	u64 ofld_chan_drops;
-	u64 octets_ddp;
-	u64 frames_ddp;
-	u64 frames_drop;
-};
-
 static void collect_sge_port_stats(const struct adapter *adap,
 				   const struct port_info *p,
 				   struct queue_port_stats *s)
@@ -337,45 +270,14 @@ static void collect_sge_port_stats(const struct adapter *adap,
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 {
-	struct tp_tcp_stats v4, v6;
-	struct tp_rdma_stats rdma_stats;
-	struct tp_err_stats err_stats;
-	struct tp_usm_stats usm_stats;
 	u64 val1, val2;
 
 	memset(s, 0, sizeof(*s));
 
-	spin_lock(&adap->stats_lock);
-	t4_tp_get_tcp_stats(adap, &v4, &v6, false);
-	t4_tp_get_rdma_stats(adap, &rdma_stats, false);
-	t4_get_usm_stats(adap, &usm_stats, false);
-	t4_tp_get_err_stats(adap, &err_stats, false);
-	spin_unlock(&adap->stats_lock);
-
 	s->db_drop = adap->db_stats.db_drop;
 	s->db_full = adap->db_stats.db_full;
 	s->db_empty = adap->db_stats.db_empty;
 
-	s->tcp_v4_out_rsts = v4.tcp_out_rsts;
-	s->tcp_v4_in_segs = v4.tcp_in_segs;
-	s->tcp_v4_out_segs = v4.tcp_out_segs;
-	s->tcp_v4_retrans_segs = v4.tcp_retrans_segs;
-	s->tcp_v6_out_rsts = v6.tcp_out_rsts;
-	s->tcp_v6_in_segs = v6.tcp_in_segs;
-	s->tcp_v6_out_segs = v6.tcp_out_segs;
-	s->tcp_v6_retrans_segs = v6.tcp_retrans_segs;
-
-	if (is_offload(adap)) {
-		s->frames = usm_stats.frames;
-		s->octets = usm_stats.octets;
-		s->drops = usm_stats.drops;
-		s->rqe_dfr_mod = rdma_stats.rqe_dfr_mod;
-		s->rqe_dfr_pkt = rdma_stats.rqe_dfr_pkt;
-	}
-
-	s->ofld_no_neigh = err_stats.ofld_no_neigh;
-	s->ofld_cong_defer = err_stats.ofld_cong_defer;
-
 	if (!is_t4(adap->params.chip)) {
 		int v;
 
@@ -389,36 +291,6 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 	}
 }
 
-static void collect_channel_stats(struct adapter *adap, struct channel_stats *s,
-				  u8 i)
-{
-	struct tp_cpl_stats cpl_stats;
-	struct tp_err_stats err_stats;
-	struct tp_fcoe_stats fcoe_stats;
-
-	memset(s, 0, sizeof(*s));
-
-	spin_lock(&adap->stats_lock);
-	t4_tp_get_cpl_stats(adap, &cpl_stats, false);
-	t4_tp_get_err_stats(adap, &err_stats, false);
-	t4_get_fcoe_stats(adap, i, &fcoe_stats, false);
-	spin_unlock(&adap->stats_lock);
-
-	s->cpl_req = cpl_stats.req[i];
-	s->cpl_rsp = cpl_stats.rsp[i];
-	s->mac_in_errs = err_stats.mac_in_errs[i];
-	s->hdr_in_errs = err_stats.hdr_in_errs[i];
-	s->tcp_in_errs = err_stats.tcp_in_errs[i];
-	s->tcp6_in_errs = err_stats.tcp6_in_errs[i];
-	s->tnl_cong_drops = err_stats.tnl_cong_drops[i];
-	s->tnl_tx_drops = err_stats.tnl_tx_drops[i];
-	s->ofld_vlan_drops = err_stats.ofld_vlan_drops[i];
-	s->ofld_chan_drops = err_stats.ofld_chan_drops[i];
-	s->octets_ddp = fcoe_stats.octets_ddp;
-	s->frames_ddp = fcoe_stats.frames_ddp;
-	s->frames_drop = fcoe_stats.frames_drop;
-}
-
 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 		      u64 *data)
 {
@@ -439,11 +311,6 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 	data += sizeof(struct adapter_stats) / sizeof(u64);
 
 	*data++ = (u64)pi->port_id;
-	collect_channel_stats(adapter, (struct channel_stats *)data,
-			      pi->port_id);
-	data += sizeof(struct channel_stats) / sizeof(u64);
-
-	*data++ = (u64)pi->port_id;
 	memset(&s, 0, sizeof(s));
 	t4_get_lb_stats(adapter, pi->port_id, &s);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0d91716..40cf8dc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -930,7 +930,8 @@ static int setup_sge_queues(struct adapter *adap)
 }
 
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	int txq;
 
@@ -972,7 +973,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 		return txq;
 	}
 
-	return fallback(dev, skb) % dev->real_num_tx_queues;
+	return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 }
 
 static int closest_timer(const struct sge *s, int time)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ebb46c4..6807bc3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c

@@ -3412,7 +3412,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	c.iqsize = htons(iq->size);
 	c.iqaddr = cpu_to_be64(iq->phys_addr);
 	if (cong >= 0)
-		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
+		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F |
+				FW_IQ_CMD_IQTYPE_V(cong ? FW_IQ_IQTYPE_NIC
+							:  FW_IQ_IQTYPE_OFLD));
 
 	if (fl) {
 		enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index f1967cf..5dc6c41 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h

@@ -1472,6 +1472,12 @@ enum fw_iq_type {
 	FW_IQ_TYPE_NO_FL_INT_CAP
 };
 
+enum fw_iq_iqtype {
+	FW_IQ_IQTYPE_OTHER,
+	FW_IQ_IQTYPE_NIC,
+	FW_IQ_IQTYPE_OFLD,
+};
+
 struct fw_iq_cmd {
 	__be32 op_to_vfn;
 	__be32 alloc_to_len16;
@@ -1586,6 +1592,12 @@ struct fw_iq_cmd {
 #define FW_IQ_CMD_IQFLINTISCSIC_S	26
 #define FW_IQ_CMD_IQFLINTISCSIC_V(x)	((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
 
+#define FW_IQ_CMD_IQTYPE_S		24
+#define FW_IQ_CMD_IQTYPE_M		0x3
+#define FW_IQ_CMD_IQTYPE_V(x)		((x) << FW_IQ_CMD_IQTYPE_S)
+#define FW_IQ_CMD_IQTYPE_G(x)		\
+	(((x) >> FW_IQ_CMD_IQTYPE_S) & FW_IQ_CMD_IQTYPE_M)
+
 #define FW_IQ_CMD_FL0CNGCHMAP_S		20
 #define FW_IQ_CMD_FL0CNGCHMAP_V(x)	((x) << FW_IQ_CMD_FL0CNGCHMAP_S)
 

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index ce1f04f..1c9ad36 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c

@@ -46,6 +46,11 @@
 #define DRV_NAME		"gmac-gemini"
 #define DRV_VERSION		"1.0"
 
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
 #define HSIZE_8			0x00
 #define HSIZE_16		0x01
 #define HSIZE_32		0x02
@@ -146,6 +151,7 @@ struct gemini_ethernet {
 	void __iomem *base;
 	struct gemini_ethernet_port *port0;
 	struct gemini_ethernet_port *port1;
+	bool initialized;
 
 	spinlock_t	irq_lock; /* Locks IRQ-related registers */
 	unsigned int	freeq_order;
@@ -300,23 +306,26 @@ static void gmac_speed_set(struct net_device *netdev)
 		status.bits.speed = GMAC_SPEED_1000;
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
 			status.bits.mii_rmii = GMAC_PHY_RGMII_1000;
-		netdev_info(netdev, "connect to RGMII @ 1Gbit\n");
+		netdev_dbg(netdev, "connect %s to RGMII @ 1Gbit\n",
+			   phydev_name(phydev));
 		break;
 	case 100:
 		status.bits.speed = GMAC_SPEED_100;
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
 			status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-		netdev_info(netdev, "connect to RGMII @ 100 Mbit\n");
+		netdev_dbg(netdev, "connect %s to RGMII @ 100 Mbit\n",
+			   phydev_name(phydev));
 		break;
 	case 10:
 		status.bits.speed = GMAC_SPEED_10;
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
 			status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-		netdev_info(netdev, "connect to RGMII @ 10 Mbit\n");
+		netdev_dbg(netdev, "connect %s to RGMII @ 10 Mbit\n",
+			   phydev_name(phydev));
 		break;
 	default:
-		netdev_warn(netdev, "Not supported PHY speed (%d)\n",
-			    phydev->speed);
+		netdev_warn(netdev, "Unsupported PHY speed (%d) on %s\n",
+			    phydev->speed, phydev_name(phydev));
 	}
 
 	if (phydev->duplex == DUPLEX_FULL) {
@@ -363,12 +372,6 @@ static int gmac_setup_phy(struct net_device *netdev)
 		return -ENODEV;
 	netdev->phydev = phy;
 
-	netdev_info(netdev, "connected to PHY \"%s\"\n",
-		    phydev_name(phy));
-	phy_attached_print(phy, "phy_id=0x%.8lx, phy_mode=%s\n",
-			   (unsigned long)phy->phy_id,
-			   phy_modes(phy->interface));
-
 	phy->supported &= PHY_GBIT_FEATURES;
 	phy->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
 	phy->advertising = phy->supported;
@@ -376,19 +379,19 @@ static int gmac_setup_phy(struct net_device *netdev)
 	/* set PHY interface type */
 	switch (phy->interface) {
 	case PHY_INTERFACE_MODE_MII:
-		netdev_info(netdev, "set GMAC0 to GMII mode, GMAC1 disabled\n");
+		netdev_dbg(netdev,
+			   "MII: set GMAC0 to GMII mode, GMAC1 disabled\n");
 		status.bits.mii_rmii = GMAC_PHY_MII;
-		netdev_info(netdev, "connect to MII\n");
 		break;
 	case PHY_INTERFACE_MODE_GMII:
-		netdev_info(netdev, "set GMAC0 to GMII mode, GMAC1 disabled\n");
+		netdev_dbg(netdev,
+			   "GMII: set GMAC0 to GMII mode, GMAC1 disabled\n");
 		status.bits.mii_rmii = GMAC_PHY_GMII;
-		netdev_info(netdev, "connect to GMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
-		dev_info(dev, "set GMAC0 and GMAC1 to MII/RGMII mode\n");
+		netdev_dbg(netdev,
+			   "RGMII: set GMAC0 and GMAC1 to MII/RGMII mode\n");
 		status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-		netdev_info(netdev, "connect to RGMII\n");
 		break;
 	default:
 		netdev_err(netdev, "Unsupported MII interface\n");
@@ -398,29 +401,63 @@ static int gmac_setup_phy(struct net_device *netdev)
 	}
 	writel(status.bits32, port->gmac_base + GMAC_STATUS);
 
+	if (netif_msg_link(port))
+		phy_attached_info(phy);
+
 	return 0;
 }
 
-static int gmac_pick_rx_max_len(int max_l3_len)
+/* The maximum frame length is not logically enumerated in the
+ * hardware, so we do a table lookup to find the applicable max
+ * frame length.
+ */
+struct gmac_max_framelen {
+	unsigned int max_l3_len;
+	u8 val;
+};
+
+static const struct gmac_max_framelen gmac_maxlens[] = {
+	{
+		.max_l3_len = 1518,
+		.val = CONFIG0_MAXLEN_1518,
+	},
+	{
+		.max_l3_len = 1522,
+		.val = CONFIG0_MAXLEN_1522,
+	},
+	{
+		.max_l3_len = 1536,
+		.val = CONFIG0_MAXLEN_1536,
+	},
+	{
+		.max_l3_len = 1542,
+		.val = CONFIG0_MAXLEN_1542,
+	},
+	{
+		.max_l3_len = 9212,
+		.val = CONFIG0_MAXLEN_9k,
+	},
+	{
+		.max_l3_len = 10236,
+		.val = CONFIG0_MAXLEN_10k,
+	},
+};
+
+static int gmac_pick_rx_max_len(unsigned int max_l3_len)
 {
-	/* index = CONFIG_MAXLEN_XXX values */
-	static const int max_len[8] = {
-		1536, 1518, 1522, 1542,
-		9212, 10236, 1518, 1518
-	};
-	int i, n = 5;
+	const struct gmac_max_framelen *maxlen;
+	int maxtot;
+	int i;
 
-	max_l3_len += ETH_HLEN + VLAN_HLEN;
+	maxtot = max_l3_len + ETH_HLEN + VLAN_HLEN;
 
-	if (max_l3_len > max_len[n])
-		return -1;
-
-	for (i = 0; i < 5; i++) {
-		if (max_len[i] >= max_l3_len && max_len[i] < max_len[n])
-			n = i;
+	for (i = 0; i < ARRAY_SIZE(gmac_maxlens); i++) {
+		maxlen = &gmac_maxlens[i];
+		if (maxtot <= maxlen->max_l3_len)
+			return maxlen->val;
 	}
 
-	return n;
+	return -1;
 }
 
 static int gmac_init(struct net_device *netdev)
@@ -1276,8 +1313,8 @@ static void gmac_enable_irq(struct net_device *netdev, int enable)
 	unsigned long flags;
 	u32 val, mask;
 
-	netdev_info(netdev, "%s device %d %s\n", __func__,
-		    netdev->dev_id, enable ? "enable" : "disable");
+	netdev_dbg(netdev, "%s device %d %s\n", __func__,
+		   netdev->dev_id, enable ? "enable" : "disable");
 	spin_lock_irqsave(&geth->irq_lock, flags);
 
 	mask = GMAC0_IRQ0_2 << (netdev->dev_id * 2);
@@ -1753,7 +1790,10 @@ static int gmac_open(struct net_device *netdev)
 	phy_start(netdev->phydev);
 
 	err = geth_resize_freeq(port);
-	if (err) {
+	/* It's fine if it's just busy, the other port has set up
+	 * the freeq in that case.
+	 */
+	if (err && (err != -EBUSY)) {
 		netdev_err(netdev, "could not resize freeq\n");
 		goto err_stop_phy;
 	}
@@ -1782,7 +1822,7 @@ static int gmac_open(struct net_device *netdev)
 		     HRTIMER_MODE_REL);
 	port->rx_coalesce_timer.function = &gmac_coalesce_delay_expired;
 
-	netdev_info(netdev, "opened\n");
+	netdev_dbg(netdev, "opened\n");
 
 	return 0;
 
@@ -2264,6 +2304,14 @@ static void gemini_port_remove(struct gemini_ethernet_port *port)
 
 static void gemini_ethernet_init(struct gemini_ethernet *geth)
 {
+	/* Only do this once both ports are online */
+	if (geth->initialized)
+		return;
+	if (geth->port0 && geth->port1)
+		geth->initialized = true;
+	else
+		return;
+
 	writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG);
 	writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_1_REG);
 	writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_2_REG);
@@ -2354,6 +2402,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 	port->id = id;
 	port->geth = geth;
 	port->dev = dev;
+	port->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 	/* DMA memory */
 	dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2410,6 +2459,10 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 		geth->port0 = port;
 	else
 		geth->port1 = port;
+
+	/* This will just be done once both ports are up and reset */
+	gemini_ethernet_init(geth);
+
 	platform_set_drvdata(pdev, port);
 
 	/* Set up and register the netdev */
@@ -2423,6 +2476,11 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 
 	netdev->hw_features = GMAC_OFFLOAD_FEATURES;
 	netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
+	/* We can handle jumbo frames up to 10236 bytes so, let's accept
+	 * payloads of 10236 bytes minus VLAN and ethernet header
+	 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
 
 	port->freeq_refill = 0;
 	netif_napi_add(netdev, &port->napi, gmac_napi_poll,
@@ -2527,7 +2585,6 @@ static int gemini_ethernet_probe(struct platform_device *pdev)
 
 	spin_lock_init(&geth->irq_lock);
 	spin_lock_init(&geth->freeq_lock);
-	gemini_ethernet_init(geth);
 
 	/* The children will use this */
 	platform_set_drvdata(pdev, geth);
@@ -2540,8 +2597,8 @@ static int gemini_ethernet_remove(struct platform_device *pdev)
 {
 	struct gemini_ethernet *geth = platform_get_drvdata(pdev);
 
-	gemini_ethernet_init(geth);
 	geth_cleanup_freeq(geth);
+	geth->initialized = false;
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 382891f..7005949 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -185,34 +185,13 @@ static inline void queue_tail_inc(struct be_queue_info *q)
 
 struct be_eq_obj {
 	struct be_queue_info q;
-	char desc[32];
 
-	/* Adaptive interrupt coalescing (AIC) info */
-	bool enable_aic;
-	u32 min_eqd;		/* in usecs */
-	u32 max_eqd;		/* in usecs */
-	u32 eqd;		/* configured val when aic is off */
-	u32 cur_eqd;		/* in usecs */
-
+	struct be_adapter *adapter;
+	struct napi_struct napi;
 	u8 idx;			/* array index */
 	u8 msix_idx;
 	u16 spurious_intr;
-	struct napi_struct napi;
-	struct be_adapter *adapter;
 	cpumask_var_t  affinity_mask;
-
-#ifdef CONFIG_NET_RX_BUSY_POLL
-#define BE_EQ_IDLE		0
-#define BE_EQ_NAPI		1	/* napi owns this EQ */
-#define BE_EQ_POLL		2	/* poll owns this EQ */
-#define BE_EQ_LOCKED		(BE_EQ_NAPI | BE_EQ_POLL)
-#define BE_EQ_NAPI_YIELD	4	/* napi yielded this EQ */
-#define BE_EQ_POLL_YIELD	8	/* poll yielded this EQ */
-#define BE_EQ_YIELD		(BE_EQ_NAPI_YIELD | BE_EQ_POLL_YIELD)
-#define BE_EQ_USER_PEND		(BE_EQ_POLL | BE_EQ_POLL_YIELD)
-	unsigned int state;
-	spinlock_t lock;	/* lock to serialize napi and busy-poll */
-#endif  /* CONFIG_NET_RX_BUSY_POLL */
 } ____cacheline_aligned_in_smp;
 
 struct be_aic_obj {		/* Adaptive interrupt coalescing (AIC) info */
@@ -238,7 +217,6 @@ struct be_tx_stats {
 	u64 tx_vxlan_offload_pkts;
 	u64 tx_reqs;
 	u64 tx_compl;
-	ulong tx_jiffies;
 	u32 tx_stops;
 	u32 tx_drv_drops;	/* pkts dropped by driver */
 	/* the error counters are described in be_ethtool.c */
@@ -261,9 +239,9 @@ struct be_tx_compl_info {
 
 struct be_tx_obj {
 	u32 db_offset;
+	struct be_tx_compl_info txcp;
 	struct be_queue_info q;
 	struct be_queue_info cq;
-	struct be_tx_compl_info txcp;
 	/* Remember the skbs that were transmitted */
 	struct sk_buff *sent_skb_list[TX_Q_LEN];
 	struct be_tx_stats stats;
@@ -458,10 +436,10 @@ struct be_port_resources {
 #define be_is_os2bmc_enabled(adapter) (adapter->flags & BE_FLAGS_OS2BMC)
 
 struct rss_info {
-	u64 rss_flags;
 	u8 rsstable[RSS_INDIR_TABLE_LEN];
 	u8 rss_queue[RSS_INDIR_TABLE_LEN];
 	u8 rss_hkey[RSS_HASH_KEY_LEN];
+	u64 rss_flags;
 };
 
 #define BE_INVALID_DIE_TEMP	0xFF
@@ -544,11 +522,13 @@ enum {
 };
 
 struct be_error_recovery {
-	/* Lancer error recovery variables */
-	u8 recovery_retries;
+	union {
+		u8 recovery_retries;	/* used for Lancer		*/
+		u8 recovery_state;	/* used for BEx and Skyhawk	*/
+	};
 
 	/* BEx/Skyhawk error recovery variables */
-	u8 recovery_state;
+	bool recovery_supported;
 	u16 ue_to_reset_time;		/* Time after UE, to soft reset
 					 * the chip - PF0 only
 					 */
@@ -556,7 +536,6 @@ struct be_error_recovery {
 					 * of SLIPORT_SEMAPHORE reg
 					 */
 	u16 last_err_code;
-	bool recovery_supported;
 	unsigned long probe_time;
 	unsigned long last_recovery_time;
 

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 8f75500..05e4c0b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -3403,9 +3403,11 @@ static int be_msix_register(struct be_adapter *adapter)
 	int status, i, vec;
 
 	for_all_evt_queues(adapter, eqo, i) {
-		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
+		char irq_name[IFNAMSIZ+4];
+
+		snprintf(irq_name, sizeof(irq_name), "%s-q%d", netdev->name, i);
 		vec = be_msix_vec_get(adapter, eqo);
-		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
+		status = request_irq(vec, be_msix, 0, irq_name, eqo);
 		if (status)
 			goto err_msix;
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ef9ef70..948b3e0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c

@@ -2022,7 +2022,8 @@ static void hns_nic_get_stats64(struct net_device *ndev,
 
 static u16
 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback)
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
 {
 	struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -2032,7 +2033,7 @@ hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
 	    is_multicast_ether_addr(eth_hdr->h_dest))
 		return 0;
 	else
-		return fallback(ndev, skb);
+		return fallback(ndev, skb, NULL);
 }
 
 static const struct net_device_ops hns_nic_netdev_ops = {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f73c9df..c211450 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c

@@ -239,7 +239,28 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
 	unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
-	int ret;
+	int i, ret;
+
+	if (kinfo->num_tc <= 1) {
+		netdev_reset_tc(netdev);
+	} else {
+		ret = netdev_set_num_tc(netdev, kinfo->num_tc);
+		if (ret) {
+			netdev_err(netdev,
+				   "netdev_set_num_tc fail, ret=%d!\n", ret);
+			return ret;
+		}
+
+		for (i = 0; i < HNAE3_MAX_TC; i++) {
+			if (!kinfo->tc_info[i].enable)
+				continue;
+
+			netdev_set_tc_queue(netdev,
+					    kinfo->tc_info[i].tc,
+					    kinfo->tc_info[i].tqp_count,
+					    kinfo->tc_info[i].tqp_offset);
+		}
+	}
 
 	ret = netif_set_real_num_tx_queues(netdev, queue_size);
 	if (ret) {
@@ -312,7 +333,9 @@ static int hns3_nic_net_up(struct net_device *netdev)
 static int hns3_nic_net_open(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	int ret;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_knic_private_info *kinfo;
+	int i, ret;
 
 	netif_carrier_off(netdev);
 
@@ -327,6 +350,12 @@ static int hns3_nic_net_open(struct net_device *netdev)
 		return ret;
 	}
 
+	kinfo = &h->kinfo;
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+		netdev_set_prio_tc_map(netdev, i,
+				       kinfo->prio_tc[i]);
+	}
+
 	priv->ae_handle->last_reset_time = jiffies;
 	return 0;
 }
@@ -762,16 +791,14 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
 		 */
 		if (skb_is_gso(skb))
 			hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
-
-		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 	} else if (l3.v6->version == 6) {
 		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
 				HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
-		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 	}
 
 	switch (l4_proto) {
 	case IPPROTO_TCP:
+		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 		hnae3_set_field(*type_cs_vlan_tso,
 				HNS3_TXD_L4T_M,
 				HNS3_TXD_L4T_S,
@@ -781,12 +808,14 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
 		if (hns3_tunnel_csum_bug(skb))
 			break;
 
+		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 		hnae3_set_field(*type_cs_vlan_tso,
 				HNS3_TXD_L4T_M,
 				HNS3_TXD_L4T_S,
 				HNS3_L4T_UDP);
 		break;
 	case IPPROTO_SCTP:
+		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 		hnae3_set_field(*type_cs_vlan_tso,
 				HNS3_TXD_L4T_M,
 				HNS3_TXD_L4T_S,
@@ -1307,7 +1336,6 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 	u16 mode = mqprio_qopt->mode;
 	u8 hw = mqprio_qopt->qopt.hw;
 	bool if_running;
-	unsigned int i;
 	int ret;
 
 	if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
@@ -1331,24 +1359,6 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 	if (ret)
 		goto out;
 
-	if (tc <= 1) {
-		netdev_reset_tc(netdev);
-	} else {
-		ret = netdev_set_num_tc(netdev, tc);
-		if (ret)
-			goto out;
-
-		for (i = 0; i < HNAE3_MAX_TC; i++) {
-			if (!kinfo->tc_info[i].enable)
-				continue;
-
-			netdev_set_tc_queue(netdev,
-					    kinfo->tc_info[i].tc,
-					    kinfo->tc_info[i].tqp_count,
-					    kinfo->tc_info[i].tqp_offset);
-		}
-	}
-
 	ret = hns3_nic_set_real_num_queue(netdev);
 
 out:
@@ -3202,7 +3212,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 	struct net_device *ndev = kinfo->netdev;
 	bool if_running;
 	int ret;
-	u8 i;
 
 	if (tc > HNAE3_MAX_TC)
 		return -EINVAL;
@@ -3212,10 +3221,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 
 	if_running = netif_running(ndev);
 
-	ret = netdev_set_num_tc(ndev, tc);
-	if (ret)
-		return ret;
-
 	if (if_running) {
 		(void)hns3_nic_net_stop(ndev);
 		msleep(100);
@@ -3226,27 +3231,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 	if (ret)
 		goto err_out;
 
-	if (tc <= 1) {
-		netdev_reset_tc(ndev);
-		goto out;
-	}
-
-	for (i = 0; i < HNAE3_MAX_TC; i++) {
-		struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
-
-		if (tc_info->enable)
-			netdev_set_tc_queue(ndev,
-					    tc_info->tc,
-					    tc_info->tqp_count,
-					    tc_info->tqp_offset);
-	}
-
-	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
-		netdev_set_prio_tc_map(ndev, i,
-				       kinfo->prio_tc[i]);
-	}
-
-out:
 	ret = hns3_nic_set_real_num_queue(ndev);
 
 err_out:

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 40c0425..11620e0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c

@@ -201,7 +201,9 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
 		rx_group = &ring->tqp_vector->rx_group;
 		pre_rx_pkt = rx_group->total_packets;
 
+		preempt_disable();
 		hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data);
+		preempt_enable();
 
 		rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt);
 		rx_group->total_packets = pre_rx_pkt;

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 82cf12a..eca4b23 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c

@@ -206,7 +206,8 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 
 	spin_lock_bh(&hw->cmq.csq.lock);
 
-	if (num > hclge_ring_space(&hw->cmq.csq)) {
+	if (num > hclge_ring_space(&hw->cmq.csq) ||
+	    test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
 		spin_unlock_bh(&hw->cmq.csq.lock);
 		return -EBUSY;
 	}
@@ -346,6 +347,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 	spin_lock_init(&hdev->hw.cmq.crq.lock);
 
 	hclge_cmd_init_regs(&hdev->hw);
+	clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 
 	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d9aaa76..656c3e6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h

@@ -571,7 +571,8 @@ struct hclge_config_auto_neg_cmd {
 
 struct hclge_config_max_frm_size_cmd {
 	__le16  max_frm_size;
-	u8      rsv[22];
+	u8      min_frm_size;
+	u8      rsv[21];
 };
 
 enum hclge_mac_vlan_tbl_opcode {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6fffc69..8bbf4e5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

@@ -1834,8 +1834,6 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
 	return 0;
 }
 
-#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
-
 static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
 				   struct hclge_pkt_buf_alloc *buf_alloc)
 {
@@ -1863,13 +1861,11 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
 			req->tc_wl[j].high =
 				cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
 			req->tc_wl[j].high |=
-				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.high) <<
-					    HCLGE_RX_PRIV_EN_B);
+				cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 			req->tc_wl[j].low =
 				cpu_to_le16(priv->wl.low >> HCLGE_BUF_UNIT_S);
 			req->tc_wl[j].low |=
-				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.low) <<
-					    HCLGE_RX_PRIV_EN_B);
+				 cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 		}
 	}
 
@@ -1911,13 +1907,11 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev,
 			req->com_thrd[j].high =
 				cpu_to_le16(tc->high >> HCLGE_BUF_UNIT_S);
 			req->com_thrd[j].high |=
-				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->high) <<
-					    HCLGE_RX_PRIV_EN_B);
+				 cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 			req->com_thrd[j].low =
 				cpu_to_le16(tc->low >> HCLGE_BUF_UNIT_S);
 			req->com_thrd[j].low |=
-				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->low) <<
-					    HCLGE_RX_PRIV_EN_B);
+				 cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 		}
 	}
 
@@ -1943,14 +1937,10 @@ static int hclge_common_wl_config(struct hclge_dev *hdev,
 
 	req = (struct hclge_rx_com_wl *)desc.data;
 	req->com_wl.high = cpu_to_le16(buf->self.high >> HCLGE_BUF_UNIT_S);
-	req->com_wl.high |=
-		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.high) <<
-			    HCLGE_RX_PRIV_EN_B);
+	req->com_wl.high |=  cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 
 	req->com_wl.low = cpu_to_le16(buf->self.low >> HCLGE_BUF_UNIT_S);
-	req->com_wl.low |=
-		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.low) <<
-			    HCLGE_RX_PRIV_EN_B);
+	req->com_wl.low |=  cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -2517,12 +2507,14 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 
 	/* check for vector0 reset event sources */
 	if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
 		*clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
 	if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
+		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
 		*clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
 		return HCLGE_VECTOR0_EVENT_RST;
@@ -2815,8 +2807,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
 		clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
 		break;
 	default:
-		dev_warn(&hdev->pdev->dev, "Unsupported reset event to clear:%d",
-			 hdev->reset_type);
 		break;
 	}
 
@@ -4997,6 +4987,7 @@ static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 
 	req = (struct hclge_config_max_frm_size_cmd *)desc.data;
 	req->max_frm_size = cpu_to_le16(max_frm_size);
+	req->min_frm_size = HCLGE_MAC_MIN_FRAME;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 71d38b8..20abe82 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h

@@ -128,6 +128,7 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_MBX_SERVICE_SCHED,
 	HCLGE_STATE_MBX_HANDLING,
 	HCLGE_STATE_STATISTICS_UPDATING,
+	HCLGE_STATE_CMD_DISABLE,
 	HCLGE_STATE_MAX
 };
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 82bc30f..e2acf3b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c

@@ -1223,6 +1223,10 @@ static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
 		tx_en = true;
 		rx_en = true;
 		break;
+	case HCLGE_FC_PFC:
+		tx_en = false;
+		rx_en = false;
+		break;
 	default:
 		tx_en = true;
 		rx_en = true;
@@ -1240,8 +1244,9 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
-	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
-		return hclge_mac_pause_setup_hw(hdev);
+	ret = hclge_mac_pause_setup_hw(hdev);
+	if (ret)
+		return ret;
 
 	/* Only DCB-supported dev supports qset back pressure and pfc cmd */
 	if (!hnae3_dev_dcb_supported(hdev))

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5a86532..d1f16f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c

@@ -658,8 +658,17 @@ static int hclgevf_unmap_ring_from_vector(
 static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int vector_id;
 
-	hclgevf_free_vector(hdev, vector);
+	vector_id = hclgevf_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&handle->pdev->dev,
+			"hclgevf_put_vector get vector index fail. ret =%d\n",
+			vector_id);
+		return vector_id;
+	}
+
+	hclgevf_free_vector(hdev, vector_id);
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 173ca27..e9d5a4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c

@@ -208,7 +208,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 
 			/* tail the async message in arq */
 			msg_q = hdev->arq.msg_q[hdev->arq.tail];
-			memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+			memcpy(&msg_q[0], req->msg,
+			       HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
 			hclge_mbx_tail_ptr_move_arq(hdev->arq);
 			hdev->arq.count++;
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 3862fea..5a6600f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

@@ -5275,6 +5275,8 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
 static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
 			     struct ixgbe_fwd_adapter *accel)
 {
+	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+	int num_tc = netdev_get_num_tc(adapter->netdev);
 	struct net_device *vdev = accel->netdev;
 	int i, baseq, err;
 
@@ -5286,6 +5288,11 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
 	accel->rx_base_queue = baseq;
 	accel->tx_base_queue = baseq;
 
+	/* record configuration for macvlan interface in vdev */
+	for (i = 0; i < num_tc; i++)
+		netdev_bind_sb_channel_queue(adapter->netdev, vdev,
+					     i, rss_i, baseq + (rss_i * i));
+
 	for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
 		adapter->rx_ring[baseq + i]->netdev = vdev;
 
@@ -5310,6 +5317,10 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
 
 	netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
 
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(adapter->netdev, vdev);
+	netdev_set_sb_channel(vdev, 0);
+
 	clear_bit(accel->pool, adapter->fwd_bitmask);
 	kfree(accel);
 
@@ -8197,26 +8208,26 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 					      input, common, ring->queue_index);
 }
 
+#ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv, select_queue_fallback_t fallback)
+			      struct net_device *sb_dev,
+			      select_queue_fallback_t fallback)
 {
-	struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
 	struct ixgbe_adapter *adapter;
-	int txq;
-#ifdef IXGBE_FCOE
 	struct ixgbe_ring_feature *f;
-#endif
+	int txq;
 
-	if (fwd_adapter) {
-		adapter = netdev_priv(dev);
-		txq = reciprocal_scale(skb_get_hash(skb),
-				       adapter->num_rx_queues_per_pool);
+	if (sb_dev) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		struct net_device *vdev = sb_dev;
 
-		return txq + fwd_adapter->tx_base_queue;
+		txq = vdev->tc_to_txq[tc].offset;
+		txq += reciprocal_scale(skb_get_hash(skb),
+					vdev->tc_to_txq[tc].count);
+
+		return txq;
 	}
 
-#ifdef IXGBE_FCOE
-
 	/*
 	 * only execute the code below if protocol is FCoE
 	 * or FIP and we have FCoE enabled on the adapter
@@ -8226,11 +8237,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 	case htons(ETH_P_FIP):
 		adapter = netdev_priv(dev);
 
-		if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+		if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
 			break;
 		/* fall through */
 	default:
-		return fallback(dev, skb);
+		return fallback(dev, skb, sb_dev);
 	}
 
 	f = &adapter->ring_feature[RING_F_FCOE];
@@ -8242,11 +8253,9 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		txq -= f->indices;
 
 	return txq + f->offset;
-#else
-	return fallback(dev, skb);
-#endif
 }
 
+#endif
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 			       struct xdp_frame *xdpf)
 {
@@ -8766,6 +8775,11 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
 	/* if we cannot find a free pool then disable the offload */
 	netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
 	macvlan_release_l2fw_offload(vdev);
+
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(adapter->netdev, vdev);
+	netdev_set_sb_channel(vdev, 0);
+
 	kfree(accel);
 
 	return 0;
@@ -9769,6 +9783,13 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
 	if (!macvlan_supports_dest_filter(vdev))
 		return ERR_PTR(-EMEDIUMTYPE);
 
+	/* We need to lock down the macvlan to be a single queue device so that
+	 * we can reuse the tc_to_txq field in the macvlan netdev to represent
+	 * the queue mapping to our netdev.
+	 */
+	if (netif_is_multiqueue(vdev))
+		return ERR_PTR(-ERANGE);
+
 	pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
 	if (pool == adapter->num_rx_pools) {
 		u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
@@ -9825,6 +9846,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
 		return ERR_PTR(-ENOMEM);
 
 	set_bit(pool, adapter->fwd_bitmask);
+	netdev_set_sb_channel(vdev, pool);
 	accel->pool = pool;
 	accel->netdev = vdev;
 
@@ -9866,6 +9888,10 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 		ring->netdev = NULL;
 	}
 
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(pdev, accel->netdev);
+	netdev_set_sb_channel(accel->netdev, 0);
+
 	clear_bit(accel->pool, adapter->fwd_bitmask);
 	kfree(accel);
 }
@@ -10025,7 +10051,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
 	.ndo_start_xmit		= ixgbe_xmit_frame,
-	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ixgbe_set_mac,
@@ -10048,6 +10073,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
 #ifdef IXGBE_FCOE
+	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
 	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
 	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index afc8100..7a637b5 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c

@@ -563,14 +563,6 @@ ltq_etop_set_multicast_list(struct net_device *dev)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static u16
-ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
-		      void *accel_priv, select_queue_fallback_t fallback)
-{
-	/* we are currently only using the first queue */
-	return 0;
-}
-
 static int
 ltq_etop_init(struct net_device *dev)
 {
@@ -641,7 +633,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
 	.ndo_set_mac_address = ltq_etop_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_set_rx_mode = ltq_etop_set_multicast_list,
-	.ndo_select_queue = ltq_etop_select_queue,
+	.ndo_select_queue = dev_pick_tx_zero,
 	.ndo_init = ltq_etop_init,
 	.ndo_tx_timeout = ltq_etop_tx_timeout,
 };

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 81a66cc..749d972 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h

@@ -12,6 +12,7 @@
 #ifndef _MVPP2_H_
 #define _MVPP2_H_
 
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
@@ -72,9 +73,9 @@
 #define     MVPP22_RSS_INDEX_TABLE_ENTRY(idx)	(idx)
 #define     MVPP22_RSS_INDEX_TABLE(idx)		((idx) << 8)
 #define     MVPP22_RSS_INDEX_QUEUE(idx)		((idx) << 16)
-#define MVPP22_RSS_TABLE_ENTRY			0x1508
-#define MVPP22_RSS_TABLE			0x1510
+#define MVPP22_RXQ2RSS_TABLE			0x1504
 #define     MVPP22_RSS_TABLE_POINTER(p)		(p)
+#define MVPP22_RSS_TABLE_ENTRY			0x1508
 #define MVPP22_RSS_WIDTH			0x150c
 
 /* Classifier Registers */
@@ -86,11 +87,28 @@
 #define     MVPP2_CLS_LKP_INDEX_WAY_OFFS	6
 #define MVPP2_CLS_LKP_TBL_REG			0x1818
 #define     MVPP2_CLS_LKP_TBL_RXQ_MASK		0xff
+#define     MVPP2_CLS_LKP_FLOW_PTR(flow)	((flow) << 16)
 #define     MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK	BIT(25)
 #define MVPP2_CLS_FLOW_INDEX_REG		0x1820
 #define MVPP2_CLS_FLOW_TBL0_REG			0x1824
+#define     MVPP2_CLS_FLOW_TBL0_LAST		BIT(0)
+#define     MVPP2_CLS_FLOW_TBL0_ENG_MASK	0x7
+#define     MVPP2_CLS_FLOW_TBL0_OFFS		1
+#define     MVPP2_CLS_FLOW_TBL0_ENG(x)		((x) << 1)
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID_MASK	0xff
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID(port)	((port) << 4)
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL	BIT(23)
 #define MVPP2_CLS_FLOW_TBL1_REG			0x1828
+#define     MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK	0x7
+#define     MVPP2_CLS_FLOW_TBL1_N_FIELDS(x)	(x)
+#define     MVPP2_CLS_FLOW_TBL1_PRIO_MASK	0x3f
+#define     MVPP2_CLS_FLOW_TBL1_PRIO(x)		((x) << 9)
+#define     MVPP2_CLS_FLOW_TBL1_SEQ_MASK	0x7
+#define     MVPP2_CLS_FLOW_TBL1_SEQ(x)		((x) << 15)
 #define MVPP2_CLS_FLOW_TBL2_REG			0x182c
+#define     MVPP2_CLS_FLOW_TBL2_FLD_MASK	0x3f
+#define     MVPP2_CLS_FLOW_TBL2_FLD_OFFS(n)	((n) * 6)
+#define     MVPP2_CLS_FLOW_TBL2_FLD(n, x)	((x) << ((n) * 6))
 #define MVPP2_CLS_OVERSIZE_RXQ_LOW_REG(port)	(0x1980 + ((port) * 4))
 #define     MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS	3
 #define     MVPP2_CLS_OVERSIZE_RXQ_LOW_MASK	0x7
@@ -98,6 +116,29 @@
 #define MVPP2_CLS_SWFWD_PCTRL_REG		0x19d0
 #define     MVPP2_CLS_SWFWD_PCTRL_MASK(port)	(1 << (port))
 
+/* Classifier C2 engine Registers */
+#define MVPP22_CLS_C2_TCAM_IDX			0x1b00
+#define MVPP22_CLS_C2_TCAM_DATA0		0x1b10
+#define MVPP22_CLS_C2_TCAM_DATA1		0x1b14
+#define MVPP22_CLS_C2_TCAM_DATA2		0x1b18
+#define MVPP22_CLS_C2_TCAM_DATA3		0x1b1c
+#define MVPP22_CLS_C2_TCAM_DATA4		0x1b20
+#define     MVPP22_CLS_C2_PORT_ID(port)		((port) << 8)
+#define MVPP22_CLS_C2_ACT			0x1b60
+#define     MVPP22_CLS_C2_ACT_RSS_EN(act)	(((act) & 0x3) << 19)
+#define     MVPP22_CLS_C2_ACT_FWD(act)		(((act) & 0x7) << 13)
+#define     MVPP22_CLS_C2_ACT_QHIGH(act)	(((act) & 0x3) << 11)
+#define     MVPP22_CLS_C2_ACT_QLOW(act)		(((act) & 0x3) << 9)
+#define MVPP22_CLS_C2_ATTR0			0x1b64
+#define     MVPP22_CLS_C2_ATTR0_QHIGH(qh)	(((qh) & 0x1f) << 24)
+#define     MVPP22_CLS_C2_ATTR0_QHIGH_MASK	0x1f
+#define     MVPP22_CLS_C2_ATTR0_QLOW(ql)	(((ql) & 0x7) << 21)
+#define     MVPP22_CLS_C2_ATTR0_QLOW_MASK	0x7
+#define MVPP22_CLS_C2_ATTR1			0x1b68
+#define MVPP22_CLS_C2_ATTR2			0x1b6c
+#define     MVPP22_CLS_C2_ATTR2_RSS_EN		BIT(30)
+#define MVPP22_CLS_C2_ATTR3			0x1b70
+
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG			0x2040
 #define MVPP2_RXQ_DESC_ADDR_REG			0x2044
@@ -499,7 +540,7 @@
 #define MVPP2_MAX_SKB_DESCS		(MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
 
 /* Dfault number of RXQs in use */
-#define MVPP2_DEFAULT_RXQ		4
+#define MVPP2_DEFAULT_RXQ		1
 
 /* Max number of Rx descriptors */
 #define MVPP2_MAX_RXD_MAX		1024
@@ -556,6 +597,9 @@
 #define MVPP2_BIT_TO_WORD(bit)		((bit) / 32)
 #define MVPP2_BIT_IN_WORD(bit)		((bit) % 32)
 
+/* RSS constants */
+#define MVPP22_RSS_TABLE_ENTRIES	32
+
 /* IPv6 max L3 address size */
 #define MVPP2_MAX_L3_ADDR_SIZE		16
 
@@ -797,6 +841,9 @@ struct mvpp2_port {
 	bool has_tx_irqs;
 
 	u32 tx_time_coal;
+
+	/* RSS indirection table */
+	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index 8581d5b..723d0ba 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c

@@ -12,6 +12,328 @@
 
 #include "mvpp2.h"
 #include "mvpp2_cls.h"
+#include "mvpp2_prs.h"
+
+#define MVPP2_DEF_FLOW(_type, _id, _opts, _ri, _ri_mask)	\
+{								\
+	.flow_type = _type,					\
+	.flow_id = _id,						\
+	.supported_hash_opts = _opts,				\
+	.prs_ri = {						\
+		.ri = _ri,					\
+		.ri_mask = _ri_mask				\
+	}							\
+}
+
+static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = {
+	/* TCP over IPv4 flows, Not fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* TCP over IPv4 flows, Not fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* TCP over IPv4 flows, fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* TCP over IPv4 flows, fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* UDP over IPv4 flows, Not fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP4_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* UDP over IPv4 flows, Not fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* UDP over IPv4 flows, fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* UDP over IPv4 flows, fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* TCP over IPv6 flows, not fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP6_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP6_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* TCP over IPv6 flows, not fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* TCP over IPv6 flows, fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* TCP over IPv6 flows, fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
+		       MVPP2_PRS_RI_L4_TCP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* UDP over IPv6 flows, not fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP6_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+		       MVPP22_CLS_HEK_IP6_5T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* UDP over IPv6 flows, not fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* UDP over IPv6 flows, fragmented, no vlan tag */
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+	/* UDP over IPv6 flows, fragmented, with vlan tag */
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
+		       MVPP2_PRS_RI_L4_UDP,
+		       MVPP2_PRS_IP_MASK),
+
+	/* IPv4 flows, no vlan tag */
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4,
+		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT,
+		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+		       MVPP22_CLS_HEK_IP4_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER,
+		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+
+	/* IPv4 flows, with vlan tag */
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4,
+		       MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OPT,
+		       MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP4_OTHER,
+		       MVPP2_PRS_RI_L3_PROTO_MASK),
+
+	/* IPv6 flows, no vlan tag */
+	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
+		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+		       MVPP22_CLS_HEK_IP6_2T,
+		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
+		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+
+	/* IPv6 flows, with vlan tag */
+	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6,
+		       MVPP2_PRS_RI_L3_PROTO_MASK),
+	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP2_PRS_RI_L3_IP6,
+		       MVPP2_PRS_RI_L3_PROTO_MASK),
+
+	/* Non IP flow, no vlan tag */
+	MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_UNTAG,
+		       0,
+		       MVPP2_PRS_RI_VLAN_NONE,
+		       MVPP2_PRS_RI_VLAN_MASK),
+	/* Non IP flow, with vlan tag */
+	MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_TAG,
+		       MVPP22_CLS_HEK_OPT_VLAN,
+		       0, 0),
+};
+
+static void mvpp2_cls_flow_read(struct mvpp2 *priv, int index,
+				struct mvpp2_cls_flow_entry *fe)
+{
+	fe->index = index;
+	mvpp2_write(priv, MVPP2_CLS_FLOW_INDEX_REG, index);
+	fe->data[0] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL0_REG);
+	fe->data[1] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL1_REG);
+	fe->data[2] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL2_REG);
+}
 
 /* Update classification flow table registers */
 static void mvpp2_cls_flow_write(struct mvpp2 *priv,
@@ -34,6 +356,433 @@ static void mvpp2_cls_lookup_write(struct mvpp2 *priv,
 	mvpp2_write(priv, MVPP2_CLS_LKP_TBL_REG, le->data);
 }
 
+/* Operations on flow entry */
+static int mvpp2_cls_flow_hek_num_get(struct mvpp2_cls_flow_entry *fe)
+{
+	return fe->data[1] & MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK;
+}
+
+static void mvpp2_cls_flow_hek_num_set(struct mvpp2_cls_flow_entry *fe,
+				       int num_of_fields)
+{
+	fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK;
+	fe->data[1] |= MVPP2_CLS_FLOW_TBL1_N_FIELDS(num_of_fields);
+}
+
+static int mvpp2_cls_flow_hek_get(struct mvpp2_cls_flow_entry *fe,
+				  int field_index)
+{
+	return (fe->data[2] >> MVPP2_CLS_FLOW_TBL2_FLD_OFFS(field_index)) &
+		MVPP2_CLS_FLOW_TBL2_FLD_MASK;
+}
+
+static void mvpp2_cls_flow_hek_set(struct mvpp2_cls_flow_entry *fe,
+				   int field_index, int field_id)
+{
+	fe->data[2] &= ~MVPP2_CLS_FLOW_TBL2_FLD(field_index,
+						MVPP2_CLS_FLOW_TBL2_FLD_MASK);
+	fe->data[2] |= MVPP2_CLS_FLOW_TBL2_FLD(field_index, field_id);
+}
+
+static void mvpp2_cls_flow_eng_set(struct mvpp2_cls_flow_entry *fe,
+				   int engine)
+{
+	fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_ENG(MVPP2_CLS_FLOW_TBL0_ENG_MASK);
+	fe->data[0] |= MVPP2_CLS_FLOW_TBL0_ENG(engine);
+}
+
+static void mvpp2_cls_flow_port_id_sel(struct mvpp2_cls_flow_entry *fe,
+				       bool from_packet)
+{
+	if (from_packet)
+		fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
+	else
+		fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
+}
+
+static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq)
+{
+	fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK);
+	fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq);
+}
+
+static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe,
+				    bool is_last)
+{
+	fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_LAST;
+	fe->data[0] |= !!is_last;
+}
+
+static void mvpp2_cls_flow_pri_set(struct mvpp2_cls_flow_entry *fe, int prio)
+{
+	fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_PRIO(MVPP2_CLS_FLOW_TBL1_PRIO_MASK);
+	fe->data[1] |= MVPP2_CLS_FLOW_TBL1_PRIO(prio);
+}
+
+static void mvpp2_cls_flow_port_add(struct mvpp2_cls_flow_entry *fe,
+				    u32 port)
+{
+	fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
+}
+
+/* Initialize the parser entry for the given flow */
+static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
+				    struct mvpp2_cls_flow *flow)
+{
+	mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri,
+			   flow->prs_ri.ri_mask);
+}
+
+/* Initialize the Lookup Id table entry for the given flow */
+static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
+				    struct mvpp2_cls_flow *flow)
+{
+	struct mvpp2_cls_lookup_entry le;
+
+	le.way = 0;
+	le.lkpid = flow->flow_id;
+
+	/* The default RxQ for this port is set in the C2 lookup */
+	le.data = 0;
+
+	/* We point on the first lookup in the sequence for the flow, that is
+	 * the C2 lookup.
+	 */
+	le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id));
+
+	/* CLS is always enabled, RSS is enabled/disabled in C2 lookup */
+	le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
+
+	mvpp2_cls_lookup_write(priv, &le);
+}
+
+/* Initialize the flow table entries for the given flow */
+static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
+{
+	struct mvpp2_cls_flow_entry fe;
+	int i;
+
+	/* C2 lookup */
+	memset(&fe, 0, sizeof(fe));
+	fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id);
+
+	mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
+	mvpp2_cls_flow_port_id_sel(&fe, true);
+	mvpp2_cls_flow_last_set(&fe, 0);
+	mvpp2_cls_flow_pri_set(&fe, 0);
+	mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1);
+
+	/* Add all ports */
+	for (i = 0; i < MVPP2_MAX_PORTS; i++)
+		mvpp2_cls_flow_port_add(&fe, BIT(i));
+
+	mvpp2_cls_flow_write(priv, &fe);
+
+	/* C3Hx lookups */
+	for (i = 0; i < MVPP2_MAX_PORTS; i++) {
+		memset(&fe, 0, sizeof(fe));
+		fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id);
+
+		mvpp2_cls_flow_port_id_sel(&fe, true);
+		mvpp2_cls_flow_pri_set(&fe, i + 1);
+		mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE);
+		mvpp2_cls_flow_port_add(&fe, BIT(i));
+
+		mvpp2_cls_flow_write(priv, &fe);
+	}
+
+	/* Update the last entry */
+	mvpp2_cls_flow_last_set(&fe, 1);
+	mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST);
+
+	mvpp2_cls_flow_write(priv, &fe);
+}
+
+/* Adds a field to the Header Extracted Key generation parameters*/
+static int mvpp2_flow_add_hek_field(struct mvpp2_cls_flow_entry *fe,
+				    u32 field_id)
+{
+	int nb_fields = mvpp2_cls_flow_hek_num_get(fe);
+
+	if (nb_fields == MVPP2_FLOW_N_FIELDS)
+		return -EINVAL;
+
+	mvpp2_cls_flow_hek_set(fe, nb_fields, field_id);
+
+	mvpp2_cls_flow_hek_num_set(fe, nb_fields + 1);
+
+	return 0;
+}
+
+static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
+				     unsigned long hash_opts)
+{
+	u32 field_id;
+	int i;
+
+	/* Clear old fields */
+	mvpp2_cls_flow_hek_num_set(fe, 0);
+	fe->data[2] = 0;
+
+	for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
+		switch (BIT(i)) {
+		case MVPP22_CLS_HEK_OPT_VLAN:
+			field_id = MVPP22_CLS_FIELD_VLAN;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP4SA:
+			field_id = MVPP22_CLS_FIELD_IP4SA;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP4DA:
+			field_id = MVPP22_CLS_FIELD_IP4DA;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP6SA:
+			field_id = MVPP22_CLS_FIELD_IP6SA;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP6DA:
+			field_id = MVPP22_CLS_FIELD_IP6DA;
+			break;
+		case MVPP22_CLS_HEK_OPT_L4SIP:
+			field_id = MVPP22_CLS_FIELD_L4SIP;
+			break;
+		case MVPP22_CLS_HEK_OPT_L4DIP:
+			field_id = MVPP22_CLS_FIELD_L4DIP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		if (mvpp2_flow_add_hek_field(fe, field_id))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+{
+	if (flow >= MVPP2_N_FLOWS)
+		return NULL;
+
+	return &cls_flows[flow];
+}
+
+/* Set the hash generation options for the given traffic flow.
+ * One traffic flow (in the ethtool sense) has multiple classification flows,
+ * to handle specific cases such as fragmentation, or the presence of a
+ * VLAN / DSA Tag.
+ *
+ * Each of these individual flows has different constraints, for example we
+ * can't hash fragmented packets on L4 data (else we would risk having packet
+ * re-ordering), so each classification flows masks the options with their
+ * supported ones.
+ *
+ */
+static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type,
+					u16 requested_opts)
+{
+	struct mvpp2_cls_flow_entry fe;
+	struct mvpp2_cls_flow *flow;
+	int i, engine, flow_index;
+	u16 hash_opts;
+
+	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+		flow = mvpp2_cls_flow_get(i);
+		if (!flow)
+			return -EINVAL;
+
+		if (flow->flow_type != flow_type)
+			continue;
+
+		flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
+							flow->flow_id);
+
+		mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+		hash_opts = flow->supported_hash_opts & requested_opts;
+
+		/* Use C3HB engine to access L4 infos. This adds L4 infos to the
+		 * hash parameters
+		 */
+		if (hash_opts & MVPP22_CLS_HEK_L4_OPTS)
+			engine = MVPP22_CLS_ENGINE_C3HB;
+		else
+			engine = MVPP22_CLS_ENGINE_C3HA;
+
+		if (mvpp2_flow_set_hek_fields(&fe, hash_opts))
+			return -EINVAL;
+
+		mvpp2_cls_flow_eng_set(&fe, engine);
+
+		mvpp2_cls_flow_write(port->priv, &fe);
+	}
+
+	return 0;
+}
+
+u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe)
+{
+	u16 hash_opts = 0;
+	int n_fields, i, field;
+
+	n_fields = mvpp2_cls_flow_hek_num_get(fe);
+
+	for (i = 0; i < n_fields; i++) {
+		field = mvpp2_cls_flow_hek_get(fe, i);
+
+		switch (field) {
+		case MVPP22_CLS_FIELD_MAC_DA:
+			hash_opts |= MVPP22_CLS_HEK_OPT_MAC_DA;
+			break;
+		case MVPP22_CLS_FIELD_VLAN:
+			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
+			break;
+		case MVPP22_CLS_FIELD_L3_PROTO:
+			hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
+			break;
+		case MVPP22_CLS_FIELD_IP4SA:
+			hash_opts |= MVPP22_CLS_HEK_OPT_IP4SA;
+			break;
+		case MVPP22_CLS_FIELD_IP4DA:
+			hash_opts |= MVPP22_CLS_HEK_OPT_IP4DA;
+			break;
+		case MVPP22_CLS_FIELD_IP6SA:
+			hash_opts |= MVPP22_CLS_HEK_OPT_IP6SA;
+			break;
+		case MVPP22_CLS_FIELD_IP6DA:
+			hash_opts |= MVPP22_CLS_HEK_OPT_IP6DA;
+			break;
+		case MVPP22_CLS_FIELD_L4SIP:
+			hash_opts |= MVPP22_CLS_HEK_OPT_L4SIP;
+			break;
+		case MVPP22_CLS_FIELD_L4DIP:
+			hash_opts |= MVPP22_CLS_HEK_OPT_L4DIP;
+			break;
+		default:
+			break;
+		}
+	}
+	return hash_opts;
+}
+
+/* Returns the hash opts for this flow. There are several classifier flows
+ * for one traffic flow, this returns an aggregation of all configurations.
+ */
+static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
+{
+	struct mvpp2_cls_flow_entry fe;
+	struct mvpp2_cls_flow *flow;
+	int i, flow_index;
+	u16 hash_opts = 0;
+
+	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+		flow = mvpp2_cls_flow_get(i);
+		if (!flow)
+			return 0;
+
+		if (flow->flow_type != flow_type)
+			continue;
+
+		flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
+							flow->flow_id);
+
+		mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+		hash_opts |= mvpp2_flow_get_hek_fields(&fe);
+	}
+
+	return hash_opts;
+}
+
+static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
+{
+	struct mvpp2_cls_flow *flow;
+	int i;
+
+	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+		flow = mvpp2_cls_flow_get(i);
+		if (!flow)
+			break;
+
+		mvpp2_cls_flow_prs_init(priv, flow);
+		mvpp2_cls_flow_lkp_init(priv, flow);
+		mvpp2_cls_flow_init(priv, flow);
+	}
+}
+
+static void mvpp2_cls_c2_write(struct mvpp2 *priv,
+			       struct mvpp2_cls_c2_entry *c2)
+{
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
+
+	/* Write TCAM */
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
+
+	mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
+
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
+}
+
+static void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+			      struct mvpp2_cls_c2_entry *c2)
+{
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
+
+	c2->index = index;
+
+	c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
+	c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
+	c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
+	c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
+	c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
+
+	c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
+
+	c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
+	c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
+	c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
+	c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
+}
+
+static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
+{
+	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql, pmap;
+
+	memset(&c2, 0, sizeof(c2));
+
+	c2.index = MVPP22_CLS_C2_RSS_ENTRY(port->id);
+
+	pmap = BIT(port->id);
+	c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
+	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
+
+	/* Update RSS status after matching this entry */
+	c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
+	/* Mark packet as "forwarded to software", needed for RSS */
+	c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
+
+	/* Configure the default rx queue : Update Queue Low and Queue High, but
+	 * don't lock, since the rx queue selection might be overridden by RSS
+	 */
+	c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD) |
+		   MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD);
+
+	qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		      MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
+	mvpp2_cls_c2_write(port->priv, &c2);
+}
+
 /* Classifier default initialization */
 void mvpp2_cls_init(struct mvpp2 *priv)
 {
@@ -61,6 +810,8 @@ void mvpp2_cls_init(struct mvpp2 *priv)
 		le.way = 1;
 		mvpp2_cls_lookup_write(priv, &le);
 	}
+
+	mvpp2_cls_port_init_flows(priv);
 }
 
 void mvpp2_cls_port_config(struct mvpp2_port *port)
@@ -89,6 +840,40 @@ void mvpp2_cls_port_config(struct mvpp2_port *port)
 
 	/* Update lookup ID table entry */
 	mvpp2_cls_lookup_write(port->priv, &le);
+
+	mvpp2_port_c2_cls_init(port);
+}
+
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+{
+	struct mvpp2_cls_c2_entry c2;
+
+	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+	c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+	mvpp2_cls_c2_write(port->priv, &c2);
+}
+
+static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
+{
+	struct mvpp2_cls_c2_entry c2;
+
+	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+	c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+	mvpp2_cls_c2_write(port->priv, &c2);
+}
+
+void mvpp22_rss_enable(struct mvpp2_port *port)
+{
+	mvpp2_rss_port_c2_enable(port);
+}
+
+void mvpp22_rss_disable(struct mvpp2_port *port)
+{
+	mvpp2_rss_port_c2_disable(port);
 }
 
 /* Set CPU queue number for oversize packets */
@@ -107,7 +892,116 @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
 	mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
 }
 
-void mvpp22_init_rss(struct mvpp2_port *port)
+static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
+{
+	int nrxqs, cpu, cpus = num_possible_cpus();
+
+	/* Number of RXQs per CPU */
+	nrxqs = port->nrxqs / cpus;
+
+	/* CPU that will handle this rx queue */
+	cpu = rxq / nrxqs;
+
+	if (!cpu_online(cpu))
+		return port->first_rxq;
+
+	/* Indirection to better distribute the paquets on the CPUs when
+	 * configuring the RSS queues.
+	 */
+	return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
+}
+
+void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+{
+	struct mvpp2 *priv = port->priv;
+	int i;
+
+	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
+		u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
+		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
+
+		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
+			    mvpp22_rxfh_indir(port, port->indir[i]));
+	}
+}
+
+int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
+{
+	u16 hash_opts = 0;
+
+	switch (info->flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		if (info->data & RXH_L4_B_0_1)
+			hash_opts |= MVPP22_CLS_HEK_OPT_L4SIP;
+		if (info->data & RXH_L4_B_2_3)
+			hash_opts |= MVPP22_CLS_HEK_OPT_L4DIP;
+		/* Fallthrough */
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		if (info->data & RXH_L2DA)
+			hash_opts |= MVPP22_CLS_HEK_OPT_MAC_DA;
+		if (info->data & RXH_VLAN)
+			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
+		if (info->data & RXH_L3_PROTO)
+			hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
+		if (info->data & RXH_IP_SRC)
+			hash_opts |= (MVPP22_CLS_HEK_OPT_IP4SA |
+				     MVPP22_CLS_HEK_OPT_IP6SA);
+		if (info->data & RXH_IP_DST)
+			hash_opts |= (MVPP22_CLS_HEK_OPT_IP4DA |
+				     MVPP22_CLS_HEK_OPT_IP6DA);
+		break;
+	default: return -EOPNOTSUPP;
+	}
+
+	return mvpp2_port_rss_hash_opts_set(port, info->flow_type, hash_opts);
+}
+
+int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
+{
+	unsigned long hash_opts;
+	int i;
+
+	hash_opts = mvpp2_port_rss_hash_opts_get(port, info->flow_type);
+	info->data = 0;
+
+	for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
+		switch (BIT(i)) {
+		case MVPP22_CLS_HEK_OPT_MAC_DA:
+			info->data |= RXH_L2DA;
+			break;
+		case MVPP22_CLS_HEK_OPT_VLAN:
+			info->data |= RXH_VLAN;
+			break;
+		case MVPP22_CLS_HEK_OPT_L3_PROTO:
+			info->data |= RXH_L3_PROTO;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP4SA:
+		case MVPP22_CLS_HEK_OPT_IP6SA:
+			info->data |= RXH_IP_SRC;
+			break;
+		case MVPP22_CLS_HEK_OPT_IP4DA:
+		case MVPP22_CLS_HEK_OPT_IP6DA:
+			info->data |= RXH_IP_DST;
+			break;
+		case MVPP22_CLS_HEK_OPT_L4SIP:
+			info->data |= RXH_L4_B_0_1;
+			break;
+		case MVPP22_CLS_HEK_OPT_L4DIP:
+			info->data |= RXH_L4_B_2_3;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+void mvpp22_rss_port_init(struct mvpp2_port *port)
 {
 	struct mvpp2 *priv = port->priv;
 	int i;
@@ -115,27 +1009,30 @@ void mvpp22_init_rss(struct mvpp2_port *port)
 	/* Set the table width: replace the whole classifier Rx queue number
 	 * with the ones configured in RSS table entries.
 	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(0));
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
 	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
 
-	/* Loop through the classifier Rx Queues and map them to a RSS table.
-	 * Map them all to the first table (0) by default.
+	/* The default RxQ is used as a key to select the RSS table to use.
+	 * We use one RSS table per port.
 	 */
-	for (i = 0; i < MVPP2_CLS_RX_QUEUES; i++) {
-		mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(i));
-		mvpp2_write(priv, MVPP22_RSS_TABLE,
-			    MVPP22_RSS_TABLE_POINTER(0));
-	}
+	mvpp2_write(priv, MVPP22_RSS_INDEX,
+		    MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
+	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
+		    MVPP22_RSS_TABLE_POINTER(port->id));
 
 	/* Configure the first table to evenly distribute the packets across
-	 * real Rx Queues. The table entries map a hash to an port Rx Queue.
+	 * real Rx Queues. The table entries map a hash to a port Rx Queue.
 	 */
-	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
-		u32 sel = MVPP22_RSS_INDEX_TABLE(0) |
-			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
-		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
+	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
+		port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
 
-		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, i % port->nrxqs);
-	}
+	mvpp22_rss_fill_table(port, port->id);
 
+	/* Configure default flows */
+	mvpp2_port_rss_hash_opts_set(port, IPV4_FLOW, MVPP22_CLS_HEK_IP4_2T);
+	mvpp2_port_rss_hash_opts_set(port, IPV6_FLOW, MVPP22_CLS_HEK_IP6_2T);
+	mvpp2_port_rss_hash_opts_set(port, TCP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
+	mvpp2_port_rss_hash_opts_set(port, TCP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
+	mvpp2_port_rss_hash_opts_set(port, UDP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
+	mvpp2_port_rss_hash_opts_set(port, UDP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
 }

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 8e1d7f9..151d791 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h

@@ -13,15 +13,178 @@
 #ifndef _MVPP2_CLS_H_
 #define _MVPP2_CLS_H_
 
+#include "mvpp2.h"
+#include "mvpp2_prs.h"
+
 /* Classifier constants */
 #define MVPP2_CLS_FLOWS_TBL_SIZE	512
 #define MVPP2_CLS_FLOWS_TBL_DATA_WORDS	3
 #define MVPP2_CLS_LKP_TBL_SIZE		64
 #define MVPP2_CLS_RX_QUEUES		256
 
-/* RSS constants */
-#define MVPP22_RSS_TABLE_ENTRIES	32
+/* Classifier flow constants */
 
+#define MVPP2_FLOW_N_FIELDS		4
+
+enum mvpp2_cls_engine {
+	MVPP22_CLS_ENGINE_C2 = 1,
+	MVPP22_CLS_ENGINE_C3A,
+	MVPP22_CLS_ENGINE_C3B,
+	MVPP22_CLS_ENGINE_C4,
+	MVPP22_CLS_ENGINE_C3HA = 6,
+	MVPP22_CLS_ENGINE_C3HB = 7,
+};
+
+#define MVPP22_CLS_HEK_OPT_MAC_DA	BIT(0)
+#define MVPP22_CLS_HEK_OPT_VLAN		BIT(1)
+#define MVPP22_CLS_HEK_OPT_L3_PROTO	BIT(2)
+#define MVPP22_CLS_HEK_OPT_IP4SA	BIT(3)
+#define MVPP22_CLS_HEK_OPT_IP4DA	BIT(4)
+#define MVPP22_CLS_HEK_OPT_IP6SA	BIT(5)
+#define MVPP22_CLS_HEK_OPT_IP6DA	BIT(6)
+#define MVPP22_CLS_HEK_OPT_L4SIP	BIT(7)
+#define MVPP22_CLS_HEK_OPT_L4DIP	BIT(8)
+#define MVPP22_CLS_HEK_N_FIELDS		9
+
+#define MVPP22_CLS_HEK_L4_OPTS	(MVPP22_CLS_HEK_OPT_L4SIP | \
+				 MVPP22_CLS_HEK_OPT_L4DIP)
+
+#define MVPP22_CLS_HEK_IP4_2T	(MVPP22_CLS_HEK_OPT_IP4SA | \
+				 MVPP22_CLS_HEK_OPT_IP4DA)
+
+#define MVPP22_CLS_HEK_IP6_2T	(MVPP22_CLS_HEK_OPT_IP6SA | \
+				 MVPP22_CLS_HEK_OPT_IP6DA)
+
+/* The fifth tuple in "5T" is the L4_Info field */
+#define MVPP22_CLS_HEK_IP4_5T	(MVPP22_CLS_HEK_IP4_2T | \
+				 MVPP22_CLS_HEK_L4_OPTS)
+
+#define MVPP22_CLS_HEK_IP6_5T	(MVPP22_CLS_HEK_IP6_2T | \
+				 MVPP22_CLS_HEK_L4_OPTS)
+
+enum mvpp2_cls_field_id {
+	MVPP22_CLS_FIELD_MAC_DA = 0x03,
+	MVPP22_CLS_FIELD_VLAN = 0x06,
+	MVPP22_CLS_FIELD_L3_PROTO = 0x0f,
+	MVPP22_CLS_FIELD_IP4SA = 0x10,
+	MVPP22_CLS_FIELD_IP4DA = 0x11,
+	MVPP22_CLS_FIELD_IP6SA = 0x17,
+	MVPP22_CLS_FIELD_IP6DA = 0x1a,
+	MVPP22_CLS_FIELD_L4SIP = 0x1d,
+	MVPP22_CLS_FIELD_L4DIP = 0x1e,
+};
+
+enum mvpp2_cls_flow_seq {
+	MVPP2_CLS_FLOW_SEQ_NORMAL = 0,
+	MVPP2_CLS_FLOW_SEQ_FIRST1,
+	MVPP2_CLS_FLOW_SEQ_FIRST2,
+	MVPP2_CLS_FLOW_SEQ_LAST,
+	MVPP2_CLS_FLOW_SEQ_MIDDLE
+};
+
+/* Classifier C2 engine constants */
+#define MVPP22_CLS_C2_TCAM_EN(data)		((data) << 16)
+
+enum mvpp22_cls_c2_action {
+	MVPP22_C2_NO_UPD = 0,
+	MVPP22_C2_NO_UPD_LOCK,
+	MVPP22_C2_UPD,
+	MVPP22_C2_UPD_LOCK,
+};
+
+enum mvpp22_cls_c2_fwd_action {
+	MVPP22_C2_FWD_NO_UPD = 0,
+	MVPP22_C2_FWD_NO_UPD_LOCK,
+	MVPP22_C2_FWD_SW,
+	MVPP22_C2_FWD_SW_LOCK,
+	MVPP22_C2_FWD_HW,
+	MVPP22_C2_FWD_HW_LOCK,
+	MVPP22_C2_FWD_HW_LOW_LAT,
+	MVPP22_C2_FWD_HW_LOW_LAT_LOCK,
+};
+
+#define MVPP2_CLS_C2_TCAM_WORDS			5
+#define MVPP2_CLS_C2_ATTR_WORDS			5
+
+struct mvpp2_cls_c2_entry {
+	u32 index;
+	u32 tcam[MVPP2_CLS_C2_TCAM_WORDS];
+	u32 act;
+	u32 attr[MVPP2_CLS_C2_ATTR_WORDS];
+};
+
+/* Classifier C2 engine entries */
+#define MVPP22_CLS_C2_RSS_ENTRY(port)	(port)
+#define MVPP22_CLS_C2_N_ENTRIES		MVPP2_MAX_PORTS
+
+/* RSS flow entries in the flow table. We have 2 entries per port for RSS.
+ *
+ * The first performs a lookup using the C2 TCAM engine, to tag the
+ * packet for software forwarding (needed for RSS), enable or disable RSS, and
+ * assign the default rx queue.
+ *
+ * The second configures the hash generation, by specifying which fields of the
+ * packet header are used to generate the hash, and specifies the relevant hash
+ * engine to use.
+ */
+#define MVPP22_RSS_FLOW_C2_OFFS		0
+#define MVPP22_RSS_FLOW_HASH_OFFS	1
+#define MVPP22_RSS_FLOW_SIZE		(MVPP22_RSS_FLOW_HASH_OFFS + 1)
+
+#define MVPP22_RSS_FLOW_C2(port)	((port) * MVPP22_RSS_FLOW_SIZE + \
+					 MVPP22_RSS_FLOW_C2_OFFS)
+#define MVPP22_RSS_FLOW_HASH(port)	((port) * MVPP22_RSS_FLOW_SIZE + \
+					 MVPP22_RSS_FLOW_HASH_OFFS)
+#define MVPP22_RSS_FLOW_FIRST(port)	MVPP22_RSS_FLOW_C2(port)
+
+/* Packet flow ID */
+enum mvpp2_prs_flow {
+	MVPP2_FL_START = 8,
+	MVPP2_FL_IP4_TCP_NF_UNTAG = MVPP2_FL_START,
+	MVPP2_FL_IP4_UDP_NF_UNTAG,
+	MVPP2_FL_IP4_TCP_NF_TAG,
+	MVPP2_FL_IP4_UDP_NF_TAG,
+	MVPP2_FL_IP6_TCP_NF_UNTAG,
+	MVPP2_FL_IP6_UDP_NF_UNTAG,
+	MVPP2_FL_IP6_TCP_NF_TAG,
+	MVPP2_FL_IP6_UDP_NF_TAG,
+	MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+	MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+	MVPP2_FL_IP4_TCP_FRAG_TAG,
+	MVPP2_FL_IP4_UDP_FRAG_TAG,
+	MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+	MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+	MVPP2_FL_IP6_TCP_FRAG_TAG,
+	MVPP2_FL_IP6_UDP_FRAG_TAG,
+	MVPP2_FL_IP4_UNTAG, /* non-TCP, non-UDP, same for below */
+	MVPP2_FL_IP4_TAG,
+	MVPP2_FL_IP6_UNTAG,
+	MVPP2_FL_IP6_TAG,
+	MVPP2_FL_NON_IP_UNTAG,
+	MVPP2_FL_NON_IP_TAG,
+	MVPP2_FL_LAST,
+};
+
+struct mvpp2_cls_flow {
+	/* The L2-L4 traffic flow type */
+	int flow_type;
+
+	/* The first id in the flow table for this flow */
+	u16 flow_id;
+
+	/* The supported HEK fields for this flow */
+	u16 supported_hash_opts;
+
+	/* The Header Parser result_info that matches this flow */
+	struct mvpp2_prs_result_info prs_ri;
+};
+
+#define MVPP2_N_FLOWS	52
+
+#define MVPP2_ENTRIES_PER_FLOW			(MVPP2_MAX_PORTS + 1)
+#define MVPP2_FLOW_C2_ENTRY(id)			((id) * MVPP2_ENTRIES_PER_FLOW)
+#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id)	((id) * MVPP2_ENTRIES_PER_FLOW + \
+						(port) + 1)
 struct mvpp2_cls_flow_entry {
 	u32 index;
 	u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
@@ -33,7 +196,15 @@ struct mvpp2_cls_lookup_entry {
 	u32 data;
 };
 
-void mvpp22_init_rss(struct mvpp2_port *port);
+void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
+
+void mvpp22_rss_port_init(struct mvpp2_port *port);
+
+void mvpp22_rss_enable(struct mvpp2_port *port);
+void mvpp22_rss_disable(struct mvpp2_port *port);
+
+int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
+int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 
 void mvpp2_cls_init(struct mvpp2 *priv);
 

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 88f3da1..2283be1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

@@ -66,7 +66,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 #define MVPP2_QDIST_SINGLE_MODE	0
 #define MVPP2_QDIST_MULTI_MODE	1
 
-static int queue_mode = MVPP2_QDIST_SINGLE_MODE;
+static int queue_mode = MVPP2_QDIST_MULTI_MODE;
 
 module_param(queue_mode, int, 0444);
 MODULE_PARM_DESC(queue_mode, "Set queue_mode (single=0, multi=1)");
@@ -3276,6 +3276,11 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
 	}
 }
 
+static bool mvpp22_rss_is_supported(void)
+{
+	return queue_mode == MVPP2_QDIST_MULTI_MODE;
+}
+
 static int mvpp2_open(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
@@ -3368,9 +3373,6 @@ static int mvpp2_open(struct net_device *dev)
 
 	mvpp2_start_dev(port);
 
-	if (priv->hw_version == MVPP22)
-		mvpp22_init_rss(port);
-
 	/* Start hardware statistics gathering */
 	queue_delayed_work(priv->stats_queue, &port->stats_work,
 			   MVPP2_MIB_COUNTERS_STATS_DELAY);
@@ -3629,6 +3631,13 @@ static int mvpp2_set_features(struct net_device *dev,
 		}
 	}
 
+	if (changed & NETIF_F_RXHASH) {
+		if (features & NETIF_F_RXHASH)
+			mvpp22_rss_enable(port);
+		else
+			mvpp22_rss_disable(port);
+	}
+
 	return 0;
 }
 
@@ -3816,6 +3825,94 @@ static int mvpp2_ethtool_set_link_ksettings(struct net_device *dev,
 	return phylink_ethtool_ksettings_set(port->phylink, cmd);
 }
 
+static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
+				   struct ethtool_rxnfc *info, u32 *rules)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXFH:
+		ret = mvpp2_ethtool_rxfh_get(port, info);
+		break;
+	case ETHTOOL_GRXRINGS:
+		info->data = port->nrxqs;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return ret;
+}
+
+static int mvpp2_ethtool_set_rxnfc(struct net_device *dev,
+				   struct ethtool_rxnfc *info)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	switch (info->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = mvpp2_ethtool_rxfh_set(port, info);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return ret;
+}
+
+static u32 mvpp2_ethtool_get_rxfh_indir_size(struct net_device *dev)
+{
+	return mvpp22_rss_is_supported() ? MVPP22_RSS_TABLE_ENTRIES : 0;
+}
+
+static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+				  u8 *hfunc)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (indir)
+		memcpy(indir, port->indir,
+		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_CRC32;
+
+	return 0;
+}
+
+static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
+				  const u8 *key, const u8 hfunc)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+		return -EOPNOTSUPP;
+
+	if (key)
+		return -EOPNOTSUPP;
+
+	if (indir) {
+		memcpy(port->indir, indir,
+		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+		mvpp22_rss_fill_table(port, port->id);
+	}
+
+	return 0;
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -3847,6 +3944,12 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.set_pauseparam		= mvpp2_ethtool_set_pause_param,
 	.get_link_ksettings	= mvpp2_ethtool_get_link_ksettings,
 	.set_link_ksettings	= mvpp2_ethtool_set_link_ksettings,
+	.get_rxnfc		= mvpp2_ethtool_get_rxnfc,
+	.set_rxnfc		= mvpp2_ethtool_set_rxnfc,
+	.get_rxfh_indir_size	= mvpp2_ethtool_get_rxfh_indir_size,
+	.get_rxfh		= mvpp2_ethtool_get_rxfh,
+	.set_rxfh		= mvpp2_ethtool_set_rxfh,
+
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -3988,8 +4091,8 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	    MVPP2_MAX_PORTS * priv->max_port_rxqs)
 		return -EINVAL;
 
-	if (port->nrxqs % 4 || (port->nrxqs > priv->max_port_rxqs) ||
-	    (port->ntxqs > MVPP2_MAX_TXQ))
+	if (port->nrxqs % MVPP2_DEFAULT_RXQ ||
+	    port->nrxqs > priv->max_port_rxqs || port->ntxqs > MVPP2_MAX_TXQ)
 		return -EINVAL;
 
 	/* Disable port */
@@ -4078,6 +4181,9 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	mvpp2_cls_oversize_rxq_set(port);
 	mvpp2_cls_port_config(port);
 
+	if (mvpp22_rss_is_supported())
+		mvpp22_rss_port_init(port);
+
 	/* Provide an initial Rx packet size */
 	port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
 
@@ -4684,6 +4790,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
 			    NETIF_F_HW_VLAN_CTAG_FILTER;
 
+	if (mvpp22_rss_is_supported())
+		dev->hw_features |= NETIF_F_RXHASH;
+
 	if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
 		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
 		dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
@@ -5014,6 +5123,12 @@ static int mvpp2_probe(struct platform_device *pdev)
 			(unsigned long)of_device_get_match_data(&pdev->dev);
 	}
 
+	/* multi queue mode isn't supported on PPV2.1, fallback to single
+	 * mode
+	 */
+	if (priv->hw_version == MVPP21)
+		queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index a882c14..acf9f78d 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c

@@ -2409,6 +2409,41 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type)
 	return 0;
 }
 
+int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask)
+{
+	struct mvpp2_prs_entry pe;
+	u8 *ri_byte, *ri_byte_mask;
+	int tid, i;
+
+	memset(&pe, 0, sizeof(pe));
+
+	tid = mvpp2_prs_tcam_first_free(priv,
+					MVPP2_PE_LAST_FREE_TID,
+					MVPP2_PE_FIRST_FREE_TID);
+	if (tid < 0)
+		return tid;
+
+	pe.index = tid;
+
+	ri_byte = (u8 *)&ri;
+	ri_byte_mask = (u8 *)&ri_mask;
+
+	mvpp2_prs_sram_ai_update(&pe, flow, MVPP2_PRS_FLOW_ID_MASK);
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+
+	for (i = 0; i < 4; i++) {
+		mvpp2_prs_tcam_data_byte_set(&pe, i, ri_byte[i],
+					     ri_byte_mask[i]);
+	}
+
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
 /* Set prs flow for the port */
 int mvpp2_prs_def_flow(struct mvpp2_port *port)
 {

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
index a7c8d08..368e90b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h

@@ -9,14 +9,15 @@
  * License version 2. This program is licensed "as is" without any
  * warranty of any kind, whether express or implied.
  */
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-
-#include "mvpp2.h"
-
 #ifndef _MVPP2_PRS_H_
 #define _MVPP2_PRS_H_
 
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+
+#include "mvpp2.h"
+
 /* Parser constants */
 #define MVPP2_PRS_TCAM_SRAM_SIZE	256
 #define MVPP2_PRS_TCAM_WORDS		6
@@ -223,6 +224,10 @@
 #define MVPP2_PRS_RI_UDF7_IP6_LITE		BIT(29)
 #define MVPP2_PRS_RI_DROP_MASK			0x80000000
 
+#define MVPP2_PRS_IP_MASK			(MVPP2_PRS_RI_L3_PROTO_MASK | \
+						MVPP2_PRS_RI_IP_FRAG_MASK | \
+						MVPP2_PRS_RI_L4_PROTO_MASK)
+
 /* Sram additional info bits assignment */
 #define MVPP2_PRS_IPV4_DIP_AI_BIT		BIT(0)
 #define MVPP2_PRS_IPV6_NO_EXT_AI_BIT		BIT(0)
@@ -270,6 +275,11 @@ struct mvpp2_prs_entry {
 	u32 sram[MVPP2_PRS_SRAM_WORDS];
 };
 
+struct mvpp2_prs_result_info {
+	u32 ri;
+	u32 ri_mask;
+};
+
 struct mvpp2_prs_shadow {
 	bool valid;
 	bool finish;
@@ -291,6 +301,8 @@ int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add);
 
 int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type);
 
+int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask);
+
 int mvpp2_prs_def_flow(struct mvpp2_port *port);
 
 void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port);

diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile
index 16b10d0..3f40077 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx4/Makefile

@@ -3,7 +3,7 @@
 
 mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
 		main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
-		srq.o resource_tracker.o
+		srq.o resource_tracker.o crdump.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 8afe4b5..c81d15b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c

@@ -178,10 +178,12 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
 	dev = persist->dev;
 	mlx4_err(dev, "device is going to be reset\n");
-	if (mlx4_is_slave(dev))
+	if (mlx4_is_slave(dev)) {
 		err = mlx4_reset_slave(dev);
-	else
+	} else {
+		mlx4_crdump_collect(dev);
 		err = mlx4_reset_master(dev);
+	}
 
 	if (!err) {
 		mlx4_err(dev, "device was reset successfully\n");

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
new file mode 100644
index 0000000..88316c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c

@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define HEALTH_BUFFER_SIZE		0x40
+#define CR_ENABLE_BIT			swab32(BIT(6))
+#define CR_ENABLE_BIT_OFFSET		0xF3F04
+#define MAX_NUM_OF_DUMPS_TO_STORE	(8)
+
+static const char *region_cr_space_str = "cr-space";
+static const char *region_fw_health_str = "fw-health";
+
+/* Set to true in case cr enable bit was set to true before crdump */
+static bool crdump_enbale_bit_set;
+
+static void crdump_enable_crspace_access(struct mlx4_dev *dev,
+					 u8 __iomem *cr_space)
+{
+	/* Get current enable bit value */
+	crdump_enbale_bit_set =
+		readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
+
+	/* Enable FW CR filter (set bit6 to 0) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+
+	/* Enable block volatile crspace accesses */
+	writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+}
+
+static void crdump_disable_crspace_access(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space)
+{
+	/* Disable block volatile crspace accesses */
+	writel(0, cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+
+	/* Restore FW CR filter value (set bit6 to original value) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+}
+
+static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
+					u8 __iomem *cr_space,
+					u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 *crspace_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_crspace) {
+		mlx4_err(dev, "crdump: cr-space region is NULL\n");
+		return;
+	}
+
+	/* Try to collect CR space */
+	cr_res_size = pci_resource_len(pdev, 0);
+	crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
+	if (crspace_data) {
+		for (offset = 0; offset < cr_res_size; offset += 4)
+			*(u32 *)(crspace_data + offset) =
+					readl(cr_space + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_crspace,
+						     cr_res_size, crspace_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(crspace_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_cr_space_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_cr_space_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
+	}
+}
+
+static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space,
+					  u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	u8 *health_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_fw_health) {
+		mlx4_err(dev, "crdump: fw-health region is NULL\n");
+		return;
+	}
+
+	/* Try to collect health buffer */
+	health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
+	if (health_data) {
+		u8 __iomem *health_buf_start =
+				cr_space + dev->caps.health_buffer_addrs;
+
+		for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
+			*(u32 *)(health_data + offset) =
+					readl(health_buf_start + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_fw_health,
+						     HEALTH_BUFFER_SIZE,
+						     health_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(health_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_fw_health_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_fw_health_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
+	}
+}
+
+int mlx4_crdump_collect(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 __iomem *cr_space;
+	u32 id;
+
+	if (!dev->caps.health_buffer_addrs) {
+		mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
+		return 0;
+	}
+
+	if (!crdump->snapshot_enable) {
+		mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
+		return 0;
+	}
+
+	cr_res_size = pci_resource_len(pdev, 0);
+
+	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
+	if (!cr_space) {
+		mlx4_err(dev, "crdump: Failed to map pci cr region\n");
+		return -ENODEV;
+	}
+
+	crdump_enable_crspace_access(dev, cr_space);
+
+	/* Get the available snapshot ID for the dumps */
+	id = devlink_region_shapshot_id_get(devlink);
+
+	/* Try to capture dumps */
+	mlx4_crdump_collect_crspace(dev, cr_space, id);
+	mlx4_crdump_collect_fw_health(dev, cr_space, id);
+
+	crdump_disable_crspace_access(dev, cr_space);
+
+	iounmap(cr_space);
+	return 0;
+}
+
+int mlx4_crdump_init(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+
+	crdump->snapshot_enable = false;
+
+	/* Create cr-space region */
+	crdump->region_crspace =
+		devlink_region_create(devlink,
+				      region_cr_space_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      pci_resource_len(pdev, 0));
+	if (IS_ERR(crdump->region_crspace))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_cr_space_str,
+			  PTR_ERR(crdump->region_crspace));
+
+	/* Create fw-health region */
+	crdump->region_fw_health =
+		devlink_region_create(devlink,
+				      region_fw_health_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      HEALTH_BUFFER_SIZE);
+	if (IS_ERR(crdump->region_fw_health))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_fw_health_str,
+			  PTR_ERR(crdump->region_fw_health));
+
+	return 0;
+}
+
+void mlx4_crdump_end(struct mlx4_dev *dev)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+
+	devlink_region_destroy(crdump->region_fw_health);
+	devlink_region_destroy(crdump->region_crspace);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0227786..1857ee0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -688,15 +688,16 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback)
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
 
 	if (netdev_get_num_tc(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
-	return fallback(dev, skb) % rings_p_up;
+	return fallback(dev, skb, NULL) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 46dcbfb..babcfd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -825,7 +825,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2
-
+#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET	0xe4
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -1082,6 +1082,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->rl_caps.min_unit = size >> 14;
 	}
 
+	MLX4_GET(dev_cap->health_buffer_addrs, outbox,
+		 QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index cd6399c..650ae08 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h

@@ -128,6 +128,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_base;
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
+	u32 health_buffer_addrs;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 	bool wol_port[MLX4_MAX_PORTS + 1];
 };

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index c42eddf..2d979a6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c

@@ -191,6 +191,26 @@ static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
 	return 0;
 }
 
+static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	ctx->val.vbool = dev->persist->crdump.snapshot_enable;
+	return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	dev->persist->crdump.snapshot_enable = ctx->val.vbool;
+	return 0;
+}
+
 static int
 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
 			       union devlink_param_value val,
@@ -224,6 +244,11 @@ static const struct devlink_param mlx4_devlink_params[] = {
 	DEVLINK_PARAM_GENERIC(MAX_MACS,
 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			      NULL, NULL, mlx4_devlink_max_macs_validate),
+	DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
+			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+			      mlx4_devlink_crdump_snapshot_get,
+			      mlx4_devlink_crdump_snapshot_set, NULL),
 	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 			     "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
@@ -270,6 +295,11 @@ static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
 	mlx4_devlink_set_init_value(devlink,
 				    MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 				    value);
+
+	value.vbool = false;
+	mlx4_devlink_set_init_value(devlink,
+				    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+				    value);
 }
 
 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
@@ -523,6 +553,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
 	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
+	dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
@@ -3806,10 +3837,14 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
-	err = mlx4_catas_init(&priv->dev);
+	err = mlx4_crdump_init(&priv->dev);
 	if (err)
 		goto err_release_regions;
 
+	err = mlx4_catas_init(&priv->dev);
+	if (err)
+		goto err_crdump;
+
 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
 	if (err)
 		goto err_catas;
@@ -3819,6 +3854,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 err_catas:
 	mlx4_catas_end(&priv->dev);
 
+err_crdump:
+	mlx4_crdump_end(&priv->dev);
+
 err_release_regions:
 	pci_release_regions(pdev);
 
@@ -3854,6 +3892,9 @@ static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
 
 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 {
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	union devlink_param_value saved_value;
 	int err;
 
@@ -3881,6 +3922,14 @@ static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 						 &saved_value);
 	if (!err)
 		enable_4k_uar = saved_value.vbool;
+	err = devlink_param_driverinit_value_get(devlink,
+						 DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+						 &saved_value);
+	if (!err && crdump->snapshot_enable != saved_value.vbool) {
+		crdump->snapshot_enable = saved_value.vbool;
+		devlink_param_value_changed(devlink,
+					    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
+	}
 }
 
 static int mlx4_devlink_reload(struct devlink *devlink,
@@ -4080,6 +4129,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	else
 		mlx4_info(dev, "%s: interface is down\n", __func__);
 	mlx4_catas_end(dev);
+	mlx4_crdump_end(dev);
 	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
 		mlx4_warn(dev, "Disabling SR-IOV\n");
 		pci_disable_sriov(pdev);

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2ebaa3b..6e01609 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h

@@ -1042,6 +1042,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 int mlx4_catas_init(struct mlx4_dev *dev);
 void mlx4_catas_end(struct mlx4_dev *dev);
+int mlx4_crdump_init(struct mlx4_dev *dev);
+void mlx4_crdump_end(struct mlx4_dev *dev);
 int mlx4_restart_one(struct pci_dev *pdev, bool reload,
 		     struct devlink *devlink);
 int mlx4_register_device(struct mlx4_dev *dev);
@@ -1228,6 +1230,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
 int mlx4_comm_internal_err(u32 slave_read);
 
+int mlx4_crdump_collect(struct mlx4_dev *dev);
+
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 		    enum mlx4_port_type *type);
 void mlx4_do_sense_ports(struct mlx4_dev *dev,

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ace6545..c3228b8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

@@ -699,7 +699,8 @@ void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback);
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e2b7586..e1b237c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h

@@ -865,7 +865,8 @@ struct mlx5e_profile {
 void mlx5e_build_ptys2ethtool_map(void);
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5e_tx_wqe *wqe, u16 pi);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e4a9a07..712b976 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

@@ -349,7 +349,8 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 {
 	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-	rq->mpwqe.info = kvzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+	rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
+						  sizeof(*rq->mpwqe.info)),
 				       GFP_KERNEL, cpu_to_node(c->cpu));
 	if (!rq->mpwqe.info)
 		return -ENOMEM;
@@ -969,7 +970,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-	sq->db.di = kvzalloc_node(sizeof(*sq->db.di) * wq_sz,
+	sq->db.di = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.di)),
 				  GFP_KERNEL, numa);
 	if (!sq->db.di) {
 		mlx5e_free_xdpsq_db(sq);
@@ -1028,7 +1029,8 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
 	u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-	sq->db.ico_wqe = kvzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz,
+	sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
+						  sizeof(*sq->db.ico_wqe)),
 				       GFP_KERNEL, numa);
 	if (!sq->db.ico_wqe)
 		return -ENOMEM;
@@ -1083,9 +1085,11 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 
-	sq->db.dma_fifo = kvzalloc_node(df_sz * sizeof(*sq->db.dma_fifo),
+	sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+						   sizeof(*sq->db.dma_fifo)),
 					GFP_KERNEL, numa);
-	sq->db.wqe_info = kvzalloc_node(wq_sz * sizeof(*sq->db.wqe_info),
+	sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+						   sizeof(*sq->db.wqe_info)),
 					GFP_KERNEL, numa);
 	if (!sq->db.dma_fifo || !sq->db.wqe_info) {
 		mlx5e_free_txqsq_db(sq);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f0739da..9106ea4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

@@ -111,10 +111,11 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 #endif
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	int channel_ix = fallback(dev, skb);
+	int channel_ix = fallback(dev, skb, NULL);
 	u16 num_channels;
 	int up = 0;
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 0cadcab..981e621 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile

@@ -15,11 +15,16 @@
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
 mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_switchdev.o spectrum_router.o \
-				   spectrum_kvdl.o spectrum_acl_tcam.o \
-				   spectrum_acl.o spectrum_flower.o \
-				   spectrum_cnt.o spectrum_fid.o \
-				   spectrum_ipip.o spectrum_acl_flex_actions.o \
-				   spectrum_mr.o spectrum_mr_tcam.o \
+				   spectrum1_kvdl.o spectrum_kvdl.o \
+				   spectrum_acl_tcam.o spectrum_acl_ctcam.o \
+				   spectrum1_acl_tcam.o \
+				   spectrum_acl.o \
+				   spectrum_flower.o spectrum_cnt.o \
+				   spectrum_fid.o spectrum_ipip.o \
+				   spectrum_acl_flex_actions.o \
+				   spectrum_acl_flex_keys.o \
+				   spectrum1_mr_tcam.o \
+				   spectrum_mr_tcam.o spectrum_mr.o \
 				   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 3c0d882..72a6a8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c

@@ -351,9 +351,24 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa)
 	block->first_set = mlxsw_afa_set_create(true);
 	if (!block->first_set)
 		goto err_first_set_create;
-	block->cur_set = block->first_set;
+
+	/* In case user instructs to have dummy first set, we leave it
+	 * empty here and create another, real, set right away.
+	 */
+	if (mlxsw_afa->ops->dummy_first_set) {
+		block->cur_set = mlxsw_afa_set_create(false);
+		if (!block->cur_set)
+			goto err_second_set_create;
+		block->cur_set->prev = block->first_set;
+		block->first_set->next = block->cur_set;
+	} else {
+		block->cur_set = block->first_set;
+	}
+
 	return block;
 
+err_second_set_create:
+	mlxsw_afa_set_destroy(block->first_set);
 err_first_set_create:
 	kfree(block);
 	return NULL;
@@ -415,11 +430,16 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_first_set);
 
-u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block)
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block)
 {
-	return block->first_set->kvdl_index;
+	/* First set is never in KVD linear. So the first set
+	 * with valid KVD linear index is always the second one.
+	 */
+	if (WARN_ON(!block->first_set->next))
+		return 0;
+	return block->first_set->next->kvdl_index;
 }
-EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index);
+EXPORT_SYMBOL(mlxsw_afa_block_first_kvdl_index);
 
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
 {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 3a155d1..c18249a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h

@@ -54,6 +54,7 @@ struct mlxsw_afa_ops {
 			  bool ingress, int *p_span_id);
 	void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
 			   bool ingress);
+	bool dummy_first_set;
 };
 
 struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set,
@@ -64,7 +65,7 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa);
 void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
 char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
-u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
 int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index b32a009..bf64521 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c

@@ -43,6 +43,7 @@
 struct mlxsw_afk {
 	struct list_head key_info_list;
 	unsigned int max_blocks;
+	const struct mlxsw_afk_ops *ops;
 	const struct mlxsw_afk_block *blocks;
 	unsigned int blocks_count;
 };
@@ -69,8 +70,7 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
 }
 
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-				   const struct mlxsw_afk_block *blocks,
-				   unsigned int blocks_count)
+				   const struct mlxsw_afk_ops *ops)
 {
 	struct mlxsw_afk *mlxsw_afk;
 
@@ -79,8 +79,9 @@ struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
 		return NULL;
 	INIT_LIST_HEAD(&mlxsw_afk->key_info_list);
 	mlxsw_afk->max_blocks = max_blocks;
-	mlxsw_afk->blocks = blocks;
-	mlxsw_afk->blocks_count = blocks_count;
+	mlxsw_afk->ops = ops;
+	mlxsw_afk->blocks = ops->blocks;
+	mlxsw_afk->blocks_count = ops->blocks_count;
 	WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk));
 	return mlxsw_afk;
 }
@@ -415,45 +416,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
 }
 EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
 
-static void mlxsw_afk_encode_u32(const struct mlxsw_item *storage_item,
-				 const struct mlxsw_item *output_item,
-				 char *storage, char *output_indexed)
-{
-	u32 value;
-
-	value = __mlxsw_item_get32(storage, storage_item, 0);
-	__mlxsw_item_set32(output_indexed, output_item, 0, value);
-}
-
-static void mlxsw_afk_encode_buf(const struct mlxsw_item *storage_item,
-				 const struct mlxsw_item *output_item,
-				 char *storage, char *output_indexed)
-{
-	char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
-	char *output_data = __mlxsw_item_data(output_indexed, output_item, 0);
-	size_t len = output_item->size.bytes;
-
-	memcpy(output_data, storage_data, len);
-}
-
-#define MLXSW_AFK_KEY_BLOCK_SIZE 16
-
-static void mlxsw_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
-				 int block_index, char *storage, char *output)
-{
-	char *output_indexed = output + block_index * MLXSW_AFK_KEY_BLOCK_SIZE;
-	const struct mlxsw_item *storage_item = &elinst->info->item;
-	const struct mlxsw_item *output_item = &elinst->item;
-
-	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
-		mlxsw_afk_encode_u32(storage_item, output_item,
-				     storage, output_indexed);
-	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
-		mlxsw_afk_encode_buf(storage_item, output_item,
-				     storage, output_indexed);
-}
-
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
 		      char *key, char *mask)
 {
@@ -466,10 +430,10 @@ void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
 						       &block_index);
 		if (!elinst)
 			continue;
-		mlxsw_afk_encode_one(elinst, block_index,
-				     values->storage.key, key);
-		mlxsw_afk_encode_one(elinst, block_index,
-				     values->storage.mask, mask);
+		mlxsw_afk->ops->encode_one(elinst, block_index,
+					   values->storage.key, key);
+		mlxsw_afk->ops->encode_one(elinst, block_index,
+					   values->storage.mask, mask);
 	}
 }
 EXPORT_SYMBOL(mlxsw_afk_encode);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 122506d..441636c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h

@@ -42,16 +42,20 @@
 
 enum mlxsw_afk_element {
 	MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
-	MLXSW_AFK_ELEMENT_DMAC,
-	MLXSW_AFK_ELEMENT_SMAC,
+	MLXSW_AFK_ELEMENT_DMAC_32_47,
+	MLXSW_AFK_ELEMENT_DMAC_0_31,
+	MLXSW_AFK_ELEMENT_SMAC_32_47,
+	MLXSW_AFK_ELEMENT_SMAC_0_31,
 	MLXSW_AFK_ELEMENT_ETHERTYPE,
 	MLXSW_AFK_ELEMENT_IP_PROTO,
-	MLXSW_AFK_ELEMENT_SRC_IP4,
-	MLXSW_AFK_ELEMENT_DST_IP4,
-	MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-	MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-	MLXSW_AFK_ELEMENT_DST_IP6_HI,
-	MLXSW_AFK_ELEMENT_DST_IP6_LO,
+	MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+	MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+	MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+	MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+	MLXSW_AFK_ELEMENT_DST_IP_96_127,
+	MLXSW_AFK_ELEMENT_DST_IP_64_95,
+	MLXSW_AFK_ELEMENT_DST_IP_32_63,
+	MLXSW_AFK_ELEMENT_DST_IP_0_31,
 	MLXSW_AFK_ELEMENT_DST_L4_PORT,
 	MLXSW_AFK_ELEMENT_SRC_L4_PORT,
 	MLXSW_AFK_ELEMENT_VID,
@@ -99,9 +103,11 @@ struct mlxsw_afk_element_info {
  * define an internal storage geometry.
  */
 static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
-	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16),
-	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC, 0x04, 6),
-	MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6),
+	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4),
 	MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16),
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8),
 	MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
@@ -112,12 +118,14 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
-	MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x20, 0, 32),
-	MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x24, 0, 32),
-	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x20, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x28, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x30, 8),
-	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x38, 8),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
+	MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
 };
 
 #define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
@@ -208,9 +216,15 @@ mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small,
 
 struct mlxsw_afk;
 
+struct mlxsw_afk_ops {
+	const struct mlxsw_afk_block *blocks;
+	unsigned int blocks_count;
+	void (*encode_one)(const struct mlxsw_afk_element_inst *elinst,
+			   int block_index, char *storage, char *output);
+};
+
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-				   const struct mlxsw_afk_block *blocks,
-				   unsigned int blocks_count);
+				   const struct mlxsw_afk_ops *ops);
 void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk);
 
 struct mlxsw_afk_key_info;
@@ -243,7 +257,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
 			      enum mlxsw_afk_element element,
 			      const char *key_value, const char *mask_value,
 			      unsigned int len);
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
 		      char *key, char *mask);
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1877d9f..6f98a43 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h

@@ -2132,14 +2132,18 @@ MLXSW_ITEM32(reg, ptar, op, 0x00, 28, 4);
 
 /* reg_ptar_action_set_type
  * Type of action set to be used on this region.
- * For Spectrum, this is always type 2 - "flexible"
+ * For Spectrum and Spectrum-2, this is always type 2 - "flexible"
  * Access: WO
  */
 MLXSW_ITEM32(reg, ptar, action_set_type, 0x00, 16, 8);
 
+enum mlxsw_reg_ptar_key_type {
+	MLXSW_REG_PTAR_KEY_TYPE_FLEX = 0x50, /* Spetrum */
+	MLXSW_REG_PTAR_KEY_TYPE_FLEX2 = 0x51, /* Spectrum-2 */
+};
+
 /* reg_ptar_key_type
  * TCAM key type for the region.
- * For Spectrum, this is always type 0x50 - "FLEX_KEY"
  * Access: WO
  */
 MLXSW_ITEM32(reg, ptar, key_type, 0x00, 0, 8);
@@ -2182,13 +2186,14 @@ MLXSW_ITEM8_INDEXED(reg, ptar, flexible_key_id, 0x20, 0, 8,
 		    MLXSW_REG_PTAR_KEY_ID_LEN, 0x00, false);
 
 static inline void mlxsw_reg_ptar_pack(char *payload, enum mlxsw_reg_ptar_op op,
+				       enum mlxsw_reg_ptar_key_type key_type,
 				       u16 region_size, u16 region_id,
 				       const char *tcam_region_info)
 {
 	MLXSW_REG_ZERO(ptar, payload);
 	mlxsw_reg_ptar_op_set(payload, op);
 	mlxsw_reg_ptar_action_set_type_set(payload, 2); /* "flexible" */
-	mlxsw_reg_ptar_key_type_set(payload, 0x50); /* "FLEX_KEY" */
+	mlxsw_reg_ptar_key_type_set(payload, key_type);
 	mlxsw_reg_ptar_region_size_set(payload, region_size);
 	mlxsw_reg_ptar_region_id_set(payload, region_id);
 	mlxsw_reg_ptar_tcam_region_info_memcpy_to(payload, tcam_region_info);
@@ -2397,6 +2402,15 @@ MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3);
  */
 MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16);
 
+/* reg_ptce2_priority
+ * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1.
+ * Note: priority does not have to be unique per rule.
+ * Within a region, higher priority should have lower offset (no limitation
+ * between regions in a multi-region).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24);
+
 /* reg_ptce2_tcam_region_info
  * Opaque object that represents the TCAM region.
  * Access: Index
@@ -2432,12 +2446,13 @@ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
 static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
 					enum mlxsw_reg_ptce2_op op,
 					const char *tcam_region_info,
-					u16 offset)
+					u16 offset, u32 priority)
 {
 	MLXSW_REG_ZERO(ptce2, payload);
 	mlxsw_reg_ptce2_v_set(payload, valid);
 	mlxsw_reg_ptce2_op_set(payload, op);
 	mlxsw_reg_ptce2_offset_set(payload, offset);
+	mlxsw_reg_ptce2_priority_set(payload, priority);
 	mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info);
 }
 
@@ -4338,6 +4353,20 @@ MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
  */
 MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6);
 
+/* reg_ritr_if_vrrp_id_ipv6
+ * VRRP ID for IPv6
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv6, 0x1C, 8, 8);
+
+/* reg_ritr_if_vrrp_id_ipv4
+ * VRRP ID for IPv4
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8);
+
 /* VLAN Interface */
 
 /* reg_ritr_vlan_if_vid

diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index fd9299c..f672a7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h

@@ -42,6 +42,8 @@ enum mlxsw_res_id {
 	MLXSW_RES_ID_KVD_SIZE,
 	MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
 	MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+	MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE,
+	MLXSW_RES_ID_MAX_KVD_ACTION_SETS,
 	MLXSW_RES_ID_MAX_TRAP_GROUPS,
 	MLXSW_RES_ID_CQE_V0,
 	MLXSW_RES_ID_CQE_V1,
@@ -83,6 +85,8 @@ static u16 mlxsw_res_ids[] = {
 	[MLXSW_RES_ID_KVD_SIZE] = 0x1001,
 	[MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
 	[MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+	[MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005,
+	[MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007,
 	[MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
 	[MLXSW_RES_ID_CQE_V0] = 0x2210,
 	[MLXSW_RES_ID_CQE_V1] = 0x2211,

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5243736..6ec0f91 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

@@ -74,15 +74,22 @@
 #include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
-#define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1620
-#define MLXSW_FWREV_SUBMINOR 192
-#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
+#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
-#define MLXSW_SP_FW_FILENAME \
-	"mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \
-	"." __stringify(MLXSW_FWREV_MINOR) \
-	"." __stringify(MLXSW_FWREV_SUBMINOR) ".mfa2"
+#define MLXSW_SP1_FWREV_MAJOR 13
+#define MLXSW_SP1_FWREV_MINOR 1620
+#define MLXSW_SP1_FWREV_SUBMINOR 192
+
+static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
+	.major = MLXSW_SP1_FWREV_MAJOR,
+	.minor = MLXSW_SP1_FWREV_MINOR,
+	.subminor = MLXSW_SP1_FWREV_SUBMINOR,
+};
+
+#define MLXSW_SP1_FW_FILENAME \
+	"mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \
+	"." __stringify(MLXSW_SP1_FWREV_MINOR) \
+	"." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
 
 static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp_driver_version[] = "1.0";
@@ -338,29 +345,35 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 {
 	const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
+	const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
+	const char *fw_filename = mlxsw_sp->fw_filename;
 	const struct firmware *firmware;
 	int err;
 
+	/* Don't check if driver does not require it */
+	if (!req_rev || !fw_filename)
+		return 0;
+
 	/* Validate driver & FW are compatible */
-	if (rev->major != MLXSW_FWREV_MAJOR) {
+	if (rev->major != req_rev->major) {
 		WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
-		     rev->major, MLXSW_FWREV_MAJOR);
+		     rev->major, req_rev->major);
 		return -EINVAL;
 	}
-	if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) ==
-	    MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR))
+	if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
+	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor))
 		return 0;
 
 	dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
 		 rev->major, rev->minor, rev->subminor);
 	dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
-		 MLXSW_SP_FW_FILENAME);
+		 fw_filename);
 
-	err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME,
+	err = request_firmware_direct(&firmware, fw_filename,
 				      mlxsw_sp->bus_info->dev);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n",
-			MLXSW_SP_FW_FILENAME);
+			fw_filename);
 		return err;
 	}
 
@@ -3373,6 +3386,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_IP2ME, DISCARD),
@@ -3621,6 +3636,14 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
 
+	mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev;
+	mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME;
+	mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
+	mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
+	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
+	mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
+	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+
 	mlxsw_sp->core = mlxsw_core;
 	mlxsw_sp->bus_info = mlxsw_bus_info;
 
@@ -3878,7 +3901,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (err)
 		return err;
 
-	err = mlxsw_sp_kvdl_resources_register(mlxsw_core);
+	err = mlxsw_sp1_kvdl_resources_register(mlxsw_core);
 	if  (err)
 		return err;
 
@@ -4399,7 +4422,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		if (!is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(upper_dev) &&
 		    !netif_is_bridge_master(upper_dev) &&
-		    !netif_is_ovs_master(upper_dev)) {
+		    !netif_is_ovs_master(upper_dev) &&
+		    !netif_is_macvlan(upper_dev)) {
 			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
@@ -4425,6 +4449,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
 			return -EINVAL;
 		}
+		if (netif_is_macvlan(upper_dev) &&
+		    !mlxsw_sp_rif_find_by_dev(mlxsw_sp, lower_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+			return -EOPNOTSUPP;
+		}
 		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
 			NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
 			return -EINVAL;
@@ -4463,6 +4492,9 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 				err = mlxsw_sp_port_ovs_join(mlxsw_sp_port);
 			else
 				mlxsw_sp_port_ovs_leave(mlxsw_sp_port);
+		} else if (netif_is_macvlan(upper_dev)) {
+			if (!info->linking)
+				mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
 		}
 		break;
 	}
@@ -4547,8 +4579,9 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (!netif_is_bridge_master(upper_dev)) {
-			NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
+		if (!netif_is_bridge_master(upper_dev) &&
+		    !netif_is_macvlan(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4560,6 +4593,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
+		if (netif_is_macvlan(upper_dev) &&
+		    !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+			return -EOPNOTSUPP;
+		}
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -4573,6 +4611,9 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 				mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
 							   vlan_dev,
 							   upper_dev);
+		} else if (netif_is_macvlan(upper_dev)) {
+			if (!info->linking)
+				mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
 		} else {
 			err = -EINVAL;
 			WARN_ON(1);
@@ -4622,6 +4663,64 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
 	return 0;
 }
 
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+					   unsigned long event, void *ptr)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+
+	if (!mlxsw_sp)
+		return 0;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!is_vlan_dev(upper_dev) && !netif_is_macvlan(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+			return -EOPNOTSUPP;
+		}
+		if (!info->linking)
+			break;
+		if (netif_is_macvlan(upper_dev) &&
+		    !mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+			return -EOPNOTSUPP;
+		}
+		break;
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (info->linking)
+			break;
+		if (netif_is_macvlan(upper_dev))
+			mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+		break;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev,
+					    unsigned long event, void *ptr)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+
+	if (!mlxsw_sp || event != NETDEV_PRECHANGEUPPER)
+		return 0;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	/* VRF enslavement is handled in mlxsw_sp_netdevice_vrf_event() */
+	NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+
+	return -EOPNOTSUPP;
+}
+
 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
 {
 	struct netdev_notifier_changeupper_info *info = ptr;
@@ -4663,6 +4762,10 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 		err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
 	else if (is_vlan_dev(dev))
 		err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+	else if (netif_is_bridge_master(dev))
+		err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
+	else if (netif_is_macvlan(dev))
+		err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr);
 
 	return notifier_from_errno(err);
 }
@@ -4739,4 +4842,4 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
 MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table);
-MODULE_FIRMWARE(MLXSW_SP_FW_FILENAME);
+MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4a519d8..8aa717a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h

@@ -145,6 +145,9 @@ struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_kvdl_ops;
+struct mlxsw_sp_mr_tcam_ops;
+struct mlxsw_sp_acl_tcam_ops;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
@@ -168,6 +171,13 @@ struct mlxsw_sp {
 		struct mlxsw_sp_span_entry *entries;
 		int entries_count;
 	} span;
+	const struct mlxsw_fw_rev *req_rev;
+	const char *fw_filename;
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+	const struct mlxsw_afa_ops *afa_ops;
+	const struct mlxsw_afk_ops *afk_ops;
+	const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
+	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -407,6 +417,8 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+			      const struct net_device *macvlan_dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 			    unsigned long event, void *ptr);
 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
@@ -435,15 +447,59 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 
 /* spectrum_kvdl.c */
+enum mlxsw_sp_kvdl_entry_type {
+	MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+	MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+	MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+	MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+};
+
+static inline unsigned int
+mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type)
+{
+	switch (type) {
+	case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+	default:
+		return 1;
+	}
+}
+
+struct mlxsw_sp_kvdl_ops {
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		     enum mlxsw_sp_kvdl_entry_type type,
+		     unsigned int entry_count, u32 *p_entry_index);
+	void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		     enum mlxsw_sp_kvdl_entry_type type,
+		     unsigned int entry_count, int entry_index);
+	int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count,
+				unsigned int *p_alloc_count);
+	int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv);
+};
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-			u32 *p_entry_index);
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-				   unsigned int entry_count,
-				   unsigned int *p_alloc_size);
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, u32 *p_entry_index);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, int entry_index);
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+				    enum mlxsw_sp_kvdl_entry_type type,
+				    unsigned int entry_count,
+				    unsigned int *p_alloc_count);
+
+/* spectrum1_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops;
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
@@ -452,44 +508,14 @@ struct mlxsw_sp_acl_rule_info {
 	unsigned int counter_index;
 };
 
-enum mlxsw_sp_acl_profile {
-	MLXSW_SP_ACL_PROFILE_FLOWER,
-};
-
-struct mlxsw_sp_acl_profile_ops {
-	size_t ruleset_priv_size;
-	int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
-			   void *priv, void *ruleset_priv);
-	void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
-	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress);
-	void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-			       struct mlxsw_sp_port *mlxsw_sp_port,
-			       bool ingress);
-	u16 (*ruleset_group_id)(void *ruleset_priv);
-	size_t rule_priv_size;
-	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
-			void *ruleset_priv, void *rule_priv,
-			struct mlxsw_sp_acl_rule_info *rulei);
-	void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
-	int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
-				 bool *activity);
-};
-
-struct mlxsw_sp_acl_ops {
-	size_t priv_size;
-	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
-	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
-	const struct mlxsw_sp_acl_profile_ops *
-			(*profile_ops)(struct mlxsw_sp *mlxsw_sp,
-				       enum mlxsw_sp_acl_profile profile);
-};
-
 struct mlxsw_sp_acl_block;
 struct mlxsw_sp_acl_ruleset;
 
 /* spectrum_acl.c */
+enum mlxsw_sp_acl_profile {
+	MLXSW_SP_ACL_PROFILE_FLOWER,
+};
+
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
 unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
@@ -582,7 +608,44 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
 
 /* spectrum_acl_tcam.c */
-extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_acl_tcam;
+struct mlxsw_sp_acl_tcam_region;
+
+struct mlxsw_sp_acl_tcam_ops {
+	enum mlxsw_reg_ptar_key_type key_type;
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		    struct mlxsw_sp_acl_tcam *tcam);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	size_t region_priv_size;
+	int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+			   struct mlxsw_sp_acl_tcam_region *region);
+	void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
+	size_t chunk_priv_size;
+	void (*chunk_init)(void *region_priv, void *chunk_priv,
+			   unsigned int priority);
+	void (*chunk_fini)(void *chunk_priv);
+	size_t entry_priv_size;
+	int (*entry_add)(struct mlxsw_sp *mlxsw_sp,
+			 void *region_priv, void *chunk_priv,
+			 void *entry_priv,
+			 struct mlxsw_sp_acl_rule_info *rulei);
+	void (*entry_del)(struct mlxsw_sp *mlxsw_sp,
+			  void *region_priv, void *chunk_priv,
+			  void *entry_priv);
+	int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp,
+				  void *region_priv, void *entry_priv,
+				  bool *activity);
+};
+
+/* spectrum1_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops;
+
+/* spectrum_acl_flex_actions.c */
+extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops;
+
+/* spectrum_acl_flex_keys.c */
+extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
@@ -631,4 +694,37 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp);
 
+/* spectrum_mr.c */
+enum mlxsw_sp_mr_route_prio {
+	MLXSW_SP_MR_ROUTE_PRIO_SG,
+	MLXSW_SP_MR_ROUTE_PRIO_STARG,
+	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+	__MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key;
+
+struct mlxsw_sp_mr_tcam_ops {
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	void (*fini)(void *priv);
+	size_t route_priv_size;
+	int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			    void *route_priv,
+			    struct mlxsw_sp_mr_route_key *key,
+			    struct mlxsw_afa_block *afa_block,
+			    enum mlxsw_sp_mr_route_prio prio);
+	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			      void *route_priv,
+			      struct mlxsw_sp_mr_route_key *key);
+	int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			    struct mlxsw_sp_mr_route_key *key,
+			    struct mlxsw_afa_block *afa_block);
+};
+
+/* spectrum1_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
+
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
new file mode 100644
index 0000000..04f0c9c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c

@@ -0,0 +1,245 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp1_acl_tcam_region {
+	struct mlxsw_sp_acl_ctcam_region cregion;
+	struct mlxsw_sp_acl_tcam_region *region;
+	struct {
+		struct mlxsw_sp_acl_ctcam_chunk cchunk;
+		struct mlxsw_sp_acl_ctcam_entry centry;
+		struct mlxsw_sp_acl_rule_info *rulei;
+	} catchall;
+};
+
+struct mlxsw_sp1_acl_tcam_chunk {
+	struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp1_acl_tcam_entry {
+	struct mlxsw_sp_acl_ctcam_entry centry;
+};
+
+static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+				   struct mlxsw_sp_acl_tcam *tcam)
+{
+	return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+}
+
+static int
+mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp1_acl_tcam_region *region)
+{
+	struct mlxsw_sp_acl_rule_info *rulei;
+	int err;
+
+	mlxsw_sp_acl_ctcam_chunk_init(&region->cregion,
+				      &region->catchall.cchunk,
+				      MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
+	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+	if (IS_ERR(rulei)) {
+		err = PTR_ERR(rulei);
+		goto err_rulei_create;
+	}
+	err = mlxsw_sp_acl_rulei_act_continue(rulei);
+	if (WARN_ON(err))
+		goto err_rulei_act_continue;
+	err = mlxsw_sp_acl_rulei_commit(rulei);
+	if (err)
+		goto err_rulei_commit;
+	err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+					   &region->catchall.cchunk,
+					   &region->catchall.centry,
+					   rulei, false);
+	if (err)
+		goto err_entry_add;
+	region->catchall.rulei = rulei;
+	return 0;
+
+err_entry_add:
+err_rulei_commit:
+err_rulei_act_continue:
+	mlxsw_sp_acl_rulei_destroy(rulei);
+err_rulei_create:
+	mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+	return err;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp1_acl_tcam_region *region)
+{
+	struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
+
+	mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+				     &region->catchall.cchunk,
+				     &region->catchall.centry);
+	mlxsw_sp_acl_rulei_destroy(rulei);
+	mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+			       struct mlxsw_sp_acl_tcam_region *_region)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	int err;
+
+	err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &region->cregion,
+					     _region);
+	if (err)
+		return err;
+	err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region);
+	if (err)
+		goto err_catchall_add;
+	region->region = _region;
+	return 0;
+
+err_catchall_add:
+	mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+	return err;
+}
+
+static void
+mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+
+	mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region);
+	mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+					  unsigned int priority)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+	mlxsw_sp_acl_ctcam_chunk_init(&region->cregion, &chunk->cchunk,
+				      priority);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv)
+{
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+	mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk);
+}
+
+static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+					void *region_priv, void *chunk_priv,
+					void *entry_priv,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+	return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+					    &chunk->cchunk, &entry->centry,
+					    rulei, false);
+}
+
+static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+					 void *region_priv, void *chunk_priv,
+					 void *entry_priv)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+	mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+				     &chunk->cchunk, &entry->centry);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+					     struct mlxsw_sp_acl_tcam_region *_region,
+					     unsigned int offset,
+					     bool *activity)
+{
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+	int err;
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
+			     _region->tcam_region_info, offset, 0);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+	if (err)
+		return err;
+	*activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
+	return 0;
+}
+
+static int
+mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+				      void *region_priv, void *entry_priv,
+				      bool *activity)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+	unsigned int offset;
+
+	offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry);
+	return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp,
+							    region->region,
+							    offset, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = {
+	.key_type		= MLXSW_REG_PTAR_KEY_TYPE_FLEX,
+	.priv_size		= 0,
+	.init			= mlxsw_sp1_acl_tcam_init,
+	.fini			= mlxsw_sp1_acl_tcam_fini,
+	.region_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_region),
+	.region_init		= mlxsw_sp1_acl_tcam_region_init,
+	.region_fini		= mlxsw_sp1_acl_tcam_region_fini,
+	.chunk_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_chunk),
+	.chunk_init		= mlxsw_sp1_acl_tcam_chunk_init,
+	.chunk_fini		= mlxsw_sp1_acl_tcam_chunk_fini,
+	.entry_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_entry),
+	.entry_add		= mlxsw_sp1_acl_tcam_entry_add,
+	.entry_del		= mlxsw_sp1_acl_tcam_entry_del,
+	.entry_activity_get	= mlxsw_sp1_acl_tcam_entry_activity_get,
+};

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
new file mode 100644
index 0000000..0d45838
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c

@@ -0,0 +1,459 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP1_KVDL_SINGLE_BASE 0
+#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP1_KVDL_SINGLE_END \
+	(MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1)
+
+#define MLXSW_SP1_KVDL_CHUNKS_BASE \
+	(MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE)
+#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152
+#define MLXSW_SP1_KVDL_CHUNKS_END \
+	(MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \
+	(MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \
+	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \
+	(MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1
+#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
+
+struct mlxsw_sp1_kvdl_part_info {
+	unsigned int part_index;
+	unsigned int start_index;
+	unsigned int end_index;
+	unsigned int alloc_size;
+	enum mlxsw_sp_resource_id resource_id;
+};
+
+enum mlxsw_sp1_kvdl_part_id {
+	MLXSW_SP1_KVDL_PART_ID_SINGLE,
+	MLXSW_SP1_KVDL_PART_ID_CHUNKS,
+	MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS,
+};
+
+#define MLXSW_SP1_KVDL_PART_INFO(id)				\
+[MLXSW_SP1_KVDL_PART_ID_##id] = {				\
+	.start_index = MLXSW_SP1_KVDL_##id##_BASE,		\
+	.end_index = MLXSW_SP1_KVDL_##id##_END,			\
+	.alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE,		\
+	.resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,	\
+}
+
+static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = {
+	MLXSW_SP1_KVDL_PART_INFO(SINGLE),
+	MLXSW_SP1_KVDL_PART_INFO(CHUNKS),
+	MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS),
+};
+
+#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info)
+
+struct mlxsw_sp1_kvdl_part {
+	struct mlxsw_sp1_kvdl_part_info info;
+	unsigned long usage[0];	/* Entries */
+};
+
+struct mlxsw_sp1_kvdl {
+	struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN];
+};
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl,
+			       unsigned int alloc_size)
+{
+	struct mlxsw_sp1_kvdl_part *part, *min_part = NULL;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (alloc_size <= part->info.alloc_size &&
+		    (!min_part ||
+		     part->info.alloc_size <= min_part->info.alloc_size))
+			min_part = part;
+	}
+
+	return min_part ?: ERR_PTR(-ENOBUFS);
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index)
+{
+	struct mlxsw_sp1_kvdl_part *part;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (kvdl_index >= part->info.start_index &&
+		    kvdl_index <= part->info.end_index)
+			return part;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static u32
+mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info,
+			     unsigned int entry_index)
+{
+	return info->start_index + entry_index * info->alloc_size;
+}
+
+static unsigned int
+mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info,
+			      u32 kvdl_index)
+{
+	return (kvdl_index - info->start_index) / info->alloc_size;
+}
+
+static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part,
+				     u32 *p_kvdl_index)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int entry_index, nr_entries;
+
+	nr_entries = (info->end_index - info->start_index + 1) /
+		     info->alloc_size;
+	entry_index = find_first_zero_bit(part->usage, nr_entries);
+	if (entry_index == nr_entries)
+		return -ENOBUFS;
+	__set_bit(entry_index, part->usage);
+
+	*p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index);
+
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part,
+				     u32 kvdl_index)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int entry_index;
+
+	entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index);
+	__clear_bit(entry_index, part->usage);
+}
+
+static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count,
+				u32 *p_entry_index)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	/* Find partition with smallest allocation size satisfying the
+	 * requested size.
+	 */
+	part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index);
+}
+
+static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count, int entry_index)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index);
+	if (IS_ERR(part))
+		return;
+	mlxsw_sp1_kvdl_part_free(part, entry_index);
+}
+
+static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+					   void *priv,
+					   enum mlxsw_sp_kvdl_entry_type type,
+					   unsigned int entry_count,
+					   unsigned int *p_alloc_size)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	*p_alloc_size = part->info.alloc_size;
+
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part,
+				       struct mlxsw_sp1_kvdl_part *part_prev,
+				       unsigned int size)
+{
+	if (!part_prev) {
+		part->info.end_index = size - 1;
+	} else {
+		part->info.start_index = part_prev->info.end_index + 1;
+		part->info.end_index = part->info.start_index + size - 1;
+	}
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+			 const struct mlxsw_sp1_kvdl_part_info *info,
+			 struct mlxsw_sp1_kvdl_part *part_prev)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl_part *part;
+	bool need_update = true;
+	unsigned int nr_entries;
+	size_t usage_size;
+	u64 resource_size;
+	int err;
+
+	err = devlink_resource_size_get(devlink, info->resource_id,
+					&resource_size);
+	if (err) {
+		need_update = false;
+		resource_size = info->end_index - info->start_index + 1;
+	}
+
+	nr_entries = div_u64(resource_size, info->alloc_size);
+	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
+	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+	if (!part)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&part->info, info, sizeof(part->info));
+
+	if (need_update)
+		mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size);
+	return part;
+}
+
+static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part)
+{
+	kfree(part);
+}
+
+static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp1_kvdl *kvdl)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info;
+	struct mlxsw_sp1_kvdl_part *part_prev = NULL;
+	int err, i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		info = &mlxsw_sp1_kvdl_parts_info[i];
+		kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info,
+							  part_prev);
+		if (IS_ERR(kvdl->parts[i])) {
+			err = PTR_ERR(kvdl->parts[i]);
+			goto err_kvdl_part_init;
+		}
+		part_prev = kvdl->parts[i];
+	}
+	return 0;
+
+err_kvdl_part_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+	return err;
+}
+
+static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+		mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int nr_entries;
+	int bit = -1;
+	u64 occ = 0;
+
+	nr_entries = (info->end_index -
+		      info->start_index + 1) /
+		      info->alloc_size;
+	while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
+		< nr_entries)
+		occ += info->alloc_size;
+	return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	u64 occ = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+		occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]);
+
+	return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	int err;
+
+	err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl);
+	if (err)
+		return err;
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR,
+					  mlxsw_sp1_kvdl_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					  mlxsw_sp1_kvdl_single_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					  mlxsw_sp1_kvdl_chunks_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					  mlxsw_sp1_kvdl_large_chunks_occ_get,
+					  kvdl);
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR);
+	mlxsw_sp1_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = {
+	.priv_size = sizeof(struct mlxsw_sp1_kvdl),
+	.init = mlxsw_sp1_kvdl_init,
+	.fini = mlxsw_sp1_kvdl_fini,
+	.alloc = mlxsw_sp1_kvdl_alloc,
+	.free = mlxsw_sp1_kvdl_free,
+	.alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query,
+};
+
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	static struct devlink_resource_size_params size_params;
+	u32 kvdl_max_size;
+	int err;
+
+	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+					MLXSW_SP1_KVDL_SINGLE_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	if (err)
+		return err;
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+					MLXSW_SP1_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	if (err)
+		return err;
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	return err;
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
new file mode 100644
index 0000000..fc649fe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c

@@ -0,0 +1,374 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp1_mr_tcam_region {
+	struct mlxsw_sp *mlxsw_sp;
+	enum mlxsw_reg_rtar_key_type rtar_key_type;
+	struct parman *parman;
+	struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp1_mr_tcam {
+	struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+};
+
+struct mlxsw_sp1_mr_tcam_route {
+	struct parman_item parman_item;
+	struct parman_prio *parman_prio;
+};
+
+static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+					   struct parman_item *parman_item,
+					   struct mlxsw_sp_mr_route_key *key,
+					   struct mlxsw_afa_block *afa_block)
+{
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  ntohl(key->group.addr4),
+					  ntohl(key->group_mask.addr4),
+					  ntohl(key->source.addr4),
+					  ntohl(key->source_mask.addr4),
+					  mlxsw_afa_block_first_set(afa_block));
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  key->group.addr6,
+					  key->group_mask.addr6,
+					  key->source.addr6,
+					  key->source_mask.addr6,
+					  mlxsw_afa_block_first_set(afa_block));
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp,
+					  struct parman_item *parman_item,
+					  struct mlxsw_sp_mr_route_key *key)
+{
+	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+					  key->vrid, 0, 0, 0, 0, 0, 0, NULL);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+					  key->vrid, 0, 0, zero_addr, zero_addr,
+					  zero_addr, zero_addr, NULL);
+		break;
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static struct mlxsw_sp1_mr_tcam_region *
+mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam,
+				  enum mlxsw_sp_l3proto proto)
+{
+	return &mr_tcam->tcam_regions[proto];
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam,
+					struct mlxsw_sp1_mr_tcam_route *route,
+					struct mlxsw_sp_mr_route_key *key,
+					enum mlxsw_sp_mr_route_prio prio)
+{
+	struct mlxsw_sp1_mr_tcam_region *tcam_region;
+	int err;
+
+	tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+	err = parman_item_add(tcam_region->parman,
+			      &tcam_region->parman_prios[prio],
+			      &route->parman_item);
+	if (err)
+		return err;
+
+	route->parman_prio = &tcam_region->parman_prios[prio];
+	return 0;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam,
+					   struct mlxsw_sp1_mr_tcam_route *route,
+					   struct mlxsw_sp_mr_route_key *key)
+{
+	struct mlxsw_sp1_mr_tcam_region *tcam_region;
+
+	tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+	parman_item_remove(tcam_region->parman,
+			   route->parman_prio, &route->parman_item);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+			       void *route_priv,
+			       struct mlxsw_sp_mr_route_key *key,
+			       struct mlxsw_afa_block *afa_block,
+			       enum mlxsw_sp_mr_route_prio prio)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	int err;
+
+	err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route,
+						      key, prio);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					      key, afa_block);
+	if (err)
+		goto err_route_replace;
+	return 0;
+
+err_route_replace:
+	mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+	return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+				void *route_priv,
+				struct mlxsw_sp_mr_route_key *key)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+
+	mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key);
+	mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+			       void *route_priv,
+			       struct mlxsw_sp_mr_route_key *key,
+			       struct mlxsw_afa_block *afa_block)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+
+	return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					       key, afa_block);
+}
+
+#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+			    mr_tcam_region->rtar_key_type,
+			    MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+			    mr_tcam_region->rtar_key_type, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv,
+						  unsigned long new_count)
+{
+	struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+	u64 max_tcam_rules;
+
+	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+	if (new_count > max_tcam_rules)
+		return -EINVAL;
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+			    mr_tcam_region->rtar_key_type, new_count);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv,
+						 unsigned long from_index,
+						 unsigned long to_index,
+						 unsigned long count)
+{
+	struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+	mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+			    from_index, count,
+			    mr_tcam_region->rtar_key_type, to_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = {
+	.base_count	= MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT,
+	.resize_step	= MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP,
+	.resize		= mlxsw_sp1_mr_tcam_region_parman_resize,
+	.move		= mlxsw_sp1_mr_tcam_region_parman_move,
+	.algo		= PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp1_mr_tcam_region *mr_tcam_region,
+			      enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+	struct parman_prio *parman_prios;
+	struct parman *parman;
+	int err;
+	int i;
+
+	mr_tcam_region->rtar_key_type = rtar_key_type;
+	mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+	err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region);
+	if (err)
+		return err;
+
+	parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops,
+			       mr_tcam_region);
+	if (!parman) {
+		err = -ENOMEM;
+		goto err_parman_create;
+	}
+	mr_tcam_region->parman = parman;
+
+	parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+				     sizeof(*parman_prios), GFP_KERNEL);
+	if (!parman_prios) {
+		err = -ENOMEM;
+		goto err_parman_prios_alloc;
+	}
+	mr_tcam_region->parman_prios = parman_prios;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_init(mr_tcam_region->parman,
+				 &mr_tcam_region->parman_prios[i], i);
+	return 0;
+
+err_parman_prios_alloc:
+	parman_destroy(parman);
+err_parman_create:
+	mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+	return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+	kfree(mr_tcam_region->parman_prios);
+	parman_destroy(mr_tcam_region->parman);
+	mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+	u32 rtar_key;
+	int err;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+		return -EIO;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+	err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+					    &region[MLXSW_SP_L3_PROTO_IPV4],
+					    rtar_key);
+	if (err)
+		return err;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+	err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+					    &region[MLXSW_SP_L3_PROTO_IPV6],
+					    rtar_key);
+	if (err)
+		goto err_ipv6_region_init;
+
+	return 0;
+
+err_ipv6_region_init:
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	return err;
+}
+
+static void mlxsw_sp1_mr_tcam_fini(void *priv)
+{
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = {
+	.priv_size = sizeof(struct mlxsw_sp1_mr_tcam),
+	.init = mlxsw_sp1_mr_tcam_init,
+	.fini = mlxsw_sp1_mr_tcam_fini,
+	.route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route),
+	.route_create = mlxsw_sp1_mr_tcam_route_create,
+	.route_destroy = mlxsw_sp1_mr_tcam_route_destroy,
+	.route_update = mlxsw_sp1_mr_tcam_route_update,
+};

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 79b1fa2..217621d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c

@@ -48,13 +48,12 @@
 #include "spectrum.h"
 #include "core_acl_flex_keys.h"
 #include "core_acl_flex_actions.h"
-#include "spectrum_acl_flex_keys.h"
+#include "spectrum_acl_tcam.h"
 
 struct mlxsw_sp_acl {
 	struct mlxsw_sp *mlxsw_sp;
 	struct mlxsw_afk *afk;
 	struct mlxsw_sp_fid *dummy_fid;
-	const struct mlxsw_sp_acl_ops *ops;
 	struct rhashtable ruleset_ht;
 	struct list_head rules;
 	struct {
@@ -62,8 +61,7 @@ struct mlxsw_sp_acl {
 		unsigned long interval;	/* ms */
 #define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000
 	} rule_activity_update;
-	unsigned long priv[0];
-	/* priv has to be always the last item */
+	struct mlxsw_sp_acl_tcam tcam;
 };
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
@@ -339,7 +337,7 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rhashtable_init;
 
-	err = ops->ruleset_add(mlxsw_sp, acl->priv, ruleset->priv);
+	err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv);
 	if (err)
 		goto err_ops_ruleset_add;
 
@@ -409,7 +407,7 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 	struct mlxsw_sp_acl_ruleset *ruleset;
 
-	ops = acl->ops->profile_ops(mlxsw_sp, profile);
+	ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
 	if (!ops)
 		return ERR_PTR(-EINVAL);
 	ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops);
@@ -427,7 +425,7 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 	struct mlxsw_sp_acl_ruleset *ruleset;
 
-	ops = acl->ops->profile_ops(mlxsw_sp, profile);
+	ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
 	if (!ops)
 		return ERR_PTR(-EINVAL);
 
@@ -487,7 +485,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei)
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
 				 unsigned int priority)
 {
-	rulei->priority = priority;
+	rulei->priority = priority >> 16;
 }
 
 void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
@@ -634,7 +632,8 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_sp_acl_ruleset_ref_inc(ruleset);
-	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+		       GFP_KERNEL);
 	if (!rule) {
 		err = -ENOMEM;
 		goto err_alloc;
@@ -825,20 +824,20 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 {
-	const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops;
 	struct mlxsw_sp_fid *fid;
 	struct mlxsw_sp_acl *acl;
+	size_t alloc_size;
 	int err;
 
-	acl = kzalloc(sizeof(*acl) + acl_ops->priv_size, GFP_KERNEL);
+	alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp);
+	acl = kzalloc(alloc_size, GFP_KERNEL);
 	if (!acl)
 		return -ENOMEM;
 	mlxsw_sp->acl = acl;
 	acl->mlxsw_sp = mlxsw_sp;
 	acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
 						       ACL_FLEX_KEYS),
-				    mlxsw_sp_afk_blocks,
-				    MLXSW_SP_AFK_BLOCKS_COUNT);
+				    mlxsw_sp->afk_ops);
 	if (!acl->afk) {
 		err = -ENOMEM;
 		goto err_afk_create;
@@ -857,12 +856,10 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 	acl->dummy_fid = fid;
 
 	INIT_LIST_HEAD(&acl->rules);
-	err = acl_ops->init(mlxsw_sp, acl->priv);
+	err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam);
 	if (err)
 		goto err_acl_ops_init;
 
-	acl->ops = acl_ops;
-
 	/* Create the delayed work for the rule activity_update */
 	INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
 			  mlxsw_sp_acl_rul_activity_update_work);
@@ -884,10 +881,9 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
-	const struct mlxsw_sp_acl_ops *acl_ops = acl->ops;
 
 	cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
-	acl_ops->fini(mlxsw_sp, acl->priv);
+	mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam);
 	WARN_ON(!list_empty(&acl->rules));
 	mlxsw_sp_fid_put(acl->dummy_fid);
 	rhashtable_destroy(&acl->ruleset_ht);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
new file mode 100644
index 0000000..ef0d4c0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c

@@ -0,0 +1,215 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+static int
+mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_region *region,
+				 u16 new_size)
+{
+	char ptar_pl[MLXSW_REG_PTAR_LEN];
+
+	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
+			    region->key_type, new_size, region->id,
+			    region->tcam_region_info);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_acl_tcam_region *region,
+			       u16 src_offset, u16 dst_offset, u16 size)
+{
+	char prcr_pl[MLXSW_REG_PRCR_LEN];
+
+	mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
+			    region->tcam_region_info, src_offset,
+			    region->tcam_region_info, dst_offset, size);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
+}
+
+static int
+mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_region *region,
+				       unsigned int offset,
+				       struct mlxsw_sp_acl_rule_info *rulei,
+				       bool fillup_priority)
+{
+	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+	char *act_set;
+	u32 priority;
+	char *mask;
+	char *key;
+	int err;
+
+	err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority,
+					     fillup_priority);
+	if (err)
+		return err;
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+			     region->tcam_region_info, offset, priority);
+	key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
+	mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
+	mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask);
+
+	/* Only the first action set belongs here, the rest is in KVD */
+	act_set = mlxsw_afa_block_first_set(rulei->act_block);
+	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_region *region,
+				       unsigned int offset)
+{
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+			     region->tcam_region_info, offset, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv,
+						   unsigned long new_count)
+{
+	struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+	struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+	u64 max_tcam_rules;
+
+	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+	if (new_count > max_tcam_rules)
+		return -EINVAL;
+	return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count);
+}
+
+static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv,
+						  unsigned long from_index,
+						  unsigned long to_index,
+						  unsigned long count)
+{
+	struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+	struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+
+	mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region,
+				       from_index, to_index, count);
+}
+
+static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = {
+	.base_count	= MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
+	.resize_step	= MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
+	.resize		= mlxsw_sp_acl_ctcam_region_parman_resize,
+	.move		= mlxsw_sp_acl_ctcam_region_parman_move,
+	.algo		= PARMAN_ALGO_TYPE_LSORT,
+};
+
+int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_tcam_region *region)
+{
+	cregion->region = region;
+	cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops,
+					cregion);
+	if (!cregion->parman)
+		return -ENOMEM;
+	return 0;
+}
+
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+	parman_destroy(cregion->parman);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				   unsigned int priority)
+{
+	parman_prio_init(cregion->parman, &cchunk->parman_prio, priority);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk)
+{
+	parman_prio_fini(&cchunk->parman_prio);
+}
+
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_ctcam_region *cregion,
+				 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				 struct mlxsw_sp_acl_ctcam_entry *centry,
+				 struct mlxsw_sp_acl_rule_info *rulei,
+				 bool fillup_priority)
+{
+	int err;
+
+	err = parman_item_add(cregion->parman, &cchunk->parman_prio,
+			      &centry->parman_item);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion->region,
+						     centry->parman_item.index,
+						     rulei, fillup_priority);
+	if (err)
+		goto err_rule_insert;
+	return 0;
+
+err_rule_insert:
+	parman_item_remove(cregion->parman, &cchunk->parman_prio,
+			   &centry->parman_item);
+	return err;
+}
+
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_ctcam_region *cregion,
+				  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				  struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+	mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion->region,
+					       centry->parman_item.index);
+	parman_item_remove(cregion->parman, &cchunk->parman_prio,
+			   &centry->parman_item);
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 510ce48..6a7c340 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c

@@ -37,8 +37,6 @@
 #include "core_acl_flex_actions.h"
 #include "spectrum_span.h"
 
-#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
-
 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 				     char *enc_actions, bool is_first)
 {
@@ -53,8 +51,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 	if (is_first)
 		return 0;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE,
-				  &kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+				  1, &kvdl_index);
 	if (err)
 		return err;
 	mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
@@ -65,7 +63,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 	return 0;
 
 err_pefa_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+			   1, kvdl_index);
 	return err;
 }
 
@@ -76,7 +75,8 @@ static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
 
 	if (is_first)
 		return;
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+			   1, kvdl_index);
 }
 
 static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
@@ -87,7 +87,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
 	u32 kvdl_index;
 	int err;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+				  1, &kvdl_index);
 	if (err)
 		return err;
 	mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
@@ -98,7 +99,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
 	return 0;
 
 err_ppbs_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+			   1, kvdl_index);
 	return err;
 }
 
@@ -106,7 +108,8 @@ static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+			   1, kvdl_index);
 }
 
 static int
@@ -154,7 +157,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 	mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
-static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
+const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
 	.kvdl_set_add		= mlxsw_sp_act_kvdl_set_add,
 	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
 	.kvdl_fwd_entry_add	= mlxsw_sp_act_kvdl_fwd_entry_add,
@@ -169,7 +172,7 @@ int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp)
 {
 	mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
 							    ACL_ACTIONS_PER_SET),
-					 &mlxsw_sp_act_afa_ops, mlxsw_sp);
+					 mlxsw_sp->afa_ops, mlxsw_sp);
 	return PTR_ERR_OR_ZERO(mlxsw_sp->afa);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
new file mode 100644
index 0000000..80f22b7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c

@@ -0,0 +1,174 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "spectrum.h"
+#include "item.h"
+#include "core_acl_flex_keys.h"
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x00, 2),
+	MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x00, 2),
+	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x02, 2),
+	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
+	MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
+	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
+	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
+	MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x00, 4),
+	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
+	MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
+};
+
+static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = {
+	MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac),
+	MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac),
+	MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex),
+	MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip),
+	MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip),
+	MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4),
+	MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex),
+	MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip),
+	MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1),
+	MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip),
+	MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex),
+	MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
+};
+
+static void mlxsw_sp1_afk_encode_u32(const struct mlxsw_item *storage_item,
+				     const struct mlxsw_item *output_item,
+				     char *storage, char *output_indexed)
+{
+	u32 value;
+
+	value = __mlxsw_item_get32(storage, storage_item, 0);
+	__mlxsw_item_set32(output_indexed, output_item, 0, value);
+}
+
+static void mlxsw_sp1_afk_encode_buf(const struct mlxsw_item *storage_item,
+				     const struct mlxsw_item *output_item,
+				     char *storage, char *output_indexed)
+{
+	char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
+	char *output_data = __mlxsw_item_data(output_indexed, output_item, 0);
+	size_t len = output_item->size.bytes;
+
+	memcpy(output_data, storage_data, len);
+}
+
+#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16
+
+static void
+mlxsw_sp1_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
+			 int block_index, char *storage, char *output)
+{
+	unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
+	char *output_indexed = output + offset;
+	const struct mlxsw_item *storage_item = &elinst->info->item;
+	const struct mlxsw_item *output_item = &elinst->item;
+
+	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
+		mlxsw_sp1_afk_encode_u32(storage_item, output_item,
+					 storage, output_indexed);
+	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
+		mlxsw_sp1_afk_encode_buf(storage_item, output_item,
+					 storage, output_indexed);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
+	.blocks		= mlxsw_sp1_afk_blocks,
+	.blocks_count	= ARRAY_SIZE(mlxsw_sp1_afk_blocks),
+	.encode_one	= mlxsw_sp1_afk_encode_one,
+};

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
deleted file mode 100644
index fb80318..0000000
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
+++ /dev/null

@@ -1,124 +0,0 @@
-/*
- * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-
-#include "core_acl_flex_keys.h"
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6),
-	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
-	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6),
-	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
-	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x02, 6),
-	MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
-	MLXSW_AFK_ELEMENT_INST_U32(DST_IP4, 0x00, 0, 32),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
-	MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
-	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
-	MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_LO, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_HI, 0x00, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_LO, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
-	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_HI, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
-	MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
-};
-
-static const struct mlxsw_afk_block mlxsw_sp_afk_blocks[] = {
-	MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac),
-	MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac),
-	MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex),
-	MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip),
-	MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip),
-	MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4),
-	MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex),
-	MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip),
-	MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1),
-	MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip),
-	MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex),
-	MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
-};
-
-#define MLXSW_SP_AFK_BLOCKS_COUNT ARRAY_SIZE(mlxsw_sp_afk_blocks)
-
-#endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index ad1b548..53fe51a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c

@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,25 +39,25 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "reg.h"
 #include "core.h"
 #include "resources.h"
 #include "spectrum.h"
+#include "spectrum_acl_tcam.h"
 #include "core_acl_flex_keys.h"
 
-struct mlxsw_sp_acl_tcam {
-	unsigned long *used_regions; /* bit array */
-	unsigned int max_regions;
-	unsigned long *used_groups;  /* bit array */
-	unsigned int max_groups;
-	unsigned int max_group_size;
-};
-
-static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_sp_acl_tcam *tcam = priv;
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	return ops->priv_size;
+}
+
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	u64 max_tcam_regions;
 	u64 max_regions;
 	u64 max_groups;
@@ -88,21 +88,53 @@ static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 	tcam->max_groups = max_groups;
 	tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
 						 ACL_MAX_GROUP_SIZE);
+
+	err = ops->init(mlxsw_sp, tcam->priv, tcam);
+	if (err)
+		goto err_tcam_init;
+
 	return 0;
 
+err_tcam_init:
+	kfree(tcam->used_groups);
 err_alloc_used_groups:
 	kfree(tcam->used_regions);
 	return err;
 }
 
-static void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_acl_tcam *tcam)
 {
-	struct mlxsw_sp_acl_tcam *tcam = priv;
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 
+	ops->fini(mlxsw_sp, tcam->priv);
 	kfree(tcam->used_groups);
 	kfree(tcam->used_regions);
 }
 
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_rule_info *rulei,
+				   u32 *priority, bool fillup_priority)
+{
+	u64 max_priority;
+
+	if (!fillup_priority) {
+		*priority = 0;
+		return 0;
+	}
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE))
+		return -EIO;
+
+	max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE);
+	if (rulei->priority > max_priority)
+		return -EINVAL;
+
+	/* Unlike in TC, in HW, higher number means higher priority. */
+	*priority = max_priority - rulei->priority;
+	return 0;
+}
+
 static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
 					   u16 *p_id)
 {
@@ -159,35 +191,21 @@ struct mlxsw_sp_acl_tcam_group {
 	unsigned int patterns_count;
 };
 
-struct mlxsw_sp_acl_tcam_region {
-	struct list_head list; /* Member of a TCAM group */
-	struct list_head chunk_list; /* List of chunks under this region */
-	struct parman *parman;
-	struct mlxsw_sp *mlxsw_sp;
-	struct mlxsw_sp_acl_tcam_group *group;
-	u16 id; /* ACL ID and region ID - they are same */
-	char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
-	struct mlxsw_afk_key_info *key_info;
-	struct {
-		struct parman_prio parman_prio;
-		struct parman_item parman_item;
-		struct mlxsw_sp_acl_rule_info *rulei;
-	} catchall;
-};
-
 struct mlxsw_sp_acl_tcam_chunk {
 	struct list_head list; /* Member of a TCAM region */
 	struct rhash_head ht_node; /* Member of a chunk HT */
 	unsigned int priority; /* Priority within the region and group */
-	struct parman_prio parman_prio;
 	struct mlxsw_sp_acl_tcam_group *group;
 	struct mlxsw_sp_acl_tcam_region *region;
 	unsigned int ref_count;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
 struct mlxsw_sp_acl_tcam_entry {
-	struct parman_item parman_item;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
 static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
@@ -441,9 +459,6 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
 	memcpy(out, elusage, sizeof(*out));
 }
 
-#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
-
 static int
 mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_acl_tcam_region *region)
@@ -455,6 +470,7 @@ mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_ALLOC,
+			    region->key_type,
 			    MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
 			    region->id, region->tcam_region_info);
 	encodings_count = mlxsw_afk_key_info_blocks_count_get(key_info);
@@ -477,24 +493,13 @@ mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp,
 {
 	char ptar_pl[MLXSW_REG_PTAR_LEN];
 
-	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE, 0, region->id,
+	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE,
+			    region->key_type, 0, region->id,
 			    region->tcam_region_info);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
 }
 
 static int
-mlxsw_sp_acl_tcam_region_resize(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_acl_tcam_region *region,
-				u16 new_size)
-{
-	char ptar_pl[MLXSW_REG_PTAR_LEN];
-
-	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
-			    new_size, region->id, region->tcam_region_info);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
-}
-
-static int
 mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam_region *region)
 {
@@ -516,193 +521,22 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl);
 }
 
-static int
-mlxsw_sp_acl_tcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region,
-				      unsigned int offset,
-				      struct mlxsw_sp_acl_rule_info *rulei)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-	char *act_set;
-	char *mask;
-	char *key;
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-			     region->tcam_region_info, offset);
-	key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
-	mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
-	mlxsw_afk_encode(region->key_info, &rulei->values, key, mask);
-
-	/* Only the first action set belongs here, the rest is in KVD */
-	act_set = mlxsw_afa_block_first_set(rulei->act_block);
-	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region,
-				      unsigned int offset)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-			     region->tcam_region_info, offset);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static int
-mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
-					    struct mlxsw_sp_acl_tcam_region *region,
-					    unsigned int offset,
-					    bool *activity)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-	int err;
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
-			     region->tcam_region_info, offset);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-	if (err)
-		return err;
-	*activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
-	return 0;
-}
-
-#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
-
-static int
-mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region)
-{
-	struct parman_prio *parman_prio = &region->catchall.parman_prio;
-	struct parman_item *parman_item = &region->catchall.parman_item;
-	struct mlxsw_sp_acl_rule_info *rulei;
-	int err;
-
-	parman_prio_init(region->parman, parman_prio,
-			 MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
-	err = parman_item_add(region->parman, parman_prio, parman_item);
-	if (err)
-		goto err_parman_item_add;
-
-	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
-	if (IS_ERR(rulei)) {
-		err = PTR_ERR(rulei);
-		goto err_rulei_create;
-	}
-
-	err = mlxsw_sp_acl_rulei_act_continue(rulei);
-	if (WARN_ON(err))
-		goto err_rulei_act_continue;
-
-	err = mlxsw_sp_acl_rulei_commit(rulei);
-	if (err)
-		goto err_rulei_commit;
-
-	err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-						    parman_item->index, rulei);
-	region->catchall.rulei = rulei;
-	if (err)
-		goto err_rule_insert;
-
-	return 0;
-
-err_rule_insert:
-err_rulei_commit:
-err_rulei_act_continue:
-	mlxsw_sp_acl_rulei_destroy(rulei);
-err_rulei_create:
-	parman_item_remove(region->parman, parman_prio, parman_item);
-err_parman_item_add:
-	parman_prio_fini(parman_prio);
-	return err;
-}
-
-static void
-mlxsw_sp_acl_tcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region)
-{
-	struct parman_prio *parman_prio = &region->catchall.parman_prio;
-	struct parman_item *parman_item = &region->catchall.parman_item;
-	struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
-
-	mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-					      parman_item->index);
-	mlxsw_sp_acl_rulei_destroy(rulei);
-	parman_item_remove(region->parman, parman_prio, parman_item);
-	parman_prio_fini(parman_prio);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_move(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_acl_tcam_region *region,
-			      u16 src_offset, u16 dst_offset, u16 size)
-{
-	char prcr_pl[MLXSW_REG_PRCR_LEN];
-
-	mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
-			    region->tcam_region_info, src_offset,
-			    region->tcam_region_info, dst_offset, size);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
-}
-
-static int mlxsw_sp_acl_tcam_region_parman_resize(void *priv,
-						  unsigned long new_count)
-{
-	struct mlxsw_sp_acl_tcam_region *region = priv;
-	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-	u64 max_tcam_rules;
-
-	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-	if (new_count > max_tcam_rules)
-		return -EINVAL;
-	return mlxsw_sp_acl_tcam_region_resize(mlxsw_sp, region, new_count);
-}
-
-static void mlxsw_sp_acl_tcam_region_parman_move(void *priv,
-						 unsigned long from_index,
-						 unsigned long to_index,
-						 unsigned long count)
-{
-	struct mlxsw_sp_acl_tcam_region *region = priv;
-	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-
-	mlxsw_sp_acl_tcam_region_move(mlxsw_sp, region,
-				      from_index, to_index, count);
-}
-
-static const struct parman_ops mlxsw_sp_acl_tcam_region_parman_ops = {
-	.base_count	= MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
-	.resize_step	= MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
-	.resize		= mlxsw_sp_acl_tcam_region_parman_resize,
-	.move		= mlxsw_sp_acl_tcam_region_parman_move,
-	.algo		= PARMAN_ALGO_TYPE_LSORT,
-};
-
 static struct mlxsw_sp_acl_tcam_region *
 mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam *tcam,
 				struct mlxsw_afk_element_usage *elusage)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
 	struct mlxsw_sp_acl_tcam_region *region;
 	int err;
 
-	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
 	if (!region)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&region->chunk_list);
 	region->mlxsw_sp = mlxsw_sp;
 
-	region->parman = parman_create(&mlxsw_sp_acl_tcam_region_parman_ops,
-				       region);
-	if (!region->parman) {
-		err = -ENOMEM;
-		goto err_parman_create;
-	}
-
 	region->key_info = mlxsw_afk_key_info_get(afk, elusage);
 	if (IS_ERR(region->key_info)) {
 		err = PTR_ERR(region->key_info);
@@ -713,6 +547,7 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_region_id_get;
 
+	region->key_type = ops->key_type;
 	err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region);
 	if (err)
 		goto err_tcam_region_alloc;
@@ -721,13 +556,13 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_tcam_region_enable;
 
-	err = mlxsw_sp_acl_tcam_region_catchall_add(mlxsw_sp, region);
+	err = ops->region_init(mlxsw_sp, region->priv, region);
 	if (err)
-		goto err_tcam_region_catchall_add;
+		goto err_tcam_region_init;
 
 	return region;
 
-err_tcam_region_catchall_add:
+err_tcam_region_init:
 	mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 err_tcam_region_enable:
 	mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
@@ -736,8 +571,6 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 err_region_id_get:
 	mlxsw_afk_key_info_put(region->key_info);
 err_key_info_get:
-	parman_destroy(region->parman);
-err_parman_create:
 	kfree(region);
 	return ERR_PTR(err);
 }
@@ -746,12 +579,13 @@ static void
 mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_tcam_region *region)
 {
-	mlxsw_sp_acl_tcam_region_catchall_del(mlxsw_sp, region);
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	ops->region_fini(mlxsw_sp, region->priv);
 	mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 	mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
 	mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
 	mlxsw_afk_key_info_put(region->key_info);
-	parman_destroy(region->parman);
 	kfree(region);
 }
 
@@ -826,13 +660,14 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 			       unsigned int priority,
 			       struct mlxsw_afk_element_usage *elusage)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
 	int err;
 
 	if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
 		return ERR_PTR(-EINVAL);
 
-	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
 	if (!chunk)
 		return ERR_PTR(-ENOMEM);
 	chunk->priority = priority;
@@ -844,7 +679,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_chunk_assoc;
 
-	parman_prio_init(chunk->region->parman, &chunk->parman_prio, priority);
+	ops->chunk_init(chunk->region->priv, chunk->priv, priority);
 
 	err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
 				     mlxsw_sp_acl_tcam_chunk_ht_params);
@@ -854,7 +689,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 	return chunk;
 
 err_rhashtable_insert:
-	parman_prio_fini(&chunk->parman_prio);
+	ops->chunk_fini(chunk->priv);
 	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 err_chunk_assoc:
 	kfree(chunk);
@@ -865,11 +700,12 @@ static void
 mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam_chunk *chunk)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_group *group = chunk->group;
 
 	rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
 			       mlxsw_sp_acl_tcam_chunk_ht_params);
-	parman_prio_fini(&chunk->parman_prio);
+	ops->chunk_fini(chunk->priv);
 	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 	kfree(chunk);
 }
@@ -903,11 +739,19 @@ static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
 }
 
+static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	return ops->entry_priv_size;
+}
+
 static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_acl_tcam_group *group,
 				       struct mlxsw_sp_acl_tcam_entry *entry,
 				       struct mlxsw_sp_acl_rule_info *rulei)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
 	struct mlxsw_sp_acl_tcam_region *region;
 	int err;
@@ -918,24 +762,16 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(chunk);
 
 	region = chunk->region;
-	err = parman_item_add(region->parman, &chunk->parman_prio,
-			      &entry->parman_item);
-	if (err)
-		goto err_parman_item_add;
 
-	err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-						    entry->parman_item.index,
-						    rulei);
+	err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
+			     entry->priv, rulei);
 	if (err)
-		goto err_rule_insert;
+		goto err_entry_add;
 	entry->chunk = chunk;
 
 	return 0;
 
-err_rule_insert:
-	parman_item_remove(region->parman, &chunk->parman_prio,
-			   &entry->parman_item);
-err_parman_item_add:
+err_entry_add:
 	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 	return err;
 }
@@ -943,13 +779,11 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_acl_tcam_entry *entry)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
 	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-	mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-					      entry->parman_item.index);
-	parman_item_remove(region->parman, &chunk->parman_prio,
-			   &entry->parman_item);
+	ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
 	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 }
 
@@ -958,22 +792,24 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_tcam_entry *entry,
 				     bool *activity)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
 	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-	return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region,
-							   entry->parman_item.index,
-							   activity);
+	return ops->entry_activity_get(mlxsw_sp, region->priv,
+				       entry->priv, activity);
 }
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 	MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
-	MLXSW_AFK_ELEMENT_DMAC,
-	MLXSW_AFK_ELEMENT_SMAC,
+	MLXSW_AFK_ELEMENT_DMAC_32_47,
+	MLXSW_AFK_ELEMENT_DMAC_0_31,
+	MLXSW_AFK_ELEMENT_SMAC_32_47,
+	MLXSW_AFK_ELEMENT_SMAC_0_31,
 	MLXSW_AFK_ELEMENT_ETHERTYPE,
 	MLXSW_AFK_ELEMENT_IP_PROTO,
-	MLXSW_AFK_ELEMENT_SRC_IP4,
-	MLXSW_AFK_ELEMENT_DST_IP4,
+	MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+	MLXSW_AFK_ELEMENT_DST_IP_0_31,
 	MLXSW_AFK_ELEMENT_DST_L4_PORT,
 	MLXSW_AFK_ELEMENT_SRC_L4_PORT,
 	MLXSW_AFK_ELEMENT_VID,
@@ -987,10 +823,14 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
 	MLXSW_AFK_ELEMENT_ETHERTYPE,
 	MLXSW_AFK_ELEMENT_IP_PROTO,
-	MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-	MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-	MLXSW_AFK_ELEMENT_DST_IP6_HI,
-	MLXSW_AFK_ELEMENT_DST_IP6_LO,
+	MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+	MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+	MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+	MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+	MLXSW_AFK_ELEMENT_DST_IP_96_127,
+	MLXSW_AFK_ELEMENT_DST_IP_64_95,
+	MLXSW_AFK_ELEMENT_DST_IP_32_63,
+	MLXSW_AFK_ELEMENT_DST_IP_0_31,
 	MLXSW_AFK_ELEMENT_DST_L4_PORT,
 	MLXSW_AFK_ELEMENT_SRC_L4_PORT,
 };
@@ -1019,10 +859,10 @@ struct mlxsw_sp_acl_tcam_flower_rule {
 
 static int
 mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp,
-				     void *priv, void *ruleset_priv)
+				     struct mlxsw_sp_acl_tcam *tcam,
+				     void *ruleset_priv)
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
-	struct mlxsw_sp_acl_tcam *tcam = priv;
 
 	return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group,
 					   mlxsw_sp_acl_tcam_patterns,
@@ -1070,6 +910,12 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
 	return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
 }
 
+static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+	return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
+	       mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
+}
+
 static int
 mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
 				  void *ruleset_priv, void *rule_priv,
@@ -1107,7 +953,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
 	.ruleset_bind		= mlxsw_sp_acl_tcam_flower_ruleset_bind,
 	.ruleset_unbind		= mlxsw_sp_acl_tcam_flower_ruleset_unbind,
 	.ruleset_group_id	= mlxsw_sp_acl_tcam_flower_ruleset_group_id,
-	.rule_priv_size		= sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
+	.rule_priv_size		= mlxsw_sp_acl_tcam_flower_rule_priv_size,
 	.rule_add		= mlxsw_sp_acl_tcam_flower_rule_add,
 	.rule_del		= mlxsw_sp_acl_tcam_flower_rule_del,
 	.rule_activity_get	= mlxsw_sp_acl_tcam_flower_rule_activity_get,
@@ -1118,7 +964,7 @@ mlxsw_sp_acl_tcam_profile_ops_arr[] = {
 	[MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops,
 };
 
-static const struct mlxsw_sp_acl_profile_ops *
+const struct mlxsw_sp_acl_profile_ops *
 mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
 			      enum mlxsw_sp_acl_profile profile)
 {
@@ -1131,10 +977,3 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
 		return NULL;
 	return ops;
 }
-
-const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops = {
-	.priv_size		= sizeof(struct mlxsw_sp_acl_tcam),
-	.init			= mlxsw_sp_acl_tcam_init,
-	.fini			= mlxsw_sp_acl_tcam_fini,
-	.profile_ops		= mlxsw_sp_acl_tcam_profile_ops,
-};

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
new file mode 100644
index 0000000..cef7697
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h

@@ -0,0 +1,146 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H
+#define _MLXSW_SPECTRUM_ACL_TCAM_H
+
+#include <linux/list.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+
+struct mlxsw_sp_acl_tcam {
+	unsigned long *used_regions; /* bit array */
+	unsigned int max_regions;
+	unsigned long *used_groups;  /* bit array */
+	unsigned int max_groups;
+	unsigned int max_group_size;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
+};
+
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam);
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_rule_info *rulei,
+				   u32 *priority, bool fillup_priority);
+
+struct mlxsw_sp_acl_profile_ops {
+	size_t ruleset_priv_size;
+	int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv);
+	void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
+	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    bool ingress);
+	void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+			       struct mlxsw_sp_port *mlxsw_sp_port,
+			       bool ingress);
+	u16 (*ruleset_group_id)(void *ruleset_priv);
+	size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
+			void *ruleset_priv, void *rule_priv,
+			struct mlxsw_sp_acl_rule_info *rulei);
+	void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+	int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+				 bool *activity);
+};
+
+const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
+			      enum mlxsw_sp_acl_profile profile);
+
+#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
+
+#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
+
+struct mlxsw_sp_acl_tcam_group;
+
+struct mlxsw_sp_acl_tcam_region {
+	struct list_head list; /* Member of a TCAM group */
+	struct list_head chunk_list; /* List of chunks under this region */
+	struct mlxsw_sp_acl_tcam_group *group;
+	enum mlxsw_reg_ptar_key_type key_type;
+	u16 id; /* ACL ID and region ID - they are same */
+	char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
+	struct mlxsw_afk_key_info *key_info;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_ctcam_region {
+	struct parman *parman;
+	struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp_acl_ctcam_chunk {
+	struct parman_prio parman_prio;
+};
+
+struct mlxsw_sp_acl_ctcam_entry {
+	struct parman_item parman_item;
+};
+
+int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_tcam_region *region);
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion);
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				   unsigned int priority);
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk);
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_ctcam_region *cregion,
+				 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				 struct mlxsw_sp_acl_ctcam_entry *centry,
+				 struct mlxsw_sp_acl_rule_info *rulei,
+				 bool fillup_priority);
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_ctcam_region *cregion,
+				  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				  struct mlxsw_sp_acl_ctcam_entry *centry);
+static inline unsigned int
+mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+	return centry->parman_item.index;
+}
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 89dbf56..201761a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c

@@ -144,10 +144,12 @@ static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
 					  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
 					  f->mask);
 
-	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_IP4,
-				       ntohl(key->src), ntohl(mask->src));
-	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_IP4,
-				       ntohl(key->dst), ntohl(mask->dst));
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+				       (char *) &key->src,
+				       (char *) &mask->src, 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+				       (char *) &key->dst,
+				       (char *) &mask->dst, 4);
 }
 
 static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
@@ -161,24 +163,31 @@ static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
 		skb_flow_dissector_target(f->dissector,
 					  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
 					  f->mask);
-	size_t addr_half_size = sizeof(key->src) / 2;
 
-	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-				       &key->src.s6_addr[0],
-				       &mask->src.s6_addr[0],
-				       addr_half_size);
-	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-				       &key->src.s6_addr[addr_half_size],
-				       &mask->src.s6_addr[addr_half_size],
-				       addr_half_size);
-	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_HI,
-				       &key->dst.s6_addr[0],
-				       &mask->dst.s6_addr[0],
-				       addr_half_size);
-	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_LO,
-				       &key->dst.s6_addr[addr_half_size],
-				       &mask->dst.s6_addr[addr_half_size],
-				       addr_half_size);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+				       &key->src.s6_addr[0x0],
+				       &mask->src.s6_addr[0x0], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+				       &key->src.s6_addr[0x4],
+				       &mask->src.s6_addr[0x4], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+				       &key->src.s6_addr[0x8],
+				       &mask->src.s6_addr[0x8], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+				       &key->src.s6_addr[0xC],
+				       &mask->src.s6_addr[0xC], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
+				       &key->dst.s6_addr[0x0],
+				       &mask->dst.s6_addr[0x0], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
+				       &key->dst.s6_addr[0x4],
+				       &mask->dst.s6_addr[0x4], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
+				       &key->dst.s6_addr[0x8],
+				       &mask->dst.s6_addr[0x8], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+				       &key->dst.s6_addr[0xC],
+				       &mask->dst.s6_addr[0xC], 4);
 }
 
 static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
@@ -340,13 +349,17 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 						  f->mask);
 
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
-					       MLXSW_AFK_ELEMENT_DMAC,
-					       key->dst, mask->dst,
-					       sizeof(key->dst));
+					       MLXSW_AFK_ELEMENT_DMAC_32_47,
+					       key->dst, mask->dst, 2);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
-					       MLXSW_AFK_ELEMENT_SMAC,
-					       key->src, mask->src,
-					       sizeof(key->src));
+					       MLXSW_AFK_ELEMENT_DMAC_0_31,
+					       key->dst + 2, mask->dst + 2, 4);
+		mlxsw_sp_acl_rulei_keymask_buf(rulei,
+					       MLXSW_AFK_ELEMENT_SMAC_32_47,
+					       key->src, mask->src, 2);
+		mlxsw_sp_acl_rulei_keymask_buf(rulei,
+					       MLXSW_AFK_ELEMENT_SMAC_0_31,
+					       key->src + 2, mask->src + 2, 4);
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index fe4327f..fd55758 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c

@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,422 +33,74 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bitops.h>
+#include <linux/slab.h>
 
 #include "spectrum.h"
 
-#define MLXSW_SP_KVDL_SINGLE_BASE 0
-#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
-#define MLXSW_SP_KVDL_SINGLE_END \
-	(MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1)
-
-#define MLXSW_SP_KVDL_CHUNKS_BASE \
-	(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
-#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152
-#define MLXSW_SP_KVDL_CHUNKS_END \
-	(MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \
-	(MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \
-	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
-	(MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE 1
-#define MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE 32
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
-
-struct mlxsw_sp_kvdl_part_info {
-	unsigned int part_index;
-	unsigned int start_index;
-	unsigned int end_index;
-	unsigned int alloc_size;
-	enum mlxsw_sp_resource_id resource_id;
-};
-
-enum mlxsw_sp_kvdl_part_id {
-	MLXSW_SP_KVDL_PART_ID_SINGLE,
-	MLXSW_SP_KVDL_PART_ID_CHUNKS,
-	MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS,
-};
-
-#define MLXSW_SP_KVDL_PART_INFO(id)				\
-[MLXSW_SP_KVDL_PART_ID_##id] = {				\
-	.start_index = MLXSW_SP_KVDL_##id##_BASE,		\
-	.end_index = MLXSW_SP_KVDL_##id##_END,			\
-	.alloc_size = MLXSW_SP_KVDL_##id##_ALLOC_SIZE,		\
-	.resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,	\
-}
-
-static const struct mlxsw_sp_kvdl_part_info mlxsw_sp_kvdl_parts_info[] = {
-	MLXSW_SP_KVDL_PART_INFO(SINGLE),
-	MLXSW_SP_KVDL_PART_INFO(CHUNKS),
-	MLXSW_SP_KVDL_PART_INFO(LARGE_CHUNKS),
-};
-
-#define MLXSW_SP_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp_kvdl_parts_info)
-
-struct mlxsw_sp_kvdl_part {
-	struct mlxsw_sp_kvdl_part_info info;
-	unsigned long usage[0];	/* Entries */
-};
-
 struct mlxsw_sp_kvdl {
-	struct mlxsw_sp_kvdl_part *parts[MLXSW_SP_KVDL_PARTS_INFO_LEN];
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
-			      unsigned int alloc_size)
-{
-	struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		part = kvdl->parts[i];
-		if (alloc_size <= part->info.alloc_size &&
-		    (!min_part ||
-		     part->info.alloc_size <= min_part->info.alloc_size))
-			min_part = part;
-	}
-
-	return min_part ?: ERR_PTR(-ENOBUFS);
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		part = kvdl->parts[i];
-		if (kvdl_index >= part->info.start_index &&
-		    kvdl_index <= part->info.end_index)
-			return part;
-	}
-
-	return ERR_PTR(-EINVAL);
-}
-
-static u32
-mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info,
-				unsigned int entry_index)
-{
-	return info->start_index + entry_index * info->alloc_size;
-}
-
-static unsigned int
-mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
-				u32 kvdl_index)
-{
-	return (kvdl_index - info->start_index) / info->alloc_size;
-}
-
-static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
-				    u32 *p_kvdl_index)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int entry_index, nr_entries;
-
-	nr_entries = (info->end_index - info->start_index + 1) /
-		     info->alloc_size;
-	entry_index = find_first_zero_bit(part->usage, nr_entries);
-	if (entry_index == nr_entries)
-		return -ENOBUFS;
-	__set_bit(entry_index, part->usage);
-
-	*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(info, entry_index);
-
-	return 0;
-}
-
-static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
-				    u32 kvdl_index)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int entry_index;
-
-	entry_index = mlxsw_sp_kvdl_index_entry_index(info, kvdl_index);
-	__clear_bit(entry_index, part->usage);
-}
-
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-			u32 *p_entry_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	/* Find partition with smallest allocation size satisfying the
-	 * requested size.
-	 */
-	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-	if (IS_ERR(part))
-		return PTR_ERR(part);
-
-	return mlxsw_sp_kvdl_part_alloc(part, p_entry_index);
-}
-
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index);
-	if (IS_ERR(part))
-		return;
-	mlxsw_sp_kvdl_part_free(part, entry_index);
-}
-
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-				   unsigned int entry_count,
-				   unsigned int *p_alloc_size)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-	if (IS_ERR(part))
-		return PTR_ERR(part);
-
-	*p_alloc_size = part->info.alloc_size;
-
-	return 0;
-}
-
-static void mlxsw_sp_kvdl_part_update(struct mlxsw_sp_kvdl_part *part,
-				      struct mlxsw_sp_kvdl_part *part_prev,
-				      unsigned int size)
-{
-
-	if (!part_prev) {
-		part->info.end_index = size - 1;
-	} else {
-		part->info.start_index = part_prev->info.end_index + 1;
-		part->info.end_index = part->info.start_index + size - 1;
-	}
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
-			const struct mlxsw_sp_kvdl_part_info *info,
-			struct mlxsw_sp_kvdl_part *part_prev)
-{
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	struct mlxsw_sp_kvdl_part *part;
-	bool need_update = true;
-	unsigned int nr_entries;
-	size_t usage_size;
-	u64 resource_size;
-	int err;
-
-	err = devlink_resource_size_get(devlink, info->resource_id,
-					&resource_size);
-	if (err) {
-		need_update = false;
-		resource_size = info->end_index - info->start_index + 1;
-	}
-
-	nr_entries = div_u64(resource_size, info->alloc_size);
-	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
-	if (!part)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&part->info, info, sizeof(part->info));
-
-	if (need_update)
-		mlxsw_sp_kvdl_part_update(part, part_prev, resource_size);
-	return part;
-}
-
-static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp_kvdl_part *part)
-{
-	kfree(part);
-}
-
-static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
-{
-	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-	const struct mlxsw_sp_kvdl_part_info *info;
-	struct mlxsw_sp_kvdl_part *part_prev = NULL;
-	int err, i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		info = &mlxsw_sp_kvdl_parts_info[i];
-		kvdl->parts[i] = mlxsw_sp_kvdl_part_init(mlxsw_sp, info,
-							 part_prev);
-		if (IS_ERR(kvdl->parts[i])) {
-			err = PTR_ERR(kvdl->parts[i]);
-			goto err_kvdl_part_init;
-		}
-		part_prev = kvdl->parts[i];
-	}
-	return 0;
-
-err_kvdl_part_init:
-	for (i--; i >= 0; i--)
-		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
-	return err;
-}
-
-static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
-}
-
-static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int nr_entries;
-	int bit = -1;
-	u64 occ = 0;
-
-	nr_entries = (info->end_index -
-		      info->start_index + 1) /
-		      info->alloc_size;
-	while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
-		< nr_entries)
-		occ += info->alloc_size;
-	return occ;
-}
-
-static u64 mlxsw_sp_kvdl_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	u64 occ = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-		occ += mlxsw_sp_kvdl_part_occ(mlxsw_sp->kvdl->parts[i]);
-
-	return occ;
-}
-
-static u64 mlxsw_sp_kvdl_single_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
-	return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
-	return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
-	return mlxsw_sp_kvdl_part_occ(part);
-}
-
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
-{
-	struct devlink *devlink = priv_to_devlink(mlxsw_core);
-	static struct devlink_resource_size_params size_params;
-	u32 kvdl_max_size;
-	int err;
-
-	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
-			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
-			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
-
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
-					MLXSW_SP_KVDL_SINGLE_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	if (err)
-		return err;
-
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
-					MLXSW_SP_KVDL_CHUNKS_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	if (err)
-		return err;
-
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
-					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	return err;
-}
-
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops;
 	struct mlxsw_sp_kvdl *kvdl;
 	int err;
 
-	kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL);
+	kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size,
+		       GFP_KERNEL);
 	if (!kvdl)
 		return -ENOMEM;
+	kvdl->kvdl_ops = kvdl_ops;
 	mlxsw_sp->kvdl = kvdl;
 
-	err = mlxsw_sp_kvdl_parts_init(mlxsw_sp);
+	err = kvdl_ops->init(mlxsw_sp, kvdl->priv);
 	if (err)
-		goto err_kvdl_parts_init;
-
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR,
-					  mlxsw_sp_kvdl_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-					  mlxsw_sp_kvdl_single_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-					  mlxsw_sp_kvdl_chunks_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-					  mlxsw_sp_kvdl_large_chunks_occ_get,
-					  mlxsw_sp);
-
+		goto err_init;
 	return 0;
 
-err_kvdl_parts_init:
-	kfree(mlxsw_sp->kvdl);
+err_init:
+	kfree(kvdl);
 	return err;
 }
 
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR);
-	mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
-	kfree(mlxsw_sp->kvdl);
+	kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv);
+	kfree(kvdl);
+}
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, u32 *p_entry_index)
+{
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+	return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type,
+				     entry_count, p_entry_index);
+}
+
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, int entry_index)
+{
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+	kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type,
+			     entry_count, entry_index);
+}
+
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+				    enum mlxsw_sp_kvdl_entry_type type,
+				    unsigned int entry_count,
+				    unsigned int *p_alloc_count)
+{
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+	return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type,
+						entry_count, p_alloc_count);
 }

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index a825396..98dcaf7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c

@@ -1075,6 +1075,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
 	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
 
 	cancel_delayed_work_sync(&mr->stats_update_dw);
-	mr->mr_ops->fini(mr->priv);
+	mr->mr_ops->fini(mlxsw_sp, mr->priv);
 	kfree(mr);
 }

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
index 7c864a8..c92fa90 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h

@@ -46,15 +46,6 @@ enum mlxsw_sp_mr_route_action {
 	MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
 };
 
-enum mlxsw_sp_mr_route_prio {
-	MLXSW_SP_MR_ROUTE_PRIO_SG,
-	MLXSW_SP_MR_ROUTE_PRIO_STARG,
-	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
-	__MLXSW_SP_MR_ROUTE_PRIO_MAX
-};
-
-#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
-
 struct mlxsw_sp_mr_route_key {
 	int vrid;
 	enum mlxsw_sp_l3proto proto;
@@ -101,7 +92,7 @@ struct mlxsw_sp_mr_ops {
 			      u16 erif_index);
 	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
 			      void *route_priv);
-	void (*fini)(void *priv);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
 };
 
 struct mlxsw_sp_mr;

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 4f4c0d3..e9c9f1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c

@@ -1,7 +1,8 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,7 +36,6 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "spectrum_mr_tcam.h"
 #include "reg.h"
@@ -43,15 +43,8 @@
 #include "core_acl_flex_actions.h"
 #include "spectrum_mr.h"
 
-struct mlxsw_sp_mr_tcam_region {
-	struct mlxsw_sp *mlxsw_sp;
-	enum mlxsw_reg_rtar_key_type rtar_key_type;
-	struct parman *parman;
-	struct parman_prio *parman_prios;
-};
-
 struct mlxsw_sp_mr_tcam {
-	struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+	void *priv;
 };
 
 /* This struct maps to one RIGR2 register entry */
@@ -84,8 +77,6 @@ mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
 	INIT_LIST_HEAD(&erif_list->erif_sublists);
 }
 
-#define MLXSW_SP_KVDL_RIGR2_SIZE 1
-
 static struct mlxsw_sp_mr_erif_sublist *
 mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_mr_tcam_erif_list *erif_list)
@@ -96,8 +87,8 @@ mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
 	erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
 	if (!erif_sublist)
 		return ERR_PTR(-ENOMEM);
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE,
-				  &erif_sublist->rigr2_kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+				  1, &erif_sublist->rigr2_kvdl_index);
 	if (err) {
 		kfree(erif_sublist);
 		return ERR_PTR(err);
@@ -112,7 +103,8 @@ mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_mr_erif_sublist *erif_sublist)
 {
 	list_del(&erif_sublist->list);
-	mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+			   1, erif_sublist->rigr2_kvdl_index);
 	kfree(erif_sublist);
 }
 
@@ -221,12 +213,11 @@ struct mlxsw_sp_mr_tcam_route {
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
 	struct mlxsw_afa_block *afa_block;
 	u32 counter_index;
-	struct parman_item parman_item;
-	struct parman_prio *parman_prio;
 	enum mlxsw_sp_mr_route_action action;
 	struct mlxsw_sp_mr_route_key key;
 	u16 irif_index;
 	u16 min_mtu;
+	void *priv;
 };
 
 static struct mlxsw_afa_block *
@@ -297,60 +288,6 @@ mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
 	mlxsw_afa_block_destroy(afa_block);
 }
 
-static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
-					  struct parman_item *parman_item,
-					  struct mlxsw_sp_mr_route_key *key,
-					  struct mlxsw_afa_block *afa_block)
-{
-	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-	switch (key->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
-					  key->vrid,
-					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-					  ntohl(key->group.addr4),
-					  ntohl(key->group_mask.addr4),
-					  ntohl(key->source.addr4),
-					  ntohl(key->source_mask.addr4),
-					  mlxsw_afa_block_first_set(afa_block));
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
-					  key->vrid,
-					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-					  key->group.addr6,
-					  key->group_mask.addr6,
-					  key->source.addr6,
-					  key->source_mask.addr6,
-					  mlxsw_afa_block_first_set(afa_block));
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
-static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
-					 struct mlxsw_sp_mr_route_key *key,
-					 struct parman_item *parman_item)
-{
-	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
-	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-	switch (key->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
-					  vrid, 0, 0, 0, 0, 0, 0, NULL);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
-					  vrid, 0, 0, zero_addr, zero_addr,
-					  zero_addr, zero_addr, NULL);
-		break;
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
 static int
 mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_mr_tcam_erif_list *erif_list,
@@ -370,51 +307,12 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static struct mlxsw_sp_mr_tcam_region *
-mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam,
-				 enum mlxsw_sp_l3proto proto)
-{
-	return &mr_tcam->tcam_regions[proto];
-}
-
-static int
-mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
-				       struct mlxsw_sp_mr_tcam_route *route,
-				       enum mlxsw_sp_mr_route_prio prio)
-{
-	struct mlxsw_sp_mr_tcam_region *tcam_region;
-	int err;
-
-	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-						       route->key.proto);
-	err = parman_item_add(tcam_region->parman,
-			      &tcam_region->parman_prios[prio],
-			      &route->parman_item);
-	if (err)
-		return err;
-
-	route->parman_prio = &tcam_region->parman_prios[prio];
-	return 0;
-}
-
-static void
-mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
-					  struct mlxsw_sp_mr_tcam_route *route)
-{
-	struct mlxsw_sp_mr_tcam_region *tcam_region;
-
-	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-						       route->key.proto);
-
-	parman_item_remove(tcam_region->parman,
-			   route->parman_prio, &route->parman_item);
-}
-
 static int
 mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 			      void *route_priv,
 			      struct mlxsw_sp_mr_route_params *route_params)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 	int err;
@@ -448,22 +346,23 @@ mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 		goto err_afa_block_create;
 	}
 
-	/* Allocate place in the TCAM */
-	err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route,
-						     route_params->prio);
-	if (err)
-		goto err_parman_item_add;
+	route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL);
+	if (!route->priv) {
+		err = -ENOMEM;
+		goto err_route_priv_alloc;
+	}
 
 	/* Write the route to the TCAM */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, route->afa_block);
+	err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv,
+				&route->key, route->afa_block,
+				route_params->prio);
 	if (err)
-		goto err_route_replace;
+		goto err_route_create;
 	return 0;
 
-err_route_replace:
-	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
-err_parman_item_add:
+err_route_create:
+	kfree(route->priv);
+err_route_priv_alloc:
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 err_afa_block_create:
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
@@ -476,12 +375,12 @@ mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
 					   void *priv, void *route_priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 
-	mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
-				      &route->key, &route->parman_item);
-	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+	ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key);
+	kfree(route->priv);
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
 	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
@@ -502,6 +401,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
 				     void *route_priv,
 				     enum mlxsw_sp_mr_route_action route_action)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_afa_block *afa_block;
 	int err;
@@ -516,8 +416,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(afa_block);
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err;
 
@@ -534,6 +433,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
 						 void *route_priv, u16 min_mtu)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_afa_block *afa_block;
 	int err;
@@ -549,8 +449,7 @@ static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(afa_block);
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err;
 
@@ -596,6 +495,7 @@ static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
 					   void *route_priv, u16 erif_index)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_erif_sublist *erif_sublist;
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
@@ -630,8 +530,7 @@ static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
 	}
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err_route_write;
 
@@ -653,6 +552,7 @@ static int
 mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
 			      struct mlxsw_sp_mr_route_info *route_info)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
 	struct mlxsw_afa_block *afa_block;
@@ -677,8 +577,7 @@ mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
 	}
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err_route_write;
 
@@ -699,167 +598,36 @@ mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
 	return err;
 }
 
-#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16
-
-static int
-mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
-			    mr_tcam_region->rtar_key_type,
-			    MLXSW_SP_MR_TCAM_REGION_BASE_COUNT);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void
-mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
-			    mr_tcam_region->rtar_key_type, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv,
-						 unsigned long new_count)
-{
-	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-	u64 max_tcam_rules;
-
-	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-	if (new_count > max_tcam_rules)
-		return -EINVAL;
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
-			    mr_tcam_region->rtar_key_type, new_count);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void mlxsw_sp_mr_tcam_region_parman_move(void *priv,
-						unsigned long from_index,
-						unsigned long to_index,
-						unsigned long count)
-{
-	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rrcr_pl[MLXSW_REG_RRCR_LEN];
-
-	mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
-			    from_index, count,
-			    mr_tcam_region->rtar_key_type, to_index);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
-}
-
-static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = {
-	.base_count	= MLXSW_SP_MR_TCAM_REGION_BASE_COUNT,
-	.resize_step	= MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP,
-	.resize		= mlxsw_sp_mr_tcam_region_parman_resize,
-	.move		= mlxsw_sp_mr_tcam_region_parman_move,
-	.algo		= PARMAN_ALGO_TYPE_LSORT,
-};
-
-static int
-mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_mr_tcam_region *mr_tcam_region,
-			     enum mlxsw_reg_rtar_key_type rtar_key_type)
-{
-	struct parman_prio *parman_prios;
-	struct parman *parman;
-	int err;
-	int i;
-
-	mr_tcam_region->rtar_key_type = rtar_key_type;
-	mr_tcam_region->mlxsw_sp = mlxsw_sp;
-
-	err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region);
-	if (err)
-		return err;
-
-	parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops,
-			       mr_tcam_region);
-	if (!parman) {
-		err = -ENOMEM;
-		goto err_parman_create;
-	}
-	mr_tcam_region->parman = parman;
-
-	parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
-				     sizeof(*parman_prios), GFP_KERNEL);
-	if (!parman_prios) {
-		err = -ENOMEM;
-		goto err_parman_prios_alloc;
-	}
-	mr_tcam_region->parman_prios = parman_prios;
-
-	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-		parman_prio_init(mr_tcam_region->parman,
-				 &mr_tcam_region->parman_prios[i], i);
-	return 0;
-
-err_parman_prios_alloc:
-	parman_destroy(parman);
-err_parman_create:
-	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-	return err;
-}
-
-static void
-mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	int i;
-
-	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-		parman_prio_fini(&mr_tcam_region->parman_prios[i]);
-	kfree(mr_tcam_region->parman_prios);
-	parman_destroy(mr_tcam_region->parman);
-	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-}
-
 static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
-	u32 rtar_key;
 	int err;
 
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
-	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES))
 		return -EIO;
 
-	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
-	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					   &region[MLXSW_SP_L3_PROTO_IPV4],
-					   rtar_key);
-	if (err)
-		return err;
+	mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL);
+	if (!mr_tcam->priv)
+		return -ENOMEM;
 
-	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
-	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					   &region[MLXSW_SP_L3_PROTO_IPV6],
-					   rtar_key);
+	err = ops->init(mlxsw_sp, mr_tcam->priv);
 	if (err)
-		goto err_ipv6_region_init;
-
+		goto err_init;
 	return 0;
 
-err_ipv6_region_init:
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+err_init:
+	kfree(mr_tcam->priv);
 	return err;
 }
 
-static void mlxsw_sp_mr_tcam_fini(void *priv)
+static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
 
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	ops->fini(mr_tcam->priv);
+	kfree(mr_tcam->priv);
 }
 
 const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 88bd27a..e51c8dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

@@ -48,6 +48,7 @@
 #include <linux/route.h>
 #include <linux/gcd.h>
 #include <linux/random.h>
+#include <linux/if_macvlan.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
@@ -60,6 +61,7 @@
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/fib_notifier.h>
+#include <net/switchdev.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -165,6 +167,7 @@ struct mlxsw_sp_rif_ops {
 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
 					 struct netlink_ext_ack *extack);
+	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 };
 
 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
@@ -1106,7 +1109,8 @@ mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
 	u32 tunnel_index;
 	int err;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				  1, &tunnel_index);
 	if (err)
 		return err;
 
@@ -1122,7 +1126,8 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
 	fib_entry->decap.ipip_entry = NULL;
-	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+			   1, fib_entry->decap.tunnel_index);
 }
 
 static struct mlxsw_sp_fib_node *
@@ -3162,8 +3167,9 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	 * by the device and make sure the request can be satisfied.
 	 */
 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
-	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
-					     &alloc_size);
+	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+					      *p_adj_grp_size, &alloc_size);
 	if (err)
 		return err;
 	/* It is possible the allocation results in more allocated
@@ -3275,7 +3281,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		/* No valid allocation size available. */
 		goto set_trap;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				  ecmp_size, &adj_index);
 	if (err) {
 		/* We ran out of KVD linear space, just set the
 		 * trap and let everything flow through kernel.
@@ -3310,7 +3317,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 
 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
 					     old_adj_index, old_ecmp_size);
-	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+			   old_ecmp_size, old_adj_index);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
 		goto set_trap;
@@ -3332,7 +3340,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
 	if (old_adj_index_valid)
-		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				   nh_grp->ecmp_size, nh_grp->adj_index);
 }
 
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
@@ -6021,6 +6030,12 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
 		    !list_empty(&inet6_dev->addr_list))
 			addr_list_empty = false;
 
+		/* macvlans do not have a RIF, but rather piggy back on the
+		 * RIF of their lower device.
+		 */
+		if (netif_is_macvlan(dev) && addr_list_empty)
+			return true;
+
 		if (rif && addr_list_empty &&
 		    !netif_is_l3_slave(rif->dev))
 			return true;
@@ -6434,6 +6449,123 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
 	return 0;
 }
 
+static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
+{
+	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
+	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+	return ether_addr_equal_masked(mac, vrrp4, mask);
+}
+
+static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
+{
+	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
+	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+	return ether_addr_equal_masked(mac, vrrp6, mask);
+}
+
+static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+				const u8 *mac, bool adding)
+{
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	u8 vrrp_id = adding ? mac[5] : 0;
+	int err;
+
+	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
+	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
+		return 0;
+
+	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+	if (err)
+		return err;
+
+	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
+		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
+	else
+		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
+				    const struct net_device *macvlan_dev,
+				    struct netlink_ext_ack *extack)
+{
+	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+	struct mlxsw_sp_rif *rif;
+	int err;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+	if (!rif) {
+		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+		return -EOPNOTSUPP;
+	}
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+				  mlxsw_sp_fid_index(rif->fid), true);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
+				   macvlan_dev->dev_addr, true);
+	if (err)
+		goto err_rif_vrrp_add;
+
+	/* Make sure the bridge driver does not have this MAC pointing at
+	 * some other port.
+	 */
+	if (rif->ops->fdb_del)
+		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
+
+	return 0;
+
+err_rif_vrrp_add:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+			    mlxsw_sp_fid_index(rif->fid), false);
+	return err;
+}
+
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+			      const struct net_device *macvlan_dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+	struct mlxsw_sp_rif *rif;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+	/* If we do not have a RIF, then we already took care of
+	 * removing the macvlan's MAC during RIF deletion.
+	 */
+	if (!rif)
+		return;
+	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
+			     false);
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+			    mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
+					   unsigned long event,
+					   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp;
+
+	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+	if (!mlxsw_sp)
+		return 0;
+
+	switch (event) {
+	case NETDEV_UP:
+		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
+	case NETDEV_DOWN:
+		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
+		break;
+	}
+
+	return 0;
+}
+
 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
 				     unsigned long event,
 				     struct netlink_ext_ack *extack)
@@ -6446,6 +6578,8 @@ static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
 	else if (is_vlan_dev(dev))
 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
+	else if (netif_is_macvlan(dev))
+		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
 	else
 		return 0;
 }
@@ -6686,7 +6820,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
 	int err = 0;
 
-	if (!mlxsw_sp)
+	/* We do not create a RIF for a macvlan, but only use it to
+	 * direct more MAC addresses to the router.
+	 */
+	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
 		return 0;
 
 	switch (event) {
@@ -6707,6 +6844,27 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 	return err;
 }
 
+static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
+{
+	struct mlxsw_sp_rif *rif = data;
+
+	if (!netif_is_macvlan(dev))
+		return 0;
+
+	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
+				   mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
+{
+	if (!netif_is_macvlan_port(rif->dev))
+		return 0;
+
+	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
+	return netdev_walk_all_upper_dev_rcu(rif->dev,
+					     __mlxsw_sp_rif_macvlan_flush, rif);
+}
+
 static struct mlxsw_sp_rif_subport *
 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
 {
@@ -6773,6 +6931,7 @@ static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
 	mlxsw_sp_fid_rif_set(fid, NULL);
 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
 			    mlxsw_sp_fid_index(fid), false);
+	mlxsw_sp_rif_macvlan_flush(rif);
 	mlxsw_sp_rif_subport_op(rif, false);
 }
 
@@ -6860,6 +7019,7 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
 	mlxsw_sp_fid_rif_set(fid, NULL);
 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
 			    mlxsw_sp_fid_index(fid), false);
+	mlxsw_sp_rif_macvlan_flush(rif);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
@@ -6878,23 +7038,39 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
 		vid = vlan_dev_vlan_id(rif->dev);
 	} else {
 		err = br_vlan_get_pvid(rif->dev, &vid);
-		if (!vid)
-			err = -EINVAL;
-		if (err) {
+		if (err < 0 || !vid) {
 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
-			return ERR_PTR(err);
+			return ERR_PTR(-EINVAL);
 		}
 	}
 
 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
 }
 
+static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+	struct switchdev_notifier_fdb_info info;
+	struct net_device *br_dev;
+	struct net_device *dev;
+
+	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
+	dev = br_fdb_find_port(br_dev, mac, vid);
+	if (!dev)
+		return;
+
+	info.addr = mac;
+	info.vid = vid;
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
 	.rif_size		= sizeof(struct mlxsw_sp_rif),
 	.configure		= mlxsw_sp_rif_vlan_configure,
 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
+	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
 };
 
 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
@@ -6946,6 +7122,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
 	mlxsw_sp_fid_rif_set(fid, NULL);
 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
 			    mlxsw_sp_fid_index(fid), false);
+	mlxsw_sp_rif_macvlan_flush(rif);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
@@ -6960,12 +7137,27 @@ mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
 }
 
+static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+	struct switchdev_notifier_fdb_info info;
+	struct net_device *dev;
+
+	dev = br_fdb_find_port(rif->dev, mac, 0);
+	if (!dev)
+		return;
+
+	info.addr = mac;
+	info.vid = 0;
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
 	.type			= MLXSW_SP_RIF_TYPE_FID,
 	.rif_size		= sizeof(struct mlxsw_sp_rif),
 	.configure		= mlxsw_sp_rif_fid_configure,
 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
+	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
 };
 
 static struct mlxsw_sp_rif_ipip_lb *

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 3d187d8..e42d640 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c

@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <net/arp.h>
 #include <net/gre.h>
+#include <net/lag.h>
 #include <net/ndisc.h>
 #include <net/ip6_tunnel.h>
 
@@ -254,7 +255,9 @@ mlxsw_sp_span_entry_lag(struct net_device *lag_dev)
 	struct list_head *iter;
 
 	netdev_for_each_lower_dev(lag_dev, dev, iter)
-		if ((dev->flags & IFF_UP) && mlxsw_sp_port_dev_check(dev))
+		if (netif_carrier_ok(dev) &&
+		    net_lag_port_dev_txable(dev) &&
+		    mlxsw_sp_port_dev_check(dev))
 			return dev;
 
 	return NULL;

diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 399e9d6..eb437f5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h

@@ -63,6 +63,7 @@ enum {
 	MLXSW_TRAP_ID_LBERROR = 0x54,
 	MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
 	MLXSW_TRAP_ID_IPV4_PIM = 0x58,
+	MLXSW_TRAP_ID_IPV4_VRRP = 0x59,
 	MLXSW_TRAP_ID_RPF = 0x5C,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
@@ -78,6 +79,7 @@ enum {
 	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
 	MLXSW_TRAP_ID_IPV6_PIM = 0x79,
+	MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
 	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
 	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
 	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index bbe5764..ef2114d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h

@@ -73,7 +73,7 @@ struct nfp_app;
 
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
-	struct timespec64 *last_used;
+	ktime_t *last_used;
 	u8 init_unallocated;
 };
 

diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 93fb809..c098730 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c

@@ -158,7 +158,6 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct circ_buf *ring;
-	struct timespec64 now;
 
 	ring = &priv->mask_ids.mask_id_free_list;
 	/* Checking if buffer is full. */
@@ -169,8 +168,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
 	ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) %
 		     (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
 
-	getnstimeofday64(&now);
-	priv->mask_ids.last_used[mask_id] = now;
+	priv->mask_ids.last_used[mask_id] = ktime_get();
 
 	return 0;
 }
@@ -178,7 +176,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
 static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
 {
 	struct nfp_flower_priv *priv = app->priv;
-	struct timespec64 delta, now;
+	ktime_t reuse_timeout;
 	struct circ_buf *ring;
 	u8 temp_id, freed_id;
 
@@ -198,10 +196,10 @@ static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
 	memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS);
 	*mask_id = temp_id;
 
-	getnstimeofday64(&now);
-	delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]);
+	reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id],
+				     NFP_FL_MASK_REUSE_TIME_NS);
 
-	if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS)
+	if (ktime_before(ktime_get(), reuse_timeout))
 		goto err_not_found;
 
 	memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS);

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 68f1221..4a7f54c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -1656,7 +1656,8 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 }
 
 static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* If skb needs TX timestamp, it is handled in network control queue */
 	return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC :

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 8fedc28..1a96dd9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c

@@ -321,18 +321,16 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
 	speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
 
 	/* Final adjustments for HW */
-	value = qopt->idleslope * 1024UL * ptr;
-	do_div(value, speed_div);
+	value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div);
 	priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
 
-	value = -qopt->sendslope * 1024UL * ptr;
-	do_div(value, speed_div);
+	value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div);
 	priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0);
 
-	value = qopt->hicredit * 1024UL * 8;
+	value = qopt->hicredit * 1024ll * 8;
 	priv->plat->tx_queues_cfg[queue].high_credit = value & GENMASK(31, 0);
 
-	value = qopt->locredit * 1024UL * 8;
+	value = qopt->locredit * 1024ll * 8;
 	priv->plat->tx_queues_cfg[queue].low_credit = value & GENMASK(31, 0);
 
 	ret = stmmac_config_cbs(priv, priv->hw,

diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index a5dd627..d42f47f 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c

@@ -101,7 +101,8 @@ static struct vnet_port *vsw_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct vnet_port *port = netdev_priv(dev);
 

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a94f504..12539b3 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c

@@ -234,7 +234,8 @@ static struct vnet_port *vnet_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	struct vnet *vp = netdev_priv(dev);
 	struct vnet_port *port = __tx_port_find(vp, skb);

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 0939981..00761fe 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c

@@ -253,23 +253,24 @@ struct cpsw_ss_regs {
 #define RX_DSCP_PRI_MAP7    0x4c /* Rx DSCP Priority to Rx Packet Mapping */
 
 /* Bit definitions for the CPSW2_CONTROL register */
-#define PASS_PRI_TAGGED     (1<<24) /* Pass Priority Tagged */
-#define VLAN_LTYPE2_EN      (1<<21) /* VLAN LTYPE 2 enable */
-#define VLAN_LTYPE1_EN      (1<<20) /* VLAN LTYPE 1 enable */
-#define DSCP_PRI_EN         (1<<16) /* DSCP Priority Enable */
-#define TS_320              (1<<14) /* Time Sync Dest Port 320 enable */
-#define TS_319              (1<<13) /* Time Sync Dest Port 319 enable */
-#define TS_132              (1<<12) /* Time Sync Dest IP Addr 132 enable */
-#define TS_131              (1<<11) /* Time Sync Dest IP Addr 131 enable */
-#define TS_130              (1<<10) /* Time Sync Dest IP Addr 130 enable */
-#define TS_129              (1<<9)  /* Time Sync Dest IP Addr 129 enable */
-#define TS_TTL_NONZERO      (1<<8)  /* Time Sync Time To Live Non-zero enable */
-#define TS_ANNEX_F_EN       (1<<6)  /* Time Sync Annex F enable */
-#define TS_ANNEX_D_EN       (1<<4)  /* Time Sync Annex D enable */
-#define TS_LTYPE2_EN        (1<<3)  /* Time Sync LTYPE 2 enable */
-#define TS_LTYPE1_EN        (1<<2)  /* Time Sync LTYPE 1 enable */
-#define TS_TX_EN            (1<<1)  /* Time Sync Transmit Enable */
-#define TS_RX_EN            (1<<0)  /* Time Sync Receive Enable */
+#define PASS_PRI_TAGGED     BIT(24) /* Pass Priority Tagged */
+#define VLAN_LTYPE2_EN      BIT(21) /* VLAN LTYPE 2 enable */
+#define VLAN_LTYPE1_EN      BIT(20) /* VLAN LTYPE 1 enable */
+#define DSCP_PRI_EN         BIT(16) /* DSCP Priority Enable */
+#define TS_107              BIT(15) /* Tyme Sync Dest IP Address 107 */
+#define TS_320              BIT(14) /* Time Sync Dest Port 320 enable */
+#define TS_319              BIT(13) /* Time Sync Dest Port 319 enable */
+#define TS_132              BIT(12) /* Time Sync Dest IP Addr 132 enable */
+#define TS_131              BIT(11) /* Time Sync Dest IP Addr 131 enable */
+#define TS_130              BIT(10) /* Time Sync Dest IP Addr 130 enable */
+#define TS_129              BIT(9)  /* Time Sync Dest IP Addr 129 enable */
+#define TS_TTL_NONZERO      BIT(8)  /* Time Sync Time To Live Non-zero enable */
+#define TS_ANNEX_F_EN       BIT(6)  /* Time Sync Annex F enable */
+#define TS_ANNEX_D_EN       BIT(4)  /* Time Sync Annex D enable */
+#define TS_LTYPE2_EN        BIT(3)  /* Time Sync LTYPE 2 enable */
+#define TS_LTYPE1_EN        BIT(2)  /* Time Sync LTYPE 1 enable */
+#define TS_TX_EN            BIT(1)  /* Time Sync Transmit Enable */
+#define TS_RX_EN            BIT(0)  /* Time Sync Receive Enable */
 
 #define CTRL_V2_TS_BITS \
 	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
@@ -281,7 +282,7 @@ struct cpsw_ss_regs {
 
 
 #define CTRL_V3_TS_BITS \
-	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+	(TS_107 | TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
 	 TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
 	 TS_LTYPE1_EN)
 

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 6f63c87..b4ea58d 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c

@@ -114,7 +114,10 @@ static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
 			dev_consume_skb_any(skb);
 			dev_dbg(cpts->dev, "match tx timestamp mtype %u seqid %04x\n",
 				mtype, seqid);
-		} else if (time_after(jiffies, skb_cb->tmo)) {
+			break;
+		}
+
+		if (time_after(jiffies, skb_cb->tmo)) {
 			/* timeout any expired skbs over 1s */
 			dev_dbg(cpts->dev,
 				"expiring tx timestamp mtype %u seqid %04x\n",

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 6ebf110..a1d335a 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c

@@ -1889,13 +1889,6 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
 	return err;
 }
 
-static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv,
-			      select_queue_fallback_t fallback)
-{
-	return 0;
-}
-
 static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
@@ -1972,7 +1965,7 @@ static const struct net_device_ops netcp_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= netcp_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= netcp_rx_kill_vid,
 	.ndo_tx_timeout		= netcp_ndo_tx_timeout,
-	.ndo_select_queue	= netcp_select_queue,
+	.ndo_select_queue	= dev_pick_tx_zero,
 	.ndo_setup_tc		= netcp_setup_tc,
 };
 

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e1..cf4f40a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -329,7 +329,7 @@ static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 }
 
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct net_device_context *ndc = netdev_priv(ndev);
@@ -343,9 +343,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 
 		if (vf_ops->ndo_select_queue)
 			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
-						       accel_priv, fallback);
+						       sb_dev, fallback);
 		else
-			txq = fallback(vf_netdev, skb);
+			txq = fallback(vf_netdev, skb, NULL);
 
 		/* Record the queue selected by VF so that it can be
 		 * used for common case where VF has more queues than

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index adde8fc..cfda146 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c

@@ -514,7 +514,6 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 	const struct macvlan_port *port = vlan->port;
 	const struct macvlan_dev *dest;
-	void *accel_priv = NULL;
 
 	if (vlan->mode == MACVLAN_MODE_BRIDGE) {
 		const struct ethhdr *eth = (void *)skb->data;
@@ -533,15 +532,10 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NET_XMIT_SUCCESS;
 		}
 	}
-
-	/* For packets that are non-multicast and not bridged we will pass
-	 * the necessary information so that the lowerdev can distinguish
-	 * the source of the packets via the accel_priv value.
-	 */
-	accel_priv = vlan->accel_priv;
 xmit_world:
 	skb->dev = vlan->lowerdev;
-	return dev_queue_xmit_accel(skb, accel_priv);
+	return dev_queue_xmit_accel(skb,
+				    netdev_get_sb_channel(dev) ? dev : NULL);
 }
 
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
@@ -1647,6 +1641,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 
 	switch (event) {
 	case NETDEV_UP:
+	case NETDEV_DOWN:
 	case NETDEV_CHANGE:
 		list_for_each_entry(vlan, &port->vlans, list)
 			netif_stacked_transfer_operstate(vlan->lowerdev,

diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 4f390fa..d00d42c 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c

@@ -115,7 +115,8 @@ static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb,
 }
 
 static u16 net_failover_select_queue(struct net_device *dev,
-				     struct sk_buff *skb, void *accel_priv,
+				     struct sk_buff *skb,
+				     struct net_device *sb_dev,
 				     select_queue_fallback_t fallback)
 {
 	struct net_failover_info *nfo_info = netdev_priv(dev);
@@ -128,9 +129,9 @@ static u16 net_failover_select_queue(struct net_device *dev,
 
 		if (ops->ndo_select_queue)
 			txq = ops->ndo_select_queue(primary_dev, skb,
-						    accel_priv, fallback);
+						    sb_dev, fallback);
 		else
-			txq = fallback(primary_dev, skb);
+			txq = fallback(primary_dev, skb, NULL);
 
 		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
 

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index b070959..6a047d3 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c

@@ -41,11 +41,6 @@
 
 #define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
 
-static struct team_port *team_port_get_rcu(const struct net_device *dev)
-{
-	return rcu_dereference(dev->rx_handler_data);
-}
-
 static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 {
 	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
@@ -1707,7 +1702,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 49a5021..7b5748f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -607,7 +607,8 @@ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 }
 
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	u16 ret;

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 601ae17..ababba3 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c

@@ -637,9 +637,62 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
-/* Add new entry to forwarding table -- assumes lock held */
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
+					 const u8 *mac, __u16 state,
+					 __be32 src_vni, __u8 ndm_flags)
+{
+	struct vxlan_fdb *f;
+
+	f = kmalloc(sizeof(*f), GFP_ATOMIC);
+	if (!f)
+		return NULL;
+	f->state = state;
+	f->flags = ndm_flags;
+	f->updated = f->used = jiffies;
+	f->vni = src_vni;
+	INIT_LIST_HEAD(&f->remotes);
+	memcpy(f->eth_addr, mac, ETH_ALEN);
+
+	return f;
+}
+
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			    const u8 *mac, union vxlan_addr *ip,
+			    __u16 state, __be16 port, __be32 src_vni,
+			    __be32 vni, __u32 ifindex, __u8 ndm_flags,
+			    struct vxlan_fdb **fdb)
+{
+	struct vxlan_rdst *rd = NULL;
+	struct vxlan_fdb *f;
+	int rc;
+
+	if (vxlan->cfg.addrmax &&
+	    vxlan->addrcnt >= vxlan->cfg.addrmax)
+		return -ENOSPC;
+
+	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+	f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+	if (!f)
+		return -ENOMEM;
+
+	rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+	if (rc < 0) {
+		kfree(f);
+		return rc;
+	}
+
+	++vxlan->addrcnt;
+	hlist_add_head_rcu(&f->hlist,
+			   vxlan_fdb_head(vxlan, mac, src_vni));
+
+	*fdb = f;
+
+	return 0;
+}
+
+/* Add new entry to forwarding table -- assumes lock held */
+static int vxlan_fdb_update(struct vxlan_dev *vxlan,
+			    const u8 *mac, union vxlan_addr *ip,
 			    __u16 state, __u16 flags,
 			    __be16 port, __be32 src_vni, __be32 vni,
 			    __u32 ifindex, __u8 ndm_flags)
@@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		if (vxlan->cfg.addrmax &&
-		    vxlan->addrcnt >= vxlan->cfg.addrmax)
-			return -ENOSPC;
-
 		/* Disallow replace to add a multicast entry */
 		if ((flags & NLM_F_REPLACE) &&
 		    (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
 			return -EOPNOTSUPP;
 
 		netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
-		f = kmalloc(sizeof(*f), GFP_ATOMIC);
-		if (!f)
-			return -ENOMEM;
-
-		notify = 1;
-		f->state = state;
-		f->flags = ndm_flags;
-		f->updated = f->used = jiffies;
-		f->vni = src_vni;
-		INIT_LIST_HEAD(&f->remotes);
-		memcpy(f->eth_addr, mac, ETH_ALEN);
-
-		rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
-		if (rc < 0) {
-			kfree(f);
+		rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
+				      vni, ifindex, ndm_flags, &f);
+		if (rc < 0)
 			return rc;
-		}
-
-		++vxlan->addrcnt;
-		hlist_add_head_rcu(&f->hlist,
-				   vxlan_fdb_head(vxlan, mac, src_vni));
+		notify = 1;
 	}
 
 	if (notify) {
@@ -742,13 +775,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
 	kfree(f);
 }
 
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+			      bool do_notify)
 {
 	netdev_dbg(vxlan->dev,
 		    "delete %pM\n", f->eth_addr);
 
 	--vxlan->addrcnt;
-	vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+	if (do_notify)
+		vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
 
 	hlist_del_rcu(&f->hlist);
 	call_rcu(&f->rcu, vxlan_fdb_free);
@@ -864,7 +899,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EAFNOSUPPORT;
 
 	spin_lock_bh(&vxlan->hash_lock);
-	err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
+	err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
 			       port, src_vni, vni, ifindex, ndm->ndm_flags);
 	spin_unlock_bh(&vxlan->hash_lock);
 
@@ -898,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 		goto out;
 	}
 
-	vxlan_fdb_destroy(vxlan, f);
+	vxlan_fdb_destroy(vxlan, f, true);
 
 out:
 	return 0;
@@ -1007,7 +1042,7 @@ static bool vxlan_snoop(struct net_device *dev,
 
 		/* close off race between vxlan_flush and incoming packets */
 		if (netif_running(dev))
-			vxlan_fdb_create(vxlan, src_mac, src_ip,
+			vxlan_fdb_update(vxlan, src_mac, src_ip,
 					 NUD_REACHABLE,
 					 NLM_F_EXCL|NLM_F_CREATE,
 					 vxlan->cfg.dst_port,
@@ -2366,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t)
 					   "garbage collect %pM\n",
 					   f->eth_addr);
 				f->state = NUD_STALE;
-				vxlan_fdb_destroy(vxlan, f);
+				vxlan_fdb_destroy(vxlan, f, true);
 			} else if (time_before(timeout, next_timer))
 				next_timer = timeout;
 		}
@@ -2417,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
 	spin_lock_bh(&vxlan->hash_lock);
 	f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
 	if (f)
-		vxlan_fdb_destroy(vxlan, f);
+		vxlan_fdb_destroy(vxlan, f, true);
 	spin_unlock_bh(&vxlan->hash_lock);
 }
 
@@ -2471,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
 				continue;
 			/* the all_zeros_mac entry is deleted at vxlan_uninit */
 			if (!is_zero_ether_addr(f->eth_addr))
-				vxlan_fdb_destroy(vxlan, f);
+				vxlan_fdb_destroy(vxlan, f, true);
 		}
 	}
 	spin_unlock_bh(&vxlan->hash_lock);
@@ -3162,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_fdb *f = NULL;
 	int err;
 
 	err = vxlan_dev_configure(net, dev, conf, false, extack);
@@ -3175,24 +3211,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 		err = vxlan_fdb_create(vxlan, all_zeros_mac,
 				       &vxlan->default_dst.remote_ip,
 				       NUD_REACHABLE | NUD_PERMANENT,
-				       NLM_F_EXCL | NLM_F_CREATE,
 				       vxlan->cfg.dst_port,
 				       vxlan->default_dst.remote_vni,
 				       vxlan->default_dst.remote_vni,
 				       vxlan->default_dst.remote_ifindex,
-				       NTF_SELF);
+				       NTF_SELF, &f);
 		if (err)
 			return err;
 	}
 
 	err = register_netdevice(dev);
+	if (err)
+		goto errout;
+
+	err = rtnl_configure_link(dev, NULL);
 	if (err) {
-		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
-		return err;
+		unregister_netdevice(dev);
+		goto errout;
 	}
 
+	/* notify default fdb entry */
+	if (f)
+		vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
+
 	list_add(&vxlan->next, &vn->vxlan_list);
 	return 0;
+errout:
+	if (f)
+		vxlan_fdb_destroy(vxlan, f, false);
+	return err;
 }
 
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
@@ -3427,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 	struct vxlan_rdst *dst = &vxlan->default_dst;
 	struct vxlan_rdst old_dst;
 	struct vxlan_config conf;
+	struct vxlan_fdb *f = NULL;
 	int err;
 
 	err = vxlan_nl2conf(tb, data,
@@ -3455,16 +3503,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 			err = vxlan_fdb_create(vxlan, all_zeros_mac,
 					       &dst->remote_ip,
 					       NUD_REACHABLE | NUD_PERMANENT,
-					       NLM_F_CREATE | NLM_F_APPEND,
 					       vxlan->cfg.dst_port,
 					       dst->remote_vni,
 					       dst->remote_vni,
 					       dst->remote_ifindex,
-					       NTF_SELF);
+					       NTF_SELF, &f);
 			if (err) {
 				spin_unlock_bh(&vxlan->hash_lock);
 				return err;
 			}
+			vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
 		}
 		spin_unlock_bh(&vxlan->hash_lock);
 	}

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index bd46b25..2a3f0f1 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c

@@ -2134,7 +2134,6 @@ static void
 fst_openport(struct fst_port_info *port)
 {
 	int signals;
-	int txq_length;
 
 	/* Only init things if card is actually running. This allows open to
 	 * succeed for downloads etc.
@@ -2161,7 +2160,6 @@ fst_openport(struct fst_port_info *port)
 		else
 			netif_carrier_off(port_to_dev(port));
 
-		txq_length = port->txqe - port->txqs;
 		port->txqe = 0;
 		port->txqs = 0;
 	}

diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 90a4ad9..093bd21 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c

@@ -1491,7 +1491,6 @@ static int lmc_rx(struct net_device *dev)
     lmc_softc_t *sc = dev_to_sc(dev);
     int i;
     int rx_work_limit = LMC_RXDESCS;
-    unsigned int next_rx;
     int rxIntLoopCnt;		/* debug -baz */
     int localLengthErrCnt = 0;
     long stat;
@@ -1505,7 +1504,6 @@ static int lmc_rx(struct net_device *dev)
     rxIntLoopCnt = 0;		/* debug -baz */
 
     i = sc->lmc_next_rx % LMC_RXDESCS;
-    next_rx = sc->lmc_next_rx;
 
     while (((stat = sc->lmc_rxring[i].status) & LMC_RDES_OWN_BIT) != DESC_OWNED_BY_DC21X4)
     {

diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index 4c41790..094cea7 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c

@@ -566,13 +566,12 @@ static void i2400m_msg_ack_hook(struct i2400m *i2400m,
 {
 	int result;
 	struct device *dev = i2400m_dev(i2400m);
-	unsigned ack_type, ack_status;
+	unsigned int ack_type;
 	char strerr[32];
 
 	/* Chew on the message, we might need some information from
 	 * here */
 	ack_type = le16_to_cpu(l3l4_hdr->type);
-	ack_status = le16_to_cpu(l3l4_hdr->status);
 	switch (ack_type) {
 	case I2400M_MT_CMD_ENTER_POWERSAVE:
 		/* This is just left here for the sake of example, as

diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index a89b568..e9fc168 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c

@@ -1552,7 +1552,6 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
 	int ret, itr;
 	struct device *dev = i2400m_dev(i2400m);
 	struct i2400m_fw *i2400m_fw;
-	const struct i2400m_bcf_hdr *bcf;	/* Firmware data */
 	const struct firmware *fw;
 	const char *fw_name;
 
@@ -1574,7 +1573,7 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
 	}
 
 	/* Load firmware files to memory. */
-	for (itr = 0, bcf = NULL, ret = -ENOENT; ; itr++) {
+	for (itr = 0, ret = -ENOENT; ; itr++) {
 		fw_name = i2400m->bus_fw_names[itr];
 		if (fw_name == NULL) {
 			dev_err(dev, "Could not find a usable firmware image\n");

diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index a654687..9ab3f0f 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c

@@ -535,14 +535,12 @@ void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
 {
 	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
 	struct device *dev = i2400m_dev(i2400m);
-	int protocol;
 
 	d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n",
 		  i2400m, skb, skb->len, cs);
 	switch(cs) {
 	case I2400M_CS_IPV4_0:
 	case I2400M_CS_IPV4:
-		protocol = ETH_P_IP;
 		i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
 					  skb->data - ETH_HLEN,
 					  cpu_to_be16(ETH_P_IP));

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 510f6b8..fa3e8dd 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c

@@ -1279,7 +1279,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 
 static u16
 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
-				void *accel_priv, select_queue_fallback_t fallback)
+				struct net_device *sb_dev,
+				select_queue_fallback_t fallback)
 {
 	skb->priority = cfg80211_classify8021d(skb, NULL);
 	return mwifiex_1d_to_wmm_queue[skb->priority];

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 78ebe49..92274c2 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c

@@ -148,14 +148,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
 }
 
 static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	unsigned int size = vif->hash.size;
 
 	if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
-		return fallback(dev, skb) % dev->real_num_tx_queues;
+		return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 
 	xenvif_set_skb_hash(vif, skb);
 

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a57daec..d67cd37 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c

@@ -545,7 +545,8 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
 }
 
 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv, select_queue_fallback_t fallback)
+			       struct net_device *sb_dev,
+			       select_queue_fallback_t fallback)
 {
 	unsigned int num_queues = dev->real_num_tx_queues;
 	u32 hash;

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index d963baf..e92391d 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c

@@ -367,14 +367,23 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev,
 	phy_interface_t iface;
 	struct device_node *phy_np;
 	struct phy_device *phy;
+	int ret;
 
 	iface = of_get_phy_mode(np);
 	if (iface < 0)
 		return NULL;
-
-	phy_np = of_parse_phandle(np, "phy-handle", 0);
-	if (!phy_np)
-		return NULL;
+	if (of_phy_is_fixed_link(np)) {
+		ret = of_phy_register_fixed_link(np);
+		if (ret < 0) {
+			netdev_err(dev, "broken fixed-link specification\n");
+			return NULL;
+		}
+		phy_np = of_node_get(np);
+	} else {
+		phy_np = of_parse_phandle(np, "phy-handle", 0);
+		if (!phy_np)
+			return NULL;
+	}
 
 	phy = of_phy_connect(dev, phy_np, hndlr, 0, iface);
 

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index a246a61..a932aac 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h

@@ -465,7 +465,6 @@ struct qeth_qdio_out_buffer {
 	struct sk_buff_head skb_list;
 	int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
 
-	struct qaob *aob;
 	struct qeth_qdio_out_q *q;
 	struct qeth_qdio_out_buffer *next_pending;
 };
@@ -662,7 +661,6 @@ struct qeth_card_info {
 	int portno;
 	enum qeth_card_types type;
 	enum qeth_link_types link_type;
-	int is_multicast_different;
 	int initial_mtu;
 	int max_mtu;
 	int broadcast_capable;
@@ -935,6 +933,19 @@ static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
 						 data, QETH_PROT_IPV6);
 }
 
+int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
+			    int ipv);
+static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card,
+							struct sk_buff *skb,
+							int ipv, int cast_type)
+{
+	if (IS_IQD(card) && cast_type != RTN_UNICAST)
+		return card->qdio.out_qs[card->qdio.no_out_queues - 1];
+	if (!card->qdio.do_prio_queueing)
+		return card->qdio.out_qs[card->qdio.default_out_queue];
+	return card->qdio.out_qs[qeth_get_priority_queue(card, skb, ipv)];
+}
+
 extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
 extern const struct attribute_group *qeth_generic_attr_groups[];
@@ -972,7 +983,6 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
 		  void *);
 struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
 			enum qeth_ipa_cmds, enum qeth_prot_versions);
-int qeth_query_setadapterparms(struct qeth_card *);
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
 		struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
 		struct qeth_hdr **);
@@ -998,11 +1008,6 @@ int qeth_query_switch_attributes(struct qeth_card *card,
 int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *,
 	int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long),
 	void *reply_param);
-int qeth_bridgeport_query_ports(struct qeth_card *card,
-	enum qeth_sbp_roles *role, enum qeth_sbp_states *state);
-int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
-int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
-int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
 int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
 			 int extra_elems, int data_offset);
 int qeth_get_elements_for_frags(struct sk_buff *);
@@ -1026,7 +1031,6 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback);
 int qeth_hdr_chk_and_bounce(struct sk_buff *, struct qeth_hdr **, int);
 int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
-int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
 int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d01ac29..d80972b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c

@@ -473,7 +473,6 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
 	if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) ==
 					QETH_QDIO_BUF_HANDLED_DELAYED)) {
 		/* for recovery situations */
-		q->bufs[bidx]->aob = q->bufstates[bidx].aob;
 		qeth_init_qdio_out_buf(q, bidx);
 		QETH_CARD_TEXT(q->card, 2, "clprecov");
 	}
@@ -510,7 +509,6 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	}
 	qeth_notify_skbs(buffer->q, buffer, notification);
 
-	buffer->aob = NULL;
 	/* Free dangling allocations. The attached skbs are handled by
 	 * qeth_cleanup_handled_pending().
 	 */
@@ -1267,8 +1265,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
 }
 
 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf,
-		enum qeth_qdio_buffer_states newbufstate)
+				     struct qeth_qdio_out_buffer *buf)
 {
 	int i;
 
@@ -1276,23 +1273,19 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
 		atomic_dec(&queue->set_pci_flags_count);
 
-	if (newbufstate == QETH_QDIO_BUF_EMPTY) {
-		qeth_release_skbs(buf);
-	}
+	qeth_release_skbs(buf);
+
 	for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
 		if (buf->buffer->element[i].addr && buf->is_header[i])
 			kmem_cache_free(qeth_core_header_cache,
 				buf->buffer->element[i].addr);
 		buf->is_header[i] = 0;
-		buf->buffer->element[i].length = 0;
-		buf->buffer->element[i].addr = NULL;
-		buf->buffer->element[i].eflags = 0;
-		buf->buffer->element[i].sflags = 0;
 	}
-	buf->buffer->element[15].eflags = 0;
-	buf->buffer->element[15].sflags = 0;
+
+	qeth_scrub_qdio_buffer(buf->buffer,
+			       QETH_MAX_BUFFER_ELEMENTS(queue->card));
 	buf->next_element_to_fill = 0;
-	atomic_set(&buf->state, newbufstate);
+	atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
 }
 
 static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
@@ -1303,7 +1296,7 @@ static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
 		if (!q->bufs[j])
 			continue;
 		qeth_cleanup_handled_pending(q, j, 1);
-		qeth_clear_output_buffer(q, q->bufs[j], QETH_QDIO_BUF_EMPTY);
+		qeth_clear_output_buffer(q, q->bufs[j]);
 		if (free) {
 			kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
 			q->bufs[j] = NULL;
@@ -1544,8 +1537,6 @@ static void qeth_determine_card_type(struct qeth_card *card)
 	card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
 	card->info.type = CARD_RDEV(card)->id.driver_info;
 	card->qdio.no_out_queues = QETH_MAX_QUEUES;
-	if (card->info.type == QETH_CARD_TYPE_IQD)
-		card->info.is_multicast_different = 0x0103;
 	qeth_update_from_chp_desc(card);
 }
 
@@ -2473,32 +2464,20 @@ static int qeth_ulp_setup(struct qeth_card *card)
 
 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
 {
-	int rc;
 	struct qeth_qdio_out_buffer *newbuf;
 
-	rc = 0;
 	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
-	if (!newbuf) {
-		rc = -ENOMEM;
-		goto out;
-	}
+	if (!newbuf)
+		return -ENOMEM;
+
 	newbuf->buffer = q->qdio_bufs[bidx];
 	skb_queue_head_init(&newbuf->skb_list);
 	lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
 	newbuf->q = q;
-	newbuf->aob = NULL;
 	newbuf->next_pending = q->bufs[bidx];
 	atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
 	q->bufs[bidx] = newbuf;
-	if (q->bufstates) {
-		q->bufstates[bidx].user = newbuf;
-		QETH_CARD_TEXT_(q->card, 2, "nbs%d", bidx);
-		QETH_CARD_TEXT_(q->card, 2, "%lx", (long) newbuf);
-		QETH_CARD_TEXT_(q->card, 2, "%lx",
-				(long) newbuf->next_pending);
-	}
-out:
-	return rc;
+	return 0;
 }
 
 static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q)
@@ -2908,8 +2887,7 @@ int qeth_init_qdio_queues(struct qeth_card *card)
 				   QDIO_MAX_BUFFERS_PER_Q);
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 			qeth_clear_output_buffer(card->qdio.out_qs[i],
-					card->qdio.out_qs[i]->bufs[j],
-					QETH_QDIO_BUF_EMPTY);
+						 card->qdio.out_qs[i]->bufs[j]);
 		}
 		card->qdio.out_qs[i]->card = card;
 		card->qdio.out_qs[i]->next_buf_to_fill = 0;
@@ -3076,7 +3054,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
 	return iob;
 }
 
-int qeth_query_setadapterparms(struct qeth_card *card)
+static int qeth_query_setadapterparms(struct qeth_card *card)
 {
 	int rc;
 	struct qeth_cmd_buffer *iob;
@@ -3089,7 +3067,6 @@ int qeth_query_setadapterparms(struct qeth_card *card)
 	rc = qeth_send_ipa_cmd(card, iob, qeth_query_setadapterparms_cb, NULL);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_query_setadapterparms);
 
 static int qeth_query_ipassists_cb(struct qeth_card *card,
 		struct qeth_reply *reply, unsigned long data)
@@ -3129,7 +3106,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
 	return 0;
 }
 
-int qeth_query_ipassists(struct qeth_card *card, enum qeth_prot_versions prot)
+static int qeth_query_ipassists(struct qeth_card *card,
+				enum qeth_prot_versions prot)
 {
 	int rc;
 	struct qeth_cmd_buffer *iob;
@@ -3141,7 +3119,6 @@ int qeth_query_ipassists(struct qeth_card *card, enum qeth_prot_versions prot)
 	rc = qeth_send_ipa_cmd(card, iob, qeth_query_ipassists_cb, NULL);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_query_ipassists);
 
 static int qeth_query_switch_attributes_cb(struct qeth_card *card,
 				struct qeth_reply *reply, unsigned long data)
@@ -3180,7 +3157,6 @@ int qeth_query_switch_attributes(struct qeth_card *card,
 	return qeth_send_ipa_cmd(card, iob,
 				qeth_query_switch_attributes_cb, sw_info);
 }
-EXPORT_SYMBOL_GPL(qeth_query_switch_attributes);
 
 static int qeth_query_setdiagass_cb(struct qeth_card *card,
 		struct qeth_reply *reply, unsigned long data)
@@ -3634,10 +3610,10 @@ int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
 }
 EXPORT_SYMBOL_GPL(qeth_configure_cq);
 
-
-static void qeth_qdio_cq_handler(struct qeth_card *card,
-		unsigned int qdio_err,
-		unsigned int queue, int first_element, int count) {
+static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
+				 unsigned int queue, int first_element,
+				 int count)
+{
 	struct qeth_qdio_q *cq = card->qdio.c_q;
 	int i;
 	int rc;
@@ -3663,25 +3639,17 @@ static void qeth_qdio_cq_handler(struct qeth_card *card,
 	for (i = first_element; i < first_element + count; ++i) {
 		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
 		struct qdio_buffer *buffer = cq->qdio_bufs[bidx];
-		int e;
+		int e = 0;
 
-		e = 0;
 		while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
 		       buffer->element[e].addr) {
 			unsigned long phys_aob_addr;
 
 			phys_aob_addr = (unsigned long) buffer->element[e].addr;
 			qeth_qdio_handle_aob(card, phys_aob_addr);
-			buffer->element[e].addr = NULL;
-			buffer->element[e].eflags = 0;
-			buffer->element[e].sflags = 0;
-			buffer->element[e].length = 0;
-
 			++e;
 		}
-
-		buffer->element[15].eflags = 0;
-		buffer->element[15].sflags = 0;
+		qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER);
 	}
 	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
 		    card->qdio.c_q->next_buf_to_init,
@@ -3760,11 +3728,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 				qeth_notify_skbs(queue, buffer,
 						 TX_NOTIFY_PENDING);
 			}
-			buffer->aob = queue->bufstates[bidx].aob;
 			QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx);
-			QETH_CARD_TEXT(queue->card, 5, "aob");
-			QETH_CARD_TEXT_(queue->card, 5, "%lx",
-					virt_to_phys(buffer->aob));
 
 			/* prepare the queue slot for re-use: */
 			qeth_scrub_qdio_buffer(buffer->buffer,
@@ -3782,8 +3746,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 				qeth_notify_skbs(queue, buffer, n);
 			}
 
-			qeth_clear_output_buffer(queue, buffer,
-						QETH_QDIO_BUF_EMPTY);
+			qeth_clear_output_buffer(queue, buffer);
 		}
 		qeth_cleanup_handled_pending(queue, bidx, 0);
 	}
@@ -3810,15 +3773,11 @@ static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
  * Note: Function assumes that we have 4 outbound queues.
  */
 int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
-			int ipv, int cast_type)
+			    int ipv)
 {
 	__be16 *tci;
 	u8 tos;
 
-	if (cast_type && card->info.is_multicast_different)
-		return card->info.is_multicast_different &
-			(card->qdio.no_out_queues - 1);
-
 	switch (card->qdio.do_prio_queueing) {
 	case QETH_PRIO_Q_ING_TOS:
 	case QETH_PRIO_Q_ING_PREC:
@@ -5887,31 +5846,13 @@ static int qeth_core_restore(struct ccwgroup_device *gdev)
 	return 0;
 }
 
-static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "qeth",
-	},
-	.ccw_driver = &qeth_ccw_driver,
-	.setup = qeth_core_probe_device,
-	.remove = qeth_core_remove_device,
-	.set_online = qeth_core_set_online,
-	.set_offline = qeth_core_set_offline,
-	.shutdown = qeth_core_shutdown,
-	.prepare = NULL,
-	.complete = NULL,
-	.freeze = qeth_core_freeze,
-	.thaw = qeth_core_thaw,
-	.restore = qeth_core_restore,
-};
-
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
 			   size_t count)
 {
 	int err;
 
-	err = ccwgroup_create_dev(qeth_core_root_dev,
-				  &qeth_core_ccwgroup_driver, 3, buf);
+	err = ccwgroup_create_dev(qeth_core_root_dev, to_ccwgroupdrv(ddrv), 3,
+				  buf);
 
 	return err ? err : count;
 }
@@ -5929,6 +5870,25 @@ static const struct attribute_group *qeth_drv_attr_groups[] = {
 	NULL,
 };
 
+static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
+	.driver = {
+		.groups = qeth_drv_attr_groups,
+		.owner = THIS_MODULE,
+		.name = "qeth",
+	},
+	.ccw_driver = &qeth_ccw_driver,
+	.setup = qeth_core_probe_device,
+	.remove = qeth_core_remove_device,
+	.set_online = qeth_core_set_online,
+	.set_offline = qeth_core_set_offline,
+	.shutdown = qeth_core_shutdown,
+	.prepare = NULL,
+	.complete = NULL,
+	.freeze = qeth_core_freeze,
+	.thaw = qeth_core_thaw,
+	.restore = qeth_core_restore,
+};
+
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct qeth_card *card = dev->ml_priv;
@@ -6620,7 +6580,6 @@ static int __init qeth_core_init(void)
 	rc = ccw_driver_register(&qeth_ccw_driver);
 	if (rc)
 		goto ccw_err;
-	qeth_core_ccwgroup_driver.driver.groups = qeth_drv_attr_groups;
 	rc = ccwgroup_driver_register(&qeth_core_ccwgroup_driver);
 	if (rc)
 		goto ccwgroup_err;

diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 878e62f..54c3522 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h

@@ -64,6 +64,8 @@ enum qeth_card_types {
 	QETH_CARD_TYPE_OSX     = 2,
 };
 
+#define IS_IQD(card)	((card)->info.type == QETH_CARD_TYPE_IQD)
+
 #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18
 /* only the first two bytes are looked at in qeth_get_cardname_short */
 enum qeth_link_types {

diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index f213005..ddc615b 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h

@@ -14,6 +14,11 @@ extern const struct attribute_group *qeth_l2_attr_groups[];
 int qeth_l2_create_device_attributes(struct device *);
 void qeth_l2_remove_device_attributes(struct device *);
 void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
+int qeth_bridgeport_query_ports(struct qeth_card *card,
+				enum qeth_sbp_roles *role,
+				enum qeth_sbp_states *state);
+int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
+int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
 
 int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state);
 int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state);

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2487f0a..8ac243d 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c

@@ -26,7 +26,6 @@
 
 static int qeth_l2_set_offline(struct ccwgroup_device *);
 static int qeth_l2_stop(struct net_device *);
-static void qeth_l2_set_rx_mode(struct net_device *);
 static void qeth_bridgeport_query_support(struct qeth_card *card);
 static void qeth_bridge_state_change(struct qeth_card *card,
 					struct qeth_ipa_cmd *cmd);
@@ -186,12 +185,12 @@ static void qeth_l2_del_all_macs(struct qeth_card *card)
 static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
 	if (card->info.type == QETH_CARD_TYPE_OSN)
-		return RTN_UNSPEC;
+		return RTN_UNICAST;
 	if (is_broadcast_ether_addr(skb->data))
 		return RTN_BROADCAST;
 	if (is_multicast_ether_addr(skb->data))
 		return RTN_MULTICAST;
-	return RTN_UNSPEC;
+	return RTN_UNICAST;
 }
 
 static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
@@ -344,7 +343,6 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
 		rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN);
 		kfree(tmpid);
 	}
-	qeth_l2_set_rx_mode(card->dev);
 	return rc;
 }
 
@@ -770,18 +768,13 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 	int tx_bytes = skb->len;
 	int rc;
 
-	if (card->qdio.do_prio_queueing || (cast_type &&
-					card->info.is_multicast_different))
-		queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb,
-					ipv, cast_type)];
-	else
-		queue = card->qdio.out_qs[card->qdio.default_out_queue];
-
 	if ((card->state != CARD_STATE_UP) || !card->lan_online) {
 		card->stats.tx_carrier_errors++;
 		goto tx_drop;
 	}
 
+	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
 	if (card->options.performance_stats) {
 		card->perf_stats.outbound_cnt++;
 		card->perf_stats.outbound_start_time = qeth_get_micros();
@@ -1125,13 +1118,12 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		if (recovery_mode &&
 		    card->info.type != QETH_CARD_TYPE_OSN) {
 			__qeth_l2_open(card->dev);
+			qeth_l2_set_rx_mode(card->dev);
 		} else {
 			rtnl_lock();
 			dev_open(card->dev);
 			rtnl_unlock();
 		}
-		/* this also sets saved unicast addresses */
-		qeth_l2_set_rx_mode(card->dev);
 	}
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
@@ -1877,7 +1869,6 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
 		return rc;
 	return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS);
 }
-EXPORT_SYMBOL_GPL(qeth_bridgeport_query_ports);
 
 static int qeth_bridgeport_set_cb(struct qeth_card *card,
 	struct qeth_reply *reply, unsigned long data)
@@ -2025,7 +2016,6 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable)
 		rc = qdio_pnso_brinfo(schid, 0, &response, NULL, NULL);
 	return qeth_anset_makerc(card, rc, response);
 }
-EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set);
 
 static bool qeth_bridgeport_is_in_use(struct qeth_card *card)
 {

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 5905dc6..062f62b 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c

@@ -1978,17 +1978,17 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
 		    (cast_type == RTN_MULTICAST) ||
 		    (cast_type == RTN_ANYCAST))
 			return cast_type;
-		return RTN_UNSPEC;
+		return RTN_UNICAST;
 	}
 	rcu_read_unlock();
 
 	/* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
 	if (be16_to_cpu(skb->protocol) == ETH_P_IPV6)
 		return ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ?
-				RTN_MULTICAST : RTN_UNSPEC;
+				RTN_MULTICAST : RTN_UNICAST;
 	else if (be16_to_cpu(skb->protocol) == ETH_P_IP)
 		return ipv4_is_multicast(ip_hdr(skb)->daddr) ?
-				RTN_MULTICAST : RTN_UNSPEC;
+				RTN_MULTICAST : RTN_UNICAST;
 
 	/* ... and MAC address */
 	if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, skb->dev->broadcast))
@@ -1997,22 +1997,21 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
 		return RTN_MULTICAST;
 
 	/* default to unicast */
-	return RTN_UNSPEC;
+	return RTN_UNICAST;
 }
 
-static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
-		struct qeth_hdr *hdr, struct sk_buff *skb)
+static void qeth_l3_fill_af_iucv_hdr(struct qeth_hdr *hdr, struct sk_buff *skb,
+				     unsigned int data_len)
 {
 	char daddr[16];
 	struct af_iucv_trans_hdr *iucv_hdr;
 
 	memset(hdr, 0, sizeof(struct qeth_hdr));
 	hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
-	hdr->hdr.l3.ext_flags = 0;
-	hdr->hdr.l3.length = skb->len - ETH_HLEN;
+	hdr->hdr.l3.length = data_len;
 	hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
 
-	iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
+	iucv_hdr = (struct af_iucv_trans_hdr *)(skb_mac_header(skb) + ETH_HLEN);
 	memset(daddr, 0, sizeof(daddr));
 	daddr[0] = 0xfe;
 	daddr[1] = 0x80;
@@ -2051,6 +2050,12 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		hdr->hdr.l3.vlan_id = skb_vlan_tag_get(skb);
 	}
 
+	if (!skb_is_gso(skb) && skb->ip_summed == CHECKSUM_PARTIAL) {
+		qeth_tx_csum(skb, &hdr->hdr.l3.ext_flags, ipv);
+		if (card->options.performance_stats)
+			card->perf_stats.tx_csum++;
+	}
+
 	/* OSA only: */
 	if (!ipv) {
 		hdr->hdr.l3.flags = QETH_HDR_PASSTHRU;
@@ -2156,104 +2161,141 @@ static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
 	return elements;
 }
 
-static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
-					   struct net_device *dev)
+static int qeth_l3_xmit_offload(struct qeth_card *card, struct sk_buff *skb,
+				struct qeth_qdio_out_q *queue, int ipv,
+				int cast_type)
 {
-	int rc;
+	const unsigned int hw_hdr_len = sizeof(struct qeth_hdr);
+	unsigned int frame_len, nr_frags;
+	unsigned char eth_hdr[ETH_HLEN];
+	unsigned int hdr_elements = 0;
+	struct qeth_hdr *hdr = NULL;
+	int elements, push_len, rc;
+	unsigned int hd_len = 0;
+
+	/* compress skb to fit into one IO buffer: */
+	if (!qeth_get_elements_no(card, skb, 0, 0)) {
+		rc = skb_linearize(skb);
+
+		if (card->options.performance_stats) {
+			if (rc)
+				card->perf_stats.tx_linfail++;
+			else
+				card->perf_stats.tx_lin++;
+		}
+		if (rc)
+			return rc;
+	}
+
+	/* re-use the L2 header area for the HW header: */
+	rc = skb_cow_head(skb, hw_hdr_len - ETH_HLEN);
+	if (rc)
+		return rc;
+	skb_copy_from_linear_data(skb, eth_hdr, ETH_HLEN);
+	skb_pull(skb, ETH_HLEN);
+	frame_len = skb->len;
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	push_len = qeth_push_hdr(skb, &hdr, hw_hdr_len);
+	if (push_len < 0)
+		return push_len;
+	if (!push_len) {
+		/* hdr was added discontiguous from skb->data */
+		hd_len = hw_hdr_len;
+		hdr_elements = 1;
+	}
+
+	elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
+	if (!elements) {
+		rc = -E2BIG;
+		goto out;
+	}
+	elements += hdr_elements;
+
+	if (skb->protocol == htons(ETH_P_AF_IUCV))
+		qeth_l3_fill_af_iucv_hdr(hdr, skb, frame_len);
+	else
+		qeth_l3_fill_header(card, hdr, skb, ipv, cast_type, frame_len);
+
+	if (IS_IQD(card)) {
+		rc = qeth_do_send_packet_fast(queue, skb, hdr, 0, hd_len);
+	} else {
+		/* TODO: drop skb_orphan() once TX completion is fast enough */
+		skb_orphan(skb);
+		rc = qeth_do_send_packet(card, queue, skb, hdr, 0, hd_len,
+					 elements);
+	}
+out:
+	if (!rc) {
+		if (card->options.performance_stats && nr_frags) {
+			card->perf_stats.sg_skbs_sent++;
+			/* nr_frags + skb->data */
+			card->perf_stats.sg_frags_sent += nr_frags + 1;
+		}
+	} else {
+		if (!push_len)
+			kmem_cache_free(qeth_core_header_cache, hdr);
+		if (rc == -EBUSY) {
+			/* roll back to ETH header */
+			skb_pull(skb, push_len);
+			skb_push(skb, ETH_HLEN);
+			skb_copy_to_linear_data(skb, eth_hdr, ETH_HLEN);
+		}
+	}
+	return rc;
+}
+
+static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb,
+			struct qeth_qdio_out_q *queue, int ipv, int cast_type)
+{
+	unsigned int hd_len, nr_frags;
+	int elements, len, rc;
 	__be16 *tag;
 	struct qeth_hdr *hdr = NULL;
 	int hdr_elements = 0;
-	int elements;
-	struct qeth_card *card = dev->ml_priv;
 	struct sk_buff *new_skb = NULL;
-	int ipv = qeth_get_ip_version(skb);
-	int cast_type = qeth_l3_get_cast_type(skb);
-	struct qeth_qdio_out_q *queue =
-		card->qdio.out_qs[card->qdio.do_prio_queueing
-			|| (cast_type && card->info.is_multicast_different) ?
-			qeth_get_priority_queue(card, skb, ipv, cast_type) :
-			card->qdio.default_out_queue];
 	int tx_bytes = skb->len;
-	unsigned int hd_len = 0;
 	bool use_tso;
-	int data_offset = -1;
-	unsigned int nr_frags;
-
-	if (((card->info.type == QETH_CARD_TYPE_IQD) &&
-	     (((card->options.cq != QETH_CQ_ENABLED) && !ipv) ||
-	      ((card->options.cq == QETH_CQ_ENABLED) &&
-	       (be16_to_cpu(skb->protocol) != ETH_P_AF_IUCV)))) ||
-	    card->options.sniffer)
-			goto tx_drop;
-
-	if ((card->state != CARD_STATE_UP) || !card->lan_online) {
-		card->stats.tx_carrier_errors++;
-		goto tx_drop;
-	}
-
-	if ((cast_type == RTN_BROADCAST) &&
-	    (card->info.broadcast_capable == 0))
-		goto tx_drop;
-
-	if (card->options.performance_stats) {
-		card->perf_stats.outbound_cnt++;
-		card->perf_stats.outbound_start_time = qeth_get_micros();
-	}
 
 	/* Ignore segment size from skb_is_gso(), 1 page is always used. */
 	use_tso = skb_is_gso(skb) &&
 		  (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4);
 
-	if (card->info.type == QETH_CARD_TYPE_IQD) {
-		new_skb = skb;
-		data_offset = ETH_HLEN;
-		hd_len = sizeof(*hdr);
-		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
-		if (!hdr)
-			goto tx_drop;
-		hdr_elements++;
-	} else {
-		/* create a clone with writeable headroom */
-		new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso)
-					+ VLAN_HLEN);
-		if (!new_skb)
-			goto tx_drop;
+	/* create a clone with writeable headroom */
+	new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) +
+					    VLAN_HLEN);
+	if (!new_skb)
+		return -ENOMEM;
 
-		if (ipv == 4) {
-			skb_pull(new_skb, ETH_HLEN);
-		}
-
-		if (ipv != 4 && skb_vlan_tag_present(new_skb)) {
-			skb_push(new_skb, VLAN_HLEN);
-			skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
-			skb_copy_to_linear_data_offset(new_skb, 4,
-				new_skb->data + 8, 4);
-			skb_copy_to_linear_data_offset(new_skb, 8,
-				new_skb->data + 12, 4);
-			tag = (__be16 *)(new_skb->data + 12);
-			*tag = cpu_to_be16(ETH_P_8021Q);
-			*(tag + 1) = cpu_to_be16(skb_vlan_tag_get(new_skb));
-		}
+	if (ipv == 4) {
+		skb_pull(new_skb, ETH_HLEN);
+	} else if (skb_vlan_tag_present(new_skb)) {
+		skb_push(new_skb, VLAN_HLEN);
+		skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
+		skb_copy_to_linear_data_offset(new_skb, 4,
+					       new_skb->data + 8, 4);
+		skb_copy_to_linear_data_offset(new_skb, 8,
+					       new_skb->data + 12, 4);
+		tag = (__be16 *)(new_skb->data + 12);
+		*tag = cpu_to_be16(ETH_P_8021Q);
+		*(tag + 1) = cpu_to_be16(skb_vlan_tag_get(new_skb));
 	}
 
-	netif_stop_queue(dev);
-
 	/* fix hardware limitation: as long as we do not have sbal
 	 * chaining we can not send long frag lists
 	 */
-	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-	    ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
-	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
-		int lin_rc = skb_linearize(new_skb);
+	if ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
+	    (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0))) {
+		rc = skb_linearize(new_skb);
 
 		if (card->options.performance_stats) {
-			if (lin_rc)
+			if (rc)
 				card->perf_stats.tx_linfail++;
 			else
 				card->perf_stats.tx_lin++;
 		}
-		if (lin_rc)
-			goto tx_drop;
+		if (rc)
+			goto out;
 	}
 	nr_frags = skb_shinfo(new_skb)->nr_frags;
 
@@ -2265,60 +2307,37 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 		qeth_tso_fill_header(card, hdr, new_skb);
 		hdr_elements++;
 	} else {
-		if (data_offset < 0) {
-			hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
-			qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
-					    new_skb->len -
-					    sizeof(struct qeth_hdr));
-		} else {
-			if (be16_to_cpu(new_skb->protocol) == ETH_P_AF_IUCV)
-				qeth_l3_fill_af_iucv_hdr(card, hdr, new_skb);
-			else {
-				qeth_l3_fill_header(card, hdr, new_skb, ipv,
-						    cast_type,
-						    new_skb->len - data_offset);
-			}
-		}
-
-		if (new_skb->ip_summed == CHECKSUM_PARTIAL) {
-			qeth_tx_csum(new_skb, &hdr->hdr.l3.ext_flags, ipv);
-			if (card->options.performance_stats)
-				card->perf_stats.tx_csum++;
-		}
+		hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
+		qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
+				    new_skb->len - sizeof(struct qeth_hdr));
 	}
 
 	elements = use_tso ?
 		   qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
-		   qeth_get_elements_no(card, new_skb, hdr_elements,
-					(data_offset > 0) ? data_offset : 0);
+		   qeth_get_elements_no(card, new_skb, hdr_elements, 0);
 	if (!elements) {
-		if (data_offset >= 0)
-			kmem_cache_free(qeth_core_header_cache, hdr);
-		goto tx_drop;
+		rc = -E2BIG;
+		goto out;
 	}
 	elements += hdr_elements;
 
-	if (card->info.type != QETH_CARD_TYPE_IQD) {
-		int len;
-		if (use_tso) {
-			hd_len = sizeof(struct qeth_hdr_tso) +
-				 ip_hdrlen(new_skb) + tcp_hdrlen(new_skb);
-			len = hd_len;
-		} else {
-			len = sizeof(struct qeth_hdr_layer3);
-		}
+	if (use_tso) {
+		hd_len = sizeof(struct qeth_hdr_tso) +
+			 ip_hdrlen(new_skb) + tcp_hdrlen(new_skb);
+		len = hd_len;
+	} else {
+		hd_len = 0;
+		len = sizeof(struct qeth_hdr_layer3);
+	}
 
-		if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len))
-			goto tx_drop;
-		rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len,
-					 hd_len, elements);
-	} else
-		rc = qeth_do_send_packet_fast(queue, new_skb, hdr, data_offset,
-					      hd_len);
-
+	if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len)) {
+		rc = -EINVAL;
+		goto out;
+	}
+	rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len, hd_len,
+				 elements);
+out:
 	if (!rc) {
-		card->stats.tx_packets++;
-		card->stats.tx_bytes += tx_bytes;
 		if (new_skb != skb)
 			dev_kfree_skb_any(skb);
 		if (card->options.performance_stats) {
@@ -2332,30 +2351,68 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 				card->perf_stats.sg_frags_sent += nr_frags + 1;
 			}
 		}
-		rc = NETDEV_TX_OK;
 	} else {
-		if (data_offset >= 0)
-			kmem_cache_free(qeth_core_header_cache, hdr);
+		if (new_skb != skb)
+			dev_kfree_skb_any(new_skb);
+	}
+	return rc;
+}
 
-		if (rc == -EBUSY) {
-			if (new_skb != skb)
-				dev_kfree_skb_any(new_skb);
-			return NETDEV_TX_BUSY;
-		} else
+static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *dev)
+{
+	int cast_type = qeth_l3_get_cast_type(skb);
+	struct qeth_card *card = dev->ml_priv;
+	int ipv = qeth_get_ip_version(skb);
+	struct qeth_qdio_out_q *queue;
+	int tx_bytes = skb->len;
+	int rc;
+
+	if (IS_IQD(card)) {
+		if (card->options.sniffer)
+			goto tx_drop;
+		if ((card->options.cq != QETH_CQ_ENABLED && !ipv) ||
+		    (card->options.cq == QETH_CQ_ENABLED &&
+		     skb->protocol != htons(ETH_P_AF_IUCV)))
 			goto tx_drop;
 	}
 
-	netif_wake_queue(dev);
-	if (card->options.performance_stats)
-		card->perf_stats.outbound_time += qeth_get_micros() -
-			card->perf_stats.outbound_start_time;
-	return rc;
+	if (card->state != CARD_STATE_UP || !card->lan_online) {
+		card->stats.tx_carrier_errors++;
+		goto tx_drop;
+	}
+
+	if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
+		goto tx_drop;
+
+	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
+	if (card->options.performance_stats) {
+		card->perf_stats.outbound_cnt++;
+		card->perf_stats.outbound_start_time = qeth_get_micros();
+	}
+	netif_stop_queue(dev);
+
+	if (IS_IQD(card) || (!skb_is_gso(skb) && ipv == 4))
+		rc = qeth_l3_xmit_offload(card, skb, queue, ipv, cast_type);
+	else
+		rc = qeth_l3_xmit(card, skb, queue, ipv, cast_type);
+
+	if (!rc) {
+		card->stats.tx_packets++;
+		card->stats.tx_bytes += tx_bytes;
+		if (card->options.performance_stats)
+			card->perf_stats.outbound_time += qeth_get_micros() -
+				card->perf_stats.outbound_start_time;
+		netif_wake_queue(dev);
+		return NETDEV_TX_OK;
+	} else if (rc == -EBUSY) {
+		return NETDEV_TX_BUSY;
+	} /* else fall through */
 
 tx_drop:
 	card->stats.tx_dropped++;
 	card->stats.tx_errors++;
-	if ((new_skb != skb) && new_skb)
-		dev_kfree_skb_any(new_skb);
 	dev_kfree_skb_any(skb);
 	netif_wake_queue(dev);
 	return NETDEV_TX_OK;
@@ -2497,9 +2554,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 		if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
 			card->dev->dev_id = card->info.unique_id & 0xffff;
 
-		card->dev->hw_features |= NETIF_F_SG;
-		card->dev->vlan_features |= NETIF_F_SG;
-
 		if (!card->info.guestlan) {
 			card->dev->features |= NETIF_F_SG;
 			card->dev->hw_features |= NETIF_F_TSO |
@@ -2519,6 +2573,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 			return -ENODEV;
 		card->dev->flags |= IFF_NOARP;
 		card->dev->netdev_ops = &qeth_l3_netdev_ops;
+
 		rc = qeth_l3_iqd_read_initial_mac(card);
 		if (rc)
 			return rc;
@@ -2534,12 +2589,18 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->max_mtu = ETH_MAX_MTU;
 	card->dev->dev_port = card->info.portno;
 	card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
+	card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	card->dev->needed_headroom = sizeof(struct qeth_hdr) - ETH_HLEN;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
+	card->dev->hw_features |= NETIF_F_SG;
+	card->dev->vlan_features |= NETIF_F_SG;
+
 	netif_keep_dst(card->dev);
-	netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-					  PAGE_SIZE);
+	if (card->dev->hw_features & NETIF_F_TSO)
+		netif_set_gso_max_size(card->dev,
+				       PAGE_SIZE * (QETH_MAX_BUFFER_ELEMENTS(card) - 1));
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
@@ -2666,11 +2727,12 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	qeth_enable_hw_features(card->dev);
 	if (recover_flag == CARD_STATE_RECOVER) {
 		rtnl_lock();
-		if (recovery_mode)
+		if (recovery_mode) {
 			__qeth_l3_open(card->dev);
-		else
+			qeth_l3_set_rx_mode(card->dev);
+		} else {
 			dev_open(card->dev);
-		qeth_l3_set_rx_mode(card->dev);
+		}
 		rtnl_unlock();
 	}
 	qeth_trace_features(card);

diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index e461168..4e6611e 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c

@@ -290,13 +290,6 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
-				void *accel_priv,
-				select_queue_fallback_t fallback)
-{
-	return (u16)smp_processor_id();
-}
-
 static void xlr_hw_set_mac_addr(struct net_device *ndev)
 {
 	struct xlr_net_priv *priv = netdev_priv(ndev);
@@ -403,7 +396,7 @@ static const struct net_device_ops xlr_netdev_ops = {
 	.ndo_open = xlr_net_open,
 	.ndo_stop = xlr_net_stop,
 	.ndo_start_xmit = xlr_net_start_xmit,
-	.ndo_select_queue = xlr_net_select_queue,
+	.ndo_select_queue = dev_pick_tx_cpu_id,
 	.ndo_set_mac_address = xlr_net_set_mac_addr,
 	.ndo_set_rx_mode = xlr_set_rx_mode,
 	.ndo_get_stats64 = xlr_stats,

diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index add1ba0..38e85c8 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c

@@ -253,7 +253,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;

diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index ace68f0..1816423 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c

@@ -403,10 +403,9 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 
-static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb
-				, void *accel_priv
-				, select_queue_fallback_t fallback
-)
+static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index d95cae0..ac42da5 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h

@@ -74,6 +74,11 @@ struct team_port {
 	long mode_priv[0];
 };
 
+static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	return rcu_dereference(dev->rx_handler_data);
+}
+
 static inline bool team_port_enabled(struct team_port *port)
 {
 	return port->index != -1;
@@ -84,6 +89,19 @@ static inline bool team_port_txable(struct team_port *port)
 	return port->linkup && team_port_enabled(port);
 }
 
+static inline bool team_port_dev_txable(const struct net_device *port_dev)
+{
+	struct team_port *port;
+	bool txable;
+
+	rcu_read_lock();
+	port = team_port_get_rcu(port_dev);
+	txable = port ? team_port_txable(port) : false;
+	rcu_read_unlock();
+
+	return txable;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static inline void team_netpoll_send_skb(struct team_port *port,
 					 struct sk_buff *skb)

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 122e7e9..dca6ab4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h

@@ -630,6 +630,7 @@ struct mlx4_caps {
 	u32			vf_caps;
 	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
+	u32			health_buffer_addrs;
 };
 
 struct mlx4_buf_list {
@@ -851,6 +852,12 @@ struct mlx4_vf_dev {
 	u8			n_ports;
 };
 
+struct mlx4_fw_crdump {
+	bool snapshot_enable;
+	struct devlink_region *region_crspace;
+	struct devlink_region *region_fw_health;
+};
+
 enum mlx4_pci_status {
 	MLX4_PCI_STATUS_DISABLED,
 	MLX4_PCI_STATUS_ENABLED,
@@ -871,6 +878,7 @@ struct mlx4_dev_persistent {
 	u8	interface_state;
 	struct mutex		pci_status_mutex; /* sync pci state */
 	enum mlx4_pci_status	pci_status;
+	struct mlx4_fw_crdump	crdump;
 };
 
 struct mlx4_dev {

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index fd436cd..6675b9f 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h

@@ -254,6 +254,7 @@ struct mr_table {
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
 	bool			mroute_do_pim;
+	bool			mroute_do_wrvifwhole;
 	int			mroute_reg_vif_num;
 };
 

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2422c0e..4fa7f7a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -575,6 +575,9 @@ struct netdev_queue {
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
 	unsigned long		trans_timeout;
+
+	/* Subordinate device that the queue has been assigned to */
+	struct net_device	*sb_dev;
 /*
  * write-mostly part
  */
@@ -790,7 +793,8 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 }
 
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
-				       struct sk_buff *skb);
+				       struct sk_buff *skb,
+				       struct net_device *sb_dev);
 
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
@@ -950,7 +954,8 @@ struct dev_ifalias {
  *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- *                         void *accel_priv, select_queue_fallback_t fallback);
+ *                         struct net_device *sb_dev,
+ *                         select_queue_fallback_t fallback);
  *	Called to decide which queue to use when device supports multiple
  *	transmit queues.
  *
@@ -1222,7 +1227,7 @@ struct net_device_ops {
 						      netdev_features_t features);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
-						    void *accel_priv,
+						    struct net_device *sb_dev,
 						    select_queue_fallback_t fallback);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
@@ -1987,7 +1992,7 @@ struct net_device {
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	u8			num_tc;
+	s16			num_tc;
 	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
 	u8			prio_tc_map[TC_BITMASK + 1];
 
@@ -2041,6 +2046,17 @@ int netdev_get_num_tc(struct net_device *dev)
 	return dev->num_tc;
 }
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev);
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset);
+int netdev_set_sb_channel(struct net_device *dev, u16 channel);
+static inline int netdev_get_sb_channel(struct net_device *dev)
+{
+	return max_t(int, -dev->num_tc, 0);
+}
+
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 					 unsigned int index)
@@ -2085,7 +2101,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv);
+				    struct net_device *sb_dev);
 
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
@@ -2549,8 +2565,14 @@ void dev_close(struct net_device *dev);
 void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback);
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 5a5e0a2..07efffd0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h

@@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
-		if (ret != 1)
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
+			list_add_tail(&skb->list, &sublist);
 	}
+	/* Put passed packets back on main list */
+	list_splice(&sublist, head);
 }
 
 /* Call setsockopt() */
@@ -410,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	return false;
+}
 #endif
 
 struct nf_conn;
@@ -420,6 +433,8 @@ enum ip_conntrack_info;
 struct nf_ct_hook {
 	int (*update)(struct net *net, struct sk_buff *skb);
 	void (*destroy)(struct nf_conntrack *);
+	bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+			      const struct sk_buff *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e6b240b6..379affc 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h

@@ -21,4 +21,9 @@
 
 #include <uapi/linux/openvswitch.h>
 
+#define OVS_CLONE_ATTR_EXEC      0   /* Specify an u32 value. When nonzero,
+				      * actions in clone will not change flow
+				      * keys. False otherwise.
+				      */
+
 #endif /* _LINUX_OPENVSWITCH_H */

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3dbea66..58a8d7d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h

@@ -89,7 +89,7 @@ struct tcp_sack_block {
 
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
-	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 	u32	ts_recent;	/* Time stamp to echo next		*/
 	u32	rcv_tsval;	/* Time stamp value             	*/
 	u32	rcv_tsecr;	/* Time stamp echo reply        	*/
@@ -426,7 +426,7 @@ struct tcp_timewait_sock {
 	/* The time we sent the last out-of-window ACK: */
 	u32			  tw_last_oow_ack_time;
 
-	long			  tw_ts_recent_stamp;
+	int			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 5ff11ad..683ce41 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h

@@ -6,6 +6,7 @@
  * Public action API for classifiers/qdiscs
 */
 
+#include <linux/refcount.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
@@ -26,8 +27,8 @@ struct tc_action {
 	struct tcf_idrinfo		*idrinfo;
 
 	u32				tcfa_index;
-	int				tcfa_refcnt;
-	int				tcfa_bindcnt;
+	refcount_t			tcfa_refcnt;
+	atomic_t			tcfa_bindcnt;
 	u32				tcfa_capab;
 	int				tcfa_action;
 	struct tcf_t			tcfa_tm;
@@ -37,7 +38,7 @@ struct tc_action {
 	spinlock_t			tcfa_lock;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
-	struct tc_cookie	*act_cookie;
+	struct tc_cookie	__rcu *act_cookie;
 	struct tcf_chain	*goto_chain;
 };
 #define tcf_index	common.tcfa_index
@@ -91,7 +92,8 @@ struct tc_action_ops {
 			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind, struct netlink_ext_ack *extack);
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
 			const struct tc_action_ops *,
@@ -99,6 +101,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	int     (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
@@ -151,6 +154,10 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind);
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
 static inline int tcf_idr_release(struct tc_action *a, bool bind)
@@ -161,18 +168,20 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind)
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
 			  struct pernet_operations *ops);
-int tcf_action_destroy(struct list_head *actions, int bind);
+int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack);
+		    struct tc_action *actions[], size_t *attr_size,
+		    bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
+		    int ref);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 808f1d1..a2d0581 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h

@@ -411,6 +411,19 @@ static inline bool bond_slave_can_tx(struct slave *slave)
 	       bond_is_active_slave(slave);
 }
 
+static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev)
+{
+	struct slave *slave;
+	bool active;
+
+	rcu_read_lock();
+	slave = bond_slave_get_rcu(slave_dev);
+	active = bond_is_active_slave(slave);
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len)
 {
 	if (len == ETH_ALEN) {

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ed57138..b9b89d6 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h

@@ -28,6 +28,8 @@ struct devlink {
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
 	struct list_head param_list;
+	struct list_head region_list;
+	u32 snapshot_id;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -359,6 +361,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -374,6 +377,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
@@ -397,6 +403,10 @@ enum devlink_param_generic_id {
 	.validate = _validate,						\
 }
 
+struct devlink_region;
+
+typedef void devlink_snapshot_data_dest_t(const void *data);
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -543,6 +553,15 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size);
+void devlink_region_destroy(struct devlink_region *region);
+u32 devlink_region_shapshot_id_get(struct devlink *devlink);
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor);
 
 #else
 
@@ -768,7 +787,34 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 static inline void
 devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
-	return -EOPNOTSUPP;
+}
+
+static inline struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+		      const char *region_name,
+		      u32 region_max_snapshots,
+		      u64 region_size)
+{
+	return NULL;
+}
+
+static inline void
+devlink_region_destroy(struct devlink_region *region)
+{
+}
+
+static inline u32
+devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	return 0;
+}
+
+static inline int
+devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+			       u8 *data, u32 snapshot_id,
+			       devlink_snapshot_data_dest_t *data_destructor)
+{
+	return 0;
 }
 
 #endif

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index adc24df5..c644067 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h

@@ -47,7 +47,7 @@ struct flow_dissector_key_tags {
 struct flow_dissector_key_vlan {
 	u16	vlan_id:12,
 		vlan_priority:3;
-	u16	padding;
+	__be16	vlan_tpid;
 };
 
 struct flow_dissector_key_mpls {
@@ -206,6 +206,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
@@ -237,6 +238,7 @@ struct flow_keys {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_tags tags;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	struct flow_dissector_key_keyid keyid;
 	struct flow_dissector_key_ports ports;
 	struct flow_dissector_key_addrs addrs;

diff --git a/include/net/ip.h b/include/net/ip.h
index 99d1b83..e44b1a4 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h

@@ -72,13 +72,27 @@ struct ipcm_cookie {
 	__be32			addr;
 	int			oif;
 	struct ip_options_rcu	*opt;
-	__u8			tx_flags;
 	__u8			ttl;
 	__s16			tos;
 	char			priority;
 	__u16			gso_size;
 };
 
+static inline void ipcm_init(struct ipcm_cookie *ipcm)
+{
+	*ipcm = (struct ipcm_cookie) { .tos = -1 };
+}
+
+static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+				const struct inet_sock *inet)
+{
+	ipcm_init(ipcm);
+
+	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+	ipcm->oif = inet->sk.sk_bound_dev_if;
+	ipcm->addr = inet->inet_saddr;
+}
+
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b7843e0..aa6fd11 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h

@@ -294,6 +294,7 @@ struct ipv6_fl_socklist {
 };
 
 struct ipcm6_cookie {
+	struct sockcm_cookie sockc;
 	__s16 hlimit;
 	__s16 tclass;
 	__s8  dontfrag;
@@ -301,6 +302,25 @@ struct ipcm6_cookie {
 	__u16 gso_size;
 };
 
+static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = -1,
+		.dontfrag = -1,
+	};
+}
+
+static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
+				 const struct ipv6_pinfo *np)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = np->tclass,
+		.dontfrag = np->dontfrag,
+	};
+}
+
 static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
 {
 	struct ipv6_txoptions *opt;
@@ -940,8 +960,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc);
+		    struct rt6_info *rt, unsigned int flags);
 
 int ip6_push_pending_frames(struct sock *sk);
 
@@ -958,8 +977,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc);
+			     struct inet_cork_full *cork);
 
 static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
 {

diff --git a/include/net/lag.h b/include/net/lag.h
new file mode 100644
index 0000000..95b880e
--- /dev/null
+++ b/include/net/lag.h

@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IF_LAG_H
+#define _LINUX_IF_LAG_H
+
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+#include <net/bonding.h>
+
+static inline bool net_lag_port_dev_txable(const struct net_device *port_dev)
+{
+	if (netif_is_team_port(port_dev))
+		return team_port_dev_txable(port_dev);
+	else
+		return bond_is_active_slave_dev(port_dev);
+}
+
+#endif /* _LINUX_IF_LAG_H */

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6641584..e4252a1 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h

@@ -13,6 +13,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
+	unsigned long cookie;
 	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
@@ -781,6 +782,7 @@ struct tc_mqprio_qopt_offload {
 struct tc_cookie {
 	u8  *data;
 	u32 len;
+	struct rcu_head rcu;
 };
 
 struct tc_qopt_offload_stats {

diff --git a/include/net/sock.h b/include/net/sock.h
index e0eac9e..83b7475 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h

@@ -1600,6 +1600,12 @@ struct sockcm_cookie {
 	u16 tsflags;
 };
 
+static inline void sockcm_init(struct sockcm_cookie *sockc,
+			       const struct sock *sk)
+{
+	*sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+}
+
 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		     struct sockcm_cookie *sockc);
 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 19cd3d3..911bbac 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h

@@ -22,14 +22,19 @@
 #include <net/act_api.h>
 #include <linux/tc_act/tc_skbedit.h>
 
+struct tcf_skbedit_params {
+	u32 flags;
+	u32 priority;
+	u32 mark;
+	u32 mask;
+	u16 queue_mapping;
+	u16 ptype;
+	struct rcu_head rcu;
+};
+
 struct tcf_skbedit {
-	struct tc_action	common;
-	u32		flags;
-	u32		priority;
-	u32		mark;
-	u32		mask;
-	u16		queue_mapping;
-	u16		ptype;
+	struct tc_action common;
+	struct tcf_skbedit_params __rcu *params;
 };
 #define to_skbedit(a) ((struct tcf_skbedit *)a)
 
@@ -37,15 +42,27 @@ struct tcf_skbedit {
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
-		return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+	u32 flags;
+
+	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+		rcu_read_lock();
+		flags = rcu_dereference(to_skbedit(a)->params)->flags;
+		rcu_read_unlock();
+		return flags == SKBEDIT_F_MARK;
+	}
 #endif
 	return false;
 }
 
 static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 {
-	return to_skbedit(a)->mark;
+	u32 mark;
+
+	rcu_read_lock();
+	mark = rcu_dereference(to_skbedit(a)->params)->mark;
+	rcu_read_unlock();
+
+	return mark;
 }
 
 #endif /* __NET_TC_SKBEDIT_H */

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cce3769..5823049 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h

@@ -472,19 +472,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-	unsigned long now = jiffies;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	if (time_after(now, last_overflow + HZ))
+	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
 
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
 static inline u32 tcp_cookie_time(void)
@@ -954,6 +955,8 @@ struct rate_sample {
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+	u32 snd_interval_us;	/* snd interval for delivered packets */
+	u32 rcv_interval_us;	/* rcv interval for delivered packets */
 	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
 	int  losses;		/* number of packets marked lost upon ACK */
 	u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
@@ -1373,7 +1376,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
 {
 	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
 		return true;
-	if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+	if (unlikely(!time_before32(ktime_get_seconds(),
+				    rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
 		return true;
 	/*
 	 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1403,7 +1407,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && !time_before32(ktime_get_seconds(),
+				  rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
 		return false;
 	return true;
 }

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index f6a3543..a8f6020 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h

@@ -42,8 +42,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 				    struct sk_buff *skb);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg,
-			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6,
-			  struct sockcm_cookie *sockc);
+			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6);
 
 void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 			       __u16 srcp, __u16 destp, int rqueue, int bucket);

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 68641fb..79407bb 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h

@@ -83,6 +83,12 @@ enum devlink_command {
 	DEVLINK_CMD_PARAM_NEW,
 	DEVLINK_CMD_PARAM_DEL,
 
+	DEVLINK_CMD_REGION_GET,
+	DEVLINK_CMD_REGION_SET,
+	DEVLINK_CMD_REGION_NEW,
+	DEVLINK_CMD_REGION_DEL,
+	DEVLINK_CMD_REGION_READ,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -262,6 +268,18 @@ enum devlink_attr {
 	DEVLINK_ATTR_PARAM_VALUE_DATA,		/* dynamic */
 	DEVLINK_ATTR_PARAM_VALUE_CMODE,		/* u8 */
 
+	DEVLINK_ATTR_REGION_NAME,               /* string */
+	DEVLINK_ATTR_REGION_SIZE,               /* u64 */
+	DEVLINK_ATTR_REGION_SNAPSHOTS,          /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT,           /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT_ID,        /* u32 */
+
+	DEVLINK_ATTR_REGION_CHUNKS,             /* nested */
+	DEVLINK_ATTR_REGION_CHUNK,              /* nested */
+	DEVLINK_ATTR_REGION_CHUNK_DATA,         /* binary */
+	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
+	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 10f9ff9..5d37a9c 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h

@@ -120,6 +120,7 @@ enum {
 	IPMRA_TABLE_MROUTE_DO_ASSERT,
 	IPMRA_TABLE_MROUTE_DO_PIM,
 	IPMRA_TABLE_VIFS,
+	IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
 	__IPMRA_TABLE_MAX
 };
 #define IPMRA_TABLE_MAX (__IPMRA_TABLE_MAX - 1)
@@ -173,5 +174,6 @@ enum {
 #define IGMPMSG_NOCACHE		1		/* Kern cache fill request to mrouted */
 #define IGMPMSG_WRONGVIF	2		/* For PIM assert processing (unused) */
 #define IGMPMSG_WHOLEPKT	3		/* For PIM Register processing */
+#define IGMPMSG_WRVIFWHOLE	4		/* For PIM Register and assert processing */
 
 #endif /* _UAPI__LINUX_MROUTE_H */

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f8f4539..97ff3c1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h

@@ -155,8 +155,8 @@ enum txtime_flags {
 };
 
 struct sock_txtime {
-	clockid_t       clockid;        /* reference clockid */
-	u32             flags;          /* flags defined by enum txtime_flags */
+	clockid_t	clockid;	/* reference clockid */
+	__u32		flags;		/* as defined by enum txtime_flags */
 };
 
 #endif /* _NET_TIMESTAMPING_H */

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 863aaba..dbe0cbe 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h

@@ -840,6 +840,8 @@ struct ovs_action_push_eth {
  * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
  * packet, or modify the packet (e.g., change the DSCP field).
+ * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
+ * actions without affecting the original packet and key.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -873,6 +875,7 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_PUSH_NSH,     /* Nested OVS_NSH_KEY_ATTR_*. */
 	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
 	OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
+	OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 84e4c1d..c4262d9 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h

@@ -469,6 +469,10 @@ enum {
 	TCA_FLOWER_KEY_IP_TTL,		/* u8 */
 	TCA_FLOWER_KEY_IP_TTL_MASK,	/* u8 */
 
+	TCA_FLOWER_KEY_CVLAN_ID,	/* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE,	/* be16 */
+
 	__TCA_FLOWER_MAX,
 };
 

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 9491184..d9cc9dc 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h

@@ -955,4 +955,118 @@ enum {
 
 #define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
 
+
+/* CAKE */
+enum {
+	TCA_CAKE_UNSPEC,
+	TCA_CAKE_PAD,
+	TCA_CAKE_BASE_RATE64,
+	TCA_CAKE_DIFFSERV_MODE,
+	TCA_CAKE_ATM,
+	TCA_CAKE_FLOW_MODE,
+	TCA_CAKE_OVERHEAD,
+	TCA_CAKE_RTT,
+	TCA_CAKE_TARGET,
+	TCA_CAKE_AUTORATE,
+	TCA_CAKE_MEMORY,
+	TCA_CAKE_NAT,
+	TCA_CAKE_RAW,
+	TCA_CAKE_WASH,
+	TCA_CAKE_MPU,
+	TCA_CAKE_INGRESS,
+	TCA_CAKE_ACK_FILTER,
+	TCA_CAKE_SPLIT_GSO,
+	__TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX	(__TCA_CAKE_MAX - 1)
+
+enum {
+	__TCA_CAKE_STATS_INVALID,
+	TCA_CAKE_STATS_PAD,
+	TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+	TCA_CAKE_STATS_MEMORY_LIMIT,
+	TCA_CAKE_STATS_MEMORY_USED,
+	TCA_CAKE_STATS_AVG_NETOFF,
+	TCA_CAKE_STATS_MIN_NETLEN,
+	TCA_CAKE_STATS_MAX_NETLEN,
+	TCA_CAKE_STATS_MIN_ADJLEN,
+	TCA_CAKE_STATS_MAX_ADJLEN,
+	TCA_CAKE_STATS_TIN_STATS,
+	TCA_CAKE_STATS_DEFICIT,
+	TCA_CAKE_STATS_COBALT_COUNT,
+	TCA_CAKE_STATS_DROPPING,
+	TCA_CAKE_STATS_DROP_NEXT_US,
+	TCA_CAKE_STATS_P_DROP,
+	TCA_CAKE_STATS_BLUE_TIMER_US,
+	__TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+	__TCA_CAKE_TIN_STATS_INVALID,
+	TCA_CAKE_TIN_STATS_PAD,
+	TCA_CAKE_TIN_STATS_SENT_PACKETS,
+	TCA_CAKE_TIN_STATS_SENT_BYTES64,
+	TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+	TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+	TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+	TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+	TCA_CAKE_TIN_STATS_TARGET_US,
+	TCA_CAKE_TIN_STATS_INTERVAL_US,
+	TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+	TCA_CAKE_TIN_STATS_WAY_MISSES,
+	TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+	TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+	TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+	TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+	TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+	TCA_CAKE_TIN_STATS_BULK_FLOWS,
+	TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+	TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+	TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+	__TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+	CAKE_FLOW_NONE = 0,
+	CAKE_FLOW_SRC_IP,
+	CAKE_FLOW_DST_IP,
+	CAKE_FLOW_HOSTS,    /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+	CAKE_FLOW_FLOWS,
+	CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_TRIPLE,   /* = CAKE_FLOW_HOSTS  | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_MAX,
+};
+
+enum {
+	CAKE_DIFFSERV_DIFFSERV3 = 0,
+	CAKE_DIFFSERV_DIFFSERV4,
+	CAKE_DIFFSERV_DIFFSERV8,
+	CAKE_DIFFSERV_BESTEFFORT,
+	CAKE_DIFFSERV_PRECEDENCE,
+	CAKE_DIFFSERV_MAX
+};
+
+enum {
+	CAKE_ACK_NONE = 0,
+	CAKE_ACK_FILTER,
+	CAKE_ACK_AGGRESSIVE,
+	CAKE_ACK_MAX
+};
+
+enum {
+	CAKE_ATM_NONE = 0,
+	CAKE_ATM_ATM,
+	CAKE_ATM_PTM,
+	CAKE_ATM_MAX
+};
+
 #endif

diff --git a/net/core/dev.c b/net/core/dev.c
index 993cdc3..0df1771 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -2067,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 		struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 		int i;
 
+		/* walk through the TCs and see if it falls into any of them */
 		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
 			if ((txq - tc->offset) < tc->count)
 				return i;
 		}
 
+		/* didn't find it, just return -1 to indicate no match */
 		return -1;
 	}
 
@@ -2260,7 +2262,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	unsigned int nr_ids;
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
@@ -2448,11 +2457,25 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+	/* Unbind any subordinate channels */
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev)
+			netdev_unbind_sb_channel(dev, txq->sb_dev);
+	}
+}
+
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
+	/* Reset TC configuration of device */
 	dev->num_tc = 0;
 	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
 	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2481,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
 	dev->num_tc = num_tc;
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_num_tc);
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+	netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev == sb_dev)
+			txq->sb_dev = NULL;
+	}
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset)
+{
+	/* Make certain the sb_dev and dev are already configured */
+	if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+		return -EINVAL;
+
+	/* We cannot hand out queues we don't have */
+	if ((offset + count) > dev->real_num_tx_queues)
+		return -EINVAL;
+
+	/* Record the mapping */
+	sb_dev->tc_to_txq[tc].count = count;
+	sb_dev->tc_to_txq[tc].offset = offset;
+
+	/* Provide a way for Tx queue to find the tc_to_txq map or
+	 * XPS map for itself.
+	 */
+	while (count--)
+		netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+	/* Do not use a multiqueue device to represent a subordinate channel */
+	if (netif_is_multiqueue(dev))
+		return -ENODEV;
+
+	/* We allow channels 1 - 32767 to be used for subordinate channels.
+	 * Channel 0 is meant to be "native" mode and used only to represent
+	 * the main root device. We allow writing 0 to reset the device back
+	 * to normal mode after being used as a subordinate channel.
+	 */
+	if (channel > S16_MAX)
+		return -EINVAL;
+
+	dev->num_tc = -channel;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2697,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+		       const struct net_device *sb_dev,
+		       struct sk_buff *skb)
 {
 	u32 hash;
 	u16 qoffset = 0;
 	u16 qcount = dev->real_num_tx_queues;
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+		qoffset = sb_dev->tc_to_txq[tc].offset;
+		qcount = sb_dev->tc_to_txq[tc].count;
+	}
+
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
 		while (unlikely(hash >= qcount))
 			hash -= qcount;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
+		return hash + qoffset;
 	}
 
 	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
@@ -3484,7 +3575,8 @@ static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 }
 #endif
 
-static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+			 struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
@@ -3498,7 +3590,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	if (!static_key_false(&xps_rxqs_needed))
 		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
@@ -3509,7 +3601,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 
 get_cpus_map:
 	if (queue_index < 0) {
-		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 		if (dev_maps) {
 			unsigned int tci = skb->sender_cpu - 1;
 
@@ -3525,17 +3617,36 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
+{
+	return 0;
+}
+EXPORT_SYMBOL(dev_pick_tx_zero);
+
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
+{
+	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
+}
+EXPORT_SYMBOL(dev_pick_tx_cpu_id);
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
 
+	sb_dev = sb_dev ? : dev;
+
 	if (queue_index < 0 || skb->ooo_okay ||
 	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
+		int new_index = get_xps_queue(dev, sb_dev, skb);
 
 		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+			new_index = skb_tx_hash(dev, sb_dev, skb);
 
 		if (queue_index != new_index && sk &&
 		    sk_fullsock(sk) &&
@@ -3550,7 +3661,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv)
+				    struct net_device *sb_dev)
 {
 	int queue_index = 0;
 
@@ -3565,10 +3676,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = __netdev_pick_tx(dev, skb);
+			queue_index = __netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
@@ -3580,7 +3691,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
- *	@accel_priv: private data used for L2 forwarding offload
+ *	@sb_dev: suboordinate device used for L2 forwarding offload
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
@@ -3603,7 +3714,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -3642,7 +3753,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	else
 		skb_dst_force(skb);
 
-	txq = netdev_pick_tx(dev, skb, accel_priv);
+	txq = netdev_pick_tx(dev, skb, sb_dev);
 	q = rcu_dereference_bh(txq->qdisc);
 
 	trace_net_dev_queue(skb);
@@ -3716,9 +3827,9 @@ int dev_queue_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 {
-	return __dev_queue_xmit(skb, accel_priv);
+	return __dev_queue_xmit(skb, sb_dev);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
@@ -4830,23 +4941,28 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 	struct list_head sublist;
 	struct sk_buff *skb, *next;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct net_device *orig_dev = skb->dev;
 		struct packet_type *pt_prev = NULL;
 
+		list_del(&skb->list);
 		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		if (!pt_prev)
+			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {
 			/* dispatch old sublist */
-			list_cut_before(&sublist, head, &skb->list);
 			__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
 			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
 			pt_curr = pt_prev;
 			od_curr = orig_dev;
 		}
+		list_add_tail(&skb->list, &sublist);
 	}
 
 	/* dispatch final sublist */
-	__netif_receive_skb_list_ptype(head, pt_curr, od_curr);
+	__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
 }
 
 static int __netif_receive_skb(struct sk_buff *skb)
@@ -4981,25 +5097,30 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 {
 	struct bpf_prog *xdp_prog = NULL;
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
 		net_timestamp_check(netdev_tstamp_prequeue, skb);
-		if (skb_defer_rx_timestamp(skb))
-			/* Handled, remove from list */
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (!skb_defer_rx_timestamp(skb))
+			list_add_tail(&skb->list, &sublist);
 	}
+	list_splice_init(&sublist, head);
 
 	if (static_branch_unlikely(&generic_xdp_needed_key)) {
 		preempt_disable();
 		rcu_read_lock();
 		list_for_each_entry_safe(skb, next, head, list) {
 			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
-				/* Dropped, remove from list */
-				list_del(&skb->list);
+			list_del(&skb->list);
+			if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
+				list_add_tail(&skb->list, &sublist);
 		}
 		rcu_read_unlock();
 		preempt_enable();
+		/* Put passed packets back on main list */
+		list_splice_init(&sublist, head);
 	}
 
 	rcu_read_lock();
@@ -5010,9 +5131,9 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 			int cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
 			if (cpu >= 0) {
-				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-				/* Handled, remove from list */
+				/* Will be handled, remove from list */
 				list_del(&skb->list);
+				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			}
 		}
 	}
@@ -5158,7 +5279,8 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-		list_del_init(&skb->list);
+		list_del(&skb->list);
+		skb->next = NULL;
 		napi_gro_complete(skb);
 		napi->gro_count--;
 		napi->gro_hash[index].count--;
@@ -5339,7 +5461,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		list_del_init(&pp->list);
+		list_del(&pp->list);
+		pp->next = NULL;
 		napi_gro_complete(pp);
 		napi->gro_count--;
 		napi->gro_hash[hash].count--;

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 470f3db..65fc366 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c

@@ -326,6 +326,57 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
 						  pool_type, p_tc_index);
 }
 
+struct devlink_region {
+	struct devlink *devlink;
+	struct list_head list;
+	const char *name;
+	struct list_head snapshot_list;
+	u32 max_snapshots;
+	u32 cur_snapshots;
+	u64 size;
+};
+
+struct devlink_snapshot {
+	struct list_head list;
+	struct devlink_region *region;
+	devlink_snapshot_data_dest_t *data_destructor;
+	u64 data_len;
+	u8 *data;
+	u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		if (!strcmp(region->name, region_name))
+			return region;
+
+	return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+	struct devlink_snapshot *snapshot;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list)
+		if (snapshot->id == id)
+			return snapshot;
+
+	return NULL;
+}
+
+static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
+{
+	snapshot->region->cur_snapshots--;
+	list_del(&snapshot->list);
+	(*snapshot->data_destructor)(snapshot->data);
+	kfree(snapshot);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
@@ -2620,6 +2671,11 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
 		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
 	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -3098,6 +3154,420 @@ static void devlink_param_unregister_one(struct devlink *devlink,
 	kfree(param_item);
 }
 
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+					     struct devlink *devlink,
+					     struct devlink_snapshot *snapshot)
+{
+	struct nlattr *snap_attr;
+	int err;
+
+	snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+	if (!snap_attr)
+		return -EINVAL;
+
+	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, snap_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snap_attr);
+	return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+					      struct devlink *devlink,
+					      struct devlink_region *region)
+{
+	struct devlink_snapshot *snapshot;
+	struct nlattr *snapshots_attr;
+	int err;
+
+	snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS);
+	if (!snapshots_attr)
+		return -EINVAL;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list) {
+		err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, snapshots_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snapshots_attr);
+	return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+				  enum devlink_command cmd, u32 portid,
+				  u32 seq, int flags,
+				  struct devlink_region *region)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+				region->size,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+				     struct devlink_snapshot *snapshot,
+				     enum devlink_command cmd)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+	if (!hdr)
+		goto out_free_msg;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto out_cancel_msg;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+			     region->name);
+	if (err)
+		goto out_cancel_msg;
+
+	if (snapshot) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+				  snapshot->id);
+		if (err)
+			goto out_cancel_msg;
+	} else {
+		err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+					region->size, DEVLINK_ATTR_PAD);
+		if (err)
+			goto out_cancel_msg;
+	}
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+
+	return;
+
+out_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+out_free_msg:
+	nlmsg_free(msg);
+}
+
+static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
+					  struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_region *region;
+	const char *region_name;
+	struct sk_buff *msg;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+				     info->snd_portid, info->snd_seq, 0,
+				     region);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
+					    struct netlink_callback *cb)
+{
+	struct devlink_region *region;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(region, &devlink->region_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_region_fill(msg, devlink,
+						     DEVLINK_CMD_REGION_GET,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     NLM_F_MULTI, region);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_cmd_region_del(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct devlink_region *region;
+	const char *region_name;
+	u32 snapshot_id;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+	devlink_region_snapshot_del(snapshot);
+	return 0;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+						 struct devlink *devlink,
+						 u8 *chunk, u32 chunk_size,
+						 u64 addr)
+{
+	struct nlattr *chunk_attr;
+	int err;
+
+	chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK);
+	if (!chunk_attr)
+		return -EINVAL;
+
+	err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, chunk_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, chunk_attr);
+	return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
+						struct devlink *devlink,
+						struct devlink_region *region,
+						struct nlattr **attrs,
+						u64 start_offset,
+						u64 end_offset,
+						bool dump,
+						u64 *new_offset)
+{
+	struct devlink_snapshot *snapshot;
+	u64 curr_offset = start_offset;
+	u32 snapshot_id;
+	int err = 0;
+
+	*new_offset = start_offset;
+
+	snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	if (end_offset > snapshot->data_len || dump)
+		end_offset = snapshot->data_len;
+
+	while (curr_offset < end_offset) {
+		u32 data_size;
+		u8 *data;
+
+		if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
+			data_size = end_offset - curr_offset;
+		else
+			data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+
+		data = &snapshot->data[curr_offset];
+		err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
+							    data, data_size,
+							    curr_offset);
+		if (err)
+			break;
+
+		curr_offset += data_size;
+	}
+	*new_offset = curr_offset;
+
+	return err;
+}
+
+static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+					     struct netlink_callback *cb)
+{
+	u64 ret_offset, start_offset, end_offset = 0;
+	struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
+	const struct genl_ops *ops = cb->data;
+	struct devlink_region *region;
+	struct nlattr *chunks_attr;
+	const char *region_name;
+	struct devlink *devlink;
+	bool dump = true;
+	void *hdr;
+	int err;
+
+	start_offset = *((u64 *)&cb->args[0]);
+
+	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
+			  attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+	if (err)
+		goto out;
+
+	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink))
+		goto out;
+
+	mutex_lock(&devlink_mutex);
+	mutex_lock(&devlink->lock);
+
+	if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		goto out_unlock;
+
+	region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		goto out_unlock;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+			  DEVLINK_CMD_REGION_READ);
+	if (!hdr)
+		goto out_unlock;
+
+	err = devlink_nl_put_handle(skb, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+	if (err)
+		goto nla_put_failure;
+
+	chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
+	if (!chunks_attr)
+		goto nla_put_failure;
+
+	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+		if (!start_offset)
+			start_offset =
+				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+		dump = false;
+	}
+
+	err = devlink_nl_region_read_snapshot_fill(skb, devlink,
+						   region, attrs,
+						   start_offset,
+						   end_offset, dump,
+						   &ret_offset);
+
+	if (err && err != -EMSGSIZE)
+		goto nla_put_failure;
+
+	/* Check if there was any progress done to prevent infinite loop */
+	if (ret_offset == start_offset)
+		goto nla_put_failure;
+
+	*((u64 *)&cb->args[0]) = ret_offset;
+
+	nla_nest_end(skb, chunks_attr);
+	genlmsg_end(skb, hdr);
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+
+	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+out_unlock:
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+out:
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3121,6 +3591,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -3319,6 +3791,28 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_REGION_GET,
+		.doit = devlink_nl_cmd_region_get_doit,
+		.dumpit = devlink_nl_cmd_region_get_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_DEL,
+		.doit = devlink_nl_cmd_region_del,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_READ,
+		.dumpit = devlink_nl_cmd_region_read_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -3358,6 +3852,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
+	INIT_LIST_HEAD(&devlink->region_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -4109,6 +4604,158 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 }
 EXPORT_SYMBOL_GPL(devlink_param_value_changed);
 
+/**
+ *	devlink_region_create - create a new address region
+ *
+ *	@devlink: devlink
+ *	@region_name: region name
+ *	@region_max_snapshots: Maximum supported number of snapshots for region
+ *	@region_size: size of region
+ */
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size)
+{
+	struct devlink_region *region;
+	int err = 0;
+
+	mutex_lock(&devlink->lock);
+
+	if (devlink_region_get_by_name(devlink, region_name)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	region->devlink = devlink;
+	region->max_snapshots = region_max_snapshots;
+	region->name = region_name;
+	region->size = region_size;
+	INIT_LIST_HEAD(&region->snapshot_list);
+	list_add_tail(&region->list, &devlink->region_list);
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+	mutex_unlock(&devlink->lock);
+	return region;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ *	devlink_region_destroy - destroy address region
+ *
+ *	@region: devlink region to destroy
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot, *ts;
+
+	mutex_lock(&devlink->lock);
+
+	/* Free all snapshots of region */
+	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+		devlink_region_snapshot_del(snapshot);
+
+	list_del(&region->list);
+
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+	mutex_unlock(&devlink->lock);
+	kfree(region);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ *	devlink_region_shapshot_id_get - get snapshot ID
+ *
+ *	This callback should be called when adding a new snapshot,
+ *	Driver should use the same id for multiple snapshots taken
+ *	on multiple regions at the same time/by the same trigger.
+ *
+ *	@devlink: devlink
+ */
+u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	u32 id;
+
+	mutex_lock(&devlink->lock);
+	id = ++devlink->snapshot_id;
+	mutex_unlock(&devlink->lock);
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+
+/**
+ *	devlink_region_snapshot_create - create a new snapshot
+ *	This will add a new snapshot of a region. The snapshot
+ *	will be stored on the region struct and can be accessed
+ *	from devlink. This is useful for future	analyses of snapshots.
+ *	Multiple snapshots can be created on a region.
+ *	The @snapshot_id should be obtained using the getter function.
+ *
+ *	@devlink_region: devlink region of the snapshot
+ *	@data_len: size of snapshot data
+ *	@data: snapshot data
+ *	@snapshot_id: snapshot id to be created
+ *	@data_destructor: pointer to destructor function to free data
+ */
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot;
+	int err;
+
+	mutex_lock(&devlink->lock);
+
+	/* check if region can hold one more snapshot */
+	if (region->cur_snapshots == region->max_snapshots) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (!snapshot) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	snapshot->id = snapshot_id;
+	snapshot->region = region;
+	snapshot->data = data;
+	snapshot->data_len = data_len;
+	snapshot->data_destructor = data_destructor;
+
+	list_add_tail(&snapshot->list, &region->snapshot_list);
+
+	region->cur_snapshots++;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 53f96e4..b555fc2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c

@@ -589,7 +589,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
-	bool skip_vlan = false;
+	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -748,14 +748,14 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	}
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
-		const struct vlan_hdr *vlan;
+		const struct vlan_hdr *vlan = NULL;
 		struct vlan_hdr _vlan;
-		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
+		__be16 saved_vlan_tpid = proto;
 
-		if (vlan_tag_present)
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
+		    skb && skb_vlan_tag_present(skb)) {
 			proto = skb->protocol;
-
-		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
+		} else {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
 			if (!vlan) {
@@ -765,20 +765,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan) {
-				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-				break;
-			}
 		}
 
-		skip_vlan = true;
-		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_VLAN)) {
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
+		} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
+		} else {
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
+		}
+
+		if (dissector_uses_key(flow_dissector, dissector_vlan)) {
 			key_vlan = skb_flow_dissector_target(flow_dissector,
-							     FLOW_DISSECTOR_KEY_VLAN,
+							     dissector_vlan,
 							     target_container);
 
-			if (vlan_tag_present) {
+			if (!vlan) {
 				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
 				key_vlan->vlan_priority =
 					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
@@ -789,6 +792,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 					(ntohs(vlan->h_vlan_TCI) &
 					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 			}
+			key_vlan->vlan_tpid = saved_vlan_tpid;
 		}
 
 		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f25ac5f..ffa1d18 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c

@@ -1047,13 +1047,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
-	int index = get_netdev_queue_index(queue);
-	int tc = netdev_txq_to_tc(dev, index);
+	int index;
+	int tc;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
+	index = get_netdev_queue_index(queue);
+
+	/* If queue belongs to subordinate dev use its TC mapping */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
+	tc = netdev_txq_to_tc(dev, index);
 	if (tc < 0)
 		return -EINVAL;
 
-	return sprintf(buf, "%u\n", tc);
+	/* We can report the traffic class one of two ways:
+	 * Subordinate device traffic classes are reported with the traffic
+	 * class first, and then the subordinate class so for example TC0 on
+	 * subordinate device 2 will be reported as "0-2". If the queue
+	 * belongs to the root device it will be reported with just the
+	 * traffic class, so just "0" for TC 0 for example.
+	 */
+	return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
+				 sprintf(buf, "%u\n", tc);
 }
 
 #ifdef CONFIG_XPS
@@ -1214,10 +1231,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	cpumask_var_t mask;
 	unsigned long index;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
 	index = get_netdev_queue_index(queue);
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
@@ -1260,6 +1287,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 	cpumask_var_t mask;
 	int err;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c9929ef..e03258e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -2796,9 +2796,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 			return err;
 	}
 
-	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
-	__dev_notify_flags(dev, old_flags, ~0U);
+	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+		__dev_notify_flags(dev, old_flags, 0U);
+	} else {
+		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+		__dev_notify_flags(dev, old_flags, ~0U);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 937239a..695979b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c

@@ -429,15 +429,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 
 	icmp_param->data.icmph.checksum = 0;
 
+	ipcm_init(&ipc);
 	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	if (icmp_param->replyopts.opt.opt.optlen) {
 		ipc.opt = &icmp_param->replyopts.opt;
@@ -711,12 +707,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
 	sk->sk_mark = mark;
+	ipcm_init(&ipc);
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts.opt;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, &icmp_param);

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1a3b6f3..3196cf5 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c

@@ -530,8 +530,14 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 {
 	struct sk_buff *skb, *next;
 
-	list_for_each_entry_safe(skb, next, head, list)
+	list_for_each_entry_safe(skb, next, head, list) {
+		list_del(&skb->list);
+		/* Handle ip{6}_forward case, as sch_direct_xmit have
+		 * another kind of SKB-list usage (see validate_xmit_skb_list)
+		 */
+		skb->next = NULL;
 		dst_input(skb);
+	}
 }
 
 static void ip_list_rcv_finish(struct net *net, struct sock *sk,

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 570e3eb..e2b6bd47 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c

@@ -1146,15 +1146,15 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->fragsize = ip_sk_use_pmtu(sk) ?
 			 dst_mtu(&rt->dst) : rt->dst.dev->mtu;
 
-	cork->gso_size = sk->sk_type == SOCK_DGRAM &&
-			 sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
+	cork->gso_size = ipc->gso_size;
 	cork->dst = &rt->dst;
 	cork->length = 0;
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
 	cork->priority = ipc->priority;
-	cork->tx_flags = ipc->tx_flags;
 	cork->transmit_time = ipc->sockc.transmit_time;
+	cork->tx_flags = 0;
+	sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
 
 	return 0;
 }
@@ -1548,12 +1548,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
+	ipcm_init(&ipc);
 	ipc.addr = daddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	if (replyopts.opt.opt.optlen) {
 		ipc.opt = &replyopts.opt;

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 82f9141..5660adc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -1052,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	struct sk_buff *skb;
 	int ret;
 
-	if (assert == IGMPMSG_WHOLEPKT)
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 	else
 		skb = alloc_skb(128, GFP_ATOMIC);
@@ -1060,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (assert == IGMPMSG_WHOLEPKT) {
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
 		/* Ugly, but we have no choice with this interface.
 		 * Duplicate old header, fix ihl, length etc.
 		 * And all this only to mangle msg->im_msgtype and
@@ -1071,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		skb_reset_transport_header(skb);
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
-		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_msgtype = assert;
 		msg->im_mbz = 0;
-		msg->im_vif = mrt->mroute_reg_vif_num;
+		if (assert == IGMPMSG_WRVIFWHOLE)
+			msg->im_vif = vifi;
+		else
+			msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -1372,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	struct mr_table *mrt;
 	struct vifctl vif;
 	struct mfcctl mfc;
+	bool do_wrvifwhole;
 	u32 uval;
 
 	/* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1502,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 			break;
 		}
 
+		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
 		val = !!val;
 		if (val != mrt->mroute_do_pim) {
 			mrt->mroute_do_pim = val;
 			mrt->mroute_do_assert = val;
+			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
 		}
 		break;
 	case MRT_TABLE:
@@ -1983,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			       MFC_ASSERT_THRESH)) {
 			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+			if (mrt->mroute_do_wrvifwhole)
+				ipmr_cache_report(mrt, skb, true_vifi,
+						  IGMPMSG_WRVIFWHOLE);
 		}
 		goto dont_forward;
 	}
@@ -2659,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 			mrt->mroute_reg_vif_num) ||
 	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
 		       mrt->mroute_do_assert) ||
-	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+		       mrt->mroute_do_wrvifwhole))
 		return false;
 
 	return true;

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b474922..b54c964a 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c

@@ -739,14 +739,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		/* no remote port */
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.oif = sk->sk_bound_dev_if;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -770,8 +763,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		rcu_read_unlock();
 	}
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 446af7b..33df4d7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c

@@ -562,14 +562,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		daddr = inet->inet_daddr;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.sockc.transmit_time = 0;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -672,8 +665,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 				      &rt, msg->msg_flags, &ipc.sockc);
 
 	 else {
-		sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 		if (!ipc.addr)
 			ipc.addr = fl4.daddr;
 		lock_sock(sk);

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf461fa..e3704a4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -817,8 +817,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 				 * This occurs when user tries to read
 				 * from never connected socket.
 				 */
-				if (!sock_flag(sk, SOCK_DONE))
-					ret = -ENOTCONN;
+				ret = -ENOTCONN;
 				break;
 			}
 			if (!timeo) {
@@ -1241,7 +1240,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 		/* 'common' sending to sendq */
 	}
 
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err)) {
@@ -1275,9 +1274,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			int linear;
 
 new_segment:
-			/* Allocate new segment. If the interface is SG,
-			 * allocate skb fitting to single page.
-			 */
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
@@ -2042,13 +2038,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 				break;
 
 			if (sk->sk_state == TCP_CLOSE) {
-				if (!sock_flag(sk, SOCK_DONE)) {
-					/* This occurs when user tries to read
-					 * from never connected socket.
-					 */
-					copied = -ENOTCONN;
-					break;
-				}
+				/* This occurs when user tries to read
+				 * from never connected socket.
+				 */
+				copied = -ENOTCONN;
 				break;
 			}
 

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 814ea43..fac5d03 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -3462,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -4695,7 +4695,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		}
 
 		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bea17f1..dc415c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c

@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	   and use initial timestamp retrieved from peer table.
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
-	    (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+	    (!twp || (reuse && time_after32(ktime_get_seconds(),
+					    tcptw->tw_ts_recent_stamp)))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dac5893..75ef332 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c

@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -189,7 +189,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 	if (newtp->rx_opt.tstamp_ok) {
 		newtp->rx_opt.ts_recent = req->ts_recent;
-		newtp->rx_opt.ts_recent_stamp = get_seconds();
+		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 	} else {
 		newtp->rx_opt.ts_recent_stamp = 0;
@@ -603,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+			tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}

diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c61240e..4dff40d 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c

@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
+	/* Record both segment send and ack receive intervals */
+	rs->snd_interval_us = snd_us;
+	rs->rcv_interval_us = ack_us;
+
 	/* Normally we expect interval_us >= min-rtt.
 	 * Note that rate may still be over-estimated when a spuriously
 	 * retransmistted skb was first (s)acked because "interval_us"

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5c76ba0..060e841d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -926,12 +926,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
 		return -EOPNOTSUPP;
 
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
-
 	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
 
 	fl4 = &inet->cork.fl.u.ip4;
@@ -978,9 +972,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		connected = 1;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 	ipc.gso_size = up->gso_size;
 
 	if (msg->msg_controllen) {
@@ -1028,8 +1020,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	if (ipc.opt && ipc.opt->opt.srr) {
 		if (!daddr) {
 			err = -EINVAL;

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91580c6..1659a6b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -385,8 +385,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 
 	if (ndev->cnf.stable_secret.initialized)
 		ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
-	else
-		ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode;
 
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -5210,7 +5208,9 @@ static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
 	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
 	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
+	     + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+	     + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+	     + 0;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -5892,32 +5892,31 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 					 loff_t *ppos)
 {
 	int ret = 0;
-	int new_val;
+	u32 new_val;
 	struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 	struct net *net = (struct net *)ctl->extra2;
+	struct ctl_table tmp = {
+		.data = &new_val,
+		.maxlen = sizeof(new_val),
+		.mode = ctl->mode,
+	};
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	new_val = *((u32 *)ctl->data);
+
+	ret = proc_douintvec(&tmp, write, buffer, lenp, ppos);
+	if (ret != 0)
+		goto out;
 
 	if (write) {
-		new_val = *((int *)ctl->data);
-
 		if (check_addr_gen_mode(new_val) < 0) {
 			ret = -EINVAL;
 			goto out;
 		}
 
-		/* request for default */
-		if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
-			ipv6_devconf_dflt.addr_gen_mode = new_val;
-
-		/* request for individual net device */
-		} else {
-			if (!idev)
-				goto out;
-
+		if (idev) {
 			if (check_stable_privacy(idev, net, new_val) < 0) {
 				ret = -EINVAL;
 				goto out;
@@ -5927,7 +5926,21 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 				idev->cnf.addr_gen_mode = new_val;
 				addrconf_dev_config(idev->dev);
 			}
+		} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
+			struct net_device *dev;
+
+			net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+			for_each_netdev(net, dev) {
+				idev = __in6_dev_get(dev);
+				if (idev &&
+				    idev->cnf.addr_gen_mode != new_val) {
+					idev->cnf.addr_gen_mode = new_val;
+					addrconf_dev_config(idev->dev);
+				}
+			}
 		}
+
+		*((u32 *)ctl->data) = new_val;
 	}
 
 out:

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ee08b6..201306b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c

@@ -736,7 +736,7 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			  struct msghdr *msg, struct flowi6 *fl6,
-			  struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
+			  struct ipcm6_cookie *ipc6)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -755,7 +755,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 		}
 
 		if (cmsg->cmsg_level == SOL_SOCKET) {
-			err = __sock_cmsg_send(sk, msg, cmsg, sockc);
+			err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc);
 			if (err)
 				return err;
 			continue;

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf..24611c8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c

@@ -430,7 +430,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	struct icmp6hdr tmp_hdr;
 	struct flowi6 fl6;
 	struct icmpv6_msg msg;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int iif = 0;
 	int addr_type = 0;
@@ -545,7 +544,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
@@ -553,8 +552,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		goto out;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	msg.skb = skb;
 	msg.offset = skb_network_offset(skb);
@@ -575,7 +572,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			    len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr),
 			    &ipc6, &fl6, (struct rt6_info *)dst,
-			    MSG_DONTWAIT, &sockc_unused)) {
+			    MSG_DONTWAIT)) {
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
@@ -679,7 +676,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct dst_entry *dst;
 	struct ipcm6_cookie ipc6;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
-	struct sockcm_cookie sockc_unused = {0};
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -726,16 +722,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	msg.offset = 0;
 	msg.type = ICMPV6_ECHO_REPLY;
 
+	ipcm6_init_sk(&ipc6, np);
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 			    skb->len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
-			    (struct rt6_info *)dst, MSG_DONTWAIT,
-			    &sockc_unused)) {
+			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3eee763..cb54a8a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c

@@ -373,7 +373,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 	if (olen > 0) {
 		struct msghdr msg;
 		struct flowi6 flowi6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		err = -ENOMEM;
@@ -392,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		memset(&flowi6, 0, sizeof(flowi6));
 
 		ipc6.opt = fl->opt;
-		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
 		if (err)
 			goto done;
 		err = -EINVAL;

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f48af7e..8047fd4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c

@@ -1158,8 +1158,7 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 
 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
-			  struct rt6_info *rt, struct flowi6 *fl6,
-			  const struct sockcm_cookie *sockc)
+			  struct rt6_info *rt, struct flowi6 *fl6)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	unsigned int mtu;
@@ -1220,14 +1219,15 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	if (mtu < IPV6_MIN_MTU)
 		return -EINVAL;
 	cork->base.fragsize = mtu;
-	cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
-			      sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
+	cork->base.gso_size = ipc6->gso_size;
+	cork->base.tx_flags = 0;
+	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
 		cork->base.flags |= IPCORK_ALLFRAG;
 	cork->base.length = 0;
 
-	cork->base.transmit_time = sockc->transmit_time;
+	cork->base.transmit_time = ipc6->sockc.transmit_time;
 
 	return 0;
 }
@@ -1241,8 +1241,7 @@ static int __ip6_append_data(struct sock *sk,
 			     int getfrag(void *from, char *to, int offset,
 					 int len, int odd, struct sk_buff *skb),
 			     void *from, int length, int transhdrlen,
-			     unsigned int flags, struct ipcm6_cookie *ipc6,
-			     const struct sockcm_cookie *sockc)
+			     unsigned int flags, struct ipcm6_cookie *ipc6)
 {
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
@@ -1252,7 +1251,6 @@ static int __ip6_append_data(struct sock *sk,
 	int copy;
 	int err;
 	int offset = 0;
-	__u8 tx_flags = 0;
 	u32 tskey = 0;
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1271,6 +1269,10 @@ static int __ip6_append_data(struct sock *sk,
 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
 	orig_mtu = mtu;
 
+	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		tskey = sk->sk_tskey++;
+
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1320,13 +1322,6 @@ static int __ip6_append_data(struct sock *sk,
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
-	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
-		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
-		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
-		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
-			tskey = sk->sk_tskey++;
-	}
-
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1445,8 +1440,8 @@ static int __ip6_append_data(struct sock *sk,
 				    dst_exthdrlen);
 
 			/* Only the initial fragment is time stamped */
-			skb_shinfo(skb)->tx_flags = tx_flags;
-			tx_flags = 0;
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
+			cork->tx_flags = 0;
 			skb_shinfo(skb)->tskey = tskey;
 			tskey = 0;
 
@@ -1563,8 +1558,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc)
+		    struct rt6_info *rt, unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1578,7 +1572,7 @@ int ip6_append_data(struct sock *sk,
 		 * setup for corking
 		 */
 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
-				     ipc6, rt, fl6, sockc);
+				     ipc6, rt, fl6);
 		if (err)
 			return err;
 
@@ -1592,7 +1586,7 @@ int ip6_append_data(struct sock *sk,
 
 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
 				 &np->cork, sk_page_frag(sk), getfrag,
-				 from, length, transhdrlen, flags, ipc6, sockc);
+				 from, length, transhdrlen, flags, ipc6);
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
@@ -1752,8 +1746,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc)
+			     struct inet_cork_full *cork)
 {
 	struct inet6_cork v6_cork;
 	struct sk_buff_head queue;
@@ -1770,7 +1763,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	cork->base.opt = NULL;
 	cork->base.dst = NULL;
 	v6_cork.opt = NULL;
-	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6, sockc);
+	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
 	if (err) {
 		ip6_cork_release(cork, &v6_cork);
 		return ERR_PTR(err);
@@ -1781,7 +1774,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
 				&current->task_frag, getfrag, from,
 				length + exthdrlen, transhdrlen + exthdrlen,
-				flags, ipc6, sockc);
+				flags, ipc6);
 	if (err) {
 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
 		return ERR_PTR(err);

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7..fabe3ba 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c

@@ -489,7 +489,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		struct ipv6_txoptions *opt = NULL;
 		struct msghdr msg;
 		struct flowi6 fl6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		memset(&fl6, 0, sizeof(fl6));
@@ -522,7 +521,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		msg.msg_control = (void *)(opt+1);
 		ipc6.opt = opt;
 
-		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6);
 		if (retv)
 			goto done;
 update:

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 96f56bf..4c04bcc 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c

@@ -62,7 +62,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct pingfakehdr pfh;
-	struct sockcm_cookie junk = {0};
 	struct ipcm6_cookie ipc6;
 
 	pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -119,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
@@ -142,13 +141,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	pfh.family = AF_INET6;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
 			      0, &ipc6, &fl6, rt,
-			      MSG_DONTWAIT, &junk);
+			      MSG_DONTWAIT);
 
 	if (err) {
 		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5737c50..413d98b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c

@@ -767,7 +767,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst = NULL;
 	struct raw6_frag_vec rfv;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc;
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	u16 proto;
@@ -791,10 +790,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
-	ipc6.opt = NULL;
+	ipcm6_init(&ipc6);
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -848,15 +845,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (fl6.flowi6_oif == 0)
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 
-	sockc.tsflags = sk->sk_tsflags;
-	sockc.transmit_time = 0;
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -924,13 +919,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 back_from_confirm:
 	if (inet->hdrincl)
 		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
-					msg->msg_flags, &sockc);
+					msg->msg_flags, &ipc6.sockc);
 	else {
 		ipc6.opt = opt;
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
 			len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
-			msg->msg_flags, &sockc);
+			msg->msg_flags);
 
 		if (err)
 			ip6_flush_pending_frames(sk);

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ac6fc67..f6b9695 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -1141,14 +1141,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
-	struct sockcm_cookie sockc;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
-	sockc.tsflags = sk->sk_tsflags;
-	sockc.transmit_time = 0;
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	/* destination address check */
 	if (sin6) {
@@ -1283,7 +1279,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
 		if (err > 0)
 			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
-						    &ipc6, &sockc);
+						    &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1377,7 +1373,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		skb = ip6_make_skb(sk, getfrag, msg, ulen,
 				   sizeof(struct udphdr), &ipc6,
 				   &fl6, (struct rt6_info *)dst,
-				   msg->msg_flags, &cork, &sockc);
+				   msg->msg_flags, &cork);
 		err = PTR_ERR(skb);
 		if (!IS_ERR_OR_NULL(skb))
 			err = udp_v6_send_skb(skb, &fl6, &cork.base);
@@ -1403,7 +1399,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	up->len += ulen;
 	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
 			      &ipc6, &fl6, (struct rt6_info *)dst,
-			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
+			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 9573691..672e5b7 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c

@@ -500,7 +500,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	int transhdrlen = 4; /* zero session-id */
@@ -525,9 +524,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 
 	if (lsa) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -575,8 +572,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
-					    &sockc_unused);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -641,7 +637,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	err = ip6_append_data(sk, ip_generic_getfrag, msg,
 			      ulen, transhdrlen, &ipc6,
 			      &fl6, (struct rt6_info *)dst,
-			      msg->msg_flags, &sockc_unused);
+			      msg->msg_flags);
 	if (err)
 		ip6_flush_pending_frames(sk);
 	else if (!(msg->msg_flags & MSG_MORE))

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 555e389..5e6cf2c 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c

@@ -1130,7 +1130,7 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 void *accel_priv,
+					 struct net_device *sb_dev,
 					 select_queue_fallback_t fallback)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
@@ -1176,7 +1176,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  void *accel_priv,
+					  struct net_device *sb_dev,
 					  select_queue_fallback_t fallback)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54..dc240cb 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c

@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2..85ab2fd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c

@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ int nf_conntrack_init_start(void)
 static struct nf_ct_hook nf_conntrack_hook = {
 	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 30a5df2..85ae53d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c

@@ -1057,6 +1057,28 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 			     clone_flow_key);
 }
 
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *attr,
+		 bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *clone_arg;
+	int rem = nla_len(attr);
+	bool dont_clone_flow_key;
+
+	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+	clone_arg = nla_data(attr);
+	dont_clone_flow_key = nla_get_u32(clone_arg);
+	actions = nla_next(clone_arg, &rem);
+
+	return clone_execute(dp, skb, key, 0, actions, rem, last,
+			     !dont_clone_flow_key);
+}
+
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 			 const struct nlattr *attr)
 {
@@ -1336,6 +1358,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				consume_skb(skb);
 				return 0;
 			}
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = clone(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
 		}
 
 		if (unlikely(err)) {

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 391c407..a70097e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c

@@ -2460,6 +2460,40 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_clone(struct net *net,
+				   const struct nlattr *attr,
+				   const struct sw_flow_key *key,
+				   struct sw_flow_actions **sfa,
+				   __be16 eth_type, __be16 vlan_tci,
+				   bool log, bool last)
+{
+	int start, err;
+	u32 exec;
+
+	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+	if (start < 0)
+		return start;
+
+	exec = last || !actions_may_change_flow(attr);
+
+	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+				 sizeof(exec), log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, attr, key, sfa,
+				     eth_type, vlan_tci, log);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    bool reset_key,
@@ -2849,6 +2883,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
+			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3038,6 +3073,18 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			/* Non-existent meters are simply ignored.  */
 			break;
 
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_clone(net, a, key, sfa,
+						      eth_type, vlan_tci,
+						      log, last);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3116,6 +3163,26 @@ static int sample_action_to_attr(const struct nlattr *attr,
 	return err;
 }
 
+static int clone_action_to_attr(const struct nlattr *attr,
+				struct sk_buff *skb)
+{
+	struct nlattr *start;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+	if (!start)
+		return -EMSGSIZE;
+
+	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+
+	if (err)
+		nla_nest_cancel(skb, start);
+	else
+		nla_nest_end(skb, start);
+
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3204,6 +3271,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CLONE:
+			err = clone_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3428f77..00189a3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c

@@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
+				  struct net_device *sb_dev)
 {
-	return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
+	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb)
 						    __packet_pick_tx_queue);
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	} else {
-		queue_index = __packet_pick_tx_queue(dev, skb);
+		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
 	}
 
 	return queue_index;
@@ -1951,8 +1952,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
 		goto out_unlock;
 	}
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err))
@@ -2636,8 +2636,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = po->sk.sk_tsflags;
+	sockcm_init(&sockc, &po->sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(&po->sk, msg, &sockc);
 		if (unlikely(err))
@@ -2833,8 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	sockc.mark = sk->sk_mark;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index fcc8970..7af2467 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig

@@ -295,6 +295,17 @@
 
 	  If unsure, say N.
 
+config NET_SCH_CAKE
+	tristate "Common Applications Kept Enhanced (CAKE)"
+	help
+	  Say Y here if you want to use the Common Applications Kept Enhanced
+          (CAKE) queue management algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_cake.
+
+	  If unsure, say N.
+
 config NET_SCH_FQ
 	tristate "Fair Queue"
 	help

diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9a5a707..673ee7d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile

@@ -50,6 +50,7 @@
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_CAKE)	+= sch_cake.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3f4cf93..148a89a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c

@@ -55,6 +55,24 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
+static void tcf_free_cookie_rcu(struct rcu_head *p)
+{
+	struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
+
+	kfree(cookie->data);
+	kfree(cookie);
+}
+
+static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
+				  struct tc_cookie *new_cookie)
+{
+	struct tc_cookie *old;
+
+	old = xchg((__force struct tc_cookie **)old_cookie, new_cookie);
+	if (old)
+		call_rcu(&old->rcu, tcf_free_cookie_rcu);
+}
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -65,44 +83,64 @@ static void free_tcf(struct tc_action *p)
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
-	if (p->act_cookie) {
-		kfree(p->act_cookie->data);
-		kfree(p->act_cookie);
-	}
+	tcf_set_action_cookie(&p->act_cookie, NULL);
 	if (p->goto_chain)
 		tcf_action_goto_chain_fini(p);
 
 	kfree(p);
 }
 
-static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
+static void tcf_action_cleanup(struct tc_action *p)
 {
-	spin_lock(&idrinfo->lock);
-	idr_remove(&idrinfo->action_idr, p->tcfa_index);
-	spin_unlock(&idrinfo->lock);
+	if (p->ops->cleanup)
+		p->ops->cleanup(p);
+
 	gen_kill_estimator(&p->tcfa_rate_est);
 	free_tcf(p);
 }
 
+static int __tcf_action_put(struct tc_action *p, bool bind)
+{
+	struct tcf_idrinfo *idrinfo = p->idrinfo;
+
+	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+		if (bind)
+			atomic_dec(&p->tcfa_bindcnt);
+		idr_remove(&idrinfo->action_idr, p->tcfa_index);
+		spin_unlock(&idrinfo->lock);
+
+		tcf_action_cleanup(p);
+		return 1;
+	}
+
+	if (bind)
+		atomic_dec(&p->tcfa_bindcnt);
+
+	return 0;
+}
+
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
-	ASSERT_RTNL();
-
+	/* Release with strict==1 and bind==0 is only called through act API
+	 * interface (classifiers always bind). Only case when action with
+	 * positive reference count and zero bind count can exist is when it was
+	 * also created with act API (unbinding last classifier will destroy the
+	 * action if it was created by classifier). So only case when bind count
+	 * can be changed after initial check is when unbound action is
+	 * destroyed by act API while classifier binds to action with same id
+	 * concurrently. This result either creation of new action(same behavior
+	 * as before), or reusing existing action if concurrent process
+	 * increments reference count before action is deleted. Both scenarios
+	 * are acceptable.
+	 */
 	if (p) {
-		if (bind)
-			p->tcfa_bindcnt--;
-		else if (strict && p->tcfa_bindcnt > 0)
+		if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
 			return -EPERM;
 
-		p->tcfa_refcnt--;
-		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
-			if (p->ops->cleanup)
-				p->ops->cleanup(p);
-			tcf_idr_remove(p->idrinfo, p);
+		if (__tcf_action_put(p, bind))
 			ret = ACT_P_DELETED;
-		}
 	}
 
 	return ret;
@@ -111,10 +149,15 @@ EXPORT_SYMBOL(__tcf_idr_release);
 
 static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
 {
+	struct tc_cookie *act_cookie;
 	u32 cookie_len = 0;
 
-	if (act->act_cookie)
-		cookie_len = nla_total_size(act->act_cookie->len);
+	rcu_read_lock();
+	act_cookie = rcu_dereference(act->act_cookie);
+
+	if (act_cookie)
+		cookie_len = nla_total_size(act_cookie->len);
+	rcu_read_unlock();
 
 	return  nla_total_size(0) /* action number nested */
 		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
@@ -257,61 +300,91 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
+static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
+			    struct tc_action **a, int bind)
 {
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
 
 	spin_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
+	if (IS_ERR(p)) {
+		p = NULL;
+	} else if (p) {
+		refcount_inc(&p->tcfa_refcnt);
+		if (bind)
+			atomic_inc(&p->tcfa_bindcnt);
+	}
 	spin_unlock(&idrinfo->lock);
 
-	return p;
+	if (p) {
+		*a = p;
+		return true;
+	}
+	return false;
 }
 
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-	if (p) {
-		*a = p;
-		return 1;
-	}
-	return 0;
+	return __tcf_idr_check(tn, index, a, 0);
 }
 EXPORT_SYMBOL(tcf_idr_search);
 
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		   int bind)
 {
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-	if (index && p) {
-		if (bind)
-			p->tcfa_bindcnt++;
-		p->tcfa_refcnt++;
-		*a = p;
-		return true;
-	}
-	return false;
+	return __tcf_idr_check(tn, index, a, bind);
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
+	int ret = 0;
+
+	spin_lock(&idrinfo->lock);
+	p = idr_find(&idrinfo->action_idr, index);
+	if (!p) {
+		spin_unlock(&idrinfo->lock);
+		return -ENOENT;
+	}
+
+	if (!atomic_read(&p->tcfa_bindcnt)) {
+		if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+			struct module *owner = p->ops->owner;
+
+			WARN_ON(p != idr_remove(&idrinfo->action_idr,
+						p->tcfa_index));
+			spin_unlock(&idrinfo->lock);
+
+			tcf_action_cleanup(p);
+			module_put(owner);
+			return 0;
+		}
+		ret = 0;
+	} else {
+		ret = -EPERM;
+	}
+
+	spin_unlock(&idrinfo->lock);
+	return ret;
+}
+EXPORT_SYMBOL(tcf_idr_delete_index);
+
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats)
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
 
 	if (unlikely(!p))
 		return -ENOMEM;
-	p->tcfa_refcnt = 1;
+	refcount_set(&p->tcfa_refcnt, 1);
 	if (bind)
-		p->tcfa_bindcnt = 1;
+		atomic_set(&p->tcfa_bindcnt, 1);
 
 	if (cpustats) {
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -322,20 +395,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 			goto err2;
 	}
 	spin_lock_init(&p->tcfa_lock);
-	idr_preload(GFP_KERNEL);
-	spin_lock(&idrinfo->lock);
-	/* user doesn't specify an index */
-	if (!index) {
-		index = 1;
-		err = idr_alloc_u32(idr, NULL, &index, UINT_MAX, GFP_ATOMIC);
-	} else {
-		err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC);
-	}
-	spin_unlock(&idrinfo->lock);
-	idr_preload_end();
-	if (err)
-		goto err3;
-
 	p->tcfa_index = index;
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
@@ -345,7 +404,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err)
-			goto err4;
+			goto err3;
 	}
 
 	p->idrinfo = idrinfo;
@@ -353,8 +412,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
-err4:
-	idr_remove(idr, index);
 err3:
 	free_percpu(p->cpu_qstats);
 err2:
@@ -370,11 +427,78 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	spin_lock(&idrinfo->lock);
-	idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
 	spin_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
+/* Cleanup idr index that was allocated but not initialized. */
+
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+	spin_lock(&idrinfo->lock);
+	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
+	spin_unlock(&idrinfo->lock);
+}
+EXPORT_SYMBOL(tcf_idr_cleanup);
+
+/* Check if action with specified index exists. If actions is found, increments
+ * its reference and bind counters, and return 1. Otherwise insert temporary
+ * error pointer (to prevent concurrent users from inserting actions with same
+ * index) and return 0.
+ */
+
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
+	int ret;
+
+again:
+	spin_lock(&idrinfo->lock);
+	if (*index) {
+		p = idr_find(&idrinfo->action_idr, *index);
+		if (IS_ERR(p)) {
+			/* This means that another process allocated
+			 * index but did not assign the pointer yet.
+			 */
+			spin_unlock(&idrinfo->lock);
+			goto again;
+		}
+
+		if (p) {
+			refcount_inc(&p->tcfa_refcnt);
+			if (bind)
+				atomic_inc(&p->tcfa_bindcnt);
+			*a = p;
+			ret = 1;
+		} else {
+			*a = NULL;
+			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+					    *index, GFP_ATOMIC);
+			if (!ret)
+				idr_replace(&idrinfo->action_idr,
+					    ERR_PTR(-EBUSY), *index);
+		}
+	} else {
+		*index = 1;
+		*a = NULL;
+		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+				    UINT_MAX, GFP_ATOMIC);
+		if (!ret)
+			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
+				    *index);
+	}
+	spin_unlock(&idrinfo->lock);
+	return ret;
+}
+EXPORT_SYMBOL(tcf_idr_check_alloc);
+
 void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
 			 struct tcf_idrinfo *idrinfo)
 {
@@ -538,13 +662,15 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 }
 EXPORT_SYMBOL(tcf_action_exec);
 
-int tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct tc_action *actions[], int bind)
 {
 	const struct tc_action_ops *ops;
-	struct tc_action *a, *tmp;
-	int ret = 0;
+	struct tc_action *a;
+	int ret = 0, i;
 
-	list_for_each_entry_safe(a, tmp, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
+		actions[i] = NULL;
 		ops = a->ops;
 		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
@@ -555,6 +681,24 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 	return ret;
 }
 
+static int tcf_action_put(struct tc_action *p)
+{
+	return __tcf_action_put(p, false);
+}
+
+static void tcf_action_put_many(struct tc_action *actions[])
+{
+	int i;
+
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
+		const struct tc_action_ops *ops = a->ops;
+
+		if (tcf_action_put(a))
+			module_put(ops->owner);
+	}
+}
+
 int
 tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -567,16 +711,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
+	struct tc_cookie *cookie;
 
 	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
 		goto nla_put_failure;
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto nla_put_failure;
-	if (a->act_cookie) {
-		if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
-			    a->act_cookie->data))
+
+	rcu_read_lock();
+	cookie = rcu_dereference(a->act_cookie);
+	if (cookie) {
+		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+			rcu_read_unlock();
 			goto nla_put_failure;
+		}
 	}
+	rcu_read_unlock();
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
@@ -593,14 +743,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 }
 EXPORT_SYMBOL(tcf_action_dump_1);
 
-int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
 		    int bind, int ref)
 {
 	struct tc_action *a;
-	int err = -EINVAL;
+	int err = -EINVAL, i;
 	struct nlattr *nest;
 
-	list_for_each_entry(a, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
 		nest = nla_nest_start(skb, a->order);
 		if (nest == NULL)
 			goto nla_put_failure;
@@ -638,6 +789,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
@@ -688,9 +840,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	a_o = tc_lookup_action_n(act_name);
 	if (a_o == NULL) {
 #ifdef CONFIG_MODULES
-		rtnl_unlock();
+		if (rtnl_held)
+			rtnl_unlock();
 		request_module("act_%s", act_name);
-		rtnl_lock();
+		if (rtnl_held)
+			rtnl_lock();
 
 		a_o = tc_lookup_action_n(act_name);
 
@@ -713,19 +867,15 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-				extack);
+				rtnl_held, extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
+		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
+				extack);
 	if (err < 0)
 		goto err_mod;
 
-	if (name == NULL && tb[TCA_ACT_COOKIE]) {
-		if (a->act_cookie) {
-			kfree(a->act_cookie->data);
-			kfree(a->act_cookie);
-		}
-		a->act_cookie = cookie;
-	}
+	if (!name && tb[TCA_ACT_COOKIE])
+		tcf_set_action_cookie(&a->act_cookie, cookie);
 
 	/* module count goes up only when brand new policy is created
 	 * if it exists and is only bound to in a_o->init() then
@@ -737,10 +887,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
 		err = tcf_action_goto_chain_init(a, tp);
 		if (err) {
-			LIST_HEAD(actions);
+			struct tc_action *actions[] = { a, NULL };
 
-			list_add_tail(&a->list, &actions);
-			tcf_action_destroy(&actions, bind);
+			tcf_action_destroy(actions, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
@@ -758,21 +907,12 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions, int ovr)
-{
-	struct tc_action *a;
-
-	if (!ovr)
-		return;
-
-	list_for_each_entry(a, actions, list)
-		a->tcfa_refcnt--;
-}
+/* Returns numbers of initialized actions or negative error. */
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack)
+		    struct tc_action *actions[], size_t *attr_size,
+		    bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -786,25 +926,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-					extack);
+					rtnl_held, extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
 		sz += tcf_action_fill_size(act);
-		if (ovr)
-			act->tcfa_refcnt++;
-		list_add_tail(&act->list, actions);
+		/* Start from index 0 */
+		actions[i - 1] = act;
 	}
 
 	*attr_size = tcf_action_full_attrs_size(sz);
-
-	/* Remove the temp refcnt which was necessary to protect against
-	 * destroying an existing action which was being replaced
-	 */
-	cleanup_a(actions, ovr);
-	return 0;
+	return i - 1;
 
 err:
 	tcf_action_destroy(actions, bind);
@@ -855,7 +989,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
 	return -1;
 }
 
-static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 			u32 portid, u32 seq, u16 flags, int event, int bind,
 			int ref)
 {
@@ -891,7 +1025,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event,
+	       struct tc_action *actions[], int event,
 	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -900,7 +1034,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	if (!skb)
 		return -ENOBUFS;
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
-			 0, 0) <= 0) {
+			 0, 1) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
@@ -1027,9 +1161,41 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	return err;
 }
 
+static int tcf_action_delete(struct net *net, struct tc_action *actions[],
+			     int *acts_deleted, struct netlink_ext_ack *extack)
+{
+	u32 act_index;
+	int ret, i;
+
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
+		const struct tc_action_ops *ops = a->ops;
+
+		/* Actions can be deleted concurrently so we must save their
+		 * type and id to search again after reference is released.
+		 */
+		act_index = a->tcfa_index;
+
+		if (tcf_action_put(a)) {
+			/* last reference, action was deleted concurrently */
+			module_put(ops->owner);
+		} else  {
+			/* now do the delete */
+			ret = ops->delete(net, act_index);
+			if (ret < 0) {
+				*acts_deleted = i + 1;
+				return ret;
+			}
+		}
+	}
+	*acts_deleted = i;
+	return 0;
+}
+
 static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
+	       int *acts_deleted, u32 portid, size_t attr_size,
+	       struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
@@ -1040,14 +1206,14 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
-			 0, 1) <= 0) {
+			 0, 2) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	/* now do the delete */
-	ret = tcf_action_destroy(actions, 0);
+	ret = tcf_action_delete(net, actions, acts_deleted, extack);
 	if (ret < 0) {
 		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
@@ -1069,7 +1235,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
 	size_t attr_size = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
+	int acts_deleted = 0;
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
@@ -1091,27 +1258,27 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		}
 		act->order = i;
 		attr_size += tcf_action_fill_size(act);
-		list_add_tail(&act->list, &actions);
+		actions[i - 1] = act;
 	}
 
 	attr_size = tcf_action_full_attrs_size(attr_size);
 
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
+		ret = tcf_get_notify(net, portid, n, actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
+		ret = tcf_del_notify(net, n, actions, &acts_deleted, portid,
+				     attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
 	}
 err:
-	if (event != RTM_GETACTION)
-		tcf_action_destroy(&actions, 0);
+	tcf_action_put_many(&actions[acts_deleted]);
 	return ret;
 }
 
 static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
 	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -1142,14 +1309,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 {
 	size_t attr_size = 0;
 	int ret = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-			      &attr_size, extack);
-	if (ret)
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
+			      &attr_size, true, extack);
+	if (ret < 0)
 		return ret;
+	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
+	if (ovr)
+		tcf_action_put_many(actions);
 
-	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+	return ret;
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 18089c0..06f743d 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c

@@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 	struct tcf_bpf *prog = to_bpf(act);
 	struct tc_act_bpf opt = {
 		.index   = prog->tcf_index,
-		.refcnt  = prog->tcf_refcnt - ref,
-		.bindcnt = prog->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&prog->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
 		.action  = prog->tcf_action,
 	};
 	struct tcf_t tm;
@@ -276,7 +276,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind, struct netlink_ext_ack *extack)
+			int replace, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -298,21 +299,27 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, act,
 				     &act_bpf_ops, bind, true);
-		if (ret < 0)
+		if (ret < 0) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		res = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		/* Don't override defaults. */
 		if (bind)
 			return 0;
 
-		tcf_idr_release(*act, bind);
-		if (!replace)
+		if (!replace) {
+			tcf_idr_release(*act, bind);
 			return -EEXIST;
+		}
+	} else {
+		return ret;
 	}
 
 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
@@ -355,8 +362,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	return res;
 out:
-	if (res == ACT_P_CREATED)
-		tcf_idr_release(*act, bind);
+	tcf_idr_release(*act, bind);
 
 	return ret;
 }
@@ -387,6 +393,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
 	.type		=	TCA_ACT_BPF,
@@ -397,6 +410,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.init		=	tcf_bpf_init,
 	.walk		=	tcf_bpf_walker,
 	.lookup		=	tcf_bpf_search,
+	.delete		=	tcf_bpf_delete,
 	.size		=	sizeof(struct tcf_bpf),
 };
 

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880f..1e31f0e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c

@@ -96,7 +96,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind,
+			     int ovr, int bind, bool rtnl_held,
 			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -118,11 +118,14 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_connmark_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ci = to_connmark(*a);
 		ci->tcf_action = parm->action;
@@ -131,16 +134,18 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 		/* replacing action and zone */
 		ci->tcf_action = parm->action;
 		ci->zone = parm->zone;
+		ret = 0;
 	}
 
 	return ret;
@@ -154,8 +159,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
-		.refcnt  = ci->tcf_refcnt - ref,
-		.bindcnt = ci->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
 		.action  = ci->tcf_action,
 		.zone   = ci->zone,
 	};
@@ -193,6 +198,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
 	.type		=	TCA_ACT_CONNMARK,
@@ -202,6 +214,7 @@ static struct tc_action_ops act_connmark_ops = {
 	.init		=	tcf_connmark_init,
 	.walk		=	tcf_connmark_walker,
 	.lookup		=	tcf_connmark_search,
+	.delete		=	tcf_connmark_delete,
 	.size		=	sizeof(struct tcf_connmark_info),
 };
 

diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 526a8e4..bd232d3 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c

@@ -46,7 +46,8 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind, struct netlink_ext_ack *extack)
+			 int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
@@ -66,18 +67,24 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_csum_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 
 	p = to_tcf_csum(*a);
@@ -85,8 +92,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	params_old = rtnl_dereference(p->params);
@@ -597,8 +603,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_csum_params *params;
 	struct tc_csum opt = {
 		.index   = p->tcf_index,
-		.refcnt  = p->tcf_refcnt - ref,
-		.bindcnt = p->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -653,6 +659,13 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
 	return nla_total_size(sizeof(struct tc_csum));
 }
 
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
 	.type		= TCA_ACT_CSUM,
@@ -664,6 +677,7 @@ static struct tc_action_ops act_csum_ops = {
 	.walk		= tcf_csum_walker,
 	.lookup		= tcf_csum_search,
 	.get_fill_size  = tcf_csum_get_fill_size,
+	.delete		= tcf_csum_delete,
 	.size		= sizeof(struct tcf_csum),
 };
 

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4dc4f15..661b72b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c

@@ -56,7 +56,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -90,18 +91,24 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_gact_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 
 	gact = to_gact(*a);
@@ -169,8 +176,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_gact *gact = to_gact(a);
 	struct tc_gact opt = {
 		.index   = gact->tcf_index,
-		.refcnt  = gact->tcf_refcnt - ref,
-		.bindcnt = gact->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&gact->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
 		.action  = gact->tcf_action,
 	};
 	struct tcf_t t;
@@ -230,6 +237,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 	return sz;
 }
 
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -241,6 +255,7 @@ static struct tc_action_ops act_gact_ops = {
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
 	.get_fill_size	=	tcf_gact_get_fill_size,
+	.delete		=	tcf_gact_delete,
 	.size		=	sizeof(struct tcf_gact),
 };
 

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 20d7d36..3d6e265 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c

@@ -448,7 +448,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind, struct netlink_ext_ack *extack)
+			int ovr, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -483,7 +484,12 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0) {
+		kfree(p);
+		return err;
+	}
+	exists = err;
 	if (exists && bind) {
 		kfree(p);
 		return 0;
@@ -493,16 +499,15 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
 				     bind, true);
 		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			kfree(p);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr) {
-			kfree(p);
-			return -EEXIST;
-		}
+		kfree(p);
+		return -EEXIST;
 	}
 
 	ife = to_ife(*a);
@@ -547,6 +552,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
 			if (exists)
 				spin_unlock_bh(&ife->tcf_lock);
+			tcf_idr_release(*a, bind);
+
 			kfree(p);
 			return err;
 		}
@@ -596,8 +603,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_ife_params *p = rtnl_dereference(ife->params);
 	struct tc_ife opt = {
 		.index = ife->tcf_index,
-		.refcnt = ife->tcf_refcnt - ref,
-		.bindcnt = ife->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&ife->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
 		.action = ife->tcf_action,
 		.flags = p->flags,
 	};
@@ -843,6 +850,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
 	.type = TCA_ACT_IFE,
@@ -853,6 +867,7 @@ static struct tc_action_ops act_ife_ops = {
 	.init = tcf_ife_init,
 	.walk = tcf_ife_walker,
 	.lookup = tcf_ife_search,
+	.delete = tcf_ife_delete,
 	.size =	sizeof(struct tcf_ife_info),
 };
 

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 14c312d..0dc787a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c

@@ -119,13 +119,18 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_idr_check(tn, index, a, bind);
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
@@ -133,22 +138,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, index, est, a, ops, bind,
 				     false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
 
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
@@ -196,7 +206,8 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind, struct netlink_ext_ack *extack)
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -204,7 +215,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind, struct netlink_ext_ack *extack)
+		       int bind, bool unlocked,
+		       struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
@@ -280,8 +292,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	if (unlikely(!t))
 		goto nla_put_failure;
 
-	c.bindcnt = ipt->tcf_bindcnt - bind;
-	c.refcnt = ipt->tcf_refcnt - ref;
+	c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
+	c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
 	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
 	if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
@@ -322,6 +334,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
 	.type		=	TCA_ACT_IPT,
@@ -332,6 +351,7 @@ static struct tc_action_ops act_ipt_ops = {
 	.init		=	tcf_ipt_init,
 	.walk		=	tcf_ipt_walker,
 	.lookup		=	tcf_ipt_search,
+	.delete		=	tcf_ipt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
@@ -372,6 +392,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
 	.type		=	TCA_ACT_XT,
@@ -382,6 +409,7 @@ static struct tc_action_ops act_xt_ops = {
 	.init		=	tcf_xt_init,
 	.walk		=	tcf_xt_walker,
 	.lookup		=	tcf_xt_search,
+	.delete		=	tcf_xt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015..6afd89a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c

@@ -68,8 +68,9 @@ static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
-			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -78,7 +79,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct tcf_mirred *m;
 	struct net_device *dev;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla) {
 		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
@@ -93,7 +94,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -106,6 +110,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
@@ -114,6 +120,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		if (dev == NULL) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -123,18 +131,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		if (!dev) {
+			tcf_idr_cleanup(tn, parm->index);
 			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	m = to_mirred(*a);
 
@@ -250,8 +260,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tc_mirred opt = {
 		.index   = m->tcf_index,
 		.action  = m->tcf_action,
-		.refcnt  = m->tcf_refcnt - ref,
-		.bindcnt = m->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&m->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
 		.eaction = m->tcfm_eaction,
 		.ifindex = dev ? dev->ifindex : 0,
 	};
@@ -321,6 +331,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 	return rtnl_dereference(m->tcfm_dev);
 }
 
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.type		=	TCA_ACT_MIRRED,
@@ -334,6 +351,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
+	.delete		=	tcf_mirred_delete,
 };
 
 static __net_init int mirred_init_net(struct net *net)

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b..4dd9188 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c

@@ -38,7 +38,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action **a, int ovr, int bind,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -57,18 +57,24 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_nat_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 	p = to_tcf_nat(*a);
 
@@ -257,8 +263,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 
 		.index    = p->tcf_index,
 		.action   = p->tcf_action,
-		.refcnt   = p->tcf_refcnt - ref,
-		.bindcnt  = p->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -294,6 +300,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
 	.type		=	TCA_ACT_NAT,
@@ -303,6 +316,7 @@ static struct tc_action_ops act_nat_ops = {
 	.init		=	tcf_nat_init,
 	.walk		=	tcf_nat_walker,
 	.lookup		=	tcf_nat_search,
+	.delete		=	tcf_nat_delete,
 	.size		=	sizeof(struct tcf_nat),
 };
 

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e43aef2..cc8ffcd 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c

@@ -132,7 +132,8 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind, struct netlink_ext_ack *extack)
+			  int ovr, int bind, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -172,16 +173,20 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		if (!parm->nkeys) {
+			tcf_idr_cleanup(tn, parm->index);
 			NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
 			ret = -EINVAL;
 			goto out_free;
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_pedit_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			goto out_free;
+		}
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (!keys) {
@@ -190,11 +195,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			goto out_free;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
 			goto out_free;
-		tcf_idr_release(*a, bind);
 		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			ret = -EEXIST;
 			goto out_free;
 		}
@@ -206,6 +211,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 				goto out_free;
 			}
 		}
+	} else {
+		return err;
 	}
 
 	spin_lock_bh(&p->tcf_lock);
@@ -409,8 +416,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->nkeys = p->tcfp_nkeys;
 	opt->flags = p->tcfp_flags;
 	opt->action = p->tcf_action;
-	opt->refcnt = p->tcf_refcnt - ref;
-	opt->bindcnt = p->tcf_bindcnt - bind;
+	opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
 	if (p->tcfp_keys_ex) {
 		tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
@@ -453,6 +460,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
 	.type		=	TCA_ACT_PEDIT,
@@ -463,6 +477,7 @@ static struct tc_action_ops act_pedit_ops = {
 	.init		=	tcf_pedit_init,
 	.walk		=	tcf_pedit_walker,
 	.lookup		=	tcf_pedit_search,
+	.delete		=	tcf_pedit_delete,
 	.size		=	sizeof(struct tcf_pedit),
 };
 

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 4e72bc2..1f3192e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c

@@ -75,7 +75,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind,
+			       int ovr, int bind, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
@@ -101,20 +101,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, NULL, a,
 				     &act_police_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	police = to_police(*a);
@@ -195,8 +199,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
-	if (ret == ACT_P_CREATED)
-		tcf_idr_release(*a, bind);
+	tcf_idr_release(*a, bind);
 	return err;
 }
 
@@ -274,8 +277,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
 		.action = police->tcf_action,
 		.mtu = police->tcfp_mtu,
 		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
-		.refcnt = police->tcf_refcnt - ref,
-		.bindcnt = police->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&police->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -314,6 +317,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, police_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -327,6 +337,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_act_police_init,
 	.walk		=	tcf_act_police_walker,
 	.lookup		=	tcf_police_search,
+	.delete		=	tcf_police_delete,
 	.size		=	sizeof(struct tcf_police),
 };
 

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5db3584..3079e7b 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c

@@ -37,7 +37,8 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -45,7 +46,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	struct tc_sample *parm;
 	struct tcf_sample *s;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla)
 		return -EINVAL;
@@ -58,20 +59,24 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_sample_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	s = to_sample(*a);
 
@@ -80,8 +85,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -173,8 +177,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tc_sample opt = {
 		.index      = s->tcf_index,
 		.action     = s->tcf_action,
-		.refcnt     = s->tcf_refcnt - ref,
-		.bindcnt    = s->tcf_bindcnt - bind,
+		.refcnt     = refcount_read(&s->tcf_refcnt) - ref,
+		.bindcnt    = atomic_read(&s->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -219,6 +223,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
 	.type	  = TCA_ACT_SAMPLE,
@@ -229,6 +240,7 @@ static struct tc_action_ops act_sample_ops = {
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.delete	  = tcf_sample_delete,
 	.size	  = sizeof(struct tcf_sample),
 };
 

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 98c4afe..aa51152 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c

@@ -79,7 +79,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -99,21 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_simp_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
@@ -126,9 +134,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 
 		reset_policy(d, tb[TCA_DEF_DATA], parm);
 	}
@@ -145,8 +154,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_defact *d = to_defact(a);
 	struct tc_defact opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
@@ -183,6 +192,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
 	.type		=	TCA_ACT_SIMP,
@@ -193,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
 	.init		=	tcf_simp_init,
 	.walk		=	tcf_simp_walker,
 	.lookup		=	tcf_simp_search,
+	.delete		=	tcf_simp_delete,
 	.size		=	sizeof(struct tcf_defact),
 };
 

diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index dfaf5d8..da56e69 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c

@@ -37,14 +37,19 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+	int action;
 
-	spin_lock(&d->tcf_lock);
 	tcf_lastuse_update(&d->tcf_tm);
-	bstats_update(&d->tcf_bstats, skb);
+	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
 
-	if (d->flags & SKBEDIT_F_PRIORITY)
-		skb->priority = d->priority;
-	if (d->flags & SKBEDIT_F_INHERITDSFIELD) {
+	rcu_read_lock();
+	params = rcu_dereference(d->params);
+	action = READ_ONCE(d->tcf_action);
+
+	if (params->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = params->priority;
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
 		int wlen = skb_network_offset(skb);
 
 		switch (tc_skb_protocol(skb)) {
@@ -63,23 +68,23 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 			break;
 		}
 	}
-	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
-	    skb->dev->real_num_tx_queues > d->queue_mapping)
-		skb_set_queue_mapping(skb, d->queue_mapping);
-	if (d->flags & SKBEDIT_F_MARK) {
-		skb->mark &= ~d->mask;
-		skb->mark |= d->mark & d->mask;
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > params->queue_mapping)
+		skb_set_queue_mapping(skb, params->queue_mapping);
+	if (params->flags & SKBEDIT_F_MARK) {
+		skb->mark &= ~params->mask;
+		skb->mark |= params->mark & params->mask;
 	}
-	if (d->flags & SKBEDIT_F_PTYPE)
-		skb->pkt_type = d->ptype;
+	if (params->flags & SKBEDIT_F_PTYPE)
+		skb->pkt_type = params->ptype;
 
-	spin_unlock(&d->tcf_lock);
-	return d->tcf_action;
-
+unlock:
+	rcu_read_unlock();
+	return action;
 err:
-	d->tcf_qstats.drops++;
-	spin_unlock(&d->tcf_lock);
-	return TC_ACT_SHOT;
+	qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
+	action = TC_ACT_SHOT;
+	goto unlock;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -94,9 +99,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind, struct netlink_ext_ack *extack)
+			    int ovr, int bind, bool rtnl_held,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+	struct tcf_skbedit_params *params_old, *params_new;
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
@@ -151,50 +158,67 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
-				     &act_skbedit_ops, bind, false);
-		if (ret)
+				     &act_skbedit_ops, bind, true);
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_skbedit(*a);
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 
-	spin_lock_bh(&d->tcf_lock);
+	ASSERT_RTNL();
 
-	d->flags = flags;
+	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+	if (unlikely(!params_new)) {
+		if (ret == ACT_P_CREATED)
+			tcf_idr_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	params_new->flags = flags;
 	if (flags & SKBEDIT_F_PRIORITY)
-		d->priority = *priority;
+		params_new->priority = *priority;
 	if (flags & SKBEDIT_F_QUEUE_MAPPING)
-		d->queue_mapping = *queue_mapping;
+		params_new->queue_mapping = *queue_mapping;
 	if (flags & SKBEDIT_F_MARK)
-		d->mark = *mark;
+		params_new->mark = *mark;
 	if (flags & SKBEDIT_F_PTYPE)
-		d->ptype = *ptype;
+		params_new->ptype = *ptype;
 	/* default behaviour is to use all the bits */
-	d->mask = 0xffffffff;
+	params_new->mask = 0xffffffff;
 	if (flags & SKBEDIT_F_MASK)
-		d->mask = *mask;
+		params_new->mask = *mask;
 
 	d->tcf_action = parm->action;
-
-	spin_unlock_bh(&d->tcf_lock);
+	params_old = rtnl_dereference(d->params);
+	rcu_assign_pointer(d->params, params_new);
+	if (params_old)
+		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
@@ -206,33 +230,36 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
 	struct tc_skbedit opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
-	struct tcf_t t;
 	u64 pure_flags = 0;
+	struct tcf_t t;
+
+	params = rtnl_dereference(d->params);
 
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PRIORITY) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
+	if ((params->flags & SKBEDIT_F_PRIORITY) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
+	if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MARK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+	if ((params->flags & SKBEDIT_F_MARK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PTYPE) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
+	if ((params->flags & SKBEDIT_F_PTYPE) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MASK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+	if ((params->flags & SKBEDIT_F_MASK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
 		goto nla_put_failure;
-	if (d->flags & SKBEDIT_F_INHERITDSFIELD)
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
 		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
 	if (pure_flags != 0 &&
 	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
@@ -248,6 +275,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	return -1;
 }
 
+static void tcf_skbedit_cleanup(struct tc_action *a)
+{
+	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+
+	params = rcu_dereference_protected(d->params, 1);
+	if (params)
+		kfree_rcu(params, rcu);
+}
+
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
 			      const struct tc_action_ops *ops,
@@ -266,6 +303,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
 	.type		=	TCA_ACT_SKBEDIT,
@@ -273,8 +317,10 @@ static struct tc_action_ops act_skbedit_ops = {
 	.act		=	tcf_skbedit,
 	.dump		=	tcf_skbedit_dump,
 	.init		=	tcf_skbedit_init,
+	.cleanup	=	tcf_skbedit_cleanup,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
+	.delete		=	tcf_skbedit_delete,
 	.size		=	sizeof(struct tcf_skbedit),
 };
 

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index ad050d7..cdc6bac 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c

@@ -84,7 +84,8 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -127,27 +128,33 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!lflags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbmod_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	d = to_skbmod(*a);
@@ -155,8 +162,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -205,8 +211,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
 	struct tc_skbmod opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
@@ -252,6 +258,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
 	.type		=	TCA_ACT_SKBMOD,
@@ -262,6 +275,7 @@ static struct tc_action_ops act_skbmod_ops = {
 	.cleanup	=	tcf_skbmod_cleanup,
 	.walk		=	tcf_skbmod_walker,
 	.lookup		=	tcf_skbmod_search,
+	.delete		=	tcf_skbmod_delete,
 	.size		=	sizeof(struct tcf_skbmod),
 };
 

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index ea203e3..3ec585d5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c

@@ -201,7 +201,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -236,7 +237,10 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	}
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -324,16 +328,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
-			return ret;
+			goto err_out;
 		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr) {
-			NL_SET_ERR_MSG(extack, "TC IDR already exists");
-			return -EEXIST;
-		}
+		NL_SET_ERR_MSG(extack, "TC IDR already exists");
+		return -EEXIST;
 	}
 
 	t = to_tunnel_key(*a);
@@ -341,8 +343,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
 		return -ENOMEM;
 	}
@@ -366,6 +367,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 err_out:
 	if (exists)
 		tcf_idr_release(*a, bind);
+	else
+		tcf_idr_cleanup(tn, parm->index);
 	return ret;
 }
 
@@ -474,8 +477,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_tunnel_key_params *params;
 	struct tc_tunnel_key opt = {
 		.index    = t->tcf_index,
-		.refcnt   = t->tcf_refcnt - ref,
-		.bindcnt  = t->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&t->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
 	};
 	struct tcf_t tm;
 
@@ -533,6 +536,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
 	.type		=	TCA_ACT_TUNNEL_KEY,
@@ -543,6 +553,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
 	.cleanup	=	tunnel_key_release,
 	.walk		=	tunnel_key_walker,
 	.lookup		=	tunnel_key_search,
+	.delete		=	tunnel_key_delete,
 	.size		=	sizeof(struct tcf_tunnel_key),
 };
 

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 1fb39e1..ad37f30 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c

@@ -109,7 +109,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -133,7 +134,10 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -145,12 +149,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ERANGE;
 		}
 
@@ -163,6 +171,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			default:
 				if (exists)
 					tcf_idr_release(*a, bind);
+				else
+					tcf_idr_cleanup(tn, parm->index);
 				return -EPROTONOSUPPORT;
 			}
 		} else {
@@ -175,6 +185,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 	action = parm->v_action;
@@ -182,14 +194,15 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_vlan_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	v = to_vlan(*a);
@@ -197,8 +210,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -239,8 +251,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
 	struct tc_vlan opt = {
 		.index    = v->tcf_index,
-		.refcnt   = v->tcf_refcnt - ref,
-		.bindcnt  = v->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&v->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&v->tcf_bindcnt) - bind,
 		.action   = v->tcf_action,
 		.v_action = p->tcfv_action,
 	};
@@ -286,6 +298,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
 	.type		=	TCA_ACT_VLAN,
@@ -296,6 +315,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
+	.delete		=	tcf_vlan_delete,
 	.size		=	sizeof(struct tcf_vlan),
 };
 

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bbf8dda..c51b1b1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c

@@ -1508,7 +1508,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		arg.w.stop = 0;
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
+		arg.w.cookie = cb->args[2];
 		tp->ops->walk(tp, &arg.w);
+		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
 			return false;
@@ -1609,11 +1611,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	LIST_HEAD(actions);
-
-	ASSERT_RTNL();
-	tcf_exts_to_list(exts, &actions);
-	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
 	exts->nr_actions = 0;
 #endif
@@ -1632,7 +1630,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, extack);
+						TCA_ACT_BIND, true, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1640,17 +1638,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 			exts->actions[0] = act;
 			exts->nr_actions = 1;
 		} else if (exts->action && tb[exts->action]) {
-			LIST_HEAD(actions);
-			int err, i = 0;
+			int err;
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, &attr_size, extack);
-			if (err)
+					      exts->actions, &attr_size, true,
+					      extack);
+			if (err < 0)
 				return err;
-			list_for_each_entry(act, &actions, list)
-				exts->actions[i++] = act;
-			exts->nr_actions = i;
+			exts->nr_actions = err;
 		}
 		exts->net = net;
 	}
@@ -1699,14 +1695,11 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
-			LIST_HEAD(actions);
-
 			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
 
-			tcf_exts_to_list(exts, &actions);
-			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
+			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 352876b..c53fdd4 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c

@@ -35,6 +35,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
 		struct flow_dissector_key_ipv6_addrs ipv6;
@@ -449,6 +450,9 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -500,22 +504,26 @@ static int fl_set_key_mpls(struct nlattr **tb,
 }
 
 static void fl_set_key_vlan(struct nlattr **tb,
+			    __be16 ethertype,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *key_val,
 			    struct flow_dissector_key_vlan *key_mask)
 {
 #define VLAN_PRIORITY_MASK	0x7
 
-	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+	if (tb[vlan_id_key]) {
 		key_val->vlan_id =
-			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+			nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
 		key_mask->vlan_id = VLAN_VID_MASK;
 	}
-	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+	if (tb[vlan_prio_key]) {
 		key_val->vlan_priority =
-			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			nla_get_u8(tb[vlan_prio_key]) &
 			VLAN_PRIORITY_MASK;
 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
 	}
+	key_val->vlan_tpid = ethertype;
+	key_mask->vlan_tpid = cpu_to_be16(~0);
 }
 
 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -592,12 +600,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
 
-		if (ethertype == htons(ETH_P_8021Q)) {
-			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
-			fl_set_key_val(tb, &key->basic.n_proto,
-				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-				       sizeof(key->basic.n_proto));
+		if (eth_type_vlan(ethertype)) {
+			fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
+					&mask->vlan);
+
+			if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
+				ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+				if (eth_type_vlan(ethertype)) {
+					fl_set_key_vlan(tb, ethertype,
+							TCA_FLOWER_KEY_CVLAN_ID,
+							TCA_FLOWER_KEY_CVLAN_PRIO,
+							&key->cvlan, &mask->cvlan);
+					fl_set_key_val(tb, &key->basic.n_proto,
+						       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+						       &mask->basic.n_proto,
+						       TCA_FLOWER_UNSPEC,
+						       sizeof(key->basic.n_proto));
+				} else {
+					key->basic.n_proto = ethertype;
+					mask->basic.n_proto = cpu_to_be16(~0);
+				}
+			}
 		} else {
 			key->basic.n_proto = ethertype;
 			mask->basic.n_proto = cpu_to_be16(~0);
@@ -823,6 +847,8 @@ static void fl_init_dissector(struct fl_flow_mask *mask)
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_CVLAN, cvlan);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
@@ -1073,19 +1099,17 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f;
-	struct fl_flow_mask *mask;
 
-	list_for_each_entry_rcu(mask, &head->masks, list) {
-		list_for_each_entry_rcu(f, &mask->filters, list) {
-			if (arg->count < arg->skip)
-				goto skip;
-			if (arg->fn(tp, f, arg) < 0) {
-				arg->stop = 1;
-				break;
-			}
-skip:
-			arg->count++;
+	arg->count = arg->skip;
+
+	while ((f = idr_get_next_ul(&head->handle_idr,
+				    &arg->cookie)) != NULL) {
+		if (arg->fn(tp, f, arg) < 0) {
+			arg->stop = 1;
+			break;
 		}
+		arg->cookie = f->handle + 1;
+		arg->count++;
 	}
 }
 
@@ -1198,6 +1222,7 @@ static int fl_dump_key_ip(struct sk_buff *skb,
 }
 
 static int fl_dump_key_vlan(struct sk_buff *skb,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *vlan_key,
 			    struct flow_dissector_key_vlan *vlan_mask)
 {
@@ -1206,13 +1231,13 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
 		return 0;
 	if (vlan_mask->vlan_id) {
-		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+		err = nla_put_u16(skb, vlan_id_key,
 				  vlan_key->vlan_id);
 		if (err)
 			return err;
 	}
 	if (vlan_mask->vlan_priority) {
-		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+		err = nla_put_u8(skb, vlan_prio_key,
 				 vlan_key->vlan_priority);
 		if (err)
 			return err;
@@ -1307,9 +1332,30 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
 		goto nla_put_failure;
 
-	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_VLAN_ID,
+			     TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan))
 		goto nla_put_failure;
 
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_CVLAN_ID,
+			     TCA_FLOWER_KEY_CVLAN_PRIO,
+			     &key->cvlan, &mask->cvlan) ||
+	    (mask->cvlan.vlan_tpid &&
+	     nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+			 key->cvlan.vlan_tpid)))
+		goto nla_put_failure;
+
+	if (mask->basic.n_proto) {
+		if (mask->cvlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		} else if (mask->vlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		}
+	}
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
new file mode 100644
index 0000000..3069569
--- /dev/null
+++ b/net/sched/sch_cake.c

@@ -0,0 +1,3019 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* COMMON Applications Kept Enhanced (CAKE) discipline
+ *
+ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
+ * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
+ * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
+ * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
+ * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+ * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
+ *
+ * The CAKE Principles:
+ *		   (or, how to have your cake and eat it too)
+ *
+ * This is a combination of several shaping, AQM and FQ techniques into one
+ * easy-to-use package:
+ *
+ * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
+ *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
+ *   eliminating the need for any sort of burst parameter (eg. token bucket
+ *   depth).  Burst support is limited to that necessary to overcome scheduling
+ *   latency.
+ *
+ * - A Diffserv-aware priority queue, giving more priority to certain classes,
+ *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
+ *   the priority is reduced to avoid starving other tins.
+ *
+ * - Each priority tin has a separate Flow Queue system, to isolate traffic
+ *   flows from each other.  This prevents a burst on one flow from increasing
+ *   the delay to another.  Flows are distributed to queues using a
+ *   set-associative hash function.
+ *
+ * - Each queue is actively managed by Cobalt, which is a combination of the
+ *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
+ *   congestion early via ECN (if available) and/or packet drops, to keep
+ *   latency low.  The codel parameters are auto-tuned based on the bandwidth
+ *   setting, as is necessary at low bandwidths.
+ *
+ * The configuration parameters are kept deliberately simple for ease of use.
+ * Everything has sane defaults.  Complete generality of configuration is *not*
+ * a goal.
+ *
+ * The priority queue operates according to a weighted DRR scheme, combined with
+ * a bandwidth tracker which reuses the shaper logic to detect which side of the
+ * bandwidth sharing threshold the tin is operating.  This determines whether a
+ * priority-based weight (high) or a bandwidth-based weight (low) is used for
+ * that tin in the current pass.
+ *
+ * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
+ * granted us permission to leverage.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/reciprocal_div.h>
+#include <net/netlink.h>
+#include <linux/version.h>
+#include <linux/if_vlan.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/flow_dissector.h>
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
+#define CAKE_SET_WAYS (8)
+#define CAKE_MAX_TINS (8)
+#define CAKE_QUEUES (1024)
+#define CAKE_FLOW_MASK 63
+#define CAKE_FLOW_NAT_FLAG 64
+#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */
+
+/* struct cobalt_params - contains codel and blue parameters
+ * @interval:	codel initial drop rate
+ * @target:     maximum persistent sojourn time & blue update rate
+ * @mtu_time:   serialisation delay of maximum-size packet
+ * @p_inc:      increment of blue drop probability (0.32 fxp)
+ * @p_dec:      decrement of blue drop probability (0.32 fxp)
+ */
+struct cobalt_params {
+	u64	interval;
+	u64	target;
+	u64	mtu_time;
+	u32	p_inc;
+	u32	p_dec;
+};
+
+/* struct cobalt_vars - contains codel and blue variables
+ * @count:		codel dropping frequency
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
+ * @drop_next:		time to drop next packet, or when we dropped last
+ * @blue_timer:		Blue time to next drop
+ * @p_drop:		BLUE drop probability (0.32 fxp)
+ * @dropping:		set if in dropping state
+ * @ecn_marked:		set if marked
+ */
+struct cobalt_vars {
+	u32	count;
+	u32	rec_inv_sqrt;
+	ktime_t	drop_next;
+	ktime_t	blue_timer;
+	u32     p_drop;
+	bool	dropping;
+	bool    ecn_marked;
+};
+
+enum {
+	CAKE_SET_NONE = 0,
+	CAKE_SET_SPARSE,
+	CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
+	CAKE_SET_BULK,
+	CAKE_SET_DECAYING
+};
+
+struct cake_flow {
+	/* this stuff is all needed per-flow at dequeue time */
+	struct sk_buff	  *head;
+	struct sk_buff	  *tail;
+	struct list_head  flowchain;
+	s32		  deficit;
+	u32		  dropped;
+	struct cobalt_vars cvars;
+	u16		  srchost; /* index into cake_host table */
+	u16		  dsthost;
+	u8		  set;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct cake_host {
+	u32 srchost_tag;
+	u32 dsthost_tag;
+	u16 srchost_refcnt;
+	u16 dsthost_refcnt;
+};
+
+struct cake_heap_entry {
+	u16 t:3, b:10;
+};
+
+struct cake_tin_data {
+	struct cake_flow flows[CAKE_QUEUES];
+	u32	backlogs[CAKE_QUEUES];
+	u32	tags[CAKE_QUEUES]; /* for set association */
+	u16	overflow_idx[CAKE_QUEUES];
+	struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
+	u16	flow_quantum;
+
+	struct cobalt_params cparams;
+	u32	drop_overlimit;
+	u16	bulk_flow_count;
+	u16	sparse_flow_count;
+	u16	decaying_flow_count;
+	u16	unresponsive_flow_count;
+
+	u32	max_skblen;
+
+	struct list_head new_flows;
+	struct list_head old_flows;
+	struct list_head decaying_flows;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	ktime_t	time_next_packet;
+	u64	tin_rate_ns;
+	u64	tin_rate_bps;
+	u16	tin_rate_shft;
+
+	u16	tin_quantum_prio;
+	u16	tin_quantum_band;
+	s32	tin_deficit;
+	u32	tin_backlog;
+	u32	tin_dropped;
+	u32	tin_ecn_mark;
+
+	u32	packets;
+	u64	bytes;
+
+	u32	ack_drops;
+
+	/* moving averages */
+	u64 avge_delay;
+	u64 peak_delay;
+	u64 base_delay;
+
+	/* hash function stats */
+	u32	way_directs;
+	u32	way_hits;
+	u32	way_misses;
+	u32	way_collisions;
+}; /* number of tins is small, so size of this struct doesn't matter much */
+
+struct cake_sched_data {
+	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
+	struct cake_tin_data *tins;
+
+	struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
+	u16		overflow_timeout;
+
+	u16		tin_cnt;
+	u8		tin_mode;
+	u8		flow_mode;
+	u8		ack_filter;
+	u8		atm_mode;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	u16		rate_shft;
+	ktime_t		time_next_packet;
+	ktime_t		failsafe_next_packet;
+	u64		rate_ns;
+	u64		rate_bps;
+	u16		rate_flags;
+	s16		rate_overhead;
+	u16		rate_mpu;
+	u64		interval;
+	u64		target;
+
+	/* resource tracking */
+	u32		buffer_used;
+	u32		buffer_max_used;
+	u32		buffer_limit;
+	u32		buffer_config_limit;
+
+	/* indices for dequeue */
+	u16		cur_tin;
+	u16		cur_flow;
+
+	struct qdisc_watchdog watchdog;
+	const u8	*tin_index;
+	const u8	*tin_order;
+
+	/* bandwidth capacity estimate */
+	ktime_t		last_packet_time;
+	ktime_t		avg_window_begin;
+	u64		avg_packet_interval;
+	u64		avg_window_bytes;
+	u64		avg_peak_bandwidth;
+	ktime_t		last_reconfig_time;
+
+	/* packet length stats */
+	u32		avg_netoff;
+	u16		max_netlen;
+	u16		max_adjlen;
+	u16		min_netlen;
+	u16		min_adjlen;
+};
+
+enum {
+	CAKE_FLAG_OVERHEAD	   = BIT(0),
+	CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+	CAKE_FLAG_INGRESS	   = BIT(2),
+	CAKE_FLAG_WASH		   = BIT(3),
+	CAKE_FLAG_SPLIT_GSO	   = BIT(4)
+};
+
+/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+ * obtain the best features of each.  Codel is excellent on flows which
+ * respond to congestion signals in a TCP-like way.  BLUE is more effective on
+ * unresponsive flows.
+ */
+
+struct cobalt_skb_cb {
+	ktime_t enqueue_time;
+	u32     adjusted_len;
+};
+
+static u64 us_to_ns(u64 us)
+{
+	return us * NSEC_PER_USEC;
+}
+
+static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
+	return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
+{
+	return get_cobalt_cb(skb)->enqueue_time;
+}
+
+static void cobalt_set_enqueue_time(struct sk_buff *skb,
+				    ktime_t now)
+{
+	get_cobalt_cb(skb)->enqueue_time = now;
+}
+
+static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+
+/* Diffserv lookup tables */
+
+static const u8 precedence[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	2, 2, 2, 2, 2, 2, 2, 2,
+	3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4,
+	5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const u8 diffserv8[] = {
+	2, 5, 1, 2, 4, 2, 2, 2,
+	0, 2, 1, 2, 1, 2, 1, 2,
+	5, 2, 4, 2, 4, 2, 4, 2,
+	3, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 2, 2, 6, 2, 6, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const u8 diffserv4[] = {
+	0, 2, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 0, 0, 3, 0, 3, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 diffserv3[] = {
+	0, 0, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 2, 0, 2, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 besteffort[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* tin priority order for stats dumping */
+
+static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const u8 bulk_order[] = {1, 0, 2, 3};
+
+#define REC_INV_SQRT_CACHE (16)
+static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+
+/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+
+static void cobalt_newton_step(struct cobalt_vars *vars)
+{
+	u32 invsqrt, invsqrt2;
+	u64 val;
+
+	invsqrt = vars->rec_inv_sqrt;
+	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
+
+	vars->rec_inv_sqrt = val;
+}
+
+static void cobalt_invsqrt(struct cobalt_vars *vars)
+{
+	if (vars->count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+	else
+		cobalt_newton_step(vars);
+}
+
+/* There is a big difference in timing between the accurate values placed in
+ * the cache and the approximations given by a single Newton step for small
+ * count values, particularly when stepping from count 1 to 2 or vice versa.
+ * Above 16, a single Newton step gives sufficient accuracy in either
+ * direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give
+ * the value that *should* have been produced at count 4.
+ */
+
+static void cobalt_cache_init(void)
+{
+	struct cobalt_vars v;
+
+	memset(&v, 0, sizeof(v));
+	v.rec_inv_sqrt = ~0U;
+	cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
+
+	for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+
+		cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
+	}
+}
+
+static void cobalt_vars_init(struct cobalt_vars *vars)
+{
+	memset(vars, 0, sizeof(*vars));
+
+	if (!cobalt_rec_inv_sqrt_cache[0]) {
+		cobalt_cache_init();
+		cobalt_rec_inv_sqrt_cache[0] = ~0;
+	}
+}
+
+/* CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static ktime_t cobalt_control(ktime_t t,
+			      u64 interval,
+			      u32 rec_inv_sqrt)
+{
+	return ktime_add_ns(t, reciprocal_scale(interval,
+						rec_inv_sqrt));
+}
+
+/* Call this when a packet had to be dropped due to queue overflow.  Returns
+ * true if the BLUE state was quiescent before but active after this call.
+ */
+static bool cobalt_queue_full(struct cobalt_vars *vars,
+			      struct cobalt_params *p,
+			      ktime_t now)
+{
+	bool up = false;
+
+	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		up = !vars->p_drop;
+		vars->p_drop += p->p_inc;
+		if (vars->p_drop < p->p_inc)
+			vars->p_drop = ~0;
+		vars->blue_timer = now;
+	}
+	vars->dropping = true;
+	vars->drop_next = now;
+	if (!vars->count)
+		vars->count = 1;
+
+	return up;
+}
+
+/* Call this when the queue was serviced but turned out to be empty.  Returns
+ * true if the BLUE state was active before but quiescent after this call.
+ */
+static bool cobalt_queue_empty(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now)
+{
+	bool down = false;
+
+	if (vars->p_drop &&
+	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		if (vars->p_drop < p->p_dec)
+			vars->p_drop = 0;
+		else
+			vars->p_drop -= p->p_dec;
+		vars->blue_timer = now;
+		down = !vars->p_drop;
+	}
+	vars->dropping = false;
+
+	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
+		vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+	}
+
+	return down;
+}
+
+/* Call this with a freshly dequeued packet for possible congestion marking.
+ * Returns true as an instruction to drop the packet, false for delivery.
+ */
+static bool cobalt_should_drop(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now,
+			       struct sk_buff *skb,
+			       u32 bulk_flows)
+{
+	bool next_due, over_target, drop = false;
+	ktime_t schedule;
+	u64 sojourn;
+
+/* The 'schedule' variable records, in its sign, whether 'now' is before or
+ * after 'drop_next'.  This allows 'drop_next' to be updated before the next
+ * scheduling decision is actually branched, without destroying that
+ * information.  Similarly, the first 'schedule' value calculated is preserved
+ * in the boolean 'next_due'.
+ *
+ * As for 'drop_next', we take advantage of the fact that 'interval' is both
+ * the delay between first exceeding 'target' and the first signalling event,
+ * *and* the scaling factor for the signalling frequency.  It's therefore very
+ * natural to use a single mechanism for both purposes, and eliminates a
+ * significant amount of reference Codel's spaghetti code.  To help with this,
+ * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
+ * as possible to 1.0 in fixed-point.
+ */
+
+	sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	schedule = ktime_sub(now, vars->drop_next);
+	over_target = sojourn > p->target &&
+		      sojourn > p->mtu_time * bulk_flows * 2 &&
+		      sojourn > p->mtu_time * 4;
+	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+
+	vars->ecn_marked = false;
+
+	if (over_target) {
+		if (!vars->dropping) {
+			vars->dropping = true;
+			vars->drop_next = cobalt_control(now,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+		}
+		if (!vars->count)
+			vars->count = 1;
+	} else if (vars->dropping) {
+		vars->dropping = false;
+	}
+
+	if (next_due && vars->dropping) {
+		/* Use ECN mark if possible, otherwise drop */
+		drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
+
+		vars->count++;
+		if (!vars->count)
+			vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+		schedule = ktime_sub(now, vars->drop_next);
+	} else {
+		while (next_due) {
+			vars->count--;
+			cobalt_invsqrt(vars);
+			vars->drop_next = cobalt_control(vars->drop_next,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+			schedule = ktime_sub(now, vars->drop_next);
+			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+		}
+	}
+
+	/* Simple BLUE implementation.  Lack of ECN is deliberate. */
+	if (vars->p_drop)
+		drop |= (prandom_u32() < vars->p_drop);
+
+	/* Overload the drop_next field as an activity timeout */
+	if (!vars->count)
+		vars->drop_next = ktime_add_ns(now, p->interval);
+	else if (ktime_to_ns(schedule) > 0 && !drop)
+		vars->drop_next = now;
+
+	return drop;
+}
+
+static void cake_update_flowkeys(struct flow_keys *keys,
+				 const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct nf_conntrack_tuple tuple = {};
+	bool rev = !skb->_nfct;
+
+	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+		return;
+
+	if (!nf_ct_get_tuple_skb(&tuple, skb))
+		return;
+
+	keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+	keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+	if (keys->ports.ports) {
+		keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+		keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+	}
+#endif
+}
+
+/* Cake has several subtle multiple bit settings. In these cases you
+ *  would be matching triple isolate mode as well.
+ */
+
+static bool cake_dsrc(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
+}
+
+static bool cake_ddst(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
+}
+
+static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+		     int flow_mode)
+{
+	u32 flow_hash = 0, srchost_hash, dsthost_hash;
+	u16 reduced_hash, srchost_idx, dsthost_idx;
+	struct flow_keys keys, host_keys;
+
+	if (unlikely(flow_mode == CAKE_FLOW_NONE))
+		return 0;
+
+	skb_flow_dissect_flow_keys(skb, &keys,
+				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	if (flow_mode & CAKE_FLOW_NAT_FLAG)
+		cake_update_flowkeys(&keys, skb);
+
+	/* flow_hash_from_keys() sorts the addresses by value, so we have
+	 * to preserve their order in a separate data structure to treat
+	 * src and dst host addresses as independently selectable.
+	 */
+	host_keys = keys;
+	host_keys.ports.ports     = 0;
+	host_keys.basic.ip_proto  = 0;
+	host_keys.keyid.keyid     = 0;
+	host_keys.tags.flow_label = 0;
+
+	switch (host_keys.control.addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		host_keys.addrs.v4addrs.src = 0;
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		host_keys.addrs.v4addrs.dst = 0;
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+		memset(&host_keys.addrs.v6addrs.src, 0,
+		       sizeof(host_keys.addrs.v6addrs.src));
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		memset(&host_keys.addrs.v6addrs.dst, 0,
+		       sizeof(host_keys.addrs.v6addrs.dst));
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	default:
+		dsthost_hash = 0;
+		srchost_hash = 0;
+	}
+
+	/* This *must* be after the above switch, since as a
+	 * side-effect it sorts the src and dst addresses.
+	 */
+	if (flow_mode & CAKE_FLOW_FLOWS)
+		flow_hash = flow_hash_from_keys(&keys);
+
+	if (!(flow_mode & CAKE_FLOW_FLOWS)) {
+		if (flow_mode & CAKE_FLOW_SRC_IP)
+			flow_hash ^= srchost_hash;
+
+		if (flow_mode & CAKE_FLOW_DST_IP)
+			flow_hash ^= dsthost_hash;
+	}
+
+	reduced_hash = flow_hash % CAKE_QUEUES;
+
+	/* set-associative hashing */
+	/* fast path if no hash collision (direct lookup succeeds) */
+	if (likely(q->tags[reduced_hash] == flow_hash &&
+		   q->flows[reduced_hash].set)) {
+		q->way_directs++;
+	} else {
+		u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
+		u32 outer_hash = reduced_hash - inner_hash;
+		bool allocate_src = false;
+		bool allocate_dst = false;
+		u32 i, k;
+
+		/* check if any active queue in the set is reserved for
+		 * this flow.
+		 */
+		for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+		     i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (q->tags[outer_hash + k] == flow_hash) {
+				if (i)
+					q->way_hits++;
+
+				if (!q->flows[outer_hash + k].set) {
+					/* need to increment host refcnts */
+					allocate_src = cake_dsrc(flow_mode);
+					allocate_dst = cake_ddst(flow_mode);
+				}
+
+				goto found;
+			}
+		}
+
+		/* no queue is reserved for this flow, look for an
+		 * empty one.
+		 */
+		for (i = 0; i < CAKE_SET_WAYS;
+			 i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (!q->flows[outer_hash + k].set) {
+				q->way_misses++;
+				allocate_src = cake_dsrc(flow_mode);
+				allocate_dst = cake_ddst(flow_mode);
+				goto found;
+			}
+		}
+
+		/* With no empty queues, default to the original
+		 * queue, accept the collision, update the host tags.
+		 */
+		q->way_collisions++;
+		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+		allocate_src = cake_dsrc(flow_mode);
+		allocate_dst = cake_ddst(flow_mode);
+found:
+		/* reserve queue for future packets in same flow */
+		reduced_hash = outer_hash + k;
+		q->tags[reduced_hash] = flow_hash;
+
+		if (allocate_src) {
+			srchost_idx = srchost_hash % CAKE_QUEUES;
+			inner_hash = srchost_idx % CAKE_SET_WAYS;
+			outer_hash = srchost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].srchost_tag ==
+				    srchost_hash)
+					goto found_src;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].srchost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+found_src:
+			srchost_idx = outer_hash + k;
+			q->hosts[srchost_idx].srchost_refcnt++;
+			q->flows[reduced_hash].srchost = srchost_idx;
+		}
+
+		if (allocate_dst) {
+			dsthost_idx = dsthost_hash % CAKE_QUEUES;
+			inner_hash = dsthost_idx % CAKE_SET_WAYS;
+			outer_hash = dsthost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].dsthost_tag ==
+				    dsthost_hash)
+					goto found_dst;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+found_dst:
+			dsthost_idx = outer_hash + k;
+			q->hosts[dsthost_idx].dsthost_refcnt++;
+			q->flows[reduced_hash].dsthost = dsthost_idx;
+		}
+	}
+
+	return reduced_hash;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from head of slot queue */
+
+static struct sk_buff *dequeue_head(struct cake_flow *flow)
+{
+	struct sk_buff *skb = flow->head;
+
+	if (skb) {
+		flow->head = skb->next;
+		skb->next = NULL;
+	}
+
+	return skb;
+}
+
+/* add skb to flow queue (tail add) */
+
+static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
+{
+	if (!flow->head)
+		flow->head = skb;
+	else
+		flow->tail->next = skb;
+	flow->tail = skb;
+	skb->next = NULL;
+}
+
+static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
+				    struct ipv6hdr *buf)
+{
+	unsigned int offset = skb_network_offset(skb);
+	struct iphdr *iph;
+
+	iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
+
+	if (!iph)
+		return NULL;
+
+	if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
+		return skb_header_pointer(skb, offset + iph->ihl * 4,
+					  sizeof(struct ipv6hdr), buf);
+
+	else if (iph->version == 4)
+		return iph;
+
+	else if (iph->version == 6)
+		return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
+					  buf);
+
+	return NULL;
+}
+
+static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+				      void *buf, unsigned int bufsize)
+{
+	unsigned int offset = skb_network_offset(skb);
+	const struct ipv6hdr *ipv6h;
+	const struct tcphdr *tcph;
+	const struct iphdr *iph;
+	struct ipv6hdr _ipv6h;
+	struct tcphdr _tcph;
+
+	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+
+	if (!ipv6h)
+		return NULL;
+
+	if (ipv6h->version == 4) {
+		iph = (struct iphdr *)ipv6h;
+		offset += iph->ihl * 4;
+
+		/* special-case 6in4 tunnelling, as that is a common way to get
+		 * v6 connectivity in the home
+		 */
+		if (iph->protocol == IPPROTO_IPV6) {
+			ipv6h = skb_header_pointer(skb, offset,
+						   sizeof(_ipv6h), &_ipv6h);
+
+			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+				return NULL;
+
+			offset += sizeof(struct ipv6hdr);
+
+		} else if (iph->protocol != IPPROTO_TCP) {
+			return NULL;
+		}
+
+	} else if (ipv6h->version == 6) {
+		if (ipv6h->nexthdr != IPPROTO_TCP)
+			return NULL;
+
+		offset += sizeof(struct ipv6hdr);
+	} else {
+		return NULL;
+	}
+
+	tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+	if (!tcph)
+		return NULL;
+
+	return skb_header_pointer(skb, offset,
+				  min(__tcp_hdrlen(tcph), bufsize), buf);
+}
+
+static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+				   int code, int *oplen)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		if (opcode == code) {
+			*oplen = opsize;
+			return ptr;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return NULL;
+}
+
+/* Compare two SACK sequences. A sequence is considered greater if it SACKs more
+ * bytes than the other. In the case where both sequences ACKs bytes that the
+ * other doesn't, A is considered greater. DSACKs in A also makes A be
+ * considered greater.
+ *
+ * @return -1, 0 or 1 as normal compare functions
+ */
+static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
+				  const struct tcphdr *tcph_b)
+{
+	const struct tcp_sack_block_wire *sack_a, *sack_b;
+	u32 ack_seq_a = ntohl(tcph_a->ack_seq);
+	u32 bytes_a = 0, bytes_b = 0;
+	int oplen_a, oplen_b;
+	bool first = true;
+
+	sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
+	sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
+
+	/* pointers point to option contents */
+	oplen_a -= TCPOLEN_SACK_BASE;
+	oplen_b -= TCPOLEN_SACK_BASE;
+
+	if (sack_a && oplen_a >= sizeof(*sack_a) &&
+	    (!sack_b || oplen_b < sizeof(*sack_b)))
+		return -1;
+	else if (sack_b && oplen_b >= sizeof(*sack_b) &&
+		 (!sack_a || oplen_a < sizeof(*sack_a)))
+		return 1;
+	else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
+		 (!sack_b || oplen_b < sizeof(*sack_b)))
+		return 0;
+
+	while (oplen_a >= sizeof(*sack_a)) {
+		const struct tcp_sack_block_wire *sack_tmp = sack_b;
+		u32 start_a = get_unaligned_be32(&sack_a->start_seq);
+		u32 end_a = get_unaligned_be32(&sack_a->end_seq);
+		int oplen_tmp = oplen_b;
+		bool found = false;
+
+		/* DSACK; always considered greater to prevent dropping */
+		if (before(start_a, ack_seq_a))
+			return -1;
+
+		bytes_a += end_a - start_a;
+
+		while (oplen_tmp >= sizeof(*sack_tmp)) {
+			u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
+			u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
+
+			/* first time through we count the total size */
+			if (first)
+				bytes_b += end_b - start_b;
+
+			if (!after(start_b, start_a) && !before(end_b, end_a)) {
+				found = true;
+				if (!first)
+					break;
+			}
+			oplen_tmp -= sizeof(*sack_tmp);
+			sack_tmp++;
+		}
+
+		if (!found)
+			return -1;
+
+		oplen_a -= sizeof(*sack_a);
+		sack_a++;
+		first = false;
+	}
+
+	/* If we made it this far, all ranges SACKed by A are covered by B, so
+	 * either the SACKs are equal, or B SACKs more bytes.
+	 */
+	return bytes_b > bytes_a ? 1 : 0;
+}
+
+static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
+				 u32 *tsval, u32 *tsecr)
+{
+	const u8 *ptr;
+	int opsize;
+
+	ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
+
+	if (ptr && opsize == TCPOLEN_TIMESTAMP) {
+		*tsval = get_unaligned_be32(ptr);
+		*tsecr = get_unaligned_be32(ptr + 4);
+	}
+}
+
+static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+			       u32 tstamp_new, u32 tsecr_new)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+	u32 tstamp, tsecr;
+
+	/* 3 reserved flags must be unset to avoid future breakage
+	 * ACK must be set
+	 * ECE/CWR are handled separately
+	 * All other flags URG/PSH/RST/SYN/FIN must be unset
+	 * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
+	 * 0x00C00000 = CWR/ECE (handled separately)
+	 * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
+	 */
+	if (((tcp_flag_word(tcph) &
+	      cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
+		return false;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		switch (opcode) {
+		case TCPOPT_MD5SIG: /* doesn't influence state */
+			break;
+
+		case TCPOPT_SACK: /* stricter checking performed later */
+			if (opsize % 8 != 2)
+				return false;
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			/* only drop timestamps lower than new */
+			if (opsize != TCPOLEN_TIMESTAMP)
+				return false;
+			tstamp = get_unaligned_be32(ptr);
+			tsecr = get_unaligned_be32(ptr + 4);
+			if (after(tstamp, tstamp_new) ||
+			    after(tsecr, tsecr_new))
+				return false;
+			break;
+
+		case TCPOPT_MSS:  /* these should only be set on SYN */
+		case TCPOPT_WINDOW:
+		case TCPOPT_SACK_PERM:
+		case TCPOPT_FASTOPEN:
+		case TCPOPT_EXP:
+		default: /* don't drop if any unknown options are present */
+			return false;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return true;
+}
+
+static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
+				       struct cake_flow *flow)
+{
+	bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+	struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
+	struct sk_buff *skb_check, *skb_prev = NULL;
+	const struct ipv6hdr *ipv6h, *ipv6h_check;
+	unsigned char _tcph[64], _tcph_check[64];
+	const struct tcphdr *tcph, *tcph_check;
+	const struct iphdr *iph, *iph_check;
+	struct ipv6hdr _iph, _iph_check;
+	const struct sk_buff *skb;
+	int seglen, num_found = 0;
+	u32 tstamp = 0, tsecr = 0;
+	__be32 elig_flags = 0;
+	int sack_comp;
+
+	/* no other possible ACKs to filter */
+	if (flow->head == flow->tail)
+		return NULL;
+
+	skb = flow->tail;
+	tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
+	iph = cake_get_iphdr(skb, &_iph);
+	if (!tcph)
+		return NULL;
+
+	cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
+
+	/* the 'triggering' packet need only have the ACK flag set.
+	 * also check that SYN is not set, as there won't be any previous ACKs.
+	 */
+	if ((tcp_flag_word(tcph) &
+	     (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
+		return NULL;
+
+	/* the 'triggering' ACK is at the tail of the queue, we have already
+	 * returned if it is the only packet in the flow. loop through the rest
+	 * of the queue looking for pure ACKs with the same 5-tuple as the
+	 * triggering one.
+	 */
+	for (skb_check = flow->head;
+	     skb_check && skb_check != skb;
+	     skb_prev = skb_check, skb_check = skb_check->next) {
+		iph_check = cake_get_iphdr(skb_check, &_iph_check);
+		tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
+					     sizeof(_tcph_check));
+
+		/* only TCP packets with matching 5-tuple are eligible, and only
+		 * drop safe headers
+		 */
+		if (!tcph_check || iph->version != iph_check->version ||
+		    tcph_check->source != tcph->source ||
+		    tcph_check->dest != tcph->dest)
+			continue;
+
+		if (iph_check->version == 4) {
+			if (iph_check->saddr != iph->saddr ||
+			    iph_check->daddr != iph->daddr)
+				continue;
+
+			seglen = ntohs(iph_check->tot_len) -
+				       (4 * iph_check->ihl);
+		} else if (iph_check->version == 6) {
+			ipv6h = (struct ipv6hdr *)iph;
+			ipv6h_check = (struct ipv6hdr *)iph_check;
+
+			if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
+			    ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
+				continue;
+
+			seglen = ntohs(ipv6h_check->payload_len);
+		} else {
+			WARN_ON(1);  /* shouldn't happen */
+			continue;
+		}
+
+		/* If the ECE/CWR flags changed from the previous eligible
+		 * packet in the same flow, we should no longer be dropping that
+		 * previous packet as this would lose information.
+		 */
+		if (elig_ack && (tcp_flag_word(tcph_check) &
+				 (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
+			elig_ack = NULL;
+			elig_ack_prev = NULL;
+			num_found--;
+		}
+
+		/* Check TCP options and flags, don't drop ACKs with segment
+		 * data, and don't drop ACKs with a higher cumulative ACK
+		 * counter than the triggering packet. Check ACK seqno here to
+		 * avoid parsing SACK options of packets we are going to exclude
+		 * anyway.
+		 */
+		if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
+		    (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
+		    after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
+			continue;
+
+		/* Check SACK options. The triggering packet must SACK more data
+		 * than the ACK under consideration, or SACK the same range but
+		 * have a larger cumulative ACK counter. The latter is a
+		 * pathological case, but is contained in the following check
+		 * anyway, just to be safe.
+		 */
+		sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
+
+		if (sack_comp < 0 ||
+		    (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
+		     sack_comp == 0))
+			continue;
+
+		/* At this point we have found an eligible pure ACK to drop; if
+		 * we are in aggressive mode, we are done. Otherwise, keep
+		 * searching unless this is the second eligible ACK we
+		 * found.
+		 *
+		 * Since we want to drop ACK closest to the head of the queue,
+		 * save the first eligible ACK we find, even if we need to loop
+		 * again.
+		 */
+		if (!elig_ack) {
+			elig_ack = skb_check;
+			elig_ack_prev = skb_prev;
+			elig_flags = (tcp_flag_word(tcph_check)
+				      & (TCP_FLAG_ECE | TCP_FLAG_CWR));
+		}
+
+		if (num_found++ > 0)
+			goto found;
+	}
+
+	/* We made it through the queue without finding two eligible ACKs . If
+	 * we found a single eligible ACK we can drop it in aggressive mode if
+	 * we can guarantee that this does not interfere with ECN flag
+	 * information. We ensure this by dropping it only if the enqueued
+	 * packet is consecutive with the eligible ACK, and their flags match.
+	 */
+	if (elig_ack && aggressive && elig_ack->next == skb &&
+	    (elig_flags == (tcp_flag_word(tcph) &
+			    (TCP_FLAG_ECE | TCP_FLAG_CWR))))
+		goto found;
+
+	return NULL;
+
+found:
+	if (elig_ack_prev)
+		elig_ack_prev->next = elig_ack->next;
+	else
+		flow->head = elig_ack->next;
+
+	elig_ack->next = NULL;
+
+	return elig_ack;
+}
+
+static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
+{
+	avg -= avg >> shift;
+	avg += sample >> shift;
+	return avg;
+}
+
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
+static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+	struct cake_heap_entry jj = q->overflow_heap[j];
+
+	q->overflow_heap[i] = jj;
+	q->overflow_heap[j] = ii;
+
+	q->tins[ii.t].overflow_idx[ii.b] = j;
+	q->tins[jj.t].overflow_idx[jj.b] = i;
+}
+
+static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+
+	return q->tins[ii.t].backlogs[ii.b];
+}
+
+static void cake_heapify(struct cake_sched_data *q, u16 i)
+{
+	static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
+	u32 mb = cake_heap_get_backlog(q, i);
+	u32 m = i;
+
+	while (m < a) {
+		u32 l = m + m + 1;
+		u32 r = l + 1;
+
+		if (l < a) {
+			u32 lb = cake_heap_get_backlog(q, l);
+
+			if (lb > mb) {
+				m  = l;
+				mb = lb;
+			}
+		}
+
+		if (r < a) {
+			u32 rb = cake_heap_get_backlog(q, r);
+
+			if (rb > mb) {
+				m  = r;
+				mb = rb;
+			}
+		}
+
+		if (m != i) {
+			cake_heap_swap(q, i, m);
+			i = m;
+		} else {
+			break;
+		}
+	}
+}
+
+static void cake_heapify_up(struct cake_sched_data *q, u16 i)
+{
+	while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
+		u16 p = (i - 1) >> 1;
+		u32 ib = cake_heap_get_backlog(q, i);
+		u32 pb = cake_heap_get_backlog(q, p);
+
+		if (ib > pb) {
+			cake_heap_swap(q, i, p);
+			i = p;
+		} else {
+			break;
+		}
+	}
+}
+
+static int cake_advance_shaper(struct cake_sched_data *q,
+			       struct cake_tin_data *b,
+			       struct sk_buff *skb,
+			       ktime_t now, bool drop)
+{
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
+
+	/* charge packet bandwidth to this tin
+	 * and to the global shaper.
+	 */
+	if (q->rate_ns) {
+		u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
+		u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
+		u64 failsafe_dur = global_dur + (global_dur >> 1);
+
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = ktime_add_ns(b->time_next_packet,
+							   tin_dur);
+
+		else if (ktime_before(b->time_next_packet,
+				      ktime_add_ns(now, tin_dur)))
+			b->time_next_packet = ktime_add_ns(now, tin_dur);
+
+		q->time_next_packet = ktime_add_ns(q->time_next_packet,
+						   global_dur);
+		if (!drop)
+			q->failsafe_next_packet = \
+				ktime_add_ns(q->failsafe_next_packet,
+					     failsafe_dur);
+	}
+	return len;
+}
+
+static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	ktime_t now = ktime_get();
+	u32 idx = 0, tin = 0, len;
+	struct cake_heap_entry qq;
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	struct sk_buff *skb;
+
+	if (!q->overflow_timeout) {
+		int i;
+		/* Build fresh max-heap */
+		for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+			cake_heapify(q, i);
+	}
+	q->overflow_timeout = 65535;
+
+	/* select longest queue for pruning */
+	qq  = q->overflow_heap[0];
+	tin = qq.t;
+	idx = qq.b;
+
+	b = &q->tins[tin];
+	flow = &b->flows[idx];
+	skb = dequeue_head(flow);
+	if (unlikely(!skb)) {
+		/* heap has gone wrong, rebuild it next time */
+		q->overflow_timeout = 0;
+		return idx + (tin << 16);
+	}
+
+	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
+		b->unresponsive_flow_count++;
+
+	len = qdisc_pkt_len(skb);
+	q->buffer_used      -= skb->truesize;
+	b->backlogs[idx]    -= len;
+	b->tin_backlog      -= len;
+	sch->qstats.backlog -= len;
+	qdisc_tree_reduce_backlog(sch, 1, len);
+
+	flow->dropped++;
+	b->tin_dropped++;
+	sch->qstats.drops++;
+
+	if (q->rate_flags & CAKE_FLAG_INGRESS)
+		cake_advance_shaper(q, b, skb, now, true);
+
+	__qdisc_drop(skb, to_free);
+	sch->q.qlen--;
+
+	cake_heapify(q, 0);
+
+	return idx + (tin << 16);
+}
+
+static void cake_wash_diffserv(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	case htons(ETH_P_IPV6):
+		ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	default:
+		break;
+	}
+}
+
+static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+{
+	u8 dscp;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_IPV6):
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_ARP):
+		return 0x38;  /* CS7 - Net Control */
+
+	default:
+		/* If there is no Diffserv field, treat as best-effort */
+		return 0;
+	}
+}
+
+static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
+					     struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 tin;
+
+	if (TC_H_MAJ(skb->priority) == sch->handle &&
+	    TC_H_MIN(skb->priority) > 0 &&
+	    TC_H_MIN(skb->priority) <= q->tin_cnt) {
+		tin = TC_H_MIN(skb->priority) - 1;
+
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+		/* extract the Diffserv Precedence field, if it exists */
+		/* and clear DSCP bits if washing */
+		tin = q->tin_index[cake_handle_diffserv(skb,
+				q->rate_flags & CAKE_FLAG_WASH)];
+		if (unlikely(tin >= q->tin_cnt))
+			tin = 0;
+	} else {
+		tin = 0;
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	}
+
+	return &q->tins[tin];
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
+			 struct sk_buff *skb, int flow_mode, int *qerr)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *filter;
+	struct tcf_result res;
+	u32 flow = 0;
+	int result;
+
+	filter = rcu_dereference_bh(q->filter_list);
+	if (!filter)
+		goto hash;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tcf_classify(skb, filter, &res, false);
+
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#endif
+		if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
+			flow = TC_H_MIN(res.classid);
+	}
+hash:
+	*t = cake_select_tin(sch, skb);
+	return flow ?: cake_hash(*t, skb, flow_mode) + 1;
+}
+
+static void cake_reconfigure(struct Qdisc *sch);
+
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int len = qdisc_pkt_len(skb);
+	int uninitialized_var(ret);
+	struct sk_buff *ack = NULL;
+	ktime_t now = ktime_get();
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	u32 idx;
+
+	/* choose flow to insert into */
+	idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
+	flow = &b->flows[idx];
+
+	/* ensure shaper state isn't stale */
+	if (!b->tin_backlog) {
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = now;
+
+		if (!sch->q.qlen) {
+			if (ktime_before(q->time_next_packet, now)) {
+				q->failsafe_next_packet = now;
+				q->time_next_packet = now;
+			} else if (ktime_after(q->time_next_packet, now) &&
+				   ktime_after(q->failsafe_next_packet, now)) {
+				u64 next = \
+					min(ktime_to_ns(q->time_next_packet),
+					    ktime_to_ns(
+						   q->failsafe_next_packet));
+				sch->qstats.overlimits++;
+				qdisc_watchdog_schedule_ns(&q->watchdog, next);
+			}
+		}
+	}
+
+	if (unlikely(len > b->max_skblen))
+		b->max_skblen = len;
+
+	if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+		struct sk_buff *segs, *nskb;
+		netdev_features_t features = netif_skb_features(skb);
+		unsigned int slen = 0;
+
+		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+		if (IS_ERR_OR_NULL(segs))
+			return qdisc_drop(skb, sch, to_free);
+
+		while (segs) {
+			nskb = segs->next;
+			segs->next = NULL;
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			cobalt_set_enqueue_time(segs, now);
+			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
+									  segs);
+			flow_queue_add(flow, segs);
+
+			sch->q.qlen++;
+			slen += segs->len;
+			q->buffer_used += segs->truesize;
+			b->packets++;
+			segs = nskb;
+		}
+
+		/* stats */
+		b->bytes	    += slen;
+		b->backlogs[idx]    += slen;
+		b->tin_backlog      += slen;
+		sch->qstats.backlog += slen;
+		q->avg_window_bytes += slen;
+
+		qdisc_tree_reduce_backlog(sch, 1, len);
+		consume_skb(skb);
+	} else {
+		/* not splitting */
+		cobalt_set_enqueue_time(skb, now);
+		get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
+		flow_queue_add(flow, skb);
+
+		if (q->ack_filter)
+			ack = cake_ack_filter(q, flow);
+
+		if (ack) {
+			b->ack_drops++;
+			sch->qstats.drops++;
+			b->bytes += qdisc_pkt_len(ack);
+			len -= qdisc_pkt_len(ack);
+			q->buffer_used += skb->truesize - ack->truesize;
+			if (q->rate_flags & CAKE_FLAG_INGRESS)
+				cake_advance_shaper(q, b, ack, now, true);
+
+			qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
+			consume_skb(ack);
+		} else {
+			sch->q.qlen++;
+			q->buffer_used      += skb->truesize;
+		}
+
+		/* stats */
+		b->packets++;
+		b->bytes	    += len;
+		b->backlogs[idx]    += len;
+		b->tin_backlog      += len;
+		sch->qstats.backlog += len;
+		q->avg_window_bytes += len;
+	}
+
+	if (q->overflow_timeout)
+		cake_heapify_up(q, b->overflow_idx[idx]);
+
+	/* incoming bandwidth capacity estimate */
+	if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+		u64 packet_interval = \
+			ktime_to_ns(ktime_sub(now, q->last_packet_time));
+
+		if (packet_interval > NSEC_PER_SEC)
+			packet_interval = NSEC_PER_SEC;
+
+		/* filter out short-term bursts, eg. wifi aggregation */
+		q->avg_packet_interval = \
+			cake_ewma(q->avg_packet_interval,
+				  packet_interval,
+				  (packet_interval > q->avg_packet_interval ?
+					  2 : 8));
+
+		q->last_packet_time = now;
+
+		if (packet_interval > q->avg_packet_interval) {
+			u64 window_interval = \
+				ktime_to_ns(ktime_sub(now,
+						      q->avg_window_begin));
+			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
+
+			do_div(b, window_interval);
+			q->avg_peak_bandwidth =
+				cake_ewma(q->avg_peak_bandwidth, b,
+					  b > q->avg_peak_bandwidth ? 2 : 8);
+			q->avg_window_bytes = 0;
+			q->avg_window_begin = now;
+
+			if (ktime_after(now,
+					ktime_add_ms(q->last_reconfig_time,
+						     250))) {
+				q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+				cake_reconfigure(sch);
+			}
+		}
+	} else {
+		q->avg_window_bytes = 0;
+		q->last_packet_time = now;
+	}
+
+	/* flowchain */
+	if (!flow->set || flow->set == CAKE_SET_DECAYING) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+		u16 host_load = 1;
+
+		if (!flow->set) {
+			list_add_tail(&flow->flowchain, &b->new_flows);
+		} else {
+			b->decaying_flow_count--;
+			list_move_tail(&flow->flowchain, &b->new_flows);
+		}
+		flow->set = CAKE_SET_SPARSE;
+		b->sparse_flow_count++;
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_refcnt);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_refcnt);
+
+		flow->deficit = (b->flow_quantum *
+				 quantum_div[host_load]) >> 16;
+	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		/* this flow was empty, accounted as a sparse flow, but actually
+		 * in the bulk rotation.
+		 */
+		flow->set = CAKE_SET_BULK;
+		b->sparse_flow_count--;
+		b->bulk_flow_count++;
+	}
+
+	if (q->buffer_used > q->buffer_max_used)
+		q->buffer_max_used = q->buffer_used;
+
+	if (q->buffer_used > q->buffer_limit) {
+		u32 dropped = 0;
+
+		while (q->buffer_used > q->buffer_limit) {
+			dropped++;
+			cake_drop(sch, to_free);
+		}
+		b->drop_overlimit += dropped;
+	}
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_flow *flow = &b->flows[q->cur_flow];
+	struct sk_buff *skb = NULL;
+	u32 len;
+
+	if (flow->head) {
+		skb = dequeue_head(flow);
+		len = qdisc_pkt_len(skb);
+		b->backlogs[q->cur_flow] -= len;
+		b->tin_backlog		 -= len;
+		sch->qstats.backlog      -= len;
+		q->buffer_used		 -= skb->truesize;
+		sch->q.qlen--;
+
+		if (q->overflow_timeout)
+			cake_heapify(q, b->overflow_idx[q->cur_flow]);
+	}
+	return skb;
+}
+
+/* Discard leftover packets from a tin no longer in use. */
+static void cake_clear_tin(struct Qdisc *sch, u16 tin)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	q->cur_tin = tin;
+	for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
+		while (!!(skb = cake_dequeue_one(sch)))
+			kfree_skb(skb);
+}
+
+static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_host *srchost, *dsthost;
+	ktime_t now = ktime_get();
+	struct cake_flow *flow;
+	struct list_head *head;
+	bool first_flow = true;
+	struct sk_buff *skb;
+	u16 host_load;
+	u64 delay;
+	u32 len;
+
+begin:
+	if (!sch->q.qlen)
+		return NULL;
+
+	/* global hard shaper */
+	if (ktime_after(q->time_next_packet, now) &&
+	    ktime_after(q->failsafe_next_packet, now)) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		sch->qstats.overlimits++;
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+		return NULL;
+	}
+
+	/* Choose a class to work on. */
+	if (!q->rate_ns) {
+		/* In unlimited mode, can't rely on shaper timings, just balance
+		 * with DRR
+		 */
+		bool wrapped = false, empty = true;
+
+		while (b->tin_deficit < 0 ||
+		       !(b->sparse_flow_count + b->bulk_flow_count)) {
+			if (b->tin_deficit <= 0)
+				b->tin_deficit += b->tin_quantum_band;
+			if (b->sparse_flow_count + b->bulk_flow_count)
+				empty = false;
+
+			q->cur_tin++;
+			b++;
+			if (q->cur_tin >= q->tin_cnt) {
+				q->cur_tin = 0;
+				b = q->tins;
+
+				if (wrapped) {
+					/* It's possible for q->qlen to be
+					 * nonzero when we actually have no
+					 * packets anywhere.
+					 */
+					if (empty)
+						return NULL;
+				} else {
+					wrapped = true;
+				}
+			}
+		}
+	} else {
+		/* In shaped mode, choose:
+		 * - Highest-priority tin with queue and meeting schedule, or
+		 * - The earliest-scheduled tin with queue.
+		 */
+		ktime_t best_time = KTIME_MAX;
+		int tin, best_tin = 0;
+
+		for (tin = 0; tin < q->tin_cnt; tin++) {
+			b = q->tins + tin;
+			if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
+				ktime_t time_to_pkt = \
+					ktime_sub(b->time_next_packet, now);
+
+				if (ktime_to_ns(time_to_pkt) <= 0 ||
+				    ktime_compare(time_to_pkt,
+						  best_time) <= 0) {
+					best_time = time_to_pkt;
+					best_tin = tin;
+				}
+			}
+		}
+
+		q->cur_tin = best_tin;
+		b = q->tins + best_tin;
+
+		/* No point in going further if no packets to deliver. */
+		if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
+			return NULL;
+	}
+
+retry:
+	/* service this class */
+	head = &b->decaying_flows;
+	if (!first_flow || list_empty(head)) {
+		head = &b->new_flows;
+		if (list_empty(head)) {
+			head = &b->old_flows;
+			if (unlikely(list_empty(head))) {
+				head = &b->decaying_flows;
+				if (unlikely(list_empty(head)))
+					goto begin;
+			}
+		}
+	}
+	flow = list_first_entry(head, struct cake_flow, flowchain);
+	q->cur_flow = flow - b->flows;
+	first_flow = false;
+
+	/* triple isolation (modified DRR++) */
+	srchost = &b->hosts[flow->srchost];
+	dsthost = &b->hosts[flow->dsthost];
+	host_load = 1;
+
+	if (cake_dsrc(q->flow_mode))
+		host_load = max(host_load, srchost->srchost_refcnt);
+
+	if (cake_ddst(q->flow_mode))
+		host_load = max(host_load, dsthost->dsthost_refcnt);
+
+	WARN_ON(host_load > CAKE_QUEUES);
+
+	/* flow isolation (DRR++) */
+	if (flow->deficit <= 0) {
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
+		/* Keep all flows with deficits out of the sparse and decaying
+		 * rotations.  No non-empty flow can go into the decaying
+		 * rotation, so they can't get deficits
+		 */
+		if (flow->set == CAKE_SET_SPARSE) {
+			if (flow->head) {
+				b->sparse_flow_count--;
+				b->bulk_flow_count++;
+				flow->set = CAKE_SET_BULK;
+			} else {
+				/* we've moved it to the bulk rotation for
+				 * correct deficit accounting but we still want
+				 * to count it as a sparse flow, not a bulk one.
+				 */
+				flow->set = CAKE_SET_SPARSE_WAIT;
+			}
+		}
+		goto retry;
+	}
+
+	/* Retrieve a packet via the AQM */
+	while (1) {
+		skb = cake_dequeue_one(sch);
+		if (!skb) {
+			/* this queue was actually empty */
+			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
+				b->unresponsive_flow_count--;
+
+			if (flow->cvars.p_drop || flow->cvars.count ||
+			    ktime_before(now, flow->cvars.drop_next)) {
+				/* keep in the flowchain until the state has
+				 * decayed to rest
+				 */
+				list_move_tail(&flow->flowchain,
+					       &b->decaying_flows);
+				if (flow->set == CAKE_SET_BULK) {
+					b->bulk_flow_count--;
+					b->decaying_flow_count++;
+				} else if (flow->set == CAKE_SET_SPARSE ||
+					   flow->set == CAKE_SET_SPARSE_WAIT) {
+					b->sparse_flow_count--;
+					b->decaying_flow_count++;
+				}
+				flow->set = CAKE_SET_DECAYING;
+			} else {
+				/* remove empty queue from the flowchain */
+				list_del_init(&flow->flowchain);
+				if (flow->set == CAKE_SET_SPARSE ||
+				    flow->set == CAKE_SET_SPARSE_WAIT)
+					b->sparse_flow_count--;
+				else if (flow->set == CAKE_SET_BULK)
+					b->bulk_flow_count--;
+				else
+					b->decaying_flow_count--;
+
+				flow->set = CAKE_SET_NONE;
+				srchost->srchost_refcnt--;
+				dsthost->dsthost_refcnt--;
+			}
+			goto begin;
+		}
+
+		/* Last packet in queue may be marked, shouldn't be dropped */
+		if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
+					(b->bulk_flow_count *
+					 !!(q->rate_flags &
+					    CAKE_FLAG_INGRESS))) ||
+		    !flow->head)
+			break;
+
+		/* drop this packet, get another one */
+		if (q->rate_flags & CAKE_FLAG_INGRESS) {
+			len = cake_advance_shaper(q, b, skb,
+						  now, true);
+			flow->deficit -= len;
+			b->tin_deficit -= len;
+		}
+		flow->dropped++;
+		b->tin_dropped++;
+		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+		qdisc_qstats_drop(sch);
+		kfree_skb(skb);
+		if (q->rate_flags & CAKE_FLAG_INGRESS)
+			goto retry;
+	}
+
+	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	qdisc_bstats_update(sch, skb);
+
+	/* collect delay stats */
+	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
+	b->peak_delay = cake_ewma(b->peak_delay, delay,
+				  delay > b->peak_delay ? 2 : 8);
+	b->base_delay = cake_ewma(b->base_delay, delay,
+				  delay < b->base_delay ? 2 : 8);
+
+	len = cake_advance_shaper(q, b, skb, now, false);
+	flow->deficit -= len;
+	b->tin_deficit -= len;
+
+	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+	} else if (!sch->q.qlen) {
+		int i;
+
+		for (i = 0; i < q->tin_cnt; i++) {
+			if (q->tins[i].decaying_flow_count) {
+				ktime_t next = \
+					ktime_add_ns(now,
+						     q->tins[i].cparams.target);
+
+				qdisc_watchdog_schedule_ns(&q->watchdog,
+							   ktime_to_ns(next));
+				break;
+			}
+		}
+	}
+
+	if (q->overflow_timeout)
+		q->overflow_timeout--;
+
+	return skb;
+}
+
+static void cake_reset(struct Qdisc *sch)
+{
+	u32 c;
+
+	for (c = 0; c < CAKE_MAX_TINS; c++)
+		cake_clear_tin(sch, c);
+}
+
+static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
+	[TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
+	[TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
+	[TCA_CAKE_ATM]		 = { .type = NLA_U32 },
+	[TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
+	[TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
+	[TCA_CAKE_RTT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_TARGET]	 = { .type = NLA_U32 },
+	[TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
+	[TCA_CAKE_MEMORY]	 = { .type = NLA_U32 },
+	[TCA_CAKE_NAT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_RAW]		 = { .type = NLA_U32 },
+	[TCA_CAKE_WASH]		 = { .type = NLA_U32 },
+	[TCA_CAKE_MPU]		 = { .type = NLA_U32 },
+	[TCA_CAKE_INGRESS]	 = { .type = NLA_U32 },
+	[TCA_CAKE_ACK_FILTER]	 = { .type = NLA_U32 },
+};
+
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+			  u64 target_ns, u64 rtt_est_ns)
+{
+	/* convert byte-rate into time-per-byte
+	 * so it will always unwedge in reasonable time.
+	 */
+	static const u64 MIN_RATE = 64;
+	u32 byte_target = mtu;
+	u64 byte_target_ns;
+	u8  rate_shft = 0;
+	u64 rate_ns = 0;
+
+	b->flow_quantum = 1514;
+	if (rate) {
+		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		rate_shft = 34;
+		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
+		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
+		while (!!(rate_ns >> 34)) {
+			rate_ns >>= 1;
+			rate_shft--;
+		}
+	} /* else unlimited, ie. zero delay */
+
+	b->tin_rate_bps  = rate;
+	b->tin_rate_ns   = rate_ns;
+	b->tin_rate_shft = rate_shft;
+
+	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
+
+	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
+	b->cparams.interval = max(rtt_est_ns +
+				     b->cparams.target - target_ns,
+				     b->cparams.target * 2);
+	b->cparams.mtu_time = byte_target_ns;
+	b->cparams.p_inc = 1 << 24; /* 1/256 */
+	b->cparams.p_dec = 1 << 20; /* 1/4096 */
+}
+
+static int cake_config_besteffort(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[0];
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+
+	q->tin_cnt = 1;
+
+	q->tin_index = besteffort;
+	q->tin_order = normal_order;
+
+	cake_set_rate(b, rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	b->tin_quantum_band = 65535;
+	b->tin_quantum_prio = 65535;
+
+	return 0;
+}
+
+static int cake_config_precedence(struct Qdisc *sch)
+{
+	/* convert high-level (user visible) parameters into internal format */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+	q->tin_index = precedence;
+	q->tin_order = normal_order;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+/*	List of known Diffserv codepoints:
+ *
+ *	Least Effort (CS1)
+ *	Best Effort (CS0)
+ *	Max Reliability & LLT "Lo" (TOS1)
+ *	Max Throughput (TOS2)
+ *	Min Delay (TOS4)
+ *	LLT "La" (TOS5)
+ *	Assured Forwarding 1 (AF1x) - x3
+ *	Assured Forwarding 2 (AF2x) - x3
+ *	Assured Forwarding 3 (AF3x) - x3
+ *	Assured Forwarding 4 (AF4x) - x3
+ *	Precedence Class 2 (CS2)
+ *	Precedence Class 3 (CS3)
+ *	Precedence Class 4 (CS4)
+ *	Precedence Class 5 (CS5)
+ *	Precedence Class 6 (CS6)
+ *	Precedence Class 7 (CS7)
+ *	Voice Admit (VA)
+ *	Expedited Forwarding (EF)
+
+ *	Total 25 codepoints.
+ */
+
+/*	List of traffic classes in RFC 4594:
+ *		(roughly descending order of contended priority)
+ *		(roughly ascending order of uncontended throughput)
+ *
+ *	Network Control (CS6,CS7)      - routing traffic
+ *	Telephony (EF,VA)         - aka. VoIP streams
+ *	Signalling (CS5)               - VoIP setup
+ *	Multimedia Conferencing (AF4x) - aka. video calls
+ *	Realtime Interactive (CS4)     - eg. games
+ *	Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
+ *	Broadcast Video (CS3)
+ *	Low Latency Data (AF2x,TOS4)      - eg. database
+ *	Ops, Admin, Management (CS2,TOS1) - eg. ssh
+ *	Standard Service (CS0 & unrecognised codepoints)
+ *	High Throughput Data (AF1x,TOS2)  - eg. web traffic
+ *	Low Priority Data (CS1)           - eg. BitTorrent
+
+ *	Total 12 traffic classes.
+ */
+
+static int cake_config_diffserv8(struct Qdisc *sch)
+{
+/*	Pruned list of traffic classes for typical applications:
+ *
+ *		Network Control          (CS6, CS7)
+ *		Minimum Latency          (EF, VA, CS5, CS4)
+ *		Interactive Shell        (CS2, TOS1)
+ *		Low Latency Transactions (AF2x, TOS4)
+ *		Video Streaming          (AF4x, AF3x, CS3)
+ *		Bog Standard             (CS0 etc.)
+ *		High Throughput          (AF1x, TOS2)
+ *		Background Traffic       (CS1)
+ *
+ *		Total 8 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv8;
+	q->tin_order = normal_order;
+
+	/* class characteristics */
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+static int cake_config_diffserv4(struct Qdisc *sch)
+{
+/*  Further pruned list of traffic classes for four-class system:
+ *
+ *	    Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
+ *	    Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
+ *	    Best Effort        (CS0, AF1x, TOS2, and those not specified)
+ *	    Background Traffic (CS1)
+ *
+ *		Total 4 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 4;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv4;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 1, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[3], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 2;
+	q->tins[3].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 1;
+	q->tins[3].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static int cake_config_diffserv3(struct Qdisc *sch)
+{
+/*  Simplified Diffserv structure with 3 tins.
+ *		Low Priority		(CS1)
+ *		Best Effort
+ *		Latency Sensitive	(TOS4, VA, EF, CS6, CS7)
+ */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 3;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv3;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int c, ft;
+
+	switch (q->tin_mode) {
+	case CAKE_DIFFSERV_BESTEFFORT:
+		ft = cake_config_besteffort(sch);
+		break;
+
+	case CAKE_DIFFSERV_PRECEDENCE:
+		ft = cake_config_precedence(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV8:
+		ft = cake_config_diffserv8(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV4:
+		ft = cake_config_diffserv4(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV3:
+	default:
+		ft = cake_config_diffserv3(sch);
+		break;
+	}
+
+	for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+		cake_clear_tin(sch, c);
+		q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+	}
+
+	q->rate_ns   = q->tins[ft].tin_rate_ns;
+	q->rate_shft = q->tins[ft].tin_rate_shft;
+
+	if (q->buffer_config_limit) {
+		q->buffer_limit = q->buffer_config_limit;
+	} else if (q->rate_bps) {
+		u64 t = q->rate_bps * q->interval;
+
+		do_div(t, USEC_PER_SEC / 4);
+		q->buffer_limit = max_t(u32, t, 4U << 20);
+	} else {
+		q->buffer_limit = ~0;
+	}
+
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+	q->buffer_limit = min(q->buffer_limit,
+			      max(sch->limit * psched_mtu(qdisc_dev(sch)),
+				  q->buffer_config_limit));
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	int err;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+		q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+		q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+			!!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+		NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
+				    "No conntrack support in kernel");
+		return -EOPNOTSUPP;
+#endif
+	}
+
+	if (tb[TCA_CAKE_BASE_RATE64])
+		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+
+	if (tb[TCA_CAKE_DIFFSERV_MODE])
+		q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+
+	if (tb[TCA_CAKE_WASH]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
+			q->rate_flags |= CAKE_FLAG_WASH;
+		else
+			q->rate_flags &= ~CAKE_FLAG_WASH;
+	}
+
+	if (tb[TCA_CAKE_FLOW_MODE])
+		q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+				(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
+					CAKE_FLOW_MASK));
+
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
+	if (tb[TCA_CAKE_RTT]) {
+		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+
+		if (!q->interval)
+			q->interval = 1;
+	}
+
+	if (tb[TCA_CAKE_TARGET]) {
+		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+
+		if (!q->target)
+			q->target = 1;
+	}
+
+	if (tb[TCA_CAKE_AUTORATE]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
+			q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_INGRESS]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
+			q->rate_flags |= CAKE_FLAG_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_ACK_FILTER])
+		q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+
+	if (tb[TCA_CAKE_MEMORY])
+		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+
+	if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD)
+		q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+	else
+		q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+
+	if (q->tins) {
+		sch_tree_lock(sch);
+		cake_reconfigure(sch);
+		sch_tree_unlock(sch);
+	}
+
+	return 0;
+}
+
+static void cake_destroy(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	tcf_block_put(q->block);
+	kvfree(q->tins);
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+		     struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int i, j, err;
+
+	sch->limit = 10240;
+	q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
+	q->flow_mode  = CAKE_FLOW_TRIPLE;
+
+	q->rate_bps = 0; /* unlimited by default */
+
+	q->interval = 100000; /* 100ms default */
+	q->target   =   5000; /* 5ms: codel RFC argues
+			       * for 5 to 10% of interval
+			       */
+
+	q->cur_tin = 0;
+	q->cur_flow  = 0;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	if (opt) {
+		int err = cake_change(sch, opt, extack);
+
+		if (err)
+			return err;
+	}
+
+	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+	if (err)
+		return err;
+
+	quantum_div[0] = ~0;
+	for (i = 1; i <= CAKE_QUEUES; i++)
+		quantum_div[i] = 65535 / i;
+
+	q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+			   GFP_KERNEL);
+	if (!q->tins)
+		goto nomem;
+
+	for (i = 0; i < CAKE_MAX_TINS; i++) {
+		struct cake_tin_data *b = q->tins + i;
+
+		INIT_LIST_HEAD(&b->new_flows);
+		INIT_LIST_HEAD(&b->old_flows);
+		INIT_LIST_HEAD(&b->decaying_flows);
+		b->sparse_flow_count = 0;
+		b->bulk_flow_count = 0;
+		b->decaying_flow_count = 0;
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			struct cake_flow *flow = b->flows + j;
+			u32 k = j * CAKE_MAX_TINS + i;
+
+			INIT_LIST_HEAD(&flow->flowchain);
+			cobalt_vars_init(&flow->cvars);
+
+			q->overflow_heap[k].t = i;
+			q->overflow_heap[k].b = j;
+			b->overflow_idx[j] = k;
+		}
+	}
+
+	cake_reconfigure(sch);
+	q->avg_peak_bandwidth = q->rate_bps;
+	q->min_netlen = ~0;
+	q->min_adjlen = ~0;
+	return 0;
+
+nomem:
+	cake_destroy(sch);
+	return -ENOMEM;
+}
+
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (!opts)
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
+			      TCA_CAKE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
+			q->flow_mode & CAKE_FLOW_MASK))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
+			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
+			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_NAT,
+			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_WASH,
+			!!(q->rate_flags & CAKE_FLAG_WASH)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
+			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	return -1;
+}
+
+static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tstats, *ts;
+	int i;
+
+	if (!stats)
+		return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_U64(attr, data) do {				       \
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
+					data, TCA_CAKE_STATS_PAD)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
+	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
+	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
+	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
+	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
+	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
+	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+
+#undef PUT_STAT_U32
+#undef PUT_STAT_U64
+
+	tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+	if (!tstats)
+		goto nla_put_failure;
+
+#define PUT_TSTAT_U32(attr, data) do {					\
+		if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
+			goto nla_put_failure;				\
+	} while (0)
+#define PUT_TSTAT_U64(attr, data) do {					\
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
+					data, TCA_CAKE_TIN_STATS_PAD))	\
+			goto nla_put_failure;				\
+	} while (0)
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		ts = nla_nest_start(d->skb, i + 1);
+		if (!ts)
+			goto nla_put_failure;
+
+		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
+		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
+		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+
+		PUT_TSTAT_U32(TARGET_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+		PUT_TSTAT_U32(INTERVAL_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+
+		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
+		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+
+		PUT_TSTAT_U32(PEAK_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+		PUT_TSTAT_U32(AVG_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+		PUT_TSTAT_U32(BASE_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->base_delay)));
+
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
+		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
+		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+
+		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
+					    b->decaying_flow_count);
+		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
+		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+
+		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		nla_nest_end(d->skb, ts);
+	}
+
+#undef PUT_TSTAT_U32
+#undef PUT_TSTAT_U64
+
+	nla_nest_end(d->skb, tstats);
+	return nla_nest_end(d->skb, stats);
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long cake_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
+			       u32 classid)
+{
+	return 0;
+}
+
+static void cake_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
+					struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return q->block;
+}
+
+static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
+			   struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	const struct cake_flow *flow = NULL;
+	struct gnet_stats_queue qs = { 0 };
+	struct nlattr *stats;
+	u32 idx = cl - 1;
+
+	if (idx < CAKE_QUEUES * q->tin_cnt) {
+		const struct cake_tin_data *b = \
+			&q->tins[q->tin_order[idx / CAKE_QUEUES]];
+		const struct sk_buff *skb;
+
+		flow = &b->flows[idx % CAKE_QUEUES];
+
+		if (flow->head) {
+			sch_tree_lock(sch);
+			skb = flow->head;
+			while (skb) {
+				qs.qlen++;
+				skb = skb->next;
+			}
+			sch_tree_unlock(sch);
+		}
+		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
+		qs.drops = flow->dropped;
+	}
+	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+		return -1;
+	if (flow) {
+		ktime_t now = ktime_get();
+
+		stats = nla_nest_start(d->skb, TCA_STATS_APP);
+		if (!stats)
+			return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_S32(attr, data) do {				       \
+		if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+		PUT_STAT_S32(DEFICIT, flow->deficit);
+		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
+		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
+		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
+		if (flow->cvars.p_drop) {
+			PUT_STAT_S32(BLUE_TIMER_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						     flow->cvars.blue_timer)));
+		}
+		if (flow->cvars.dropping) {
+			PUT_STAT_S32(DROP_NEXT_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						       flow->cvars.drop_next)));
+		}
+
+		if (nla_nest_end(d->skb, stats) < 0)
+			return -1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	unsigned int i, j;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			if (list_empty(&b->flows[j].flowchain) ||
+			    arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
+				arg->stop = 1;
+				break;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops cake_class_ops = {
+	.leaf		=	cake_leaf,
+	.find		=	cake_find,
+	.tcf_block	=	cake_tcf_block,
+	.bind_tcf	=	cake_bind,
+	.unbind_tcf	=	cake_unbind,
+	.dump		=	cake_dump_class,
+	.dump_stats	=	cake_dump_class_stats,
+	.walk		=	cake_walk,
+};
+
+static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
+	.cl_ops		=	&cake_class_ops,
+	.id		=	"cake",
+	.priv_size	=	sizeof(struct cake_sched_data),
+	.enqueue	=	cake_enqueue,
+	.dequeue	=	cake_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cake_init,
+	.reset		=	cake_reset,
+	.destroy	=	cake_destroy,
+	.change		=	cake_change,
+	.dump		=	cake_dump,
+	.dump_stats	=	cake_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cake_module_init(void)
+{
+	return register_qdisc(&cake_qdisc_ops);
+}
+
+static void __exit cake_module_exit(void)
+{
+	unregister_qdisc(&cake_qdisc_ops);
+}
+
+module_init(cake_module_init)
+module_exit(cake_module_exit)
+MODULE_AUTHOR("Jonathan Morton");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("The CAKE shaper.");

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6386032..df763be 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c

@@ -106,7 +106,8 @@ struct tipc_stats {
  * @backlogq: queue for messages waiting to be sent
  * @snt_nxt: next sequence number to use for outbound messages
  * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_count: # of identical retransmit requests made by peer
+ * @stale_cnt: counter for number of identical retransmit attempts
+ * @stale_limit: time when repeated identical retransmits must force link reset
  * @ackers: # of peers that needs to ack each packet before it can be released
  * @acked: # last packet acked by a certain peer. Used for broadcast.
  * @rcv_nxt: next sequence number to expect for inbound messages
@@ -127,14 +128,17 @@ struct tipc_link {
 	struct net *net;
 
 	/* Management and link supervision data */
-	u32 peer_session;
-	u32 session;
+	u16 peer_session;
+	u16 session;
+	u16 snd_nxt_state;
+	u16 rcv_nxt_state;
 	u32 peer_bearer_id;
 	u32 bearer_id;
 	u32 tolerance;
 	u32 abort_limit;
 	u32 state;
 	u16 peer_caps;
+	bool in_session;
 	bool active;
 	u32 silent_intv_cnt;
 	char if_name[TIPC_MAX_IF_NAME];
@@ -161,7 +165,8 @@ struct tipc_link {
 	u16 snd_nxt;
 	u16 last_retransm;
 	u16 window;
-	u32 stale_count;
+	u16 stale_cnt;
+	unsigned long stale_limit;
 
 	/* Reception */
 	u16 rcv_nxt;
@@ -212,11 +217,6 @@ enum {
  */
 #define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
 
-/* Wildcard value for link session numbers. When it is known that
- * peer endpoint is down, any session number must be accepted.
- */
-#define ANY_SESSION 0x10000
-
 /* Link FSM states:
  */
 enum {
@@ -337,6 +337,11 @@ char tipc_link_plane(struct tipc_link *l)
 	return l->net_plane;
 }
 
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
+{
+	l->peer_caps = capabilities;
+}
+
 void tipc_link_add_bc_peer(struct tipc_link *snd_l,
 			   struct tipc_link *uc_l,
 			   struct sk_buff_head *xmitq)
@@ -469,7 +474,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	l->addr = peer;
 	l->peer_caps = peer_caps;
 	l->net = net;
-	l->peer_session = ANY_SESSION;
+	l->in_session = false;
 	l->bearer_id = bearer_id;
 	l->tolerance = tolerance;
 	l->net_plane = net_plane;
@@ -838,7 +843,7 @@ void link_prepare_wakeup(struct tipc_link *l)
 
 void tipc_link_reset(struct tipc_link *l)
 {
-	l->peer_session = ANY_SESSION;
+	l->in_session = false;
 	l->session++;
 	l->mtu = l->advertised_mtu;
 	__skb_queue_purge(&l->transmq);
@@ -857,10 +862,12 @@ void tipc_link_reset(struct tipc_link *l)
 	l->rcv_unacked = 0;
 	l->snd_nxt = 1;
 	l->rcv_nxt = 1;
+	l->snd_nxt_state = 1;
+	l->rcv_nxt_state = 1;
 	l->acked = 0;
 	l->silent_intv_cnt = 0;
 	l->rst_cnt = 0;
-	l->stale_count = 0;
+	l->stale_cnt = 0;
 	l->bc_peer_is_up = false;
 	memset(&l->mon_state, 0, sizeof(l->mon_state));
 	tipc_link_reset_stats(l);
@@ -997,39 +1004,41 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
 		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
+/* tipc_link_retrans() - retransmit one or more packets
+ * @l: the link to transmit on
+ * @r: the receiving link ordering the retransmit. Same as l if unicast
+ * @from: retransmit from (inclusive) this sequence number
+ * @to: retransmit to (inclusive) this sequence number
+ * xmitq: queue for accumulating the retransmitted packets
+ */
+int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 		      u16 from, u16 to, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
-	struct tipc_msg *hdr;
-	u16 ack = l->rcv_nxt - 1;
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+	u16 ack = l->rcv_nxt - 1;
+	struct tipc_msg *hdr;
 
 	if (!skb)
 		return 0;
 
 	/* Detect repeated retransmit failures on same packet */
-	if (nacker->last_retransm != buf_seqno(skb)) {
-		nacker->last_retransm = buf_seqno(skb);
-		nacker->stale_count = 1;
-	} else if (++nacker->stale_count > 100) {
+	if (r->last_retransm != buf_seqno(skb)) {
+		r->last_retransm = buf_seqno(skb);
+		r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance);
+	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
-		nacker->stale_count = 0;
 		if (link_is_bc_sndlink(l))
 			return TIPC_LINK_DOWN_EVT;
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
 
-	/* Move forward to where retransmission should start */
 	skb_queue_walk(&l->transmq, skb) {
-		if (!less(buf_seqno(skb), from))
-			break;
-	}
-
-	skb_queue_walk_from(&l->transmq, skb) {
-		if (more(buf_seqno(skb), to))
-			break;
 		hdr = buf_msg(skb);
+		if (less(msg_seqno(hdr), from))
+			continue;
+		if (more(msg_seqno(hdr), to))
+			break;
 		_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
 		if (!_skb)
 			return 0;
@@ -1272,6 +1281,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 		/* Forward queues and wake up waiting users */
 		if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
+			l->stale_cnt = 0;
 			tipc_link_advance_backlog(l, xmitq);
 			if (unlikely(!skb_queue_empty(&l->wakeupq)))
 				link_prepare_wakeup(l);
@@ -1348,6 +1358,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 	msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
 
 	if (mtyp == STATE_MSG) {
+		if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
+			msg_set_seqno(hdr, l->snd_nxt_state++);
 		msg_set_seq_gap(hdr, rcvgap);
 		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
 		msg_set_probe(hdr, probe);
@@ -1439,6 +1451,44 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
 	}
 }
 
+/* tipc_link_validate_msg(): validate message against current link state
+ * Returns true if message should be accepted, otherwise false
+ */
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
+{
+	u16 curr_session = l->peer_session;
+	u16 session = msg_session(hdr);
+	int mtyp = msg_type(hdr);
+
+	if (msg_user(hdr) != LINK_PROTOCOL)
+		return true;
+
+	switch (mtyp) {
+	case RESET_MSG:
+		if (!l->in_session)
+			return true;
+		/* Accept only RESET with new session number */
+		return more(session, curr_session);
+	case ACTIVATE_MSG:
+		if (!l->in_session)
+			return true;
+		/* Accept only ACTIVATE with new or current session number */
+		return !less(session, curr_session);
+	case STATE_MSG:
+		/* Accept only STATE with current session number */
+		if (!l->in_session)
+			return false;
+		if (session != curr_session)
+			return false;
+		if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
+			return true;
+		/* Accept only STATE with new sequence number */
+		return !less(msg_seqno(hdr), l->rcv_nxt_state);
+	default:
+		return false;
+	}
+}
+
 /* tipc_link_proto_rcv(): receive link level protocol message :
  * Note that network plane id propagates through the network, and may
  * change at any time. The node with lowest numerical id determines
@@ -1472,17 +1522,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	hdr = buf_msg(skb);
 	data = msg_data(hdr);
 
+	if (!tipc_link_validate_msg(l, hdr))
+		goto exit;
+
 	switch (mtyp) {
 	case RESET_MSG:
-
-		/* Ignore duplicate RESET with old session number */
-		if ((less_eq(msg_session(hdr), l->peer_session)) &&
-		    (l->peer_session != ANY_SESSION))
-			break;
-		/* fall thru' */
-
 	case ACTIVATE_MSG:
-
 		/* Complete own link name with peer's interface name */
 		if_name =  strrchr(l->name, ':') + 1;
 		if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
@@ -1510,12 +1555,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			rc = TIPC_LINK_UP_EVT;
 
 		l->peer_session = msg_session(hdr);
+		l->in_session = true;
 		l->peer_bearer_id = msg_bearer_id(hdr);
 		if (l->mtu > msg_max_pkt(hdr))
 			l->mtu = msg_max_pkt(hdr);
 		break;
 
 	case STATE_MSG:
+		l->rcv_nxt_state = msg_seqno(hdr) + 1;
 
 		/* Update own tolerance if peer indicates a non-zero value */
 		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))

diff --git a/net/tipc/link.h b/net/tipc/link.h
index ec59348..7bc494a3 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h

@@ -110,6 +110,8 @@ char *tipc_link_name(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
 int tipc_link_window(struct tipc_link *l);
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
 unsigned long tipc_link_tolerance(struct tipc_link *l);
 void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq);

diff --git a/net/tipc/node.c b/net/tipc/node.c
index cfdbaf4..52fd80b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c

@@ -363,6 +363,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n, *temp_node;
+	struct tipc_link *l;
+	int bearer_id;
 	int i;
 
 	spin_lock_bh(&tn->node_list_lock);
@@ -370,6 +372,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
 	if (n) {
 		/* Same node may come back with new capabilities */
 		n->capabilities = capabilities;
+		for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+			l = n->links[bearer_id].link;
+			if (l)
+				tipc_link_update_caps(l, capabilities);
+		}
 		goto exit;
 	}
 	n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -1533,7 +1540,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
  * tipc_node_check_state - check and if necessary update node state
  * @skb: TIPC packet
  * @bearer_id: identity of bearer delivering the packet
- * Returns true if state is ok, otherwise consumes buffer and returns false
+ * Returns true if state and msg are ok, otherwise false
  */
 static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 				  int bearer_id, struct sk_buff_head *xmitq)
@@ -1567,6 +1574,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 		}
 	}
 
+	if (!tipc_link_validate_msg(l, hdr))
+		return false;
+
 	/* Check and update node accesibility if applicable */
 	if (state == SELF_UP_PEER_COMING) {
 		if (!tipc_link_is_up(l))

diff --git a/net/tipc/node.h b/net/tipc/node.h
index 846c8f2..48b3298 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h

@@ -49,14 +49,16 @@ enum {
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
-	TIPC_NODE_ID128       = (1 << 5)
+	TIPC_NODE_ID128       = (1 << 5),
+	TIPC_LINK_PROTO_SEQNO = (1 << 6)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
-				TIPC_BCAST_STATE_NACK | \
-				TIPC_BCAST_RCAST | \
-				TIPC_BLOCK_FLOWCTL | \
-				TIPC_NODE_ID128)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH       |  \
+				TIPC_BCAST_STATE_NACK  |  \
+				TIPC_BCAST_RCAST       |  \
+				TIPC_BLOCK_FLOWCTL     |  \
+				TIPC_NODE_ID128        |  \
+				TIPC_LINK_PROTO_SEQNO)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0d670c8..7453f5a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c

@@ -57,14 +57,11 @@ static int tls_do_decryption(struct sock *sk,
 	struct aead_request *aead_req;
 
 	int ret;
-	unsigned int req_size = sizeof(struct aead_request) +
-		crypto_aead_reqsize(ctx->aead_recv);
 
-	aead_req = kzalloc(req_size, flags);
+	aead_req = aead_request_alloc(ctx->aead_recv, flags);
 	if (!aead_req)
 		return -ENOMEM;
 
-	aead_request_set_tfm(aead_req, ctx->aead_recv);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
 	aead_request_set_crypt(aead_req, sgin, sgout,
 			       data_len + tls_ctx->rx.tag_size,
@@ -86,7 +83,7 @@ static int tls_do_decryption(struct sock *sk,
 	ctx->saved_data_ready(sk);
 
 out:
-	kfree(aead_req);
+	aead_request_free(aead_req);
 	return ret;
 }
 
@@ -224,8 +221,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	struct aead_request *req;
 	int rc;
 
-	req = kzalloc(sizeof(struct aead_request) +
-		      crypto_aead_reqsize(ctx->aead_send), sk->sk_allocation);
+	req = aead_request_alloc(ctx->aead_send, sk->sk_allocation);
 	if (!req)
 		return -ENOMEM;
 
@@ -267,7 +263,7 @@ static int tls_push_record(struct sock *sk, int flags,
 
 	tls_advance_record_sn(sk, &tls_ctx->tx);
 out_req:
-	kfree(req);
+	aead_request_free(req);
 	return rc;
 }
 

diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 0000000..76f1ab4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh

@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+	tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+		      ttl 100 tos inherit allow-localremote \
+		      key 1234
+
+	tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+		      key 1234
+	ip link set h3-gt6-key vrf v$h3
+	matchall_sink_create h3-gt6-key
+
+	ip address add dev $swp3 2001:db8:3::1/64
+	ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+	ip address del dev $h3 2001:db8:3::2/64
+	ip address del dev $swp3 2001:db8:3::1/64
+
+	tunnel_destroy h3-gt6-key
+	tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+	# Set up a topology for testing underlay routes that point at an
+	# unsupported soft device.
+
+	tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+		      ttl 100 tos inherit allow-localremote
+
+	tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+	ip link set h3-gt6-soft vrf v$h3
+	matchall_sink_create h3-gt6-soft
+
+	ip link add name v1 type veth peer name v2
+	ip link set dev v1 up
+	ip address add dev v1 2001:db8:4::1/64
+
+	ip link set dev v2 vrf v$h3
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+	ip link del dev v1
+
+	tunnel_destroy h3-gt6-soft
+	tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	setup_keyful
+	setup_soft
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	cleanup_soft
+	cleanup_keyful
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type ttl inherit
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type ttl 100
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type tos 0x10
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type tos inherit
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+	local should_fail=$1; shift
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	if ((should_fail)); then
+	    fail_test_span_gre_dir  $tundev ingress
+	else
+	    quick_test_span_gre_dir $tundev ingress
+	fi
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+	local should_fail=$1; shift
+
+	test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+	test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	test_failable 0
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+	test_failable 1
+
+	test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+	test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+	test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+	test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+    check_err 1 "Could not test offloaded functionality"
+    log_test "mlxsw-specific tests for mirror to gretap"
+    exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index 1c18e33..fac4861 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh

@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-source mirror_lib.sh
+source "$relative_path/mirror_lib.sh"
 
 quick_test_span_gre_dir_ips()
 {

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
index 8fa681e..6f9ef18 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh

@@ -35,6 +35,8 @@
 	vrf_prepare
 	mirror_gre_topo_create
 
+	sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
 	ip address add dev $swp3 192.0.2.161/28
 	ip address add dev $h3 192.0.2.162/28
 	ip address add dev gt4 192.0.2.129/32
@@ -61,6 +63,8 @@
 	ip address del dev $h3 192.0.2.162/28
 	ip address del dev $swp3 192.0.2.161/28
 
+	sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
 	mirror_gre_topo_destroy
 	vrf_cleanup
 

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index 2534195..39c03e2 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh

@@ -33,7 +33,7 @@
 #   |                                                                         |
 #   +-------------------------------------------------------------------------+
 
-source mirror_topo_lib.sh
+source "$relative_path/mirror_topo_lib.sh"
 
 mirror_gre_topo_h3_create()
 {

diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index d36dc26..07991e1 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh

@@ -105,7 +105,7 @@
 	# Install the capture as skip_hw to avoid double-counting of packets.
 	# The traffic is meant for local box anyway, so will be trapped to
 	# kernel.
-	vlan_capture_install $dev "skip_hw vlan_id $vid"
+	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
 	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
 	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
 	vlan_capture_uninstall $dev

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index d878ce1..10b2d89 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json

@@ -641,6 +641,174 @@
         ]
     },
     {
+        "id": "4f20",
+        "name": "Add tunnel_key action with a single geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e33d",
+        "name": "Add tunnel_key action with multiple geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "0778",
+        "name": "Add tunnel_key action with invalid class geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4ae8",
+        "name": "Add tunnel_key action with invalid type geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4039",
+        "name": "Add tunnel_key action with short data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "26a6",
+        "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "f44d",
+        "name": "Add tunnel_key action with incomplete geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
         "id": "7afc",
         "name": "Replace tunnel_key set action with all parameters",
         "category": [
commit	2aa4a3378ad077d02131a23d22641ae8ae44cb28	[log] [tgz]
author	David S. Miller <davem@davemloft.net>	Sat Jul 14 18:47:44 2018 -0700
committer	David S. Miller <davem@davemloft.net>	Sat Jul 14 18:47:44 2018 -0700
tree	da31b9ef7436fb938c874d549d95a0ab1aac1b84
parent	f5c64e566c521456e9c34e3a7208dabab0eaf18b [diff]
parent	13f7432bdd8eb2eb55eed2af1469d37c0f667617 [diff]