Merge tag 'rxrpc-rewrite-20160922-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

David Howells says:

====================
rxrpc: Preparation for slow-start algorithm [ver #2]

Here are some patches that prepare for improvements in ACK generation and
for the implementation of the slow-start part of the protocol:

 (1) Stop storing the protocol header in the Tx socket buffers, but rather
     generate it on the fly.  This potentially saves a little space and
     makes it easier to alter the header just before transmission (the
     flags may get altered and the serial number has to be changed).

 (2) Mask off the Tx buffer annotations and add a flag to record which ones
     have already been resent.

 (3) Track RTT on a per-peer basis for use in future changes.  Tracepoints
     are added to log this.

 (4) Send PING ACKs in response to incoming calls to elicit a PING-RESPONSE
     ACK from which RTT data can be calculated.  The response also carries
     other useful information.

 (5) Expedite PING-RESPONSE ACK generation from sendmsg.  If we're actively
     using sendmsg, this allows us, under some circumstances, to avoid
     having to rely on the background work item to run to generate this
     ACK.

     This requires ktime_sub_ms() to be added.

 (6) Set the REQUEST-ACK flag on some DATA packets to elicit ACK-REQUESTED
     ACKs from which RTT data can be calculated.

 (7) Limit the use of pings and ACK requests for RTT determination.

Changes:

 (V2) Don't use the C division operator for 64-bit division.  One instance
      should use do_div() and the other should be using nsecs_to_jiffies().

      The last two patches got transposed, leading to an undefined symbol
      in one of them.

      Reported-by: kbuild test robot <lkp@intel.com>
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 1d71802..25bd3fa 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -216,6 +216,22 @@
 	return 0;
 }
 
+int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+			u16 *val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+			 u16 val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	return mv88e6xxx_write(chip, addr, reg, val);
+}
+
 static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
 			      int reg, u16 *val)
 {
@@ -585,23 +601,23 @@
 				  struct phy_device *phydev)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	u32 reg;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (!phy_is_pseudo_fixed_link(phydev))
 		return;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
-	if (ret < 0)
+	err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+	if (err)
 		goto out;
 
-	reg = ret & ~(PORT_PCS_CTRL_LINK_UP |
-		      PORT_PCS_CTRL_FORCE_LINK |
-		      PORT_PCS_CTRL_DUPLEX_FULL |
-		      PORT_PCS_CTRL_FORCE_DUPLEX |
-		      PORT_PCS_CTRL_UNFORCED);
+	reg &= ~(PORT_PCS_CTRL_LINK_UP |
+		 PORT_PCS_CTRL_FORCE_LINK |
+		 PORT_PCS_CTRL_DUPLEX_FULL |
+		 PORT_PCS_CTRL_FORCE_DUPLEX |
+		 PORT_PCS_CTRL_UNFORCED);
 
 	reg |= PORT_PCS_CTRL_FORCE_LINK;
 	if (phydev->link)
@@ -639,7 +655,7 @@
 			reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
 				PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
 	}
-	_mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg);
+	mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
 
 out:
 	mutex_unlock(&chip->reg_lock);
@@ -799,22 +815,22 @@
 {
 	u32 low;
 	u32 high = 0;
-	int ret;
+	int err;
+	u16 reg;
 	u64 value;
 
 	switch (s->type) {
 	case PORT:
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg);
-		if (ret < 0)
+		err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
+		if (err)
 			return UINT64_MAX;
 
-		low = ret;
+		low = reg;
 		if (s->sizeof_stat == 4) {
-			ret = _mv88e6xxx_reg_read(chip, REG_PORT(port),
-						  s->reg + 1);
-			if (ret < 0)
+			err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
+			if (err)
 				return UINT64_MAX;
-			high = ret;
+			high = reg;
 		}
 		break;
 	case BANK0:
@@ -893,6 +909,8 @@
 			       struct ethtool_regs *regs, void *_p)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
+	u16 reg;
 	u16 *p = _p;
 	int i;
 
@@ -903,11 +921,10 @@
 	mutex_lock(&chip->reg_lock);
 
 	for (i = 0; i < 32; i++) {
-		int ret;
 
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i);
-		if (ret >= 0)
-			p[i] = ret;
+		err = mv88e6xxx_port_read(chip, port, i, &reg);
+		if (!err)
+			p[i] = reg;
 	}
 
 	mutex_unlock(&chip->reg_lock);
@@ -938,7 +955,7 @@
 	e->eee_enabled = !!(reg & 0x0200);
 	e->tx_lpi_enabled = !!(reg & 0x0100);
 
-	err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, &reg);
+	err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
 	if (err)
 		goto out;
 
@@ -1106,12 +1123,13 @@
 				 u8 state)
 {
 	struct dsa_switch *ds = chip->ds;
-	int reg, ret = 0;
+	u16 reg;
+	int err;
 	u8 oldstate;
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL);
-	if (reg < 0)
-		return reg;
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, &reg);
+	if (err)
+		return err;
 
 	oldstate = reg & PORT_CONTROL_STATE_MASK;
 
@@ -1124,23 +1142,22 @@
 		     oldstate == PORT_CONTROL_STATE_FORWARDING) &&
 		    (state == PORT_CONTROL_STATE_DISABLED ||
 		     state == PORT_CONTROL_STATE_BLOCKING)) {
-			ret = _mv88e6xxx_atu_remove(chip, 0, port, false);
-			if (ret)
-				return ret;
+			err = _mv88e6xxx_atu_remove(chip, 0, port, false);
+			if (err)
+				return err;
 		}
 
 		reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL,
-					   reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+		if (err)
+			return err;
 
 		netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
 			   mv88e6xxx_port_state_names[state],
 			   mv88e6xxx_port_state_names[oldstate]);
 	}
 
-	return ret;
+	return err;
 }
 
 static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
@@ -1149,7 +1166,8 @@
 	const u16 mask = (1 << chip->info->num_ports) - 1;
 	struct dsa_switch *ds = chip->ds;
 	u16 output_ports = 0;
-	int reg;
+	u16 reg;
+	int err;
 	int i;
 
 	/* allow CPU port or DSA link(s) to send frames to every port */
@@ -1170,14 +1188,14 @@
 	/* prevent frames from going back out of the port they came in on */
 	output_ports &= ~BIT(port);
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
-	if (reg < 0)
-		return reg;
+	err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+	if (err)
+		return err;
 
 	reg &= ~mask;
 	reg |= output_ports & mask;
 
-	return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg);
+	return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
 }
 
 static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
@@ -1218,23 +1236,22 @@
 				u16 *new, u16 *old)
 {
 	struct dsa_switch *ds = chip->ds;
-	u16 pvid;
-	int ret;
+	u16 pvid, reg;
+	int err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
+	if (err)
+		return err;
 
-	pvid = ret & PORT_DEFAULT_VLAN_MASK;
+	pvid = reg & PORT_DEFAULT_VLAN_MASK;
 
 	if (new) {
-		ret &= ~PORT_DEFAULT_VLAN_MASK;
-		ret |= *new & PORT_DEFAULT_VLAN_MASK;
+		reg &= ~PORT_DEFAULT_VLAN_MASK;
+		reg |= *new & PORT_DEFAULT_VLAN_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_DEFAULT_VLAN, ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg);
+		if (err)
+			return err;
 
 		netdev_dbg(ds->ports[port].netdev,
 			   "DefaultVID %d (was %d)\n", *new, pvid);
@@ -1613,7 +1630,8 @@
 	struct dsa_switch *ds = chip->ds;
 	u16 upper_mask;
 	u16 fid;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (mv88e6xxx_num_databases(chip) == 4096)
 		upper_mask = 0xff;
@@ -1623,37 +1641,35 @@
 		return -EOPNOTSUPP;
 
 	/* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+	if (err)
+		return err;
 
-	fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
+	fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
 
 	if (new) {
-		ret &= ~PORT_BASE_VLAN_FID_3_0_MASK;
-		ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
+		reg &= ~PORT_BASE_VLAN_FID_3_0_MASK;
+		reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN,
-					   ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
+	if (err)
+		return err;
 
-	fid |= (ret & upper_mask) << 4;
+	fid |= (reg & upper_mask) << 4;
 
 	if (new) {
-		ret &= ~upper_mask;
-		ret |= (*new >> 4) & upper_mask;
+		reg &= ~upper_mask;
+		reg |= (*new >> 4) & upper_mask;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
-					   ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg);
+		if (err)
+			return err;
 
 		netdev_dbg(ds->ports[port].netdev,
 			   "FID %d (was %d)\n", *new, fid);
@@ -1865,26 +1881,26 @@
 	struct mv88e6xxx_chip *chip = ds->priv;
 	u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
 		PORT_CONTROL_2_8021Q_DISABLED;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2);
-	if (ret < 0)
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, &reg);
+	if (err)
 		goto unlock;
 
-	old = ret & PORT_CONTROL_2_8021Q_MASK;
+	old = reg & PORT_CONTROL_2_8021Q_MASK;
 
 	if (new != old) {
-		ret &= ~PORT_CONTROL_2_8021Q_MASK;
-		ret |= new & PORT_CONTROL_2_8021Q_MASK;
+		reg &= ~PORT_CONTROL_2_8021Q_MASK;
+		reg |= new & PORT_CONTROL_2_8021Q_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2,
-					   ret);
-		if (ret < 0)
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+		if (err)
 			goto unlock;
 
 		netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n",
@@ -1892,11 +1908,11 @@
 			   mv88e6xxx_port_8021q_mode_names[old]);
 	}
 
-	ret = 0;
+	err = 0;
 unlock:
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return err;
 }
 
 static int
@@ -2406,19 +2422,20 @@
 	u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
 	struct gpio_desc *gpiod = chip->reset;
 	unsigned long timeout;
-	int ret;
+	int err, ret;
+	u16 reg;
 	int i;
 
 	/* Set all ports to the disabled state. */
 	for (i = 0; i < chip->info->num_ports; i++) {
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, &reg);
+		if (err)
+			return err;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL,
-					   ret & 0xfffc);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, i, PORT_CONTROL,
+					   reg & 0xfffc);
+		if (err)
+			return err;
 	}
 
 	/* Wait for transmit queues to drain. */
@@ -2437,11 +2454,11 @@
 	 * through global registers 0x18 and 0x19.
 	 */
 	if (ppu_active)
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
+		err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
 	else
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
-	if (ret)
-		return ret;
+		err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
+	if (err)
+		return err;
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
@@ -2455,11 +2472,11 @@
 		usleep_range(1000, 2000);
 	}
 	if (time_after(jiffies, timeout))
-		ret = -ETIMEDOUT;
+		err = -ETIMEDOUT;
 	else
-		ret = 0;
+		err = 0;
 
-	return ret;
+	return err;
 }
 
 static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
@@ -2480,21 +2497,10 @@
 	return err;
 }
 
-static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
-			       int reg, u16 *val)
-{
-	int addr = chip->info->port_base_addr + port;
-
-	if (port >= chip->info->num_ports)
-		return -EINVAL;
-
-	return mv88e6xxx_read(chip, addr, reg, val);
-}
-
 static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 {
 	struct dsa_switch *ds = chip->ds;
-	int ret;
+	int err;
 	u16 reg;
 
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
@@ -2507,7 +2513,7 @@
 		 * and all DSA ports to their maximum bandwidth and
 		 * full duplex.
 		 */
-		reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
+		err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
 		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
 			reg &= ~PORT_PCS_CTRL_UNFORCED;
 			reg |= PORT_PCS_CTRL_FORCE_LINK |
@@ -2522,10 +2528,9 @@
 			reg |= PORT_PCS_CTRL_UNFORCED;
 		}
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PCS_CTRL, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
@@ -2576,26 +2581,25 @@
 				PORT_CONTROL_FORWARD_UNKNOWN_MC;
 	}
 	if (reg) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_CONTROL, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+		if (err)
+			return err;
 	}
 
 	/* If this port is connected to a SerDes, make sure the SerDes is not
 	 * powered down.
 	 */
 	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) {
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
-		if (ret < 0)
-			return ret;
-		ret &= PORT_STATUS_CMODE_MASK;
-		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_SGMII)) {
-			ret = mv88e6xxx_serdes_power_on(chip);
-			if (ret < 0)
-				return ret;
+		err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
+		if (err)
+			return err;
+		reg &= PORT_STATUS_CMODE_MASK;
+		if ((reg == PORT_STATUS_CMODE_100BASE_X) ||
+		    (reg == PORT_STATUS_CMODE_1000BASE_X) ||
+		    (reg == PORT_STATUS_CMODE_SGMII)) {
+			err = mv88e6xxx_serdes_power_on(chip);
+			if (err < 0)
+				return err;
 		}
 	}
 
@@ -2629,10 +2633,9 @@
 	reg |= PORT_CONTROL_2_8021Q_DISABLED;
 
 	if (reg) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_CONTROL_2, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port Association Vector: when learning source addresses
@@ -2645,16 +2648,14 @@
 	if (dsa_is_cpu_port(ds, port))
 		reg = 0;
 
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR,
-				   reg);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg);
+	if (err)
+		return err;
 
 	/* Egress rate control 2: disable egress rate control. */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2,
-				   0x0000);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000);
+	if (err)
+		return err;
 
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
 	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
@@ -2663,96 +2664,89 @@
 		 * be paused for by the remote end or the period of
 		 * time that this port can pause the remote end.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PAUSE_CTRL, 0x0000);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000);
+		if (err)
+			return err;
 
 		/* Port ATU control: disable limiting the number of
 		 * address database entries that this port is allowed
 		 * to use.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_ATU_CONTROL, 0x0000);
+		err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL,
+					   0x0000);
 		/* Priority Override: disable DA, SA and VTU priority
 		 * override.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PRI_OVERRIDE, 0x0000);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE,
+					   0x0000);
+		if (err)
+			return err;
 
 		/* Port Ethertype: use the Ethertype DSA Ethertype
 		 * value.
 		 */
 		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) {
-			ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-						   PORT_ETH_TYPE, ETH_P_EDSA);
-			if (ret)
-				return ret;
+			err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE,
+						   ETH_P_EDSA);
+			if (err)
+				return err;
 		}
 
 		/* Tag Remap: use an identity 802.1p prio -> switch
 		 * prio mapping.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_TAG_REGMAP_0123, 0x3210);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123,
+					   0x3210);
+		if (err)
+			return err;
 
 		/* Tag Remap 2: use an identity 802.1p prio -> switch
 		 * prio mapping.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_TAG_REGMAP_4567, 0x7654);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567,
+					   0x7654);
+		if (err)
+			return err;
 	}
 
 	/* Rate Control: disable ingress rate limiting. */
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
 	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
 	    mv88e6xxx_6320_family(chip)) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_RATE_CONTROL, 0x0001);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+					   0x0001);
+		if (err)
+			return err;
 	} else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_RATE_CONTROL, 0x0000);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+					   0x0000);
+		if (err)
+			return err;
 	}
 
 	/* Port Control 1: disable trunking, disable sending
 	 * learning messages to this port.
 	 */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
-				   0x0000);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000);
+	if (err)
+		return err;
 
 	/* Port based VLAN map: give each port the same default address
 	 * database, and allow bidirectional communication between the
 	 * CPU and DSA port(s), and the other ports.
 	 */
-	ret = _mv88e6xxx_port_fid_set(chip, port, 0);
-	if (ret)
-		return ret;
+	err = _mv88e6xxx_port_fid_set(chip, port, 0);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_port_based_vlan_map(chip, port);
-	if (ret)
-		return ret;
+	err = _mv88e6xxx_port_based_vlan_map(chip, port);
+	if (err)
+		return err;
 
 	/* Default VLAN ID and priority: don't set a default VLAN
 	 * ID, and set the default packet priority to zero.
 	 */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN,
-				   0x0000);
-	if (ret)
-		return ret;
-
-	return 0;
+	return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000);
 }
 
 static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 52f3f52..8279883 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -37,7 +37,6 @@
 #define ADDR_SERDES		0x0f
 #define SERDES_PAGE_FIBER	0x01
 
-#define REG_PORT(p)		(0x10 + (p))
 #define PORT_STATUS		0x00
 #define PORT_STATUS_PAUSE_EN	BIT(15)
 #define PORT_STATUS_MY_PAUSE	BIT(14)
@@ -453,36 +452,36 @@
 };
 
 /* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EDSA		BIT(MV88E6XXX_CAP_EDSA)
-#define MV88E6XXX_FLAG_EEE		BIT(MV88E6XXX_CAP_EEE)
+#define MV88E6XXX_FLAG_EDSA		BIT_ULL(MV88E6XXX_CAP_EDSA)
+#define MV88E6XXX_FLAG_EEE		BIT_ULL(MV88E6XXX_CAP_EEE)
 
-#define MV88E6XXX_FLAG_SMI_CMD		BIT(MV88E6XXX_CAP_SMI_CMD)
-#define MV88E6XXX_FLAG_SMI_DATA		BIT(MV88E6XXX_CAP_SMI_DATA)
+#define MV88E6XXX_FLAG_SMI_CMD		BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
+#define MV88E6XXX_FLAG_SMI_DATA		BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
 
-#define MV88E6XXX_FLAG_PHY_PAGE		BIT(MV88E6XXX_CAP_PHY_PAGE)
+#define MV88E6XXX_FLAG_PHY_PAGE		BIT_ULL(MV88E6XXX_CAP_PHY_PAGE)
 
-#define MV88E6XXX_FLAG_SERDES		BIT(MV88E6XXX_CAP_SERDES)
+#define MV88E6XXX_FLAG_SERDES		BIT_ULL(MV88E6XXX_CAP_SERDES)
 
-#define MV88E6XXX_FLAG_GLOBAL2		BIT(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_IRL_CMD	BIT(MV88E6XXX_CAP_G2_IRL_CMD)
-#define MV88E6XXX_FLAG_G2_IRL_DATA	BIT(MV88E6XXX_CAP_G2_IRL_DATA)
-#define MV88E6XXX_FLAG_G2_PVT_ADDR	BIT(MV88E6XXX_CAP_G2_PVT_ADDR)
-#define MV88E6XXX_FLAG_G2_PVT_DATA	BIT(MV88E6XXX_CAP_G2_PVT_DATA)
-#define MV88E6XXX_FLAG_G2_SWITCH_MAC	BIT(MV88E6XXX_CAP_G2_SWITCH_MAC)
-#define MV88E6XXX_FLAG_G2_POT		BIT(MV88E6XXX_CAP_G2_POT)
-#define MV88E6XXX_FLAG_G2_EEPROM_CMD	BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
-#define MV88E6XXX_FLAG_G2_EEPROM_DATA	BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
-#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD	BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
-#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA	BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
+#define MV88E6XXX_FLAG_GLOBAL2		BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
+#define MV88E6XXX_FLAG_G2_IRL_CMD	BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD)
+#define MV88E6XXX_FLAG_G2_IRL_DATA	BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA)
+#define MV88E6XXX_FLAG_G2_PVT_ADDR	BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR)
+#define MV88E6XXX_FLAG_G2_PVT_DATA	BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA)
+#define MV88E6XXX_FLAG_G2_SWITCH_MAC	BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC)
+#define MV88E6XXX_FLAG_G2_POT		BIT_ULL(MV88E6XXX_CAP_G2_POT)
+#define MV88E6XXX_FLAG_G2_EEPROM_CMD	BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD)
+#define MV88E6XXX_FLAG_G2_EEPROM_DATA	BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD	BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA	BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
 
-#define MV88E6XXX_FLAG_PPU		BIT(MV88E6XXX_CAP_PPU)
-#define MV88E6XXX_FLAG_PPU_ACTIVE	BIT(MV88E6XXX_CAP_PPU_ACTIVE)
-#define MV88E6XXX_FLAG_STU		BIT(MV88E6XXX_CAP_STU)
-#define MV88E6XXX_FLAG_TEMP		BIT(MV88E6XXX_CAP_TEMP)
-#define MV88E6XXX_FLAG_TEMP_LIMIT	BIT(MV88E6XXX_CAP_TEMP_LIMIT)
-#define MV88E6XXX_FLAG_VTU		BIT(MV88E6XXX_CAP_VTU)
+#define MV88E6XXX_FLAG_PPU		BIT_ULL(MV88E6XXX_CAP_PPU)
+#define MV88E6XXX_FLAG_PPU_ACTIVE	BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE)
+#define MV88E6XXX_FLAG_STU		BIT_ULL(MV88E6XXX_CAP_STU)
+#define MV88E6XXX_FLAG_TEMP		BIT_ULL(MV88E6XXX_CAP_TEMP)
+#define MV88E6XXX_FLAG_TEMP_LIMIT	BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT)
+#define MV88E6XXX_FLAG_VTU		BIT_ULL(MV88E6XXX_CAP_VTU)
 
 /* EEPROM Programming via Global2 with 16-bit data */
 #define MV88E6XXX_FLAGS_EEPROM16	\
@@ -615,7 +614,7 @@
 	unsigned int num_ports;
 	unsigned int port_base_addr;
 	unsigned int age_time_coeff;
-	unsigned long flags;
+	unsigned long long flags;
 };
 
 struct mv88e6xxx_atu_entry {
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 4788a89..b3df70d 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -256,7 +256,7 @@
 	.n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges),
 };
 
-struct regmap_config qca8k_regmap_config = {
+static struct regmap_config qca8k_regmap_config = {
 	.reg_bits = 16,
 	.val_bits = 32,
 	.reg_stride = 4,
@@ -1032,19 +1032,7 @@
 	},
 };
 
-static int __init
-qca8kmdio_driver_register(void)
-{
-	return mdio_driver_register(&qca8kmdio_driver);
-}
-module_init(qca8kmdio_driver_register);
-
-static void __exit
-qca8kmdio_driver_unregister(void)
-{
-	mdio_driver_unregister(&qca8kmdio_driver);
-}
-module_exit(qca8kmdio_driver_unregister);
+mdio_module_driver(qca8kmdio_driver);
 
 MODULE_AUTHOR("Mathieu Olivari, John Crispin <john@phrozen.org>");
 MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 2461296..c6b71f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o
+cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1f9867d..ea0d1f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -851,6 +851,9 @@
 
 	spinlock_t stats_lock;
 	spinlock_t win0_lock ____cacheline_aligned_in_smp;
+
+	/* TC u32 offload */
+	struct cxgb4_tc_u32_table *tc_u32;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1025,6 +1028,32 @@
 	VLAN_REWRITE
 };
 
+/* Host shadow copy of ingress filter entry.  This is in host native format
+ * and doesn't match the ordering or bit order, etc. of the hardware of the
+ * firmware command.  The use of bit-field structure elements is purely to
+ * remind ourselves of the field size limitations and save memory in the case
+ * where the filter table is large.
+ */
+struct filter_entry {
+	/* Administrative fields for filter. */
+	u32 valid:1;            /* filter allocated and valid */
+	u32 locked:1;           /* filter is administratively locked */
+
+	u32 pending:1;          /* filter action is pending firmware reply */
+	u32 smtidx:8;           /* Source MAC Table index for smac */
+	struct filter_ctx *ctx; /* Caller's completion hook */
+	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
+	struct net_device *dev; /* Associated net device */
+	u32 tid;                /* This will store the actual tid */
+
+	/* The filter itself.  Most of this is a straight copy of information
+	 * provided by the extended ioctl().  Some fields are translated to
+	 * internal forms -- for instance the Ingress Queue ID passed in from
+	 * the ioctl() is translated into the Absolute Ingress Queue ID.
+	 */
+	struct ch_filter_specification fs;
+};
+
 static inline int is_offload(const struct adapter *adap)
 {
 	return adap->params.offload;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 52be9a4..20455d0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2748,12 +2748,6 @@
 				 size_mb << 20);
 }
 
-static int blocked_fl_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf,
 			       size_t count, loff_t *ppos)
 {
@@ -2797,7 +2791,7 @@
 
 static const struct file_operations blocked_fl_fops = {
 	.owner   = THIS_MODULE,
-	.open    = blocked_fl_open,
+	.open    = simple_open,
 	.read    = blocked_fl_read,
 	.write   = blocked_fl_write,
 	.llseek  = generic_file_llseek,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
new file mode 100644
index 0000000..2a61617
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -0,0 +1,721 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_regs.h"
+#include "l2t.h"
+#include "t4fw_api.h"
+#include "cxgb4_filter.h"
+
+static inline bool is_field_set(u32 val, u32 mask)
+{
+	return val || mask;
+}
+
+static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask)
+{
+	return !(conf & conf_mask) && is_field_set(val, mask);
+}
+
+/* Validate filter spec against configuration done on the card. */
+static int validate_filter(struct net_device *dev,
+			   struct ch_filter_specification *fs)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	u32 fconf, iconf;
+
+	/* Check for unconfigured fields being used. */
+	fconf = adapter->params.tp.vlan_pri_map;
+	iconf = adapter->params.tp.ingress_config;
+
+	if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
+	    unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
+	    unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) ||
+	    unsupported(fconf, ETHERTYPE_F, fs->val.ethtype,
+			fs->mask.ethtype) ||
+	    unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) ||
+	    unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype,
+			fs->mask.matchtype) ||
+	    unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) ||
+	    unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) ||
+	    unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld,
+			fs->mask.pfvf_vld) ||
+	    unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld,
+			fs->mask.ovlan_vld) ||
+	    unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld))
+		return -EOPNOTSUPP;
+
+	/* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer
+	 * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set
+	 * in TP_INGRESS_CONFIG.  Hense the somewhat crazy checks
+	 * below.  Additionally, since the T4 firmware interface also
+	 * carries that overlap, we need to translate any PF/VF
+	 * specification into that internal format below.
+	 */
+	if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
+	    is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld))
+		return -EOPNOTSUPP;
+	if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) ||
+	    (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) &&
+	     (iconf & VNIC_F)))
+		return -EOPNOTSUPP;
+	if (fs->val.pf > 0x7 || fs->val.vf > 0x7f)
+		return -ERANGE;
+	fs->mask.pf &= 0x7;
+	fs->mask.vf &= 0x7f;
+
+	/* If the user is requesting that the filter action loop
+	 * matching packets back out one of our ports, make sure that
+	 * the egress port is in range.
+	 */
+	if (fs->action == FILTER_SWITCH &&
+	    fs->eport >= adapter->params.nports)
+		return -ERANGE;
+
+	/* Don't allow various trivially obvious bogus out-of-range values... */
+	if (fs->val.iport >= adapter->params.nports)
+		return -ERANGE;
+
+	/* T4 doesn't support removing VLAN Tags for loop back filters. */
+	if (is_t4(adapter->params.chip) &&
+	    fs->action == FILTER_SWITCH &&
+	    (fs->newvlan == VLAN_REMOVE ||
+	     fs->newvlan == VLAN_REWRITE))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static unsigned int get_filter_steerq(struct net_device *dev,
+				      struct ch_filter_specification *fs)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	unsigned int iq;
+
+	/* If the user has requested steering matching Ingress Packets
+	 * to a specific Queue Set, we need to make sure it's in range
+	 * for the port and map that into the Absolute Queue ID of the
+	 * Queue Set's Response Queue.
+	 */
+	if (!fs->dirsteer) {
+		if (fs->iq)
+			return -EINVAL;
+		iq = 0;
+	} else {
+		struct port_info *pi = netdev_priv(dev);
+
+		/* If the iq id is greater than the number of qsets,
+		 * then assume it is an absolute qid.
+		 */
+		if (fs->iq < pi->nqsets)
+			iq = adapter->sge.ethrxq[pi->first_qset +
+						 fs->iq].rspq.abs_id;
+		else
+			iq = fs->iq;
+	}
+
+	return iq;
+}
+
+static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+
+	if (test_bit(fidx, t->ftid_bmap)) {
+		spin_unlock_bh(&t->ftid_lock);
+		return -EBUSY;
+	}
+
+	if (family == PF_INET)
+		__set_bit(fidx, t->ftid_bmap);
+	else
+		bitmap_allocate_region(t->ftid_bmap, fidx, 2);
+
+	spin_unlock_bh(&t->ftid_lock);
+	return 0;
+}
+
+static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+	if (family == PF_INET)
+		__clear_bit(fidx, t->ftid_bmap);
+	else
+		bitmap_release_region(t->ftid_bmap, fidx, 2);
+	spin_unlock_bh(&t->ftid_lock);
+}
+
+/* Delete the filter at a specified index. */
+static int del_filter_wr(struct adapter *adapter, int fidx)
+{
+	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+	struct fw_filter_wr *fwr;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	len = sizeof(*fwr);
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
+	t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+
+	/* Mark the filter as "pending" and ship off the Filter Work Request.
+	 * When we get the Work Request Reply we'll clear the pending status.
+	 */
+	f->pending = 1;
+	t4_mgmt_tx(adapter, skb);
+	return 0;
+}
+
+/* Send a Work Request to write the filter at a specified index.  We construct
+ * a Firmware Filter Work Request to have the work done and put the indicated
+ * filter into "pending" mode which will prevent any further actions against
+ * it till we get a reply from the firmware on the completion status of the
+ * request.
+ */
+int set_filter_wr(struct adapter *adapter, int fidx)
+{
+	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+	struct fw_filter_wr *fwr;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* If the new filter requires loopback Destination MAC and/or VLAN
+	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+	 * the filter.
+	 */
+	if (f->fs.newdmac || f->fs.newvlan) {
+		/* allocate L2T entry for new filter */
+		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
+						f->fs.eport, f->fs.dmac);
+		if (!f->l2t) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+	}
+
+	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
+	memset(fwr, 0, sizeof(*fwr));
+
+	/* It would be nice to put most of the following in t4_hw.c but most
+	 * of the work is translating the cxgbtool ch_filter_specification
+	 * into the Work Request and the definition of that structure is
+	 * currently in cxgbtool.h which isn't appropriate to pull into the
+	 * common code.  We may eventually try to come up with a more neutral
+	 * filter specification structure but for now it's easiest to simply
+	 * put this fairly direct code in line ...
+	 */
+	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
+	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16));
+	fwr->tid_to_iq =
+		htonl(FW_FILTER_WR_TID_V(f->tid) |
+		      FW_FILTER_WR_RQTYPE_V(f->fs.type) |
+		      FW_FILTER_WR_NOREPLY_V(0) |
+		      FW_FILTER_WR_IQ_V(f->fs.iq));
+	fwr->del_filter_to_l2tix =
+		htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
+		      FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
+		      FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
+		      FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
+		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
+		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
+		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
+		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
+		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
+					     f->fs.newvlan == VLAN_REWRITE) |
+		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
+					    f->fs.newvlan == VLAN_REWRITE) |
+		      FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
+		      FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
+		      FW_FILTER_WR_PRIO_V(f->fs.prio) |
+		      FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
+	fwr->ethtype = htons(f->fs.val.ethtype);
+	fwr->ethtypem = htons(f->fs.mask.ethtype);
+	fwr->frag_to_ovlan_vldm =
+		(FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
+		 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
+		 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
+		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
+		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
+		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
+	fwr->smac_sel = 0;
+	fwr->rx_chan_rx_rpl_iq =
+		htons(FW_FILTER_WR_RX_CHAN_V(0) |
+		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
+	fwr->maci_to_matchtypem =
+		htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
+		      FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
+		      FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
+		      FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
+		      FW_FILTER_WR_PORT_V(f->fs.val.iport) |
+		      FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
+		      FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
+		      FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
+	fwr->ptcl = f->fs.val.proto;
+	fwr->ptclm = f->fs.mask.proto;
+	fwr->ttyp = f->fs.val.tos;
+	fwr->ttypm = f->fs.mask.tos;
+	fwr->ivlan = htons(f->fs.val.ivlan);
+	fwr->ivlanm = htons(f->fs.mask.ivlan);
+	fwr->ovlan = htons(f->fs.val.ovlan);
+	fwr->ovlanm = htons(f->fs.mask.ovlan);
+	memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
+	memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
+	memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
+	memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
+	fwr->lp = htons(f->fs.val.lport);
+	fwr->lpm = htons(f->fs.mask.lport);
+	fwr->fp = htons(f->fs.val.fport);
+	fwr->fpm = htons(f->fs.mask.fport);
+	if (f->fs.newsmac)
+		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+	/* Mark the filter as "pending" and ship off the Filter Work Request.
+	 * When we get the Work Request Reply we'll clear the pending status.
+	 */
+	f->pending = 1;
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+	t4_ofld_send(adapter, skb);
+	return 0;
+}
+
+/* Return an error number if the indicated filter isn't writable ... */
+int writable_filter(struct filter_entry *f)
+{
+	if (f->locked)
+		return -EPERM;
+	if (f->pending)
+		return -EBUSY;
+
+	return 0;
+}
+
+/* Delete the filter at the specified index (if valid).  The checks for all
+ * the common problems with doing this like the filter being locked, currently
+ * pending in another operation, etc.
+ */
+int delete_filter(struct adapter *adapter, unsigned int fidx)
+{
+	struct filter_entry *f;
+	int ret;
+
+	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+		return -EINVAL;
+
+	f = &adapter->tids.ftid_tab[fidx];
+	ret = writable_filter(f);
+	if (ret)
+		return ret;
+	if (f->valid)
+		return del_filter_wr(adapter, fidx);
+
+	return 0;
+}
+
+/* Clear a filter and release any of its resources that we own.  This also
+ * clears the filter's "pending" status.
+ */
+void clear_filter(struct adapter *adap, struct filter_entry *f)
+{
+	/* If the new or old filter have loopback rewriteing rules then we'll
+	 * need to free any existing Layer Two Table (L2T) entries of the old
+	 * filter rule.  The firmware will handle freeing up any Source MAC
+	 * Table (SMT) entries used for rewriting Source MAC Addresses in
+	 * loopback rules.
+	 */
+	if (f->l2t)
+		cxgb4_l2t_release(f->l2t);
+
+	/* The zeroing of the filter rule below clears the filter valid,
+	 * pending, locked flags, l2t pointer, etc. so it's all we need for
+	 * this operation.
+	 */
+	memset(f, 0, sizeof(*f));
+}
+
+void clear_all_filters(struct adapter *adapter)
+{
+	unsigned int i;
+
+	if (adapter->tids.ftid_tab) {
+		struct filter_entry *f = &adapter->tids.ftid_tab[0];
+		unsigned int max_ftid = adapter->tids.nftids +
+					adapter->tids.nsftids;
+
+		for (i = 0; i < max_ftid; i++, f++)
+			if (f->valid || f->pending)
+				clear_filter(adapter, f);
+	}
+}
+
+/* Fill up default masks for set match fields. */
+static void fill_default_mask(struct ch_filter_specification *fs)
+{
+	unsigned int lip = 0, lip_mask = 0;
+	unsigned int fip = 0, fip_mask = 0;
+	unsigned int i;
+
+	if (fs->val.iport && !fs->mask.iport)
+		fs->mask.iport |= ~0;
+	if (fs->val.fcoe && !fs->mask.fcoe)
+		fs->mask.fcoe |= ~0;
+	if (fs->val.matchtype && !fs->mask.matchtype)
+		fs->mask.matchtype |= ~0;
+	if (fs->val.macidx && !fs->mask.macidx)
+		fs->mask.macidx |= ~0;
+	if (fs->val.ethtype && !fs->mask.ethtype)
+		fs->mask.ethtype |= ~0;
+	if (fs->val.ivlan && !fs->mask.ivlan)
+		fs->mask.ivlan |= ~0;
+	if (fs->val.ovlan && !fs->mask.ovlan)
+		fs->mask.ovlan |= ~0;
+	if (fs->val.frag && !fs->mask.frag)
+		fs->mask.frag |= ~0;
+	if (fs->val.tos && !fs->mask.tos)
+		fs->mask.tos |= ~0;
+	if (fs->val.proto && !fs->mask.proto)
+		fs->mask.proto |= ~0;
+
+	for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) {
+		lip |= fs->val.lip[i];
+		lip_mask |= fs->mask.lip[i];
+		fip |= fs->val.fip[i];
+		fip_mask |= fs->mask.fip[i];
+	}
+
+	if (lip && !lip_mask)
+		memset(fs->mask.lip, ~0, sizeof(fs->mask.lip));
+
+	if (fip && !fip_mask)
+		memset(fs->mask.fip, ~0, sizeof(fs->mask.lip));
+
+	if (fs->val.lport && !fs->mask.lport)
+		fs->mask.lport = ~0;
+	if (fs->val.fport && !fs->mask.fport)
+		fs->mask.fport = ~0;
+}
+
+/* Check a Chelsio Filter Request for validity, convert it into our internal
+ * format and send it to the hardware.  Return 0 on success, an error number
+ * otherwise.  We attach any provided filter operation context to the internal
+ * filter specification in order to facilitate signaling completion of the
+ * operation.
+ */
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
+		       struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	unsigned int max_fidx, fidx, iq;
+	struct filter_entry *f;
+	u32 iconf;
+	int ret;
+
+	max_fidx = adapter->tids.nftids;
+	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+	    filter_id >= max_fidx)
+		return -E2BIG;
+
+	fill_default_mask(fs);
+
+	ret = validate_filter(dev, fs);
+	if (ret)
+		return ret;
+
+	iq = get_filter_steerq(dev, fs);
+	if (iq < 0)
+		return iq;
+
+	/* IPv6 filters occupy four slots and must be aligned on
+	 * four-slot boundaries.  IPv4 filters only occupy a single
+	 * slot and have no alignment requirements but writing a new
+	 * IPv4 filter into the middle of an existing IPv6 filter
+	 * requires clearing the old IPv6 filter and hence we prevent
+	 * insertion.
+	 */
+	if (fs->type == 0) { /* IPv4 */
+		/* If our IPv4 filter isn't being written to a
+		 * multiple of four filter index and there's an IPv6
+		 * filter at the multiple of 4 base slot, then we
+		 * prevent insertion.
+		 */
+		fidx = filter_id & ~0x3;
+		if (fidx != filter_id &&
+		    adapter->tids.ftid_tab[fidx].fs.type) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid) {
+				dev_err(adapter->pdev_dev,
+					"Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n",
+					fidx, fidx + 3);
+				return -EINVAL;
+			}
+		}
+	} else { /* IPv6 */
+		/* Ensure that the IPv6 filter is aligned on a
+		 * multiple of 4 boundary.
+		 */
+		if (filter_id & 0x3) {
+			dev_err(adapter->pdev_dev,
+				"Invalid location. IPv6 must be aligned on a 4-slot boundary\n");
+			return -EINVAL;
+		}
+
+		/* Check all except the base overlapping IPv4 filter slots. */
+		for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid) {
+				dev_err(adapter->pdev_dev,
+					"Invalid location.  IPv6 requires 4 slots and an IPv4 filter exists at %u\n",
+					fidx);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Check to make sure that provided filter index is not
+	 * already in use by someone else
+	 */
+	f = &adapter->tids.ftid_tab[filter_id];
+	if (f->valid)
+		return -EBUSY;
+
+	fidx = filter_id + adapter->tids.ftid_base;
+	ret = cxgb4_set_ftid(&adapter->tids, filter_id,
+			     fs->type ? PF_INET6 : PF_INET);
+	if (ret)
+		return ret;
+
+	/* Check to make sure the filter requested is writable ... */
+	ret = writable_filter(f);
+	if (ret) {
+		/* Clear the bits we have set above */
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 fs->type ? PF_INET6 : PF_INET);
+		return ret;
+	}
+
+	/* Clear out any old resources being used by the filter before
+	 * we start constructing the new filter.
+	 */
+	if (f->valid)
+		clear_filter(adapter, f);
+
+	/* Convert the filter specification into our internal format.
+	 * We copy the PF/VF specification into the Outer VLAN field
+	 * here so the rest of the code -- including the interface to
+	 * the firmware -- doesn't have to constantly do these checks.
+	 */
+	f->fs = *fs;
+	f->fs.iq = iq;
+	f->dev = dev;
+
+	iconf = adapter->params.tp.ingress_config;
+	if (iconf & VNIC_F) {
+		f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf;
+		f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
+		f->fs.val.ovlan_vld = fs->val.pfvf_vld;
+		f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+	}
+
+	/* Attempt to set the filter.  If we don't succeed, we clear
+	 * it and return the failure.
+	 */
+	f->ctx = ctx;
+	f->tid = fidx; /* Save the actual tid */
+	ret = set_filter_wr(adapter, filter_id);
+	if (ret) {
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 fs->type ? PF_INET6 : PF_INET);
+		clear_filter(adapter, f);
+	}
+
+	return ret;
+}
+
+/* Check a delete filter request for validity and send it to the hardware.
+ * Return 0 on success, an error number otherwise.  We attach any provided
+ * filter operation context to the internal filter specification in order to
+ * facilitate signaling completion of the operation.
+ */
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	struct filter_entry *f;
+	unsigned int max_fidx;
+	int ret;
+
+	max_fidx = adapter->tids.nftids;
+	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+	    filter_id >= max_fidx)
+		return -E2BIG;
+
+	f = &adapter->tids.ftid_tab[filter_id];
+	ret = writable_filter(f);
+	if (ret)
+		return ret;
+
+	if (f->valid) {
+		f->ctx = ctx;
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 f->fs.type ? PF_INET6 : PF_INET);
+		return del_filter_wr(adapter, filter_id);
+	}
+
+	/* If the caller has passed in a Completion Context then we need to
+	 * mark it as a successful completion so they don't stall waiting
+	 * for it.
+	 */
+	if (ctx) {
+		ctx->result = 0;
+		complete(&ctx->completion);
+	}
+	return ret;
+}
+
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs)
+{
+	struct filter_ctx ctx;
+	int ret;
+
+	init_completion(&ctx.completion);
+
+	ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx);
+	if (ret)
+		goto out;
+
+	/* Wait for reply */
+	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	ret = ctx.result;
+out:
+	return ret;
+}
+
+int cxgb4_del_filter(struct net_device *dev, int filter_id)
+{
+	struct filter_ctx ctx;
+	int ret;
+
+	init_completion(&ctx.completion);
+
+	ret = __cxgb4_del_filter(dev, filter_id, &ctx);
+	if (ret)
+		goto out;
+
+	/* Wait for reply */
+	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	ret = ctx.result;
+out:
+	return ret;
+}
+
+/* Handle a filter write/deletion reply. */
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
+{
+	unsigned int tid = GET_TID(rpl);
+	struct filter_entry *f = NULL;
+	unsigned int max_fidx;
+	int idx;
+
+	max_fidx = adap->tids.nftids + adap->tids.nsftids;
+	/* Get the corresponding filter entry for this tid */
+	if (adap->tids.ftid_tab) {
+		/* Check this in normal filter region */
+		idx = tid - adap->tids.ftid_base;
+		if (idx >= max_fidx)
+			return;
+		f = &adap->tids.ftid_tab[idx];
+		if (f->tid != tid)
+			return;
+	}
+
+	/* We found the filter entry for this tid */
+	if (f) {
+		unsigned int ret = TCB_COOKIE_G(rpl->cookie);
+		struct filter_ctx *ctx;
+
+		/* Pull off any filter operation context attached to the
+		 * filter.
+		 */
+		ctx = f->ctx;
+		f->ctx = NULL;
+
+		if (ret == FW_FILTER_WR_FLT_DELETED) {
+			/* Clear the filter when we get confirmation from the
+			 * hardware that the filter has been deleted.
+			 */
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = 0;
+		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
+			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
+				idx);
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = -ENOMEM;
+		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
+			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
+			f->pending = 0;  /* asynchronous setup completed */
+			f->valid = 1;
+			if (ctx) {
+				ctx->result = 0;
+				ctx->tid = idx;
+			}
+		} else {
+			/* Something went wrong.  Issue a warning about the
+			 * problem and clear everything out.
+			 */
+			dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
+				idx, ret);
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = -EINVAL;
+		}
+		if (ctx)
+			complete(&ctx->completion);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
new file mode 100644
index 0000000..23742cb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_FILTER_H
+#define __CXGB4_FILTER_H
+
+#include "t4_msg.h"
+
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl);
+void clear_filter(struct adapter *adap, struct filter_entry *f);
+
+int set_filter_wr(struct adapter *adapter, int fidx);
+int delete_filter(struct adapter *adapter, unsigned int fidx);
+
+int writable_filter(struct filter_entry *f);
+void clear_all_filters(struct adapter *adapter);
+#endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index d1ebb84..1be4d23 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -67,6 +67,7 @@
 #include <linux/crash_dump.h>
 
 #include "cxgb4.h"
+#include "cxgb4_filter.h"
 #include "t4_regs.h"
 #include "t4_values.h"
 #include "t4_msg.h"
@@ -77,6 +78,7 @@
 #include "clip_tbl.h"
 #include "l2t.h"
 #include "sched.h"
+#include "cxgb4_tc_u32.h"
 
 char cxgb4_driver_name[] = KBUILD_MODNAME;
 
@@ -87,30 +89,6 @@
 const char cxgb4_driver_version[] = DRV_VERSION;
 #define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
 
-/* Host shadow copy of ingress filter entry.  This is in host native format
- * and doesn't match the ordering or bit order, etc. of the hardware of the
- * firmware command.  The use of bit-field structure elements is purely to
- * remind ourselves of the field size limitations and save memory in the case
- * where the filter table is large.
- */
-struct filter_entry {
-	/* Administrative fields for filter.
-	 */
-	u32 valid:1;            /* filter allocated and valid */
-	u32 locked:1;           /* filter is administratively locked */
-
-	u32 pending:1;          /* filter action is pending firmware reply */
-	u32 smtidx:8;           /* Source MAC Table index for smac */
-	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
-
-	/* The filter itself.  Most of this is a straight copy of information
-	 * provided by the extended ioctl().  Some fields are translated to
-	 * internal forms -- for instance the Ingress Queue ID passed in from
-	 * the ioctl() is translated into the Absolute Ingress Queue ID.
-	 */
-	struct ch_filter_specification fs;
-};
-
 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@ -527,66 +505,6 @@
 }
 #endif /* CONFIG_CHELSIO_T4_DCB */
 
-/* Clear a filter and release any of its resources that we own.  This also
- * clears the filter's "pending" status.
- */
-static void clear_filter(struct adapter *adap, struct filter_entry *f)
-{
-	/* If the new or old filter have loopback rewriteing rules then we'll
-	 * need to free any existing Layer Two Table (L2T) entries of the old
-	 * filter rule.  The firmware will handle freeing up any Source MAC
-	 * Table (SMT) entries used for rewriting Source MAC Addresses in
-	 * loopback rules.
-	 */
-	if (f->l2t)
-		cxgb4_l2t_release(f->l2t);
-
-	/* The zeroing of the filter rule below clears the filter valid,
-	 * pending, locked flags, l2t pointer, etc. so it's all we need for
-	 * this operation.
-	 */
-	memset(f, 0, sizeof(*f));
-}
-
-/* Handle a filter write/deletion reply.
- */
-static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
-{
-	unsigned int idx = GET_TID(rpl);
-	unsigned int nidx = idx - adap->tids.ftid_base;
-	unsigned int ret;
-	struct filter_entry *f;
-
-	if (idx >= adap->tids.ftid_base && nidx <
-	   (adap->tids.nftids + adap->tids.nsftids)) {
-		idx = nidx;
-		ret = TCB_COOKIE_G(rpl->cookie);
-		f = &adap->tids.ftid_tab[idx];
-
-		if (ret == FW_FILTER_WR_FLT_DELETED) {
-			/* Clear the filter when we get confirmation from the
-			 * hardware that the filter has been deleted.
-			 */
-			clear_filter(adap, f);
-		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
-			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
-				idx);
-			clear_filter(adap, f);
-		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
-			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
-			f->pending = 0;  /* asynchronous setup completed */
-			f->valid = 1;
-		} else {
-			/* Something went wrong.  Issue a warning about the
-			 * problem and clear everything out.
-			 */
-			dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
-				idx, ret);
-			clear_filter(adap, f);
-		}
-	}
-}
-
 /* Response queue handler for the FW event queue.
  */
 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
@@ -1026,151 +944,6 @@
 	kvfree(addr);
 }
 
-/* Send a Work Request to write the filter at a specified index.  We construct
- * a Firmware Filter Work Request to have the work done and put the indicated
- * filter into "pending" mode which will prevent any further actions against
- * it till we get a reply from the firmware on the completion status of the
- * request.
- */
-static int set_filter_wr(struct adapter *adapter, int fidx)
-{
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
-	struct sk_buff *skb;
-	struct fw_filter_wr *fwr;
-	unsigned int ftid;
-
-	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	/* If the new filter requires loopback Destination MAC and/or VLAN
-	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
-	 * the filter.
-	 */
-	if (f->fs.newdmac || f->fs.newvlan) {
-		/* allocate L2T entry for new filter */
-		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
-						f->fs.eport, f->fs.dmac);
-		if (f->l2t == NULL) {
-			kfree_skb(skb);
-			return -ENOMEM;
-		}
-	}
-
-	ftid = adapter->tids.ftid_base + fidx;
-
-	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
-	memset(fwr, 0, sizeof(*fwr));
-
-	/* It would be nice to put most of the following in t4_hw.c but most
-	 * of the work is translating the cxgbtool ch_filter_specification
-	 * into the Work Request and the definition of that structure is
-	 * currently in cxgbtool.h which isn't appropriate to pull into the
-	 * common code.  We may eventually try to come up with a more neutral
-	 * filter specification structure but for now it's easiest to simply
-	 * put this fairly direct code in line ...
-	 */
-	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
-	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
-	fwr->tid_to_iq =
-		htonl(FW_FILTER_WR_TID_V(ftid) |
-		      FW_FILTER_WR_RQTYPE_V(f->fs.type) |
-		      FW_FILTER_WR_NOREPLY_V(0) |
-		      FW_FILTER_WR_IQ_V(f->fs.iq));
-	fwr->del_filter_to_l2tix =
-		htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
-		      FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
-		      FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
-		      FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
-		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
-		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
-		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
-		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
-		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
-					     f->fs.newvlan == VLAN_REWRITE) |
-		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
-					    f->fs.newvlan == VLAN_REWRITE) |
-		      FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
-		      FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
-		      FW_FILTER_WR_PRIO_V(f->fs.prio) |
-		      FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
-	fwr->ethtype = htons(f->fs.val.ethtype);
-	fwr->ethtypem = htons(f->fs.mask.ethtype);
-	fwr->frag_to_ovlan_vldm =
-		(FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
-		 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
-		 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
-		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
-		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
-		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
-	fwr->smac_sel = 0;
-	fwr->rx_chan_rx_rpl_iq =
-		htons(FW_FILTER_WR_RX_CHAN_V(0) |
-		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
-	fwr->maci_to_matchtypem =
-		htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
-		      FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
-		      FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
-		      FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
-		      FW_FILTER_WR_PORT_V(f->fs.val.iport) |
-		      FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
-		      FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
-		      FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
-	fwr->ptcl = f->fs.val.proto;
-	fwr->ptclm = f->fs.mask.proto;
-	fwr->ttyp = f->fs.val.tos;
-	fwr->ttypm = f->fs.mask.tos;
-	fwr->ivlan = htons(f->fs.val.ivlan);
-	fwr->ivlanm = htons(f->fs.mask.ivlan);
-	fwr->ovlan = htons(f->fs.val.ovlan);
-	fwr->ovlanm = htons(f->fs.mask.ovlan);
-	memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
-	memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
-	memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
-	memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
-	fwr->lp = htons(f->fs.val.lport);
-	fwr->lpm = htons(f->fs.mask.lport);
-	fwr->fp = htons(f->fs.val.fport);
-	fwr->fpm = htons(f->fs.mask.fport);
-	if (f->fs.newsmac)
-		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
-
-	/* Mark the filter as "pending" and ship off the Filter Work Request.
-	 * When we get the Work Request Reply we'll clear the pending status.
-	 */
-	f->pending = 1;
-	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
-	t4_ofld_send(adapter, skb);
-	return 0;
-}
-
-/* Delete the filter at a specified index.
- */
-static int del_filter_wr(struct adapter *adapter, int fidx)
-{
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
-	struct sk_buff *skb;
-	struct fw_filter_wr *fwr;
-	unsigned int len, ftid;
-
-	len = sizeof(*fwr);
-	ftid = adapter->tids.ftid_base + fidx;
-
-	skb = alloc_skb(len, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
-	t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
-
-	/* Mark the filter as "pending" and ship off the Filter Work Request.
-	 * When we get the Work Request Reply we'll clear the pending status.
-	 */
-	f->pending = 1;
-	t4_mgmt_tx(adapter, skb);
-	return 0;
-}
-
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 			     void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -1552,19 +1325,22 @@
  */
 static int tid_init(struct tid_info *t)
 {
-	size_t size;
-	unsigned int stid_bmap_size;
-	unsigned int natids = t->natids;
 	struct adapter *adap = container_of(t, struct adapter, tids);
+	unsigned int max_ftids = t->nftids + t->nsftids;
+	unsigned int natids = t->natids;
+	unsigned int stid_bmap_size;
+	unsigned int ftid_bmap_size;
+	size_t size;
 
 	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
 	size = t->ntids * sizeof(*t->tid_tab) +
 	       natids * sizeof(*t->atid_tab) +
 	       t->nstids * sizeof(*t->stid_tab) +
 	       t->nsftids * sizeof(*t->stid_tab) +
 	       stid_bmap_size * sizeof(long) +
-	       t->nftids * sizeof(*t->ftid_tab) +
-	       t->nsftids * sizeof(*t->ftid_tab);
+	       max_ftids * sizeof(*t->ftid_tab) +
+	       ftid_bmap_size * sizeof(long);
 
 	t->tid_tab = t4_alloc_mem(size);
 	if (!t->tid_tab)
@@ -1574,8 +1350,10 @@
 	t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
 	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
 	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
 	spin_lock_init(&t->stid_lock);
 	spin_lock_init(&t->atid_lock);
+	spin_lock_init(&t->ftid_lock);
 
 	t->stids_in_use = 0;
 	t->sftids_in_use = 0;
@@ -1590,12 +1368,16 @@
 			t->atid_tab[natids - 1].next = &t->atid_tab[natids];
 		t->afree = t->atid_tab;
 	}
-	bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
-	/* Reserve stid 0 for T4/T5 adapters */
-	if (!t->stid_base &&
-	    (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
-		__set_bit(0, t->stid_bmap);
 
+	if (is_offload(adap)) {
+		bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
+		/* Reserve stid 0 for T4/T5 adapters */
+		if (!t->stid_base &&
+		    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+			__set_bit(0, t->stid_bmap);
+	}
+
+	bitmap_zero(t->ftid_bmap, t->nftids);
 	return 0;
 }
 
@@ -2514,40 +2296,6 @@
 	return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
 }
 
-/* Return an error number if the indicated filter isn't writable ...
- */
-static int writable_filter(struct filter_entry *f)
-{
-	if (f->locked)
-		return -EPERM;
-	if (f->pending)
-		return -EBUSY;
-
-	return 0;
-}
-
-/* Delete the filter at the specified index (if valid).  The checks for all
- * the common problems with doing this like the filter being locked, currently
- * pending in another operation, etc.
- */
-static int delete_filter(struct adapter *adapter, unsigned int fidx)
-{
-	struct filter_entry *f;
-	int ret;
-
-	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
-		return -EINVAL;
-
-	f = &adapter->tids.ftid_tab[fidx];
-	ret = writable_filter(f);
-	if (ret)
-		return ret;
-	if (f->valid)
-		return del_filter_wr(adapter, fidx);
-
-	return 0;
-}
-
 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
 		__be32 sip, __be16 sport, __be16 vlan,
 		unsigned int queue, unsigned char port, unsigned char mask)
@@ -2964,6 +2712,35 @@
 	return err;
 }
 
+int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+		  struct tc_to_netdev *tc)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!(adap->flags & FULL_INIT_DONE)) {
+		dev_err(adap->pdev_dev,
+			"Failed to setup tc on port %d. Link Down?\n",
+			pi->port_id);
+		return -EINVAL;
+	}
+
+	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+	    tc->type == TC_SETUP_CLSU32) {
+		switch (tc->cls_u32->command) {
+		case TC_CLSU32_NEW_KNODE:
+		case TC_CLSU32_REPLACE_KNODE:
+			return cxgb4_config_knode(dev, proto, tc->cls_u32);
+		case TC_CLSU32_DELETE_KNODE:
+			return cxgb4_delete_knode(dev, proto, tc->cls_u32);
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops cxgb4_netdev_ops = {
 	.ndo_open             = cxgb_open,
 	.ndo_stop             = cxgb_close,
@@ -2987,6 +2764,7 @@
 	.ndo_busy_poll        = cxgb_busy_poll,
 #endif
 	.ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
+	.ndo_setup_tc         = cxgb_setup_tc,
 };
 
 #ifdef CONFIG_PCI_IOV
@@ -4659,6 +4437,7 @@
 	t4_free_mem(adapter->l2t);
 	t4_cleanup_sched(adapter);
 	t4_free_mem(adapter->tids.tid_tab);
+	cxgb4_cleanup_tc_u32(adapter);
 	kfree(adapter->sge.egr_map);
 	kfree(adapter->sge.ingr_map);
 	kfree(adapter->sge.starving_fl);
@@ -5003,7 +4782,8 @@
 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_RXHASH |
-			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			NETIF_F_HW_TC;
 		if (highdma)
 			netdev->hw_features |= NETIF_F_HIGHDMA;
 		netdev->features |= netdev->hw_features;
@@ -5087,10 +4867,16 @@
 				 i);
 	}
 
-	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
+	if (tid_init(&adapter->tids) < 0) {
 		dev_warn(&pdev->dev, "could not allocate TID table, "
 			 "continuing\n");
 		adapter->params.offload = 0;
+	} else {
+		adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
+						    CXGB4_MAX_LINK_HANDLE);
+		if (!adapter->tc_u32)
+			dev_warn(&pdev->dev,
+				 "could not offload tc u32, continuing\n");
 	}
 
 	if (is_offload(adapter)) {
@@ -5274,13 +5060,7 @@
 		/* If we allocated filters, free up state associated with any
 		 * valid filters ...
 		 */
-		if (adapter->tids.ftid_tab) {
-			struct filter_entry *f = &adapter->tids.ftid_tab[0];
-			for (i = 0; i < (adapter->tids.nftids +
-					adapter->tids.nsftids); i++, f++)
-				if (f->valid)
-					clear_filter(adapter, f);
-		}
+		clear_all_filters(adapter);
 
 		if (adapter->flags & FULL_INIT_DONE)
 			cxgb_down(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
new file mode 100644
index 0000000..49d2deb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -0,0 +1,483 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "cxgb4.h"
+#include "cxgb4_tc_u32_parse.h"
+#include "cxgb4_tc_u32.h"
+
+/* Fill ch_filter_specification with parsed match value/mask pair. */
+static int fill_match_fields(struct adapter *adap,
+			     struct ch_filter_specification *fs,
+			     struct tc_cls_u32_offload *cls,
+			     const struct cxgb4_match_field *entry,
+			     bool next_header)
+{
+	unsigned int i, j;
+	u32 val, mask;
+	int off, err;
+	bool found;
+
+	for (i = 0; i < cls->knode.sel->nkeys; i++) {
+		off = cls->knode.sel->keys[i].off;
+		val = cls->knode.sel->keys[i].val;
+		mask = cls->knode.sel->keys[i].mask;
+
+		if (next_header) {
+			/* For next headers, parse only keys with offmask */
+			if (!cls->knode.sel->keys[i].offmask)
+				continue;
+		} else {
+			/* For the remaining, parse only keys without offmask */
+			if (cls->knode.sel->keys[i].offmask)
+				continue;
+		}
+
+		found = false;
+
+		for (j = 0; entry[j].val; j++) {
+			if (off == entry[j].off) {
+				found = true;
+				err = entry[j].val(fs, val, mask);
+				if (err)
+					return err;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Fill ch_filter_specification with parsed action. */
+static int fill_action_fields(struct adapter *adap,
+			      struct ch_filter_specification *fs,
+			      struct tc_cls_u32_offload *cls)
+{
+	unsigned int num_actions = 0;
+	const struct tc_action *a;
+	struct tcf_exts *exts;
+	LIST_HEAD(actions);
+
+	exts = cls->knode.exts;
+	if (tc_no_actions(exts))
+		return -EINVAL;
+
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		/* Don't allow more than one action per rule. */
+		if (num_actions)
+			return -EINVAL;
+
+		/* Drop in hardware. */
+		if (is_tcf_gact_shot(a)) {
+			fs->action = FILTER_DROP;
+			num_actions++;
+			continue;
+		}
+
+		/* Re-direct to specified port in hardware. */
+		if (is_tcf_mirred_redirect(a)) {
+			struct net_device *n_dev;
+			unsigned int i, index;
+			bool found = false;
+
+			index = tcf_mirred_ifindex(a);
+			for_each_port(adap, i) {
+				n_dev = adap->port[i];
+				if (index == n_dev->ifindex) {
+					fs->action = FILTER_SWITCH;
+					fs->eport = i;
+					found = true;
+					break;
+				}
+			}
+
+			/* Interface doesn't belong to any port of
+			 * the underlying hardware.
+			 */
+			if (!found)
+				return -EINVAL;
+
+			num_actions++;
+			continue;
+		}
+
+		/* Un-supported action. */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls)
+{
+	const struct cxgb4_match_field *start, *link_start = NULL;
+	struct adapter *adapter = netdev2adap(dev);
+	struct ch_filter_specification fs;
+	struct cxgb4_tc_u32_table *t;
+	struct cxgb4_link *link;
+	unsigned int filter_id;
+	u32 uhtid, link_uhtid;
+	bool is_ipv6 = false;
+	int ret;
+
+	if (!can_tc_u32_offload(dev))
+		return -EOPNOTSUPP;
+
+	if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6))
+		return -EOPNOTSUPP;
+
+	/* Fetch the location to insert the filter. */
+	filter_id = cls->knode.handle & 0xFFFFF;
+
+	if (filter_id > adapter->tids.nftids) {
+		dev_err(adapter->pdev_dev,
+			"Location %d out of range for insertion. Max: %d\n",
+			filter_id, adapter->tids.nftids);
+		return -ERANGE;
+	}
+
+	t = adapter->tc_u32;
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+	link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
+
+	/* Ensure that uhtid is either root u32 (i.e. 0x800)
+	 * or a a valid linked bucket.
+	 */
+	if (uhtid != 0x800 && uhtid >= t->size)
+		return -EINVAL;
+
+	/* Ensure link handle uhtid is sane, if specified. */
+	if (link_uhtid >= t->size)
+		return -EINVAL;
+
+	memset(&fs, 0, sizeof(fs));
+
+	if (protocol == htons(ETH_P_IPV6)) {
+		start = cxgb4_ipv6_fields;
+		is_ipv6 = true;
+	} else {
+		start = cxgb4_ipv4_fields;
+		is_ipv6 = false;
+	}
+
+	if (uhtid != 0x800) {
+		/* Link must exist from root node before insertion. */
+		if (!t->table[uhtid - 1].link_handle)
+			return -EINVAL;
+
+		/* Link must have a valid supported next header. */
+		link_start = t->table[uhtid - 1].match_field;
+		if (!link_start)
+			return -EINVAL;
+	}
+
+	/* Parse links and record them for subsequent jumps to valid
+	 * next headers.
+	 */
+	if (link_uhtid) {
+		const struct cxgb4_next_header *next;
+		bool found = false;
+		unsigned int i, j;
+		u32 val, mask;
+		int off;
+
+		if (t->table[link_uhtid - 1].link_handle) {
+			dev_err(adapter->pdev_dev,
+				"Link handle exists for: 0x%x\n",
+				link_uhtid);
+			return -EINVAL;
+		}
+
+		next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps;
+
+		/* Try to find matches that allow jumps to next header. */
+		for (i = 0; next[i].jump; i++) {
+			if (next[i].offoff != cls->knode.sel->offoff ||
+			    next[i].shift != cls->knode.sel->offshift ||
+			    next[i].mask != cls->knode.sel->offmask ||
+			    next[i].offset != cls->knode.sel->off)
+				continue;
+
+			/* Found a possible candidate.  Find a key that
+			 * matches the corresponding offset, value, and
+			 * mask to jump to next header.
+			 */
+			for (j = 0; j < cls->knode.sel->nkeys; j++) {
+				off = cls->knode.sel->keys[j].off;
+				val = cls->knode.sel->keys[j].val;
+				mask = cls->knode.sel->keys[j].mask;
+
+				if (next[i].match_off == off &&
+				    next[i].match_val == val &&
+				    next[i].match_mask == mask) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+				continue; /* Try next candidate. */
+
+			/* Candidate to jump to next header found.
+			 * Translate all keys to internal specification
+			 * and store them in jump table. This spec is copied
+			 * later to set the actual filters.
+			 */
+			ret = fill_match_fields(adapter, &fs, cls,
+						start, false);
+			if (ret)
+				goto out;
+
+			link = &t->table[link_uhtid - 1];
+			link->match_field = next[i].jump;
+			link->link_handle = cls->knode.handle;
+			memcpy(&link->fs, &fs, sizeof(fs));
+			break;
+		}
+
+		/* No candidate found to jump to next header. */
+		if (!found)
+			return -EINVAL;
+
+		return 0;
+	}
+
+	/* Fill ch_filter_specification match fields to be shipped to hardware.
+	 * Copy the linked spec (if any) first.  And then update the spec as
+	 * needed.
+	 */
+	if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) {
+		/* Copy linked ch_filter_specification */
+		memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs));
+		ret = fill_match_fields(adapter, &fs, cls,
+					link_start, true);
+		if (ret)
+			goto out;
+	}
+
+	ret = fill_match_fields(adapter, &fs, cls, start, false);
+	if (ret)
+		goto out;
+
+	/* Fill ch_filter_specification action fields to be shipped to
+	 * hardware.
+	 */
+	ret = fill_action_fields(adapter, &fs, cls);
+	if (ret)
+		goto out;
+
+	/* The filter spec has been completely built from the info
+	 * provided from u32.  We now set some default fields in the
+	 * spec for sanity.
+	 */
+
+	/* Match only packets coming from the ingress port where this
+	 * filter will be created.
+	 */
+	fs.val.iport = netdev2pinfo(dev)->port_id;
+	fs.mask.iport = ~0;
+
+	/* Enable filter hit counts. */
+	fs.hitcnts = 1;
+
+	/* Set type of filter - IPv6 or IPv4 */
+	fs.type = is_ipv6 ? 1 : 0;
+
+	/* Set the filter */
+	ret = cxgb4_set_filter(dev, filter_id, &fs);
+	if (ret)
+		goto out;
+
+	/* If this is a linked bucket, then set the corresponding
+	 * entry in the bitmap to mark it as belonging to this linked
+	 * bucket.
+	 */
+	if (uhtid != 0x800 && t->table[uhtid - 1].link_handle)
+		set_bit(filter_id, t->table[uhtid - 1].tid_map);
+
+out:
+	return ret;
+}
+
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	unsigned int filter_id, max_tids, i, j;
+	struct cxgb4_link *link = NULL;
+	struct cxgb4_tc_u32_table *t;
+	u32 handle, uhtid;
+	int ret;
+
+	if (!can_tc_u32_offload(dev))
+		return -EOPNOTSUPP;
+
+	/* Fetch the location to delete the filter. */
+	filter_id = cls->knode.handle & 0xFFFFF;
+
+	if (filter_id > adapter->tids.nftids) {
+		dev_err(adapter->pdev_dev,
+			"Location %d out of range for deletion. Max: %d\n",
+			filter_id, adapter->tids.nftids);
+		return -ERANGE;
+	}
+
+	t = adapter->tc_u32;
+	handle = cls->knode.handle;
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+
+	/* Ensure that uhtid is either root u32 (i.e. 0x800)
+	 * or a a valid linked bucket.
+	 */
+	if (uhtid != 0x800 && uhtid >= t->size)
+		return -EINVAL;
+
+	/* Delete the specified filter */
+	if (uhtid != 0x800) {
+		link = &t->table[uhtid - 1];
+		if (!link->link_handle)
+			return -EINVAL;
+
+		if (!test_bit(filter_id, link->tid_map))
+			return -EINVAL;
+	}
+
+	ret = cxgb4_del_filter(dev, filter_id);
+	if (ret)
+		goto out;
+
+	if (link)
+		clear_bit(filter_id, link->tid_map);
+
+	/* If a link is being deleted, then delete all filters
+	 * associated with the link.
+	 */
+	max_tids = adapter->tids.nftids;
+	for (i = 0; i < t->size; i++) {
+		link = &t->table[i];
+
+		if (link->link_handle == handle) {
+			for (j = 0; j < max_tids; j++) {
+				if (!test_bit(j, link->tid_map))
+					continue;
+
+				ret = __cxgb4_del_filter(dev, j, NULL);
+				if (ret)
+					goto out;
+
+				clear_bit(j, link->tid_map);
+			}
+
+			/* Clear the link state */
+			link->match_field = NULL;
+			link->link_handle = 0;
+			memset(&link->fs, 0, sizeof(link->fs));
+			break;
+		}
+	}
+
+out:
+	return ret;
+}
+
+void cxgb4_cleanup_tc_u32(struct adapter *adap)
+{
+	struct cxgb4_tc_u32_table *t;
+	unsigned int i;
+
+	if (!adap->tc_u32)
+		return;
+
+	/* Free up all allocated memory. */
+	t = adap->tc_u32;
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+
+		t4_free_mem(link->tid_map);
+	}
+	t4_free_mem(adap->tc_u32);
+}
+
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+					     unsigned int size)
+{
+	struct cxgb4_tc_u32_table *t;
+	unsigned int i;
+
+	if (!size)
+		return NULL;
+
+	t = t4_alloc_mem(sizeof(*t) +
+			 (size * sizeof(struct cxgb4_link)));
+	if (!t)
+		return NULL;
+
+	t->size = size;
+
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+		unsigned int bmap_size;
+		unsigned int max_tids;
+
+		max_tids = adap->tids.nftids;
+		bmap_size = BITS_TO_LONGS(max_tids);
+		link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size);
+		if (!link->tid_map)
+			goto out_no_mem;
+		bitmap_zero(link->tid_map, max_tids);
+	}
+
+	return t;
+
+out_no_mem:
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+
+		if (link->tid_map)
+			t4_free_mem(link->tid_map);
+	}
+
+	if (t)
+		t4_free_mem(t);
+
+	return NULL;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
new file mode 100644
index 0000000..6bdc885
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_H
+#define __CXGB4_TC_U32_H
+
+#include <net/pkt_cls.h>
+
+#define CXGB4_MAX_LINK_HANDLE 32
+
+static inline bool can_tc_u32_offload(struct net_device *dev)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls);
+
+void cxgb4_cleanup_tc_u32(struct adapter *adapter);
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+					     unsigned int size);
+#endif /* __CXGB4_TC_U32_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
new file mode 100644
index 0000000..a4b99ed
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
@@ -0,0 +1,294 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_PARSE_H
+#define __CXGB4_TC_U32_PARSE_H
+
+struct cxgb4_match_field {
+	int off; /* Offset from the beginning of the header to match */
+	/* Fill the value/mask pair in the spec if matched */
+	int (*val)(struct ch_filter_specification *f, u32 val, u32 mask);
+};
+
+/* IPv4 match fields */
+static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.tos  = (ntohl(val)  >> 16) & 0x000000FF;
+	f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f,
+				       u32 val, u32 mask)
+{
+	u32 mask_val;
+	u8 frag_val;
+
+	frag_val = (ntohl(val) >> 13) & 0x00000007;
+	mask_val = ntohl(mask) & 0x0000FFFF;
+
+	if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */
+		f->val.frag = 1;
+		f->mask.frag = 1;
+	} else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */
+		f->val.frag = 0;
+		f->mask.frag = 1;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f,
+					u32 val, u32 mask)
+{
+	f->val.proto  = (ntohl(val)  >> 16) & 0x000000FF;
+	f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f,
+					 u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f,
+					 u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv4_fields[] = {
+	{ .off = 0,  .val = cxgb4_fill_ipv4_tos },
+	{ .off = 4,  .val = cxgb4_fill_ipv4_frag },
+	{ .off = 8,  .val = cxgb4_fill_ipv4_proto },
+	{ .off = 12, .val = cxgb4_fill_ipv4_src_ip },
+	{ .off = 16, .val = cxgb4_fill_ipv4_dst_ip },
+	{ .val = NULL }
+};
+
+/* IPv6 match fields */
+static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.tos  = (ntohl(val)  >> 20) & 0x000000FF;
+	f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f,
+					u32 val, u32 mask)
+{
+	f->val.proto  = (ntohl(val)  >> 8) & 0x000000FF;
+	f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[4],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[4], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[8],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[8], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[12],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[12], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[4],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[4], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[8],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[8], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[12],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[12], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv6_fields[] = {
+	{ .off = 0,  .val = cxgb4_fill_ipv6_tos },
+	{ .off = 4,  .val = cxgb4_fill_ipv6_proto },
+	{ .off = 8,  .val = cxgb4_fill_ipv6_src_ip0 },
+	{ .off = 12, .val = cxgb4_fill_ipv6_src_ip1 },
+	{ .off = 16, .val = cxgb4_fill_ipv6_src_ip2 },
+	{ .off = 20, .val = cxgb4_fill_ipv6_src_ip3 },
+	{ .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 },
+	{ .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 },
+	{ .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 },
+	{ .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 },
+	{ .val = NULL }
+};
+
+/* TCP/UDP match */
+static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.fport  = ntohl(val)  >> 16;
+	f->mask.fport = ntohl(mask) >> 16;
+	f->val.lport  = ntohl(val)  & 0x0000FFFF;
+	f->mask.lport = ntohl(mask) & 0x0000FFFF;
+
+	return 0;
+};
+
+static const struct cxgb4_match_field cxgb4_tcp_fields[] = {
+	{ .off = 0, .val = cxgb4_fill_l4_ports },
+	{ .val = NULL }
+};
+
+static const struct cxgb4_match_field cxgb4_udp_fields[] = {
+	{ .off = 0, .val = cxgb4_fill_l4_ports },
+	{ .val = NULL }
+};
+
+struct cxgb4_next_header {
+	unsigned int offset; /* Offset to next header */
+	/* offset, shift, and mask added to offset above
+	 * to get to next header.  Useful when using a header
+	 * field's value to jump to next header such as IHL field
+	 * in IPv4 header.
+	 */
+	unsigned int offoff;
+	u32 shift;
+	u32 mask;
+	/* match criteria to make this jump */
+	unsigned int match_off;
+	u32 match_val;
+	u32 match_mask;
+	/* location of jump to make */
+	const struct cxgb4_match_field *jump;
+};
+
+/* Accept a rule with a jump to transport layer header based on IHL field in
+ * IPv4 header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = {
+	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+	  .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00,
+	  .jump = cxgb4_tcp_fields },
+	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+	  .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00,
+	  .jump = cxgb4_udp_fields },
+	{ .jump = NULL }
+};
+
+/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header
+ * to get to transport layer header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = {
+	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+	  .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000,
+	  .jump = cxgb4_tcp_fields },
+	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+	  .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000,
+	  .jump = cxgb4_udp_fields },
+	{ .jump = NULL }
+};
+
+struct cxgb4_link {
+	const struct cxgb4_match_field *match_field;  /* Next header */
+	struct ch_filter_specification fs; /* Match spec associated with link */
+	u32 link_handle;         /* Knode handle associated with the link */
+	unsigned long *tid_map;  /* Bitmap for filter tids */
+};
+
+struct cxgb4_tc_u32_table {
+	unsigned int size;          /* number of entries in table */
+	struct cxgb4_link table[0]; /* Jump table */
+};
+#endif /* __CXGB4_TC_U32_PARSE_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b3544f6..47bd14f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -106,6 +106,7 @@
 	unsigned int atid_base;
 
 	struct filter_entry *ftid_tab;
+	unsigned long *ftid_bmap;
 	unsigned int nftids;
 	unsigned int ftid_base;
 	unsigned int aftid_base;
@@ -126,6 +127,8 @@
 	atomic_t tids_in_use;
 	/* TIDs in the HASH */
 	atomic_t hash_tids_in_use;
+	/* lock for setting/clearing filter bitmap */
+	spinlock_t ftid_lock;
 };
 
 static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@ -185,6 +188,27 @@
 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
 			       unsigned int queue, bool ipv6);
 
+/* Filter operation context to allow callers of cxgb4_set_filter() and
+ * cxgb4_del_filter() to wait for an asynchronous completion.
+ */
+struct filter_ctx {
+	struct completion completion;	/* completion rendezvous */
+	void *closure;			/* caller's opaque information */
+	int result;			/* result of operation */
+	u32 tid;			/* to store tid */
+};
+
+struct ch_filter_specification;
+
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
+		       struct filter_ctx *ctx);
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct filter_ctx *ctx);
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs);
+int cxgb4_del_filter(struct net_device *dev, int filter_id);
+
 static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 {
 	skb_set_queue_mapping(skb, (queue << 1) | prio);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 36361f8..90f9c54 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -60,6 +60,8 @@
 	struct ftgmac100_descs *descs;
 	dma_addr_t descs_dma_addr;
 
+	struct page *rx_pages[RX_QUEUE_ENTRIES];
+
 	unsigned int rx_pointer;
 	unsigned int tx_clean_pointer;
 	unsigned int tx_pointer;
@@ -77,6 +79,9 @@
 	int int_mask_all;
 	bool use_ncsi;
 	bool enabled;
+
+	u32 rxdes0_edorr_mask;
+	u32 txdes0_edotr_mask;
 };
 
 static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
@@ -257,10 +262,11 @@
 	return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY);
 }
 
-static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv,
+					struct ftgmac100_rxdes *rxdes)
 {
 	/* clear status bits */
-	rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+	rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask);
 }
 
 static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes)
@@ -298,9 +304,10 @@
 	return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST);
 }
 
-static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv,
+					    struct ftgmac100_rxdes *rxdes)
 {
-	rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+	rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask);
 }
 
 static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes,
@@ -341,18 +348,27 @@
 	return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR);
 }
 
+static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv,
+						      struct ftgmac100_rxdes *rxdes)
+{
+	return &priv->rx_pages[rxdes - priv->descs->rxdes];
+}
+
 /*
  * rxdes2 is not used by hardware. We use it to keep track of page.
  * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu().
  */
-static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page)
+static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv,
+				     struct ftgmac100_rxdes *rxdes,
+				     struct page *page)
 {
-	rxdes->rxdes2 = (unsigned int)page;
+	*ftgmac100_rxdes_page_slot(priv, rxdes) = page;
 }
 
-static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes)
+static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv,
+					     struct ftgmac100_rxdes *rxdes)
 {
-	return (struct page *)rxdes->rxdes2;
+	return *ftgmac100_rxdes_page_slot(priv, rxdes);
 }
 
 /******************************************************************************
@@ -382,7 +398,7 @@
 		if (ftgmac100_rxdes_first_segment(rxdes))
 			return rxdes;
 
-		ftgmac100_rxdes_set_dma_own(rxdes);
+		ftgmac100_rxdes_set_dma_own(priv, rxdes);
 		ftgmac100_rx_pointer_advance(priv);
 		rxdes = ftgmac100_current_rxdes(priv);
 	}
@@ -453,7 +469,7 @@
 		if (ftgmac100_rxdes_last_segment(rxdes))
 			done = true;
 
-		ftgmac100_rxdes_set_dma_own(rxdes);
+		ftgmac100_rxdes_set_dma_own(priv, rxdes);
 		ftgmac100_rx_pointer_advance(priv);
 		rxdes = ftgmac100_current_rxdes(priv);
 	} while (!done && ftgmac100_rxdes_packet_ready(rxdes));
@@ -501,7 +517,7 @@
 
 	do {
 		dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
-		struct page *page = ftgmac100_rxdes_get_page(rxdes);
+		struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
 		unsigned int size;
 
 		dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE);
@@ -545,10 +561,11 @@
 /******************************************************************************
  * internal functions (transmit descriptor)
  *****************************************************************************/
-static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_reset(const struct ftgmac100 *priv,
+				  struct ftgmac100_txdes *txdes)
 {
 	/* clear all except end of ring bit */
-	txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+	txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
 	txdes->txdes1 = 0;
 	txdes->txdes2 = 0;
 	txdes->txdes3 = 0;
@@ -569,9 +586,10 @@
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
 }
 
-static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv,
+					    struct ftgmac100_txdes *txdes)
 {
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+	txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask);
 }
 
 static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes)
@@ -690,7 +708,7 @@
 
 	dev_kfree_skb(skb);
 
-	ftgmac100_txdes_reset(txdes);
+	ftgmac100_txdes_reset(priv, txdes);
 
 	ftgmac100_tx_clean_pointer_advance(priv);
 
@@ -779,9 +797,9 @@
 		return -ENOMEM;
 	}
 
-	ftgmac100_rxdes_set_page(rxdes, page);
+	ftgmac100_rxdes_set_page(priv, rxdes, page);
 	ftgmac100_rxdes_set_dma_addr(rxdes, map);
-	ftgmac100_rxdes_set_dma_own(rxdes);
+	ftgmac100_rxdes_set_dma_own(priv, rxdes);
 	return 0;
 }
 
@@ -791,7 +809,7 @@
 
 	for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
-		struct page *page = ftgmac100_rxdes_get_page(rxdes);
+		struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
 		dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
 
 		if (!page)
@@ -828,7 +846,8 @@
 		return -ENOMEM;
 
 	/* initialize RX ring */
-	ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
+	ftgmac100_rxdes_set_end_of_ring(priv,
+					&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
 
 	for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
@@ -838,7 +857,8 @@
 	}
 
 	/* initialize TX ring */
-	ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
+	ftgmac100_txdes_set_end_of_ring(priv,
+					&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
 	return 0;
 
 err:
@@ -1055,14 +1075,12 @@
 	}
 
 	if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF |
-			FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR |
-			FTGMAC100_INT_PHYSTS_CHG)) {
+			FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) {
 		if (net_ratelimit())
-			netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status,
+			netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status,
 				    status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "",
 				    status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "",
-				    status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "",
-				    status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : "");
+				    status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "");
 
 		if (status & FTGMAC100_INT_NO_RXBUF) {
 			/* RX buffer unavailable */
@@ -1092,6 +1110,7 @@
 static int ftgmac100_open(struct net_device *netdev)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
+	unsigned int status;
 	int err;
 
 	err = ftgmac100_alloc_buffers(priv);
@@ -1117,6 +1136,11 @@
 
 	ftgmac100_init_hw(priv);
 	ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10);
+
+	/* Clear stale interrupts */
+	status = ioread32(priv->base + FTGMAC100_OFFSET_ISR);
+	iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR);
+
 	if (netdev->phydev)
 		phy_start(netdev->phydev);
 	else if (priv->use_ncsi)
@@ -1226,12 +1250,21 @@
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct platform_device *pdev = to_platform_device(priv->dev);
 	int i, err = 0;
+	u32 reg;
 
 	/* initialize mdio bus */
 	priv->mii_bus = mdiobus_alloc();
 	if (!priv->mii_bus)
 		return -EIO;
 
+	if (of_machine_is_compatible("aspeed,ast2400") ||
+	    of_machine_is_compatible("aspeed,ast2500")) {
+		/* This driver supports the old MDIO interface */
+		reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR);
+		reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE;
+		iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR);
+	};
+
 	priv->mii_bus->name = "ftgmac100_mdio";
 	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
 		 pdev->name, pdev->id);
@@ -1355,9 +1388,18 @@
 			      FTGMAC100_INT_XPKT_ETH |
 			      FTGMAC100_INT_XPKT_LOST |
 			      FTGMAC100_INT_AHB_ERR |
-			      FTGMAC100_INT_PHYSTS_CHG |
 			      FTGMAC100_INT_RPKT_BUF |
 			      FTGMAC100_INT_NO_RXBUF);
+
+	if (of_machine_is_compatible("aspeed,ast2400") ||
+	    of_machine_is_compatible("aspeed,ast2500")) {
+		priv->rxdes0_edorr_mask = BIT(30);
+		priv->txdes0_edotr_mask = BIT(30);
+	} else {
+		priv->rxdes0_edorr_mask = BIT(15);
+		priv->txdes0_edotr_mask = BIT(15);
+	}
+
 	if (pdev->dev.of_node &&
 	    of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) {
 		if (!IS_ENABLED(CONFIG_NET_NCSI)) {
@@ -1367,7 +1409,6 @@
 
 		dev_info(&pdev->dev, "Using NCSI interface\n");
 		priv->use_ncsi = true;
-		priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG;
 		priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
 		if (!priv->ndev)
 			goto err_ncsi_dev;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 13408d4..a7ce0ac 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -134,6 +134,11 @@
 #define FTGMAC100_DMAFIFOS_TXDMA_REQ		(1 << 31)
 
 /*
+ * Feature Register
+ */
+#define FTGMAC100_REVR_NEW_MDIO_INTERFACE	BIT(31)
+
+/*
  * Receive buffer size register
  */
 #define FTGMAC100_RBSR_SIZE(x)		((x) & 0x3fff)
@@ -152,6 +157,7 @@
 #define FTGMAC100_MACCR_FULLDUP		(1 << 8)
 #define FTGMAC100_MACCR_GIGA_MODE	(1 << 9)
 #define FTGMAC100_MACCR_CRC_APD		(1 << 10)
+#define FTGMAC100_MACCR_PHY_LINK_LEVEL	(1 << 11)
 #define FTGMAC100_MACCR_RX_RUNT		(1 << 12)
 #define FTGMAC100_MACCR_JUMBO_LF	(1 << 13)
 #define FTGMAC100_MACCR_RX_ALL		(1 << 14)
@@ -189,7 +195,6 @@
 } __attribute__ ((aligned(16)));
 
 #define FTGMAC100_TXDES0_TXBUF_SIZE(x)	((x) & 0x3fff)
-#define FTGMAC100_TXDES0_EDOTR		(1 << 15)
 #define FTGMAC100_TXDES0_CRC_ERR	(1 << 19)
 #define FTGMAC100_TXDES0_LTS		(1 << 28)
 #define FTGMAC100_TXDES0_FTS		(1 << 29)
@@ -215,7 +220,6 @@
 } __attribute__ ((aligned(16)));
 
 #define FTGMAC100_RXDES0_VDBC		0x3fff
-#define FTGMAC100_RXDES0_EDORR		(1 << 15)
 #define FTGMAC100_RXDES0_MULTICAST	(1 << 16)
 #define FTGMAC100_RXDES0_BROADCAST	(1 << 17)
 #define FTGMAC100_RXDES0_RX_ERR		(1 << 18)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index d7e1f8c..059aaed 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -994,10 +994,10 @@
 	struct hnae_handle *h = priv->ae_handle;
 	int state = 1;
 
-	if (priv->phy) {
+	if (ndev->phydev) {
 		h->dev->ops->adjust_link(h, ndev->phydev->speed,
 					 ndev->phydev->duplex);
-		state = priv->phy->link;
+		state = ndev->phydev->link;
 	}
 	state = state && h->dev->ops->get_status(h);
 
@@ -1022,7 +1022,6 @@
  */
 int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
 {
-	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct phy_device *phy_dev = h->phy_dev;
 	int ret;
 
@@ -1046,8 +1045,6 @@
 	if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
 		phy_dev->autoneg = false;
 
-	priv->phy = phy_dev;
-
 	return 0;
 }
 
@@ -1224,8 +1221,8 @@
 	if (ret)
 		goto out_start_err;
 
-	if (priv->phy)
-		phy_start(priv->phy);
+	if (ndev->phydev)
+		phy_start(ndev->phydev);
 
 	clear_bit(NIC_STATE_DOWN, &priv->state);
 	(void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ);
@@ -1259,8 +1256,8 @@
 	netif_tx_disable(ndev);
 	priv->link = 0;
 
-	if (priv->phy)
-		phy_stop(priv->phy);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 
 	ops = priv->ae_handle->dev->ops;
 
@@ -1359,8 +1356,7 @@
 static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			    int cmd)
 {
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 
 	if (!netif_running(netdev))
 		return -EINVAL;
@@ -2017,9 +2013,8 @@
 		hns_nic_uninit_ring_data(priv);
 	priv->ring_data = NULL;
 
-	if (priv->phy)
-		phy_disconnect(priv->phy);
-	priv->phy = NULL;
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	if (!IS_ERR_OR_NULL(priv->ae_handle))
 		hnae_put_handle(priv->ae_handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 44bb301..5b412de 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -59,7 +59,6 @@
 	u32 port_id;
 	int phy_mode;
 	int phy_led_val;
-	struct phy_device *phy;
 	struct net_device *netdev;
 	struct device *dev;
 	struct hnae_handle *ae_handle;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 5eb3245..47e59bb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -48,9 +48,9 @@
 
 	h = priv->ae_handle;
 
-	if (priv->phy) {
-		if (!genphy_read_status(priv->phy))
-			link_stat = priv->phy->link;
+	if (net_dev->phydev) {
+		if (!genphy_read_status(net_dev->phydev))
+			link_stat = net_dev->phydev->link;
 		else
 			link_stat = 0;
 	}
@@ -64,15 +64,14 @@
 }
 
 static void hns_get_mdix_mode(struct net_device *net_dev,
-			      struct ethtool_cmd *cmd)
+			      struct ethtool_link_ksettings *cmd)
 {
 	int mdix_ctrl, mdix, retval, is_resolved;
-	struct hns_nic_priv *priv = netdev_priv(net_dev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = net_dev->phydev;
 
 	if (!phy_dev || !phy_dev->mdio.bus) {
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
-		cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 		return;
 	}
 
@@ -89,35 +88,35 @@
 
 	switch (mdix_ctrl) {
 	case 0x0:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI;
 		break;
 	case 0x1:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X;
 		break;
 	case 0x3:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
 		break;
 	default:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
 		break;
 	}
 
 	if (!is_resolved)
-		cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 	else if (mdix)
-		cmd->eth_tp_mdix = ETH_TP_MDI_X;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
 	else
-		cmd->eth_tp_mdix = ETH_TP_MDI;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI;
 }
 
 /**
- *hns_nic_get_settings - implement ethtool get settings
+ *hns_nic_get_link_ksettings - implement ethtool get link ksettings
  *@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
  */
-static int hns_nic_get_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
+static int hns_nic_get_link_ksettings(struct net_device *net_dev,
+				      struct ethtool_link_ksettings *cmd)
 {
 	struct hns_nic_priv *priv = netdev_priv(net_dev);
 	struct hnae_handle *h;
@@ -125,6 +124,7 @@
 	int ret;
 	u8 duplex;
 	u16 speed;
+	u32 supported, advertising;
 
 	if (!priv || !priv->ae_handle)
 		return -ESRCH;
@@ -139,38 +139,43 @@
 		return -EINVAL;
 	}
 
-	/* When there is no phy, autoneg is off. */
-	cmd->autoneg = false;
-	ethtool_cmd_speed_set(cmd, speed);
-	cmd->duplex = duplex;
+	ethtool_convert_link_mode_to_legacy_u32(&supported,
+						cmd->link_modes.supported);
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
-	if (priv->phy)
-		(void)phy_ethtool_gset(priv->phy, cmd);
+	/* When there is no phy, autoneg is off. */
+	cmd->base.autoneg = false;
+	cmd->base.cmd = speed;
+	cmd->base.duplex = duplex;
+
+	if (net_dev->phydev)
+		(void)phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
 	link_stat = hns_nic_get_link(net_dev);
 	if (!link_stat) {
-		ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN);
-		cmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = (u32)SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	if (cmd->autoneg)
-		cmd->advertising |= ADVERTISED_Autoneg;
+	if (cmd->base.autoneg)
+		advertising |= ADVERTISED_Autoneg;
 
-	cmd->supported |= h->if_support;
+	supported |= h->if_support;
 	if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
-		cmd->supported |= SUPPORTED_TP;
-		cmd->advertising |= ADVERTISED_1000baseT_Full;
+		supported |= SUPPORTED_TP;
+		advertising |= ADVERTISED_1000baseT_Full;
 	} else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
-		cmd->supported |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_10000baseKR_Full;
+		supported |= SUPPORTED_FIBRE;
+		advertising |= ADVERTISED_10000baseKR_Full;
 	}
 
 	switch (h->media_type) {
 	case HNAE_MEDIA_TYPE_FIBER:
-		cmd->port = PORT_FIBRE;
+		cmd->base.port = PORT_FIBRE;
 		break;
 	case HNAE_MEDIA_TYPE_COPPER:
-		cmd->port = PORT_TP;
+		cmd->base.port = PORT_TP;
 		break;
 	case HNAE_MEDIA_TYPE_UNKNOWN:
 	default:
@@ -178,23 +183,27 @@
 	}
 
 	if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG))
-		cmd->supported |= SUPPORTED_Pause;
+		supported |= SUPPORTED_Pause;
 
-	cmd->transceiver = XCVR_EXTERNAL;
-	cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22;
 	hns_get_mdix_mode(net_dev, cmd);
 
 	return 0;
 }
 
 /**
- *hns_nic_set_settings - implement ethtool set settings
+ *hns_nic_set_link_settings - implement ethtool set link ksettings
  *@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
  */
-static int hns_nic_set_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
+static int hns_nic_set_link_ksettings(struct net_device *net_dev,
+				      const struct ethtool_link_ksettings *cmd)
 {
 	struct hns_nic_priv *priv = netdev_priv(net_dev);
 	struct hnae_handle *h;
@@ -208,24 +217,25 @@
 		return -ENODEV;
 
 	h = priv->ae_handle;
-	speed = ethtool_cmd_speed(cmd);
+	speed = cmd->base.speed;
 
 	if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
-		if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 ||
-		    cmd->duplex != DUPLEX_FULL)
+		if (cmd->base.autoneg == AUTONEG_ENABLE ||
+		    speed != SPEED_10000 ||
+		    cmd->base.duplex != DUPLEX_FULL)
 			return -EINVAL;
 	} else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
-		if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE)
+		if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE)
 			return -EINVAL;
 
-		if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF)
+		if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF)
 			return -EINVAL;
-		if (priv->phy)
-			return phy_ethtool_sset(priv->phy, cmd);
+		if (net_dev->phydev)
+			return phy_ethtool_ksettings_set(net_dev->phydev, cmd);
 
 		if ((speed != SPEED_10 && speed != SPEED_100 &&
-		     speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF &&
-		     cmd->duplex != DUPLEX_FULL))
+		     speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL))
 			return -EINVAL;
 	} else {
 		netdev_err(net_dev, "Not supported!");
@@ -233,7 +243,7 @@
 	}
 
 	if (h->dev->ops->adjust_link) {
-		h->dev->ops->adjust_link(h, (int)speed, cmd->duplex);
+		h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex);
 		return 0;
 	}
 
@@ -305,7 +315,7 @@
 {
 	int ret = 0;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = ndev->phydev;
 	struct hnae_handle *h = priv->ae_handle;
 
 	switch (loop) {
@@ -910,7 +920,7 @@
 		memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES],
 		       ETH_GSTRING_LEN);
 		buff += ETH_GSTRING_LEN;
-		if ((priv->phy) && (!priv->phy->is_c45))
+		if ((netdev->phydev) && (!netdev->phydev->is_c45))
 			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY],
 			       ETH_GSTRING_LEN);
 
@@ -996,7 +1006,7 @@
 		if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII)
 			cnt--;
 
-		if ((!priv->phy) || (priv->phy->is_c45))
+		if ((!netdev->phydev) || (netdev->phydev->is_c45))
 			cnt--;
 
 		return cnt;
@@ -1015,8 +1025,7 @@
 int hns_phy_led_set(struct net_device *netdev, int value)
 {
 	int retval;
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 
 	retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED);
 	retval |= phy_write(phy_dev, HNS_LED_FC_REG, value);
@@ -1039,7 +1048,7 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_handle *h = priv->ae_handle;
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 	int ret;
 
 	if (phy_dev)
@@ -1159,8 +1168,7 @@
 static int hns_nic_nway_reset(struct net_device *netdev)
 {
 	int ret = 0;
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy = priv->phy;
+	struct phy_device *phy = netdev->phydev;
 
 	if (netif_running(netdev)) {
 		if (phy)
@@ -1267,8 +1275,6 @@
 static const struct ethtool_ops hns_ethtool_ops = {
 	.get_drvinfo = hns_nic_get_drvinfo,
 	.get_link  = hns_nic_get_link,
-	.get_settings  = hns_nic_get_settings,
-	.set_settings  = hns_nic_set_settings,
 	.get_ringparam = hns_get_ringparam,
 	.get_pauseparam = hns_get_pauseparam,
 	.set_pauseparam = hns_set_pauseparam,
@@ -1288,6 +1294,8 @@
 	.get_rxfh = hns_get_rss,
 	.set_rxfh = hns_set_rss,
 	.get_rxnfc = hns_get_rxnfc,
+	.get_link_ksettings  = hns_nic_get_link_ksettings,
+	.set_link_ksettings  = hns_nic_set_link_ksettings,
 };
 
 void hns_ethtool_set_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 2e1b17a..ad03763 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -334,7 +334,7 @@
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		e_err("ptp_clock_register failed\n");
-	} else {
+	} else if (adapter->ptp_clock) {
 		e_info("registered PHC clock\n");
 	}
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index ed39cba..f1fecea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -669,7 +669,7 @@
 		pf->ptp_clock = NULL;
 		dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
 			__func__);
-	} else {
+	} else if (pf->ptp_clock) {
 		struct timespec64 ts;
 		u32 regval;
 
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 66dfa20..1dd14e1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1159,7 +1159,7 @@
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
-	} else {
+	} else if (adapter->ptp_clock) {
 		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
 			 adapter->netdev->name);
 		adapter->ptp_flags |= IGB_PTP_ENABLED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index e5431bf..a922776 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1254,7 +1254,7 @@
 		adapter->ptp_clock = NULL;
 		e_dev_err("ptp_clock_register failed\n");
 		return err;
-	} else
+	} else if (adapter->ptp_clock)
 		e_dev_info("registered PHC device on %s\n", netdev->name);
 
 	/* set default timestamp mode to disabled here. We do this in
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index ca6b501..2909372 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1894,11 +1894,8 @@
 	struct mtk_eth *eth = mac->hw;
 
 	phy_disconnect(mac->phy_dev);
-	mtk_mdio_cleanup(eth);
 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
-	free_irq(eth->irq[1], dev);
-	free_irq(eth->irq[2], dev);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -2454,6 +2451,7 @@
 	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
+	mtk_mdio_cleanup(eth);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index f04a423..a58d96c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -785,17 +785,23 @@
 		return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
 
 	if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+		int ret;
+
 		if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
 			return mlx4_internal_err_ret_value(dev, op,
 							  op_modifier);
+		down_read(&mlx4_priv(dev)->cmd.switch_sem);
 		if (mlx4_priv(dev)->cmd.use_events)
-			return mlx4_cmd_wait(dev, in_param, out_param,
-					     out_is_imm, in_modifier,
-					     op_modifier, op, timeout);
+			ret = mlx4_cmd_wait(dev, in_param, out_param,
+					    out_is_imm, in_modifier,
+					    op_modifier, op, timeout);
 		else
-			return mlx4_cmd_poll(dev, in_param, out_param,
-					     out_is_imm, in_modifier,
-					     op_modifier, op, timeout);
+			ret = mlx4_cmd_poll(dev, in_param, out_param,
+					    out_is_imm, in_modifier,
+					    op_modifier, op, timeout);
+
+		up_read(&mlx4_priv(dev)->cmd.switch_sem);
+		return ret;
 	}
 	return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
 			      in_modifier, op_modifier, op, timeout);
@@ -2454,6 +2460,7 @@
 	int flags = 0;
 
 	if (!priv->cmd.initialized) {
+		init_rwsem(&priv->cmd.switch_sem);
 		mutex_init(&priv->cmd.slave_cmd_mutex);
 		sema_init(&priv->cmd.poll_sem, 1);
 		priv->cmd.use_events = 0;
@@ -2583,6 +2590,7 @@
 	if (!priv->cmd.context)
 		return -ENOMEM;
 
+	down_write(&priv->cmd.switch_sem);
 	for (i = 0; i < priv->cmd.max_cmds; ++i) {
 		priv->cmd.context[i].token = i;
 		priv->cmd.context[i].next  = i + 1;
@@ -2606,6 +2614,7 @@
 
 	down(&priv->cmd.poll_sem);
 	priv->cmd.use_events = 1;
+	up_write(&priv->cmd.switch_sem);
 
 	return err;
 }
@@ -2618,6 +2627,7 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i;
 
+	down_write(&priv->cmd.switch_sem);
 	priv->cmd.use_events = 0;
 
 	for (i = 0; i < priv->cmd.max_cmds; ++i)
@@ -2626,6 +2636,7 @@
 	kfree(priv->cmd.context);
 
 	up(&priv->cmd.poll_sem);
+	up_write(&priv->cmd.switch_sem);
 }
 
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 1494997..08fc5fc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -298,7 +298,7 @@
 	if (IS_ERR(mdev->ptp_clock)) {
 		mdev->ptp_clock = NULL;
 		mlx4_err(mdev, "ptp_clock_register failed\n");
-	} else {
+	} else if (mdev->ptp_clock) {
 		mlx4_info(mdev, "registered PHC clock\n");
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c46355b..f2e8bed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -72,7 +72,7 @@
 	}
 	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
 			   frag_info->dma_dir);
-	if (dma_mapping_error(priv->ddev, dma)) {
+	if (unlikely(dma_mapping_error(priv->ddev, dma))) {
 		put_page(page);
 		return -ENOMEM;
 	}
@@ -108,7 +108,8 @@
 		    ring_alloc[i].page_size)
 			continue;
 
-		if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
+		if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
+					      frag_info, gfp)))
 			goto out;
 	}
 
@@ -585,7 +586,7 @@
 		frag_info = &priv->frag_info[nr];
 		if (length <= frag_info->frag_prefix_size)
 			break;
-		if (!frags[nr].page)
+		if (unlikely(!frags[nr].page))
 			goto fail;
 
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
@@ -625,7 +626,7 @@
 	dma_addr_t dma;
 
 	skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
-	if (!skb) {
+	if (unlikely(!skb)) {
 		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
 		return NULL;
 	}
@@ -736,7 +737,8 @@
 {
 	__wsum csum_pseudo_hdr = 0;
 
-	if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+	if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
+		     ipv6h->nexthdr == IPPROTO_HOPOPTS))
 		return -1;
 	hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
 
@@ -769,7 +771,7 @@
 		get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
-		if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
+		if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
 			return -1;
 #endif
 	return 0;
@@ -796,10 +798,10 @@
 	u64 timestamp;
 	bool l2_tunnel;
 
-	if (!priv->port_up)
+	if (unlikely(!priv->port_up))
 		return 0;
 
-	if (budget <= 0)
+	if (unlikely(budget <= 0))
 		return polled;
 
 	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -902,9 +904,9 @@
 			case XDP_PASS:
 				break;
 			case XDP_TX:
-				if (!mlx4_en_xmit_frame(frags, dev,
+				if (likely(!mlx4_en_xmit_frame(frags, dev,
 							length, tx_index,
-							&doorbell_pending))
+							&doorbell_pending)))
 					goto consumed;
 				goto xdp_drop; /* Drop on xmit failure */
 			default:
@@ -912,7 +914,7 @@
 			case XDP_ABORTED:
 			case XDP_DROP:
 xdp_drop:
-				if (mlx4_en_rx_recycle(ring, frags))
+				if (likely(mlx4_en_rx_recycle(ring, frags)))
 					goto consumed;
 				goto next;
 			}
@@ -1016,12 +1018,12 @@
 
 		/* GRO not possible, complete processing here */
 		skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
-		if (!skb) {
+		if (unlikely(!skb)) {
 			ring->dropped++;
 			goto next;
 		}
 
-                if (unlikely(priv->validate_loopback)) {
+		if (unlikely(priv->validate_loopback)) {
 			validate_loopback(priv, skb);
 			goto next;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index c9d7fc51..c128ba3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -46,6 +46,7 @@
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <net/devlink.h>
+#include <linux/rwsem.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
@@ -627,6 +628,7 @@
 	struct mutex		slave_cmd_mutex;
 	struct semaphore	poll_sem;
 	struct semaphore	event_sem;
+	struct rw_semaphore	switch_sem;
 	int			max_cmds;
 	spinlock_t		context_lock;
 	int			free_head;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 6714662..f44d089 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -45,15 +45,12 @@
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_srq *srq;
 
-	spin_lock(&srq_table->lock);
-
+	rcu_read_lock();
 	srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
+	rcu_read_unlock();
 	if (srq)
 		atomic_inc(&srq->refcount);
-
-	spin_unlock(&srq_table->lock);
-
-	if (!srq) {
+	else {
 		mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
 		return;
 	}
@@ -301,12 +298,11 @@
 {
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_srq *srq;
-	unsigned long flags;
 
-	spin_lock_irqsave(&srq_table->lock, flags);
+	rcu_read_lock();
 	srq = radix_tree_lookup(&srq_table->tree,
 				srqn & (dev->caps.num_srqs - 1));
-	spin_unlock_irqrestore(&srq_table->lock, flags);
+	rcu_read_unlock();
 
 	return srq;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7dd4763..3460154 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -65,6 +65,8 @@
 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x3
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
 
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE		6  /* >= 6, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS	8  /* >= 6, HW restriction */
 #define MLX5_MPWRQ_LOG_WQE_SZ			18
@@ -99,6 +101,18 @@
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_BF_BUDGET             16
 
+#define MLX5E_ICOSQ_MAX_WQEBBS \
+	(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_IHS_DS_COUNT \
+	DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT \
+	(MLX5E_XDP_IHS_DS_COUNT + \
+	 (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_WQEBBS \
+	DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS)
+
 #define MLX5E_NUM_MAIN_GROUPS 9
 
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
@@ -302,10 +316,20 @@
 struct mlx5e_rq {
 	/* data path */
 	struct mlx5_wq_ll      wq;
-	u32                    wqe_sz;
-	struct sk_buff       **skb;
-	struct mlx5e_mpw_info *wqe_info;
-	void                  *mtt_no_align;
+
+	union {
+		struct mlx5e_dma_info *dma_info;
+		struct {
+			struct mlx5e_mpw_info *info;
+			void                  *mtt_no_align;
+			u32                    mtt_offset;
+		} mpwqe;
+	};
+	struct {
+		u8             page_order;
+		u32            wqe_sz;    /* wqe data buffer size */
+		u8             map_dir;   /* dma map direction */
+	} buff;
 	__be32                 mkey_be;
 
 	struct device         *pdev;
@@ -321,9 +345,9 @@
 
 	unsigned long          state;
 	int                    ix;
-	u32                    mpwqe_mtt_offset;
 
 	struct mlx5e_rx_am     am; /* Adaptive Moderation */
+	struct bpf_prog       *xdp_prog;
 
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
@@ -370,11 +394,17 @@
 	MLX5E_SQ_STATE_BF_ENABLE,
 };
 
-struct mlx5e_ico_wqe_info {
+struct mlx5e_sq_wqe_info {
 	u8  opcode;
 	u8  num_wqebbs;
 };
 
+enum mlx5e_sq_type {
+	MLX5E_SQ_TXQ,
+	MLX5E_SQ_ICO,
+	MLX5E_SQ_XDP
+};
+
 struct mlx5e_sq {
 	/* data path */
 
@@ -392,10 +422,20 @@
 
 	struct mlx5e_cq            cq;
 
-	/* pointers to per packet info: write@xmit, read@completion */
-	struct sk_buff           **skb;
-	struct mlx5e_sq_dma       *dma_fifo;
-	struct mlx5e_tx_wqe_info  *wqe_info;
+	/* pointers to per tx element info: write@xmit, read@completion */
+	union {
+		struct {
+			struct sk_buff           **skb;
+			struct mlx5e_sq_dma       *dma_fifo;
+			struct mlx5e_tx_wqe_info  *wqe_info;
+		} txq;
+		struct mlx5e_sq_wqe_info *ico_wqe;
+		struct {
+			struct mlx5e_sq_wqe_info  *wqe_info;
+			struct mlx5e_dma_info     *di;
+			bool                       doorbell;
+		} xdp;
+	} db;
 
 	/* read only */
 	struct mlx5_wq_cyc         wq;
@@ -417,8 +457,8 @@
 	struct mlx5_uar            uar;
 	struct mlx5e_channel      *channel;
 	int                        tc;
-	struct mlx5e_ico_wqe_info *ico_wqe_info;
 	u32                        rate_limit;
+	u8                         type;
 } ____cacheline_aligned_in_smp;
 
 static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
@@ -434,8 +474,10 @@
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq            rq;
+	struct mlx5e_sq            xdp_sq;
 	struct mlx5e_sq            sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_sq            icosq;   /* internal control operations */
+	bool                       xdp;
 	struct napi_struct         napi;
 	struct device             *pdev;
 	struct net_device         *netdev;
@@ -617,6 +659,7 @@
 	/* priv data path fields - start */
 	struct mlx5e_sq            **txq_to_sq_map;
 	int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+	struct bpf_prog *xdp_prog;
 	/* priv data path fields - end */
 
 	unsigned long              state;
@@ -663,7 +706,7 @@
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq);
 
 void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 			bool recycle);
@@ -764,7 +807,7 @@
 
 static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-	return rq->mpwqe_mtt_offset +
+	return rq->mpwqe.mtt_offset +
 		wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 847a8f3..13dc388 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -273,7 +273,7 @@
 
 	tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
 					 &priv->mdev->pdev->dev);
-	if (IS_ERR_OR_NULL(tstamp->ptp)) {
+	if (IS_ERR(tstamp->ptp)) {
 		mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
 			       PTR_ERR(tstamp->ptp));
 		tstamp->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8595b50..a9fc9d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -34,6 +34,7 @@
 #include <net/pkt_cls.h>
 #include <linux/mlx5/fs.h>
 #include <net/vxlan.h>
+#include <linux/bpf.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -50,7 +51,7 @@
 	struct mlx5_wq_param       wq;
 	u16                        max_inline;
 	u8                         min_inline_mode;
-	bool                       icosq;
+	enum mlx5e_sq_type         type;
 };
 
 struct mlx5e_cq_param {
@@ -63,12 +64,55 @@
 struct mlx5e_channel_param {
 	struct mlx5e_rq_param      rq;
 	struct mlx5e_sq_param      sq;
+	struct mlx5e_sq_param      xdp_sq;
 	struct mlx5e_sq_param      icosq;
 	struct mlx5e_cq_param      rx_cq;
 	struct mlx5e_cq_param      tx_cq;
 	struct mlx5e_cq_param      icosq_cq;
 };
 
+static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_GEN(mdev, striding_rq) &&
+		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+		MLX5_CAP_ETH(mdev, reg_umr_sq);
+}
+
+static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
+{
+	priv->params.rq_wq_type = rq_type;
+	switch (priv->params.rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+		priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ?
+			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+			MLX5_MPWRQ_LOG_STRIDE_SIZE;
+		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+			priv->params.mpwqe_log_stride_sz;
+		break;
+	default: /* MLX5_WQ_TYPE_LINKED_LIST */
+		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+	}
+	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+					       BIT(priv->params.log_rq_size));
+
+	mlx5_core_info(priv->mdev,
+		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       BIT(priv->params.log_rq_size),
+		       BIT(priv->params.mpwqe_log_stride_sz),
+		       priv->params.rx_cqe_compress_admin);
+}
+
+static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
+{
+	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) &&
+		    !priv->xdp_prog ?
+		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+		    MLX5_WQ_TYPE_LINKED_LIST;
+	mlx5e_set_rq_type_params(priv, rq_type);
+}
+
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -136,6 +180,9 @@
 		s->rx_csum_none	+= rq_stats->csum_none;
 		s->rx_csum_complete += rq_stats->csum_complete;
 		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+		s->rx_xdp_drop += rq_stats->xdp_drop;
+		s->rx_xdp_tx += rq_stats->xdp_tx;
+		s->rx_xdp_tx_full += rq_stats->xdp_tx_full;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
@@ -314,7 +361,7 @@
 	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
 	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
 	struct mlx5_wqe_data_seg      *dseg = &wqe->data;
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 	u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
 	u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
@@ -342,21 +389,21 @@
 	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
 	int i;
 
-	rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
-				    GFP_KERNEL, cpu_to_node(c->cpu));
-	if (!rq->wqe_info)
+	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+				      GFP_KERNEL, cpu_to_node(c->cpu));
+	if (!rq->mpwqe.info)
 		goto err_out;
 
 	/* We allocate more than mtt_sz as we will align the pointer */
-	rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
 					cpu_to_node(c->cpu));
-	if (unlikely(!rq->mtt_no_align))
+	if (unlikely(!rq->mpwqe.mtt_no_align))
 		goto err_free_wqe_info;
 
 	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
 
-		wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc,
+		wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
 					MLX5_UMR_ALIGN);
 		wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
 						  PCI_DMA_TODEVICE);
@@ -370,14 +417,14 @@
 
 err_unmap_mtts:
 	while (--i >= 0) {
-		struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
 
 		dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
 				 PCI_DMA_TODEVICE);
 	}
-	kfree(rq->mtt_no_align);
+	kfree(rq->mpwqe.mtt_no_align);
 err_free_wqe_info:
-	kfree(rq->wqe_info);
+	kfree(rq->mpwqe.info);
 
 err_out:
 	return -ENOMEM;
@@ -390,13 +437,13 @@
 	int i;
 
 	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
 
 		dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
 				 PCI_DMA_TODEVICE);
 	}
-	kfree(rq->mtt_no_align);
-	kfree(rq->wqe_info);
+	kfree(rq->mpwqe.mtt_no_align);
+	kfree(rq->mpwqe.info);
 }
 
 static int mlx5e_create_rq(struct mlx5e_channel *c,
@@ -408,6 +455,8 @@
 	void *rqc = param->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	u32 byte_count;
+	u32 frag_sz;
+	int npages;
 	int wq_sz;
 	int err;
 	int i;
@@ -430,6 +479,11 @@
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->priv    = c->priv;
+	rq->xdp_prog = priv->xdp_prog;
+
+	rq->buff.map_dir = DMA_FROM_DEVICE;
+	if (rq->xdp_prog)
+		rq->buff.map_dir = DMA_BIDIRECTIONAL;
 
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -437,34 +491,45 @@
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-		rq->mpwqe_mtt_offset = c->ix *
+		rq->mpwqe.mtt_offset = c->ix *
 			MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
 
 		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
 		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
-		rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
-		byte_count = rq->wqe_sz;
+
+		rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
+		byte_count = rq->buff.wqe_sz;
 		rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
 		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
 		if (err)
 			goto err_rq_wq_destroy;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
-				       cpu_to_node(c->cpu));
-		if (!rq->skb) {
+		rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
+					    GFP_KERNEL, cpu_to_node(c->cpu));
+		if (!rq->dma_info) {
 			err = -ENOMEM;
 			goto err_rq_wq_destroy;
 		}
+
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
 		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
-		rq->wqe_sz = (priv->params.lro_en) ?
+		rq->buff.wqe_sz = (priv->params.lro_en) ?
 				priv->params.lro_wqe_sz :
 				MLX5E_SW2HW_MTU(priv->netdev->mtu);
-		rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz);
-		byte_count = rq->wqe_sz;
+		byte_count = rq->buff.wqe_sz;
+
+		/* calc the required page order */
+		frag_sz = MLX5_RX_HEADROOM +
+			  byte_count /* packet data */ +
+			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+		frag_sz = SKB_DATA_ALIGN(frag_sz);
+
+		npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE);
+		rq->buff.page_order = order_base_2(npages);
+
 		byte_count |= MLX5_HW_START_PADDING;
 		rq->mkey_be = c->mkey_be;
 	}
@@ -482,6 +547,9 @@
 	rq->page_cache.head = 0;
 	rq->page_cache.tail = 0;
 
+	if (rq->xdp_prog)
+		bpf_prog_add(rq->xdp_prog, 1);
+
 	return 0;
 
 err_rq_wq_destroy:
@@ -494,12 +562,15 @@
 {
 	int i;
 
+	if (rq->xdp_prog)
+		bpf_prog_put(rq->xdp_prog);
+
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		mlx5e_rq_free_mpwqe_info(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		kfree(rq->skb);
+		kfree(rq->dma_info);
 	}
 
 	for (i = rq->page_cache.head; i != rq->page_cache.tail;
@@ -641,7 +712,7 @@
 
 	/* UMR WQE (if in progress) is always at wq->head */
 	if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
-		mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 
 	while (!mlx5_wq_ll_is_empty(wq)) {
 		wqe_ix_be = *wq->tail_next;
@@ -676,8 +747,8 @@
 	if (param->am_enabled)
 		set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-	sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
-	sq->ico_wqe_info[pi].num_wqebbs = 1;
+	sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
+	sq->db.ico_wqe[pi].num_wqebbs = 1;
 	mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
 
 	return 0;
@@ -701,26 +772,65 @@
 	mlx5e_destroy_rq(rq);
 }
 
-static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq)
 {
-	kfree(sq->wqe_info);
-	kfree(sq->dma_fifo);
-	kfree(sq->skb);
+	kfree(sq->db.xdp.di);
+	kfree(sq->db.xdp.wqe_info);
 }
 
-static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
+{
+	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+	sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz,
+				     GFP_KERNEL, numa);
+	sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz,
+					   GFP_KERNEL, numa);
+	if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) {
+		mlx5e_free_sq_xdp_db(sq);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq)
+{
+	kfree(sq->db.ico_wqe);
+}
+
+static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa)
+{
+	u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+	sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz,
+				      GFP_KERNEL, numa);
+	if (!sq->db.ico_wqe)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq)
+{
+	kfree(sq->db.txq.wqe_info);
+	kfree(sq->db.txq.dma_fifo);
+	kfree(sq->db.txq.skb);
+}
+
+static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 
-	sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa);
-	sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL,
-				    numa);
-	sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL,
-				    numa);
-
-	if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) {
-		mlx5e_free_sq_db(sq);
+	sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb),
+				      GFP_KERNEL, numa);
+	sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo),
+					   GFP_KERNEL, numa);
+	sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info),
+					   GFP_KERNEL, numa);
+	if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) {
+		mlx5e_free_sq_txq_db(sq);
 		return -ENOMEM;
 	}
 
@@ -729,6 +839,46 @@
 	return 0;
 }
 
+static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		mlx5e_free_sq_txq_db(sq);
+		break;
+	case MLX5E_SQ_ICO:
+		mlx5e_free_sq_ico_db(sq);
+		break;
+	case MLX5E_SQ_XDP:
+		mlx5e_free_sq_xdp_db(sq);
+		break;
+	}
+}
+
+static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		return mlx5e_alloc_sq_txq_db(sq, numa);
+	case MLX5E_SQ_ICO:
+		return mlx5e_alloc_sq_ico_db(sq, numa);
+	case MLX5E_SQ_XDP:
+		return mlx5e_alloc_sq_xdp_db(sq, numa);
+	}
+
+	return 0;
+}
+
+static int mlx5e_sq_get_max_wqebbs(u8 sq_type)
+{
+	switch (sq_type) {
+	case MLX5E_SQ_ICO:
+		return MLX5E_ICOSQ_MAX_WQEBBS;
+	case MLX5E_SQ_XDP:
+		return MLX5E_XDP_TX_WQEBBS;
+	}
+	return MLX5_SEND_WQE_MAX_WQEBBS;
+}
+
 static int mlx5e_create_sq(struct mlx5e_channel *c,
 			   int tc,
 			   struct mlx5e_sq_param *param,
@@ -741,6 +891,13 @@
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
+	sq->type      = param->type;
+	sq->pdev      = c->pdev;
+	sq->tstamp    = &priv->tstamp;
+	sq->mkey_be   = c->mkey_be;
+	sq->channel   = c;
+	sq->tc        = tc;
+
 	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
@@ -769,18 +926,7 @@
 	if (err)
 		goto err_sq_wq_destroy;
 
-	if (param->icosq) {
-		u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
-
-		sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) *
-						wq_sz,
-						GFP_KERNEL,
-						cpu_to_node(c->cpu));
-		if (!sq->ico_wqe_info) {
-			err = -ENOMEM;
-			goto err_free_sq_db;
-		}
-	} else {
+	if (sq->type == MLX5E_SQ_TXQ) {
 		int txq_ix;
 
 		txq_ix = c->ix + tc * priv->params.num_channels;
@@ -788,19 +934,11 @@
 		priv->txq_to_sq_map[txq_ix] = sq;
 	}
 
-	sq->pdev      = c->pdev;
-	sq->tstamp    = &priv->tstamp;
-	sq->mkey_be   = c->mkey_be;
-	sq->channel   = c;
-	sq->tc        = tc;
-	sq->edge      = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
+	sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type);
 	sq->bf_budget = MLX5E_SQ_BF_BUDGET;
 
 	return 0;
 
-err_free_sq_db:
-	mlx5e_free_sq_db(sq);
-
 err_sq_wq_destroy:
 	mlx5_wq_destroy(&sq->wq_ctrl);
 
@@ -815,7 +953,6 @@
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 
-	kfree(sq->ico_wqe_info);
 	mlx5e_free_sq_db(sq);
 	mlx5_wq_destroy(&sq->wq_ctrl);
 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
@@ -844,11 +981,12 @@
 
 	memcpy(sqc, param->sqc, sizeof(param->sqc));
 
-	MLX5_SET(sqc,  sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
+	MLX5_SET(sqc,  sqc, tis_num_0, param->type == MLX5E_SQ_ICO ?
+				       0 : priv->tisn[sq->tc]);
 	MLX5_SET(sqc,  sqc, cqn,		sq->cq.mcq.cqn);
 	MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
 	MLX5_SET(sqc,  sqc, state,		MLX5_SQC_STATE_RST);
-	MLX5_SET(sqc,  sqc, tis_lst_sz,		param->icosq ? 0 : 1);
+	MLX5_SET(sqc,  sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1);
 	MLX5_SET(sqc,  sqc, flush_in_error_en,	1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
@@ -963,12 +1101,14 @@
 		netif_tx_disable_queue(sq->txq);
 
 		/* last doorbell out, godspeed .. */
-		if (mlx5e_sq_has_room_for(sq, 1))
+		if (mlx5e_sq_has_room_for(sq, 1)) {
+			sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL;
 			mlx5e_send_nop(sq, true);
+		}
 	}
 
 	mlx5e_disable_sq(sq);
-	mlx5e_free_tx_descs(sq);
+	mlx5e_free_sq_descs(sq);
 	mlx5e_destroy_sq(sq);
 }
 
@@ -1329,14 +1469,31 @@
 		}
 	}
 
+	if (priv->xdp_prog) {
+		/* XDP SQ CQ params are same as normal TXQ sq CQ params */
+		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
+				    priv->params.tx_cq_moderation);
+		if (err)
+			goto err_close_sqs;
+
+		err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq);
+		if (err) {
+			mlx5e_close_cq(&c->xdp_sq.cq);
+			goto err_close_sqs;
+		}
+	}
+
+	c->xdp = !!priv->xdp_prog;
 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 	if (err)
-		goto err_close_sqs;
+		goto err_close_xdp_sq;
 
 	netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix);
 	*cp = c;
 
 	return 0;
+err_close_xdp_sq:
+	mlx5e_close_sq(&c->xdp_sq);
 
 err_close_sqs:
 	mlx5e_close_sqs(c);
@@ -1365,9 +1522,13 @@
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
 	mlx5e_close_rq(&c->rq);
+	if (c->xdp)
+		mlx5e_close_sq(&c->xdp_sq);
 	mlx5e_close_sqs(c);
 	mlx5e_close_sq(&c->icosq);
 	napi_disable(&c->napi);
+	if (c->xdp)
+		mlx5e_close_cq(&c->xdp_sq.cq);
 	mlx5e_close_cq(&c->rq.cq);
 	mlx5e_close_tx_cqs(c);
 	mlx5e_close_cq(&c->icosq.cq);
@@ -1441,6 +1602,7 @@
 
 	param->max_inline = priv->params.tx_max_inline;
 	param->min_inline_mode = priv->params.tx_min_inline_mode;
+	param->type = MLX5E_SQ_TXQ;
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1514,7 +1676,22 @@
 	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 
-	param->icosq = true;
+	param->type = MLX5E_SQ_ICO;
+}
+
+static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+				    struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+	MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
+
+	param->max_inline = priv->params.tx_max_inline;
+	/* FOR XDP SQs will support only L2 inline mode */
+	param->min_inline_mode = MLX5_INLINE_MODE_NONE;
+	param->type = MLX5E_SQ_XDP;
 }
 
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam)
@@ -1523,6 +1700,7 @@
 
 	mlx5e_build_rq_param(priv, &cparam->rq);
 	mlx5e_build_sq_param(priv, &cparam->sq);
+	mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq);
 	mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz);
 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
@@ -2901,6 +3079,92 @@
 		schedule_work(&priv->tx_timeout_work);
 }
 
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct bpf_prog *old_prog;
+	int err = 0;
+	bool reset, was_opened;
+	int i;
+
+	mutex_lock(&priv->state_lock);
+
+	if ((netdev->features & NETIF_F_LRO) && prog) {
+		netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	/* no need for full reset when exchanging programs */
+	reset = (!priv->xdp_prog || !prog);
+
+	if (was_opened && reset)
+		mlx5e_close_locked(netdev);
+
+	/* exchange programs */
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (prog)
+		bpf_prog_add(prog, 1);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (reset) /* change RQ type according to priv->xdp_prog */
+		mlx5e_set_rq_priv_params(priv);
+
+	if (was_opened && reset)
+		mlx5e_open_locked(netdev);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+		goto unlock;
+
+	/* exchanging programs w/o reset, we update ref counts on behalf
+	 * of the channels RQs here.
+	 */
+	bpf_prog_add(prog, priv->params.num_channels);
+	for (i = 0; i < priv->params.num_channels; i++) {
+		struct mlx5e_channel *c = priv->channel[i];
+
+		set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		napi_synchronize(&c->napi);
+		/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+
+		old_prog = xchg(&c->rq.xdp_prog, prog);
+
+		clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		/* napi_schedule in case we have missed anything */
+		set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
+		napi_schedule(&c->napi);
+
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
+}
+
+static bool mlx5e_xdp_attached(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	return !!priv->xdp_prog;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return mlx5e_xdp_set(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = mlx5e_xdp_attached(dev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -2920,6 +3184,7 @@
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
+	.ndo_xdp		 = mlx5e_xdp,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2951,6 +3216,7 @@
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
+	.ndo_xdp		 = mlx5e_xdp,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -3025,13 +3291,6 @@
 		indirection_rqt[i] = i % num_channels;
 }
 
-static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
-{
-	return MLX5_CAP_GEN(mdev, striding_rq) &&
-		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
-		MLX5_CAP_ETH(mdev, reg_umr_sq);
-}
-
 static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
 {
 	enum pcie_link_width width;
@@ -3111,11 +3370,13 @@
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
-	priv->params.log_sq_size           =
-		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
-	priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ?
-		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
-		MLX5_WQ_TYPE_LINKED_LIST;
+	priv->mdev                         = mdev;
+	priv->netdev                       = netdev;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
+
+	priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
 	/* set CQE compression */
 	priv->params.rx_cqe_compress_admin = false;
@@ -3128,33 +3389,11 @@
 		priv->params.rx_cqe_compress_admin =
 			cqe_compress_heuristic(link_speed, pci_bw);
 	}
-
 	priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
 
-	switch (priv->params.rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-		priv->params.mpwqe_log_stride_sz =
-			priv->params.rx_cqe_compress ?
-			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
-			MLX5_MPWRQ_LOG_STRIDE_SIZE;
-		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
-			priv->params.mpwqe_log_stride_sz;
+	mlx5e_set_rq_priv_params(priv);
+	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 		priv->params.lro_en = true;
-		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
-	}
-
-	mlx5_core_info(mdev,
-		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
-		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
-		       BIT(priv->params.log_rq_size),
-		       BIT(priv->params.mpwqe_log_stride_sz),
-		       priv->params.rx_cqe_compress_admin);
-
-	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
-					    BIT(priv->params.log_rq_size));
 
 	priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
@@ -3174,19 +3413,16 @@
 	mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
 				      MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
 
-	priv->params.lro_wqe_sz            =
-		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+	priv->params.lro_wqe_sz =
+		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ -
+		/* Extra room needed for build_skb */
+		MLX5_RX_HEADROOM -
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	/* Initialize pflags */
 	MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
 			    priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 
-	priv->mdev                         = mdev;
-	priv->netdev                       = netdev;
-	priv->params.num_channels          = profile->max_nch(mdev);
-	priv->profile                      = profile;
-	priv->ppriv                        = ppriv;
-
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_ets_init(priv);
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index dc86779..0a81bd3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -179,50 +179,99 @@
 	mutex_unlock(&priv->state_lock);
 }
 
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
-{
-	struct sk_buff *skb;
-	dma_addr_t dma_addr;
+#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
 
-	skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz);
-	if (unlikely(!skb))
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info)
+{
+	struct mlx5e_page_cache *cache = &rq->page_cache;
+	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+
+	if (tail_next == cache->head) {
+		rq->stats.cache_full++;
+		return false;
+	}
+
+	cache->page_cache[cache->tail] = *dma_info;
+	cache->tail = tail_next;
+	return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info)
+{
+	struct mlx5e_page_cache *cache = &rq->page_cache;
+
+	if (unlikely(cache->head == cache->tail)) {
+		rq->stats.cache_empty++;
+		return false;
+	}
+
+	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+		rq->stats.cache_busy++;
+		return false;
+	}
+
+	*dma_info = cache->page_cache[cache->head];
+	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+	rq->stats.cache_reuse++;
+
+	dma_sync_single_for_device(rq->pdev, dma_info->addr,
+				   RQ_PAGE_SIZE(rq),
+				   DMA_FROM_DEVICE);
+	return true;
+}
+
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+					  struct mlx5e_dma_info *dma_info)
+{
+	struct page *page;
+
+	if (mlx5e_rx_cache_get(rq, dma_info))
+		return 0;
+
+	page = dev_alloc_pages(rq->buff.page_order);
+	if (unlikely(!page))
 		return -ENOMEM;
 
-	dma_addr = dma_map_single(rq->pdev,
-				  /* hw start padding */
-				  skb->data,
-				  /* hw end padding */
-				  rq->wqe_sz,
-				  DMA_FROM_DEVICE);
-
-	if (unlikely(dma_mapping_error(rq->pdev, dma_addr)))
-		goto err_free_skb;
-
-	*((dma_addr_t *)skb->cb) = dma_addr;
-	wqe->data.addr = cpu_to_be64(dma_addr);
-
-	rq->skb[ix] = skb;
+	dma_info->page = page;
+	dma_info->addr = dma_map_page(rq->pdev, page, 0,
+				      RQ_PAGE_SIZE(rq), rq->buff.map_dir);
+	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+		put_page(page);
+		return -ENOMEM;
+	}
 
 	return 0;
+}
 
-err_free_skb:
-	dev_kfree_skb(skb);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+			bool recycle)
+{
+	if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
+		return;
 
-	return -ENOMEM;
+	dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
+		       rq->buff.map_dir);
+	put_page(dma_info->page);
+}
+
+int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+{
+	struct mlx5e_dma_info *di = &rq->dma_info[ix];
+
+	if (unlikely(mlx5e_page_alloc_mapped(rq, di)))
+		return -ENOMEM;
+
+	wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM);
+	return 0;
 }
 
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 {
-	struct sk_buff *skb = rq->skb[ix];
+	struct mlx5e_dma_info *di = &rq->dma_info[ix];
 
-	if (skb) {
-		rq->skb[ix] = NULL;
-		dma_unmap_single(rq->pdev,
-				 *((dma_addr_t *)skb->cb),
-				 rq->wqe_sz,
-				 DMA_FROM_DEVICE);
-		dev_kfree_skb(skb);
-	}
+	mlx5e_page_release(rq, di, true);
 }
 
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
@@ -279,7 +328,7 @@
 
 static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 {
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 	struct mlx5e_sq *sq = &rq->channel->icosq;
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *wqe;
@@ -288,8 +337,8 @@
 
 	/* fill sq edge with nops to avoid wqe wrap around */
 	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-		sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP;
-		sq->ico_wqe_info[pi].num_wqebbs = 1;
+		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+		sq->db.ico_wqe[pi].num_wqebbs = 1;
 		mlx5e_send_nop(sq, true);
 	}
 
@@ -299,90 +348,17 @@
 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
 			    MLX5_OPCODE_UMR);
 
-	sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR;
-	sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs;
+	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+	sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
 	sq->pc += num_wqebbs;
 	mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
 }
 
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
-				      struct mlx5e_dma_info *dma_info)
-{
-	struct mlx5e_page_cache *cache = &rq->page_cache;
-	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
-
-	if (tail_next == cache->head) {
-		rq->stats.cache_full++;
-		return false;
-	}
-
-	cache->page_cache[cache->tail] = *dma_info;
-	cache->tail = tail_next;
-	return true;
-}
-
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
-				      struct mlx5e_dma_info *dma_info)
-{
-	struct mlx5e_page_cache *cache = &rq->page_cache;
-
-	if (unlikely(cache->head == cache->tail)) {
-		rq->stats.cache_empty++;
-		return false;
-	}
-
-	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
-		rq->stats.cache_busy++;
-		return false;
-	}
-
-	*dma_info = cache->page_cache[cache->head];
-	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
-	rq->stats.cache_reuse++;
-
-	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
-				   DMA_FROM_DEVICE);
-	return true;
-}
-
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
-					  struct mlx5e_dma_info *dma_info)
-{
-	struct page *page;
-
-	if (mlx5e_rx_cache_get(rq, dma_info))
-		return 0;
-
-	page = dev_alloc_page();
-	if (unlikely(!page))
-		return -ENOMEM;
-
-	dma_info->page = page;
-	dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE,
-				      DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
-		put_page(page);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
-			bool recycle)
-{
-	if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
-		return;
-
-	dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE);
-	put_page(dma_info->page);
-}
-
 static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
 				    struct mlx5e_rx_wqe *wqe,
 				    u16 ix)
 {
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 	u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
 	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 	int err;
@@ -436,7 +412,7 @@
 	clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
 
 	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
-		mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 		return;
 	}
 
@@ -448,7 +424,7 @@
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,	u16 ix)
+int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 {
 	int err;
 
@@ -462,7 +438,7 @@
 
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 
 	mlx5e_free_rx_mpwqe(rq, wi);
 }
@@ -656,33 +632,154 @@
 	napi_gro_receive(rq->cq.napi, skb);
 }
 
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
+
+	wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
+}
+
+static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
+					struct mlx5e_dma_info *di,
+					unsigned int data_offset,
+					int len)
+{
+	struct mlx5e_sq          *sq   = &rq->channel->xdp_sq;
+	struct mlx5_wq_cyc       *wq   = &sq->wq;
+	u16                      pi    = sq->pc & wq->sz_m1;
+	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5e_sq_wqe_info *wi   = &sq->db.xdp.wqe_info[pi];
+
+	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+	struct mlx5_wqe_data_seg *dseg;
+
+	dma_addr_t dma_addr  = di->addr + data_offset + MLX5E_XDP_MIN_INLINE;
+	unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE;
+	void *data           = page_address(di->page) + data_offset;
+
+	if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
+		if (sq->db.xdp.doorbell) {
+			/* SQ is full, ring doorbell */
+			mlx5e_xmit_xdp_doorbell(sq);
+			sq->db.xdp.doorbell = false;
+		}
+		rq->stats.xdp_tx_full++;
+		mlx5e_page_release(rq, di, true);
+		return;
+	}
+
+	dma_sync_single_for_device(sq->pdev, dma_addr, dma_len,
+				   PCI_DMA_TODEVICE);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	/* copy the inline part */
+	memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE);
+	eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+
+	dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1);
+
+	/* write the dma part */
+	dseg->addr       = cpu_to_be64(dma_addr);
+	dseg->byte_count = cpu_to_be32(dma_len);
+	dseg->lkey       = sq->mkey_be;
+
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT);
+
+	sq->db.xdp.di[pi] = *di;
+	wi->opcode     = MLX5_OPCODE_SEND;
+	wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
+	sq->pc += MLX5E_XDP_TX_WQEBBS;
+
+	sq->db.xdp.doorbell = true;
+	rq->stats.xdp_tx++;
+}
+
+/* returns true if packet was consumed by xdp */
+static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
+				    const struct bpf_prog *prog,
+				    struct mlx5e_dma_info *di,
+				    void *data, u16 len)
+{
+	struct xdp_buff xdp;
+	u32 act;
+
+	if (!prog)
+		return false;
+
+	xdp.data = data;
+	xdp.data_end = xdp.data + len;
+	act = bpf_prog_run_xdp(prog, &xdp);
+	switch (act) {
+	case XDP_PASS:
+		return false;
+	case XDP_TX:
+		mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len);
+		return true;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_ABORTED:
+	case XDP_DROP:
+		rq->stats.xdp_drop++;
+		mlx5e_page_release(rq, di, true);
+		return true;
+	}
+}
+
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
+	struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
+	struct mlx5e_dma_info *di;
 	struct mlx5e_rx_wqe *wqe;
-	struct sk_buff *skb;
 	__be16 wqe_counter_be;
+	struct sk_buff *skb;
 	u16 wqe_counter;
+	void *va, *data;
 	u32 cqe_bcnt;
 
 	wqe_counter_be = cqe->wqe_counter;
 	wqe_counter    = be16_to_cpu(wqe_counter_be);
 	wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
-	skb            = rq->skb[wqe_counter];
-	prefetch(skb->data);
-	rq->skb[wqe_counter] = NULL;
+	di             = &rq->dma_info[wqe_counter];
+	va             = page_address(di->page);
+	data           = va + MLX5_RX_HEADROOM;
 
-	dma_unmap_single(rq->pdev,
-			 *((dma_addr_t *)skb->cb),
-			 rq->wqe_sz,
-			 DMA_FROM_DEVICE);
+	dma_sync_single_range_for_cpu(rq->pdev,
+				      di->addr,
+				      MLX5_RX_HEADROOM,
+				      rq->buff.wqe_sz,
+				      DMA_FROM_DEVICE);
+	prefetch(data);
+	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
 		rq->stats.wqe_err++;
-		dev_kfree_skb(skb);
+		mlx5e_page_release(rq, di, true);
 		goto wq_ll_pop;
 	}
 
-	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+	if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+		goto wq_ll_pop; /* page/packet was consumed by XDP */
+
+	skb = build_skb(va, RQ_PAGE_SIZE(rq));
+	if (unlikely(!skb)) {
+		rq->stats.buff_alloc_err++;
+		mlx5e_page_release(rq, di, true);
+		goto wq_ll_pop;
+	}
+
+	/* queue up for recycling ..*/
+	page_ref_inc(di->page);
+	mlx5e_page_release(rq, di, true);
+
+	skb_reserve(skb, MLX5_RX_HEADROOM);
 	skb_put(skb, cqe_bcnt);
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
@@ -734,7 +831,7 @@
 {
 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
 	u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
 	struct mlx5e_rx_wqe  *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
 	struct sk_buff *skb;
 	u16 cqe_bcnt;
@@ -776,6 +873,7 @@
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+	struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
 	int work_done = 0;
 
 	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
@@ -802,6 +900,11 @@
 		rq->handle_rx_cqe(rq, cqe);
 	}
 
+	if (xdp_sq->db.xdp.doorbell) {
+		mlx5e_xmit_xdp_doorbell(xdp_sq);
+		xdp_sq->db.xdp.doorbell = false;
+	}
+
 	mlx5_cqwq_update_db_record(&cq->wq);
 
 	/* ensure cq space is freed before enabling more cqes */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 6af8d79..57452fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -65,6 +65,9 @@
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
 	u64 rx_csum_unnecessary_inner;
+	u64 rx_xdp_drop;
+	u64 rx_xdp_tx;
+	u64 rx_xdp_tx_full;
 	u64 tx_csum_partial;
 	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
@@ -100,6 +103,9 @@
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
@@ -278,6 +284,9 @@
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
+	u64 xdp_drop;
+	u64 xdp_tx;
+	u64 xdp_tx_full;
 	u64 wqe_err;
 	u64 mpwqe_filler;
 	u64 buff_alloc_err;
@@ -295,6 +304,9 @@
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index eb0e725..70a7173 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -52,7 +52,6 @@
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | 0x01);
 
-	sq->skb[pi] = NULL;
 	sq->pc++;
 	sq->stats.nop++;
 
@@ -82,15 +81,17 @@
 				  u32 size,
 				  enum mlx5e_dma_map_type map_type)
 {
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
+	u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask;
+
+	sq->db.txq.dma_fifo[i].addr = addr;
+	sq->db.txq.dma_fifo[i].size = size;
+	sq->db.txq.dma_fifo[i].type = map_type;
 	sq->dma_fifo_pc++;
 }
 
 static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
 {
-	return &sq->dma_fifo[i & sq->dma_fifo_mask];
+	return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask];
 }
 
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma)
@@ -221,7 +222,7 @@
 
 	u16 pi = sq->pc & wq->sz_m1;
 	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-	struct mlx5e_tx_wqe_info *wi   = &sq->wqe_info[pi];
+	struct mlx5e_tx_wqe_info *wi   = &sq->db.txq.wqe_info[pi];
 
 	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
 	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
@@ -341,7 +342,7 @@
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-	sq->skb[pi] = skb;
+	sq->db.txq.skb[pi] = skb;
 
 	wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 	sq->pc += wi->num_wqebbs;
@@ -368,8 +369,10 @@
 	}
 
 	/* fill sq edge with nops to avoid wqe wrap around */
-	while ((sq->pc & wq->sz_m1) > sq->edge)
+	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+		sq->db.txq.skb[pi] = NULL;
 		mlx5e_send_nop(sq, false);
+	}
 
 	if (bf)
 		sq->bf_budget--;
@@ -442,8 +445,8 @@
 			last_wqe = (sqcc == wqe_counter);
 
 			ci = sqcc & sq->wq.sz_m1;
-			skb = sq->skb[ci];
-			wi = &sq->wqe_info[ci];
+			skb = sq->db.txq.skb[ci];
+			wi = &sq->db.txq.wqe_info[ci];
 
 			if (unlikely(!skb)) { /* nop */
 				sqcc++;
@@ -492,7 +495,7 @@
 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
 {
 	struct mlx5e_tx_wqe_info *wi;
 	struct sk_buff *skb;
@@ -501,8 +504,8 @@
 
 	while (sq->cc != sq->pc) {
 		ci = sq->cc & sq->wq.sz_m1;
-		skb = sq->skb[ci];
-		wi = &sq->wqe_info[ci];
+		skb = sq->db.txq.skb[ci];
+		wi = &sq->db.txq.wqe_info[ci];
 
 		if (!skb) { /* nop */
 			sq->cc++;
@@ -520,3 +523,37 @@
 		sq->cc += wi->num_wqebbs;
 	}
 }
+
+static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_sq_wqe_info *wi;
+	struct mlx5e_dma_info *di;
+	u16 ci;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		di = &sq->db.xdp.di[ci];
+		wi = &sq->db.xdp.wqe_info[ci];
+
+		if (wi->opcode == MLX5_OPCODE_NOP) {
+			sq->cc++;
+			continue;
+		}
+
+		sq->cc += wi->num_wqebbs;
+
+		mlx5e_page_release(&sq->channel->rq, di, false);
+	}
+}
+
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		mlx5e_free_txq_sq_descs(sq);
+		break;
+	case MLX5E_SQ_XDP:
+		mlx5e_free_xdp_sq_descs(sq);
+		break;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 08d8b0c..5703f19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -72,7 +72,7 @@
 
 	do {
 		u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
-		struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci];
+		struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
 
 		mlx5_cqwq_pop(&cq->wq);
 		sqcc += icowi->num_wqebbs;
@@ -105,6 +105,66 @@
 	sq->cc = sqcc;
 }
 
+static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq)
+{
+	struct mlx5e_sq *sq;
+	u16 sqcc;
+	int i;
+
+	sq = container_of(cq, struct mlx5e_sq, cq);
+
+	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+		return false;
+
+	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+	 * otherwise a cq overrun may occur
+	 */
+	sqcc = sq->cc;
+
+	for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
+		struct mlx5_cqe64 *cqe;
+		u16 wqe_counter;
+		bool last_wqe;
+
+		cqe = mlx5e_get_cqe(cq);
+		if (!cqe)
+			break;
+
+		mlx5_cqwq_pop(&cq->wq);
+
+		wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+		do {
+			struct mlx5e_sq_wqe_info *wi;
+			struct mlx5e_dma_info *di;
+			u16 ci;
+
+			last_wqe = (sqcc == wqe_counter);
+
+			ci = sqcc & sq->wq.sz_m1;
+			di = &sq->db.xdp.di[ci];
+			wi = &sq->db.xdp.wqe_info[ci];
+
+			if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) {
+				sqcc++;
+				continue;
+			}
+
+			sqcc += wi->num_wqebbs;
+			/* Recycle RX page */
+			mlx5e_page_release(&sq->channel->rq, di, true);
+		} while (!last_wqe);
+	}
+
+	mlx5_cqwq_update_db_record(&cq->wq);
+
+	/* ensure cq space is freed before enabling more cqes */
+	wmb();
+
+	sq->cc = sqcc;
+	return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -121,6 +181,9 @@
 	work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
 	busy |= work_done == budget;
 
+	if (c->xdp)
+		busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq);
+
 	mlx5e_poll_ico_cq(&c->icosq.cq);
 
 	busy |= mlx5e_post_rx_wqes(&c->rq);
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 6817881..0efb2ba 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -3,6 +3,13 @@
 nfp_netvf-objs := \
 	    nfp_net_common.o \
 	    nfp_net_ethtool.o \
+	    nfp_net_offload.o \
 	    nfp_netvf_main.o
 
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+nfp_netvf-objs += \
+	    nfp_bpf_verifier.o \
+	    nfp_bpf_jit.o
+endif
+
 nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
new file mode 100644
index 0000000..22484b6
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ASM_H__
+#define __NFP_ASM_H__ 1
+
+#include "nfp_bpf.h"
+
+#define REG_NONE	0
+
+#define RE_REG_NO_DST	0x020
+#define RE_REG_IMM	0x020
+#define RE_REG_IMM_encode(x)					\
+	(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
+#define RE_REG_IMM_MAX	 0x07fULL
+#define RE_REG_XFR	0x080
+
+#define UR_REG_XFR	0x180
+#define UR_REG_NN	0x280
+#define UR_REG_NO_DST	0x300
+#define UR_REG_IMM	UR_REG_NO_DST
+#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
+#define UR_REG_IMM_MAX	 0x0ffULL
+
+#define OP_BR_BASE	0x0d800000020ULL
+#define OP_BR_BASE_MASK	0x0f8000c3ce0ULL
+#define OP_BR_MASK	0x0000000001fULL
+#define OP_BR_EV_PIP	0x00000000300ULL
+#define OP_BR_CSS	0x0000003c000ULL
+#define OP_BR_DEFBR	0x00000300000ULL
+#define OP_BR_ADDR_LO	0x007ffc00000ULL
+#define OP_BR_ADDR_HI	0x10000000000ULL
+
+#define nfp_is_br(_insn)				\
+	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+
+enum br_mask {
+	BR_BEQ = 0x00,
+	BR_BNE = 0x01,
+	BR_BHS = 0x04,
+	BR_BLO = 0x05,
+	BR_BGE = 0x08,
+	BR_UNC = 0x18,
+};
+
+enum br_ev_pip {
+	BR_EV_PIP_UNCOND = 0,
+	BR_EV_PIP_COND = 1,
+};
+
+enum br_ctx_signal_state {
+	BR_CSS_NONE = 2,
+};
+
+#define OP_BBYTE_BASE	0x0c800000000ULL
+#define OP_BB_A_SRC	0x000000000ffULL
+#define OP_BB_BYTE	0x00000000300ULL
+#define OP_BB_B_SRC	0x0000003fc00ULL
+#define OP_BB_I8	0x00000040000ULL
+#define OP_BB_EQ	0x00000080000ULL
+#define OP_BB_DEFBR	0x00000300000ULL
+#define OP_BB_ADDR_LO	0x007ffc00000ULL
+#define OP_BB_ADDR_HI	0x10000000000ULL
+
+#define OP_BALU_BASE	0x0e800000000ULL
+#define OP_BA_A_SRC	0x000000003ffULL
+#define OP_BA_B_SRC	0x000000ffc00ULL
+#define OP_BA_DEFBR	0x00000300000ULL
+#define OP_BA_ADDR_HI	0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC	0x000000003ffULL
+#define OP_IMMED_B_SRC	0x000000ffc00ULL
+#define OP_IMMED_IMM	0x0000ff00000ULL
+#define OP_IMMED_WIDTH	0x00060000000ULL
+#define OP_IMMED_INV	0x00080000000ULL
+#define OP_IMMED_SHIFT	0x00600000000ULL
+#define OP_IMMED_BASE	0x0f000000000ULL
+#define OP_IMMED_WR_AB	0x20000000000ULL
+
+enum immed_width {
+	IMMED_WIDTH_ALL = 0,
+	IMMED_WIDTH_BYTE = 1,
+	IMMED_WIDTH_WORD = 2,
+};
+
+enum immed_shift {
+	IMMED_SHIFT_0B = 0,
+	IMMED_SHIFT_1B = 1,
+	IMMED_SHIFT_2B = 2,
+};
+
+#define OP_SHF_BASE	0x08000000000ULL
+#define OP_SHF_A_SRC	0x000000000ffULL
+#define OP_SHF_SC	0x00000000300ULL
+#define OP_SHF_B_SRC	0x0000003fc00ULL
+#define OP_SHF_I8	0x00000040000ULL
+#define OP_SHF_SW	0x00000080000ULL
+#define OP_SHF_DST	0x0000ff00000ULL
+#define OP_SHF_SHIFT	0x001f0000000ULL
+#define OP_SHF_OP	0x00e00000000ULL
+#define OP_SHF_DST_AB	0x01000000000ULL
+#define OP_SHF_WR_AB	0x20000000000ULL
+
+enum shf_op {
+	SHF_OP_NONE = 0,
+	SHF_OP_AND = 2,
+	SHF_OP_OR = 5,
+};
+
+enum shf_sc {
+	SHF_SC_R_ROT = 0,
+	SHF_SC_R_SHF = 1,
+	SHF_SC_L_SHF = 2,
+	SHF_SC_R_DSHF = 3,
+};
+
+#define OP_ALU_A_SRC	0x000000003ffULL
+#define OP_ALU_B_SRC	0x000000ffc00ULL
+#define OP_ALU_DST	0x0003ff00000ULL
+#define OP_ALU_SW	0x00040000000ULL
+#define OP_ALU_OP	0x00f80000000ULL
+#define OP_ALU_DST_AB	0x01000000000ULL
+#define OP_ALU_BASE	0x0a000000000ULL
+#define OP_ALU_WR_AB	0x20000000000ULL
+
+enum alu_op {
+	ALU_OP_NONE	= 0x00,
+	ALU_OP_ADD	= 0x01,
+	ALU_OP_NEG	= 0x04,
+	ALU_OP_AND	= 0x08,
+	ALU_OP_SUB_C	= 0x0d,
+	ALU_OP_ADD_C	= 0x11,
+	ALU_OP_OR	= 0x14,
+	ALU_OP_SUB	= 0x15,
+	ALU_OP_XOR	= 0x18,
+};
+
+enum alu_dst_ab {
+	ALU_DST_A = 0,
+	ALU_DST_B = 1,
+};
+
+#define OP_LDF_BASE	0x0c000000000ULL
+#define OP_LDF_A_SRC	0x000000000ffULL
+#define OP_LDF_SC	0x00000000300ULL
+#define OP_LDF_B_SRC	0x0000003fc00ULL
+#define OP_LDF_I8	0x00000040000ULL
+#define OP_LDF_SW	0x00000080000ULL
+#define OP_LDF_ZF	0x00000100000ULL
+#define OP_LDF_BMASK	0x0000f000000ULL
+#define OP_LDF_SHF	0x001f0000000ULL
+#define OP_LDF_WR_AB	0x20000000000ULL
+
+#define OP_CMD_A_SRC	 0x000000000ffULL
+#define OP_CMD_CTX	 0x00000000300ULL
+#define OP_CMD_B_SRC	 0x0000003fc00ULL
+#define OP_CMD_TOKEN	 0x000000c0000ULL
+#define OP_CMD_XFER	 0x00001f00000ULL
+#define OP_CMD_CNT	 0x0000e000000ULL
+#define OP_CMD_SIG	 0x000f0000000ULL
+#define OP_CMD_TGT_CMD	 0x07f00000000ULL
+#define OP_CMD_MODE	0x1c0000000000ULL
+
+struct cmd_tgt_act {
+	u8 token;
+	u8 tgt_cmd;
+};
+
+enum cmd_tgt_map {
+	CMD_TGT_READ8,
+	CMD_TGT_WRITE8,
+	CMD_TGT_READ_LE,
+	CMD_TGT_READ_SWAP_LE,
+	__CMD_TGT_MAP_SIZE,
+};
+
+enum cmd_mode {
+	CMD_MODE_40b_AB	= 0,
+	CMD_MODE_40b_BA	= 1,
+	CMD_MODE_32b	= 4,
+};
+
+enum cmd_ctx_swap {
+	CMD_CTX_SWAP = 0,
+	CMD_CTX_NO_SWAP = 3,
+};
+
+#define OP_LCSR_BASE	0x0fc00000000ULL
+#define OP_LCSR_A_SRC	0x000000003ffULL
+#define OP_LCSR_B_SRC	0x000000ffc00ULL
+#define OP_LCSR_WRITE	0x00000200000ULL
+#define OP_LCSR_ADDR	0x001ffc00000ULL
+
+enum lcsr_wr_src {
+	LCSR_WR_AREG,
+	LCSR_WR_BREG,
+	LCSR_WR_IMM,
+};
+
+#define OP_CARB_BASE	0x0e000000000ULL
+#define OP_CARB_OR	0x00000010000ULL
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
new file mode 100644
index 0000000..fc220cd
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_BPF_H__
+#define __NFP_BPF_H__ 1
+
+#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#define FIELD_FIT(mask, val)  (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
+
+/* For branch fixup logic use up-most byte of branch instruction as scratch
+ * area.  Remember to clear this before sending instructions to HW!
+ */
+#define OP_BR_SPECIAL	0xff00000000000000ULL
+
+enum br_special {
+	OP_BR_NORMAL = 0,
+	OP_BR_GO_OUT,
+	OP_BR_GO_ABORT,
+};
+
+enum static_regs {
+	STATIC_REG_PKT		= 1,
+#define REG_PKT_BANK	ALU_DST_A
+	STATIC_REG_IMM		= 2, /* Bank AB */
+};
+
+enum nfp_bpf_action_type {
+	NN_ACT_TC_DROP,
+	NN_ACT_TC_REDIR,
+	NN_ACT_DIRECT,
+};
+
+/* Software register representation, hardware encoding in asm.h */
+#define NN_REG_TYPE	GENMASK(31, 24)
+#define NN_REG_VAL	GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+	NN_REG_GPR_A =	BIT(0),
+	NN_REG_GPR_B =	BIT(1),
+	NN_REG_NNR =	BIT(2),
+	NN_REG_XFER =	BIT(3),
+	NN_REG_IMM =	BIT(4),
+	NN_REG_NONE =	BIT(5),
+};
+
+#define NN_REG_GPR_BOTH	(NN_REG_GPR_A | NN_REG_GPR_B)
+
+#define reg_both(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
+#define reg_a(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
+#define reg_b(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
+#define reg_nnr(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
+#define reg_xfer(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
+#define reg_imm(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
+#define reg_none()	(FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+
+#define pkt_reg(np)	reg_a((np)->regs_per_thread - STATIC_REG_PKT)
+#define imm_a(np)	reg_a((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_b(np)	reg_b((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_both(np)	reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+
+#define NFP_BPF_ABI_FLAGS	reg_nnr(0)
+#define   NFP_BPF_ABI_FLAG_MARK	1
+#define NFP_BPF_ABI_MARK	reg_nnr(1)
+#define NFP_BPF_ABI_PKT		reg_nnr(2)
+#define NFP_BPF_ABI_LEN		reg_nnr(3)
+
+struct nfp_prog;
+struct nfp_insn_meta;
+typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
+
+#define nfp_prog_first_meta(nfp_prog)					\
+	list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_prog_last_meta(nfp_prog)					\
+	list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_meta_next(meta)	list_next_entry(meta, l)
+#define nfp_meta_prev(meta)	list_prev_entry(meta, l)
+
+/**
+ * struct nfp_insn_meta - BPF instruction wrapper
+ * @insn: BPF instruction
+ * @off: index of first generated machine instruction (in nfp_prog.prog)
+ * @n: eBPF instruction number
+ * @skip: skip this instruction (optimized out)
+ * @double_cb: callback for second part of the instruction
+ * @l: link on nfp_prog->insns list
+ */
+struct nfp_insn_meta {
+	struct bpf_insn insn;
+	unsigned int off;
+	unsigned short n;
+	bool skip;
+	instr_cb_t double_cb;
+
+	struct list_head l;
+};
+
+#define BPF_SIZE_MASK	0x18
+
+static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
+{
+	return BPF_CLASS(meta->insn.code);
+}
+
+static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
+{
+	return BPF_SRC(meta->insn.code);
+}
+
+static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
+{
+	return BPF_OP(meta->insn.code);
+}
+
+static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
+{
+	return BPF_MODE(meta->insn.code);
+}
+
+/**
+ * struct nfp_prog - nfp BPF program
+ * @prog: machine code
+ * @prog_len: number of valid instructions in @prog array
+ * @__prog_alloc_len: alloc size of @prog array
+ * @act: BPF program/action type (TC DA, TC with action, XDP etc.)
+ * @num_regs: number of registers used by this program
+ * @regs_per_thread: number of basic registers allocated per thread
+ * @start_off: address of the first instruction in the memory
+ * @tgt_out: jump target for normal exit
+ * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
+ * @tgt_done: jump target to get the next packet
+ * @n_translated: number of successfully translated instructions (for errors)
+ * @error: error code if something went wrong
+ * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
+ */
+struct nfp_prog {
+	u64 *prog;
+	unsigned int prog_len;
+	unsigned int __prog_alloc_len;
+
+	enum nfp_bpf_action_type act;
+
+	unsigned int num_regs;
+	unsigned int regs_per_thread;
+
+	unsigned int start_off;
+	unsigned int tgt_out;
+	unsigned int tgt_abort;
+	unsigned int tgt_done;
+
+	unsigned int n_translated;
+	int error;
+
+	struct list_head insns;
+};
+
+struct nfp_bpf_result {
+	unsigned int n_instr;
+	bool dense_mode;
+};
+
+#ifdef CONFIG_BPF_SYSCALL
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res);
+#else
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
new file mode 100644
index 0000000..3de819a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -0,0 +1,1811 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/pkt_cls.h>
+#include <linux/unistd.h>
+
+#include "nfp_asm.h"
+#include "nfp_bpf.h"
+
+/* --- NFP prog --- */
+/* Foreach "multiple" entries macros provide pos and next<n> pointers.
+ * It's safe to modify the next pointers (but not pos).
+ */
+#define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos))
+
+#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l),			\
+	     next2 = list_next_entry(next, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l &&			\
+	     &(nfp_prog)->insns != &next2->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos),				\
+	     next2 = nfp_meta_next(next))
+
+static bool
+nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.next != &nfp_prog->insns;
+}
+
+static bool
+nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.prev != &nfp_prog->insns;
+}
+
+static void nfp_prog_free(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *tmp;
+
+	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
+		list_del(&meta->l);
+		kfree(meta);
+	}
+	kfree(nfp_prog);
+}
+
+static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
+{
+	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
+		nfp_prog->error = -ENOSPC;
+		return;
+	}
+
+	nfp_prog->prog[nfp_prog->prog_len] = insn;
+	nfp_prog->prog_len++;
+}
+
+static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
+{
+	return nfp_prog->start_off + nfp_prog->prog_len;
+}
+
+static unsigned int
+nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
+{
+	return offset - nfp_prog->start_off;
+}
+
+/* --- SW reg --- */
+struct nfp_insn_ur_regs {
+	enum alu_dst_ab dst_ab;
+	u16 dst;
+	u16 areg, breg;
+	bool swap;
+	bool wr_both;
+};
+
+struct nfp_insn_re_regs {
+	enum alu_dst_ab dst_ab;
+	u8 dst;
+	u8 areg, breg;
+	bool swap;
+	bool wr_both;
+	bool i8;
+};
+
+static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_NNR:
+		return UR_REG_NN | val;
+	case NN_REG_XFER:
+		return UR_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~0xff) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		return UR_REG_IMM_encode(val);
+	case NN_REG_NONE:
+		return is_dst ? UR_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding %08x\n", swreg);
+		return 0;
+	}
+}
+
+static int
+swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_unreg(dst, true);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_unreg(rreg, false);
+		reg->breg = nfp_swreg_to_unreg(lreg, false);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_unreg(lreg, false);
+		reg->breg = nfp_swreg_to_unreg(rreg, false);
+	}
+
+	return 0;
+}
+
+static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_XFER:
+		return RE_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~(0x7f | has_imm8 << 7)) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		*i8 = val & 0x80;
+		return RE_REG_IMM_encode(val & 0x7f);
+	case NN_REG_NONE:
+		return is_dst ? RE_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding\n");
+		return 0;
+	}
+}
+
+static int
+swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
+		    bool has_imm8)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+	}
+
+	return 0;
+}
+
+/* --- Emitters --- */
+static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+	[CMD_TGT_WRITE8] =		{ 0x00, 0x42 },
+	[CMD_TGT_READ8] =		{ 0x01, 0x43 },
+	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
+	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+};
+
+static void
+__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
+{
+	enum cmd_ctx_swap ctx;
+	u64 insn;
+
+	if (sync)
+		ctx = CMD_CTX_SWAP;
+	else
+		ctx = CMD_CTX_NO_SWAP;
+
+	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
+		FIELD_PREP(OP_CMD_CTX, ctx) |
+		FIELD_PREP(OP_CMD_B_SRC, breg) |
+		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
+		FIELD_PREP(OP_CMD_XFER, xfer) |
+		FIELD_PREP(OP_CMD_CNT, size) |
+		FIELD_PREP(OP_CMD_SIG, sync) |
+		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
+		FIELD_PREP(OP_CMD_MODE, mode);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	 u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+	if (reg.swap) {
+		pr_err("cmd can't swap arguments\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
+}
+
+static void
+__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
+	  enum br_ctx_signal_state css, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BR_BASE |
+		FIELD_PREP(OP_BR_MASK, mask) |
+		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
+		FIELD_PREP(OP_BR_CSS, css) |
+		FIELD_PREP(OP_BR_DEFBR, defer) |
+		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
+{
+	if (defer > 2) {
+		pr_err("BUG: branch defer out of bounds %d\n", defer);
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+	__emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
+}
+
+static void
+emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
+{
+	__emit_br(nfp_prog, mask,
+		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
+		  BR_CSS_NONE, addr, defer);
+}
+
+static void
+__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
+	       u8 byte, bool equal, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BBYTE_BASE |
+		FIELD_PREP(OP_BB_A_SRC, areg) |
+		FIELD_PREP(OP_BB_BYTE, byte) |
+		FIELD_PREP(OP_BB_B_SRC, breg) |
+		FIELD_PREP(OP_BB_I8, imm8) |
+		FIELD_PREP(OP_BB_EQ, equal) |
+		FIELD_PREP(OP_BB_DEFBR, defer) |
+		FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_byte_neq(struct nfp_prog *nfp_prog,
+		 u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
+		       defer);
+}
+
+static void
+__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
+	     enum immed_width width, bool invert,
+	     enum immed_shift shift, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_IMMED_BASE |
+		FIELD_PREP(OP_IMMED_A_SRC, areg) |
+		FIELD_PREP(OP_IMMED_B_SRC, breg) |
+		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
+		FIELD_PREP(OP_IMMED_WIDTH, width) |
+		FIELD_PREP(OP_IMMED_INV, invert) |
+		FIELD_PREP(OP_IMMED_SHIFT, shift) |
+		FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+	   enum immed_width width, bool invert, enum immed_shift shift)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
+		     invert, shift, reg.wr_both);
+}
+
+static void
+__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   enum shf_sc sc, u8 shift,
+	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+{
+	u64 insn;
+
+	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	if (sc == SHF_SC_L_SHF)
+		shift = 32 - shift;
+
+	insn = OP_SHF_BASE |
+		FIELD_PREP(OP_SHF_A_SRC, areg) |
+		FIELD_PREP(OP_SHF_SC, sc) |
+		FIELD_PREP(OP_SHF_B_SRC, breg) |
+		FIELD_PREP(OP_SHF_I8, i8) |
+		FIELD_PREP(OP_SHF_SW, sw) |
+		FIELD_PREP(OP_SHF_DST, dst) |
+		FIELD_PREP(OP_SHF_SHIFT, shift) |
+		FIELD_PREP(OP_SHF_OP, op) |
+		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
+		FIELD_PREP(OP_SHF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
+	 enum shf_sc sc, u8 shift)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
+		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_ALU_BASE |
+		FIELD_PREP(OP_ALU_A_SRC, areg) |
+		FIELD_PREP(OP_ALU_B_SRC, breg) |
+		FIELD_PREP(OP_ALU_DST, dst) |
+		FIELD_PREP(OP_ALU_SW, swap) |
+		FIELD_PREP(OP_ALU_OP, op) |
+		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
+		FIELD_PREP(OP_ALU_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
+		   reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
+		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
+		bool zero, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_LDF_BASE |
+		FIELD_PREP(OP_LDF_A_SRC, areg) |
+		FIELD_PREP(OP_LDF_SC, sc) |
+		FIELD_PREP(OP_LDF_B_SRC, breg) |
+		FIELD_PREP(OP_LDF_I8, imm8) |
+		FIELD_PREP(OP_LDF_SW, swap) |
+		FIELD_PREP(OP_LDF_ZF, zero) |
+		FIELD_PREP(OP_LDF_BMASK, bmask) |
+		FIELD_PREP(OP_LDF_SHF, shift) |
+		FIELD_PREP(OP_LDF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
+		  u32 dst, u8 bmask, u32 src, bool zero)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
+			reg.i8, zero, reg.swap, reg.wr_both);
+}
+
+static void
+emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+	      enum shf_sc sc, u8 shift)
+{
+	emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+}
+
+/* --- Wrappers --- */
+static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
+{
+	if (!(imm & 0xffff0000)) {
+		*val = imm;
+		*shift = IMMED_SHIFT_0B;
+	} else if (!(imm & 0xff0000ff)) {
+		*val = imm >> 8;
+		*shift = IMMED_SHIFT_1B;
+	} else if (!(imm & 0x0000ffff)) {
+		*val = imm >> 16;
+		*shift = IMMED_SHIFT_2B;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+{
+	enum immed_shift shift;
+	u16 val;
+
+	if (pack_immed(imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
+	} else if (pack_immed(~imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
+	} else {
+		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
+			   false, IMMED_SHIFT_0B);
+		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
+			   false, IMMED_SHIFT_2B);
+	}
+}
+
+/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+/* re_load_imm_any() - encode immediate or use tmp register (restricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+static void
+wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
+	       enum br_special special)
+{
+	emit_br(nfp_prog, mask, 0, 0);
+
+	nfp_prog->prog[nfp_prog->prog_len - 1] |=
+		FIELD_PREP(OP_BR_SPECIAL, special);
+}
+
+static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
+{
+	emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+}
+
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
+		      u16 src, bool src_valid, u8 size)
+{
+	unsigned int i;
+	u16 shift, sz;
+	u32 tmp_reg;
+
+	/* We load the value from the address indicated in @offset and then
+	 * shift out the data we don't need.  Note: this is big endian!
+	 */
+	sz = size < 4 ? 4 : size;
+	shift = size < 4 ? 4 - size : 0;
+
+	if (src_valid) {
+		/* Calculate the true offset (src_reg + imm) */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_both(nfp_prog),
+			 reg_a(src), ALU_OP_ADD, tmp_reg);
+		/* Check packet length (size guaranteed to fit b/c it's u8) */
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
+	} else {
+		/* Check packet length */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
+					  imm_a(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
+	}
+
+	i = 0;
+	if (shift)
+		emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
+	else
+		for (; i * 4 < size; i++)
+			emit_alu(nfp_prog, reg_both(i),
+				 reg_none(), ALU_OP_NONE, reg_xfer(i));
+
+	if (i < 2)
+		wrp_immed(nfp_prog, reg_both(1), 0);
+
+	return 0;
+}
+
+static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+{
+	return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+}
+
+static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+{
+	emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
+		 reg_none(), ALU_OP_NONE, reg_b(src));
+	emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
+		 NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+
+	return 0;
+}
+
+static void
+wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
+{
+	u32 tmp_reg;
+
+	if (alu_op == ALU_OP_AND) {
+		if (!imm)
+			wrp_immed(nfp_prog, reg_both(dst), 0);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_OR) {
+		if (!~imm)
+			wrp_immed(nfp_prog, reg_both(dst), ~0U);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_XOR) {
+		if (!~imm)
+			emit_alu(nfp_prog, reg_both(dst), reg_none(),
+				 ALU_OP_NEG, reg_b(dst));
+		if (!imm || !~imm)
+			return;
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
+}
+
+static int
+wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
+
+	return 0;
+}
+
+static int
+wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	emit_alu(nfp_prog, reg_both(dst + 1),
+		 reg_a(dst + 1), alu_op, reg_b(src + 1));
+
+	return 0;
+}
+
+static int
+wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int
+wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static void
+wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
+		 enum br_mask br_mask, u16 off)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
+	emit_br(nfp_prog, br_mask, off, 0);
+}
+
+static int
+wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     enum alu_op alu_op, enum br_mask br_mask)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
+			 insn->src_reg * 2, br_mask, insn->off);
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+			 insn->src_reg * 2 + 1, br_mask, insn->off);
+
+	return 0;
+}
+
+static int
+wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u8 reg = insn->dst_reg * 2;
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(),
+			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+static int
+wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (swap) {
+		areg ^= breg;
+		breg ^= areg;
+		areg ^= breg;
+	}
+
+	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+/* --- Callbacks --- */
+static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1);
+
+	return 0;
+}
+
+static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u64 imm = meta->insn.imm; /* sign extend */
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
+
+	return 0;
+}
+
+static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
+}
+
+static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
+
+	return 0;
+}
+
+static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
+
+	return 0;
+}
+
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+
+	return 0;
+}
+
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+}
+
+static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
+}
+
+static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+}
+
+static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
+}
+
+static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+}
+
+static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (!insn->imm)
+		return 1; /* TODO: zero shift means indirect */
+
+	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
+		 SHF_SC_L_SHF, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
+		  meta->insn.imm);
+
+	return 0;
+}
+
+static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	meta->double_cb = imm_ld8_part2;
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+
+	return 0;
+}
+
+static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+}
+
+static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+}
+
+static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+}
+
+static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 1);
+}
+
+static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 2);
+}
+
+static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 4);
+}
+
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, len))
+		emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+			 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
+	else
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, mark))
+		return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+
+	return -ENOTSUPP;
+}
+
+static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off < 0) /* TODO */
+		return -ENOTSUPP;
+	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
+
+	return 0;
+}
+
+static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+		or1 = imm_a(nfp_prog);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+		or2 = imm_b(nfp_prog);
+	}
+
+	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		meta->skip = true;
+		return 0;
+	}
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	return 0;
+}
+
+static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	return 0;
+}
+
+static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
+	emit_alu(nfp_prog, reg_none(),
+		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
+}
+
+static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
+}
+
+static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
+
+	return 0;
+}
+
+static const instr_cb_t instr_cb[256] = {
+	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
+	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
+	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
+	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
+	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
+	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
+	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
+	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
+	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
+	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
+	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
+	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
+	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
+	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
+	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
+	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
+	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
+	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
+	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
+	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
+	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
+	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
+	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
+	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
+	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
+	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
+	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
+	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
+	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
+	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
+	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
+	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
+	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
+	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
+	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
+	[BPF_JMP | BPF_JA | BPF_K] =	jump,
+	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
+	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
+	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
+	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
+	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
+	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
+	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
+	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
+	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
+	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
+	[BPF_JMP | BPF_EXIT] =		goto_out,
+};
+
+/* --- Misc code --- */
+static void br_set_offset(u64 *instr, u16 offset)
+{
+	u16 addr_lo, addr_hi;
+
+	addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = offset != addr_lo;
+	*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
+	*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+	*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
+}
+
+/* --- Assembler logic --- */
+static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *next;
+	u32 off, br_idx;
+	u32 idx;
+
+	nfp_for_each_insn_walk2(nfp_prog, meta, next) {
+		if (meta->skip)
+			continue;
+		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+			continue;
+
+		br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1;
+		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
+			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
+			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
+			return -ELOOP;
+		}
+		/* Leave special branches for later */
+		if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
+			continue;
+
+		/* Find the target offset in assembler realm */
+		off = meta->insn.off;
+		if (!off) {
+			pr_err("Fixup found zero offset!!\n");
+			return -ELOOP;
+		}
+
+		while (off && nfp_meta_has_next(nfp_prog, next)) {
+			next = nfp_meta_next(next);
+			off--;
+		}
+		if (off) {
+			pr_err("Fixup found too large jump!! %d\n", off);
+			return -ELOOP;
+		}
+
+		if (next->skip) {
+			pr_err("Branch landing on removed instruction!!\n");
+			return -ELOOP;
+		}
+
+		for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
+		     idx <= br_idx; idx++) {
+			if (!nfp_is_br(nfp_prog->prog[idx]))
+				continue;
+			br_set_offset(&nfp_prog->prog[idx], next->off);
+		}
+	}
+
+	/* Fixup 'goto out's separately, they can be scattered around */
+	for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
+		enum br_special special;
+
+		if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
+			continue;
+
+		special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
+		switch (special) {
+		case OP_BR_NORMAL:
+			break;
+		case OP_BR_GO_OUT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_out);
+			break;
+		case OP_BR_GO_ABORT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_abort);
+			break;
+		}
+
+		nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
+	}
+
+	return 0;
+}
+
+static void nfp_intro(struct nfp_prog *nfp_prog)
+{
+	emit_alu(nfp_prog, pkt_reg(nfp_prog),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+}
+
+static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
+{
+	const u8 act2code[] = {
+		[NN_ACT_TC_DROP]  = 0x22,
+		[NN_ACT_TC_REDIR] = 0x24
+	};
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+	wrp_immed(nfp_prog, reg_both(0), 0);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+	/* Legacy TC mode:
+	 *   0        0x11 -> pass,  count as stat0
+	 *  -1  drop  0x22 -> drop,  count as stat1
+	 *     redir  0x24 -> redir, count as stat1
+	 *  ife mark  0x21 -> pass,  count as stat1
+	 *  ife + tx  0x24 -> redir, count as stat1
+	 */
+	emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]),
+		      SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
+{
+	/* TC direct-action mode:
+	 *   0,1   ok        NOT SUPPORTED[1]
+	 *   2   drop  0x22 -> drop,  count as stat1
+	 *   4,5 nuke  0x02 -> drop
+	 *   7  redir  0x44 -> redir, count as stat2
+	 *   * unspec  0x11 -> pass,  count as stat0
+	 *
+	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
+	 *     the exact decision made.  We are forced to support UNSPEC
+	 *     to handle aborts so that's the only one we handle for passing
+	 *     packets up the stack.
+	 */
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+	/* if R0 > 7 jump to abort */
+	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
+	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+
+	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
+	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
+
+	emit_shf(nfp_prog, reg_a(1),
+		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_a(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro(struct nfp_prog *nfp_prog)
+{
+	switch (nfp_prog->act) {
+	case NN_ACT_DIRECT:
+		nfp_outro_tc_da(nfp_prog);
+		break;
+	case NN_ACT_TC_DROP:
+	case NN_ACT_TC_REDIR:
+		nfp_outro_tc_legacy(nfp_prog);
+		break;
+	}
+}
+
+static int nfp_translate(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+	int err;
+
+	nfp_intro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		instr_cb_t cb = instr_cb[meta->insn.code];
+
+		meta->off = nfp_prog_current_offset(nfp_prog);
+
+		if (meta->skip) {
+			nfp_prog->n_translated++;
+			continue;
+		}
+
+		if (nfp_meta_has_prev(nfp_prog, meta) &&
+		    nfp_meta_prev(meta)->double_cb)
+			cb = nfp_meta_prev(meta)->double_cb;
+		if (!cb)
+			return -ENOENT;
+		err = cb(nfp_prog, meta);
+		if (err)
+			return err;
+
+		nfp_prog->n_translated++;
+	}
+
+	nfp_outro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	return nfp_fixup_branches(nfp_prog);
+}
+
+static int
+nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
+		 unsigned int cnt)
+{
+	unsigned int i;
+
+	for (i = 0; i < cnt; i++) {
+		struct nfp_insn_meta *meta;
+
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			return -ENOMEM;
+
+		meta->insn = prog[i];
+		meta->n = i;
+
+		list_add_tail(&meta->l, &nfp_prog->insns);
+	}
+
+	return 0;
+}
+
+/* --- Optimizations --- */
+static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		struct bpf_insn insn = meta->insn;
+
+		/* Programs converted from cBPF start with register xoring */
+		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
+		    insn.src_reg == insn.dst_reg)
+			continue;
+
+		/* Programs start with R6 = R1 but we ignore the skb pointer */
+		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
+		    insn.src_reg == 1 && insn.dst_reg == 6)
+			meta->skip = true;
+
+		/* Return as soon as something doesn't match */
+		if (!meta->skip)
+			return;
+	}
+}
+
+/* Try to rename registers so that program uses only low ones */
+static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
+{
+	bool reg_used[MAX_BPF_REG] = {};
+	u8 tgt_reg[MAX_BPF_REG] = {};
+	struct nfp_insn_meta *meta;
+	unsigned int i, j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (meta->skip)
+			continue;
+
+		reg_used[meta->insn.src_reg] = true;
+		reg_used[meta->insn.dst_reg] = true;
+	}
+
+	for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
+		if (!reg_used[i])
+			continue;
+
+		tgt_reg[i] = j++;
+	}
+	nfp_prog->num_regs = j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
+		meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
+	}
+
+	return 0;
+}
+
+/* Remove masking after load since our load guarantees this is not needed */
+static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2;
+	const s32 exp_mask[] = {
+		[BPF_B] = 0x000000ffU,
+		[BPF_H] = 0x0000ffffU,
+		[BPF_W] = 0xffffffffU,
+	};
+
+	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+		struct bpf_insn insn, next;
+
+		insn = meta1->insn;
+		next = meta2->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+
+		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
+			continue;
+
+		if (!exp_mask[BPF_SIZE(insn.code)])
+			continue;
+		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
+			continue;
+
+		if (next.src_reg || next.dst_reg)
+			continue;
+
+		meta2->skip = true;
+	}
+}
+
+static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2, *meta3;
+
+	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
+		struct bpf_insn insn, next1, next2;
+
+		insn = meta1->insn;
+		next1 = meta2->insn;
+		next2 = meta3->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+		if (BPF_SIZE(insn.code) != BPF_W)
+			continue;
+
+		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
+		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
+			continue;
+
+		if (next1.src_reg || next1.dst_reg ||
+		    next2.src_reg || next2.dst_reg)
+			continue;
+
+		if (next1.imm != 0x20 || next2.imm != 0x20)
+			continue;
+
+		meta2->skip = true;
+		meta3->skip = true;
+	}
+}
+
+static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
+{
+	int ret;
+
+	nfp_bpf_opt_reg_init(nfp_prog);
+
+	ret = nfp_bpf_opt_reg_rename(nfp_prog);
+	if (ret)
+		return ret;
+
+	nfp_bpf_opt_ld_mask(nfp_prog);
+	nfp_bpf_opt_ld_shift(nfp_prog);
+
+	return 0;
+}
+
+/**
+ * nfp_bpf_jit() - translate BPF code into NFP assembly
+ * @filter:	kernel BPF filter struct
+ * @prog_mem:	memory to store assembler instructions
+ * @act:	action attached to this eBPF program
+ * @prog_start:	offset of the first instruction when loaded
+ * @prog_done:	where to jump on exit
+ * @prog_sz:	size of @prog_mem in instructions
+ * @res:	achieved parameters of translation results
+ */
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
+	    enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+	struct nfp_prog *nfp_prog;
+	int ret;
+
+	nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
+	if (!nfp_prog)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&nfp_prog->insns);
+	nfp_prog->act = act;
+	nfp_prog->start_off = prog_start;
+	nfp_prog->tgt_done = prog_done;
+
+	ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len);
+	if (ret)
+		goto out;
+
+	ret = nfp_prog_verify(nfp_prog, filter);
+	if (ret)
+		goto out;
+
+	ret = nfp_bpf_optimize(nfp_prog);
+	if (ret)
+		goto out;
+
+	if (nfp_prog->num_regs <= 7)
+		nfp_prog->regs_per_thread = 16;
+	else
+		nfp_prog->regs_per_thread = 32;
+
+	nfp_prog->prog = prog_mem;
+	nfp_prog->__prog_alloc_len = prog_sz;
+
+	ret = nfp_translate(nfp_prog);
+	if (ret) {
+		pr_err("Translation failed with error %d (translated: %u)\n",
+		       ret, nfp_prog->n_translated);
+		ret = -EINVAL;
+	}
+
+	res->n_instr = nfp_prog->prog_len;
+	res->dense_mode = nfp_prog->num_regs <= 7;
+out:
+	nfp_prog_free(nfp_prog);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
new file mode 100644
index 0000000..144cae8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/kernel.h>
+#include <linux/pkt_cls.h>
+
+#include "nfp_bpf.h"
+
+/* Analyzer/verifier definitions */
+struct nfp_bpf_analyzer_priv {
+	struct nfp_prog *prog;
+	struct nfp_insn_meta *meta;
+};
+
+static struct nfp_insn_meta *
+nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  unsigned int insn_idx, unsigned int n_insns)
+{
+	unsigned int forward, backward, i;
+
+	backward = meta->n - insn_idx;
+	forward = insn_idx - meta->n;
+
+	if (min(forward, backward) > n_insns - insn_idx - 1) {
+		backward = n_insns - insn_idx - 1;
+		meta = nfp_prog_last_meta(nfp_prog);
+	}
+	if (min(forward, backward) > insn_idx && backward > insn_idx) {
+		forward = insn_idx;
+		meta = nfp_prog_first_meta(nfp_prog);
+	}
+
+	if (forward < backward)
+		for (i = 0; i < forward; i++)
+			meta = nfp_meta_next(meta);
+	else
+		for (i = 0; i < backward; i++)
+			meta = nfp_meta_prev(meta);
+
+	return meta;
+}
+
+static int
+nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
+		   const struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+
+	if (reg0->type != CONST_IMM) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (nfp_prog->act != NN_ACT_DIRECT &&
+	    reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
+	    reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
+	    reg0->imm != TC_ACT_QUEUED) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
+		      const struct bpf_verifier_env *env, u8 reg)
+{
+	if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+{
+	struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
+	struct nfp_insn_meta *meta = priv->meta;
+
+	meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
+	priv->meta = meta;
+
+	if (meta->insn.src_reg == BPF_REG_10 ||
+	    meta->insn.dst_reg == BPF_REG_10) {
+		pr_err("stack not yet supported\n");
+		return -EINVAL;
+	}
+	if (meta->insn.src_reg >= MAX_BPF_REG ||
+	    meta->insn.dst_reg >= MAX_BPF_REG) {
+		pr_err("program uses extended registers - jit hardening?\n");
+		return -EINVAL;
+	}
+
+	if (meta->insn.code == (BPF_JMP | BPF_EXIT))
+		return nfp_bpf_check_exit(priv->prog, env);
+
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.src_reg);
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.dst_reg);
+
+	return 0;
+}
+
+static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+	.insn_hook = nfp_verify_insn,
+};
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
+{
+	struct nfp_bpf_analyzer_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->prog = nfp_prog;
+	priv->meta = nfp_prog_first_meta(nfp_prog);
+
+	ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
+
+	kfree(priv);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6906356..ed824e1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -62,6 +62,9 @@
 /* Max time to wait for NFP to respond on updates (in seconds) */
 #define NFP_NET_POLL_TIMEOUT	5
 
+/* Interval for reading offloaded filter stats */
+#define NFP_NET_STAT_POLL_IVL	msecs_to_jiffies(100)
+
 /* Bar allocation */
 #define NFP_NET_CTRL_BAR	0
 #define NFP_NET_Q0_BAR		2
@@ -220,7 +223,7 @@
 #define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
 #define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
 #define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_SPARE		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_BPF		cpu_to_le16(BIT(8))
 #define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
 #define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
 #define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
@@ -266,6 +269,8 @@
 	};
 };
 
+#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
+
 struct nfp_net_rx_hash {
 	__be32 hash_type;
 	__be32 hash;
@@ -405,6 +410,11 @@
 	       fw_ver->minor == minor;
 }
 
+struct nfp_stat_pair {
+	u64 pkts;
+	u64 bytes;
+};
+
 /**
  * struct nfp_net - NFP network device structure
  * @pdev:               Backpointer to PCI device
@@ -413,6 +423,7 @@
  * @is_vf:              Is the driver attached to a VF?
  * @is_nfp3200:         Is the driver for a NFP-3200 card?
  * @fw_loaded:          Is the firmware loaded?
+ * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
  * @ctrl:               Local copy of the control register/word.
  * @fl_bufsz:           Currently configured size of the freelist buffers
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -427,6 +438,11 @@
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
+ * @rx_filter:		Filter offload statistics - dropped packets/bytes
+ * @rx_filter_prev:	Filter offload statistics - values from previous update
+ * @rx_filter_change:	Jiffies when statistics last changed
+ * @rx_filter_stats_timer:  Timer for polling filter offload statistics
+ * @rx_filter_lock:	Lock protecting timer state changes (teardown)
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
  * @num_tx_rings:       Currently configured number of TX rings
@@ -473,6 +489,7 @@
 	unsigned is_vf:1;
 	unsigned is_nfp3200:1;
 	unsigned fw_loaded:1;
+	unsigned bpf_offload_skip_sw:1;
 
 	u32 ctrl;
 	u32 fl_bufsz;
@@ -502,6 +519,11 @@
 	u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
 	u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
 
+	struct nfp_stat_pair rx_filter, rx_filter_prev;
+	unsigned long rx_filter_change;
+	struct timer_list rx_filter_stats_timer;
+	spinlock_t rx_filter_lock;
+
 	int max_tx_rings;
 	int max_rx_rings;
 
@@ -561,12 +583,28 @@
 /* Functions to read/write from/to a BAR
  * Performs any endian conversion necessary.
  */
+static inline u16 nn_readb(struct nfp_net *nn, int off)
+{
+	return readb(nn->ctrl_bar + off);
+}
+
 static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
 {
 	writeb(val, nn->ctrl_bar + off);
 }
 
-/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
+/* NFP-3200 can't handle 16-bit accesses too well */
+static inline u16 nn_readw(struct nfp_net *nn, int off)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	return readw(nn->ctrl_bar + off);
+}
+
+static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	writew(val, nn->ctrl_bar + off);
+}
 
 static inline u32 nn_readl(struct nfp_net *nn, int off)
 {
@@ -757,4 +795,9 @@
 }
 #endif /* CONFIG_NFP_NET_DEBUG */
 
+void nfp_net_filter_stats_timer(unsigned long data);
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf);
+
 #endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 252e492..415691e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -60,6 +60,7 @@
 
 #include <linux/ktime.h>
 
+#include <net/pkt_cls.h>
 #include <net/vxlan.h>
 
 #include "nfp_net_ctrl.h"
@@ -1292,36 +1293,70 @@
 	}
 }
 
-/**
- * nfp_net_set_hash() - Set SKB hash data
- * @netdev: adapter's net_device structure
- * @skb:   SKB to set the hash data on
- * @rxd:   RX descriptor
- *
- * The RSS hash and hash-type are pre-pended to the packet data.
- * Extract and decode it and set the skb fields.
- */
 static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
-			     struct nfp_net_rx_desc *rxd)
+			     unsigned int type, __be32 *hash)
+{
+	if (!(netdev->features & NETIF_F_RXHASH))
+		return;
+
+	switch (type) {
+	case NFP_NET_RSS_IPV4:
+	case NFP_NET_RSS_IPV6:
+	case NFP_NET_RSS_IPV6_EX:
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
+		break;
+	default:
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
+		break;
+	}
+}
+
+static void
+nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+		      struct nfp_net_rx_desc *rxd)
 {
 	struct nfp_net_rx_hash *rx_hash;
 
-	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
-	    !(netdev->features & NETIF_F_RXHASH))
+	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
 		return;
 
 	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
 
-	switch (be32_to_cpu(rx_hash->hash_type)) {
-	case NFP_NET_RSS_IPV4:
-	case NFP_NET_RSS_IPV6:
-	case NFP_NET_RSS_IPV6_EX:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
-		break;
-	default:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
-		break;
+	nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+			 &rx_hash->hash);
+}
+
+static void *
+nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+		   int meta_len)
+{
+	u8 *data = skb->data - meta_len;
+	u32 meta_info;
+
+	meta_info = get_unaligned_be32(data);
+	data += 4;
+
+	while (meta_info) {
+		switch (meta_info & NFP_NET_META_FIELD_MASK) {
+		case NFP_NET_META_HASH:
+			meta_info >>= NFP_NET_META_FIELD_SIZE;
+			nfp_net_set_hash(netdev, skb,
+					 meta_info & NFP_NET_META_FIELD_MASK,
+					 (__be32 *)data);
+			data += 4;
+			break;
+		case NFP_NET_META_MARK:
+			skb->mark = get_unaligned_be32(data);
+			data += 4;
+			break;
+		default:
+			return NULL;
+		}
+
+		meta_info >>= NFP_NET_META_FIELD_SIZE;
 	}
+
+	return data;
 }
 
 /**
@@ -1438,14 +1473,29 @@
 			skb_reserve(skb, nn->rx_offset);
 		skb_put(skb, data_len - meta_len);
 
-		nfp_net_set_hash(nn->netdev, skb, rxd);
-
 		/* Stats update */
 		u64_stats_update_begin(&r_vec->rx_sync);
 		r_vec->rx_pkts++;
 		r_vec->rx_bytes += skb->len;
 		u64_stats_update_end(&r_vec->rx_sync);
 
+		if (nn->fw_ver.major <= 3) {
+			nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+		} else if (meta_len) {
+			void *end;
+
+			end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+			if (unlikely(end != skb->data)) {
+				u64_stats_update_begin(&r_vec->rx_sync);
+				r_vec->rx_drops++;
+				u64_stats_update_end(&r_vec->rx_sync);
+
+				dev_kfree_skb_any(skb);
+				nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+				continue;
+			}
+		}
+
 		skb_record_rx_queue(skb, rx_ring->idx);
 		skb->protocol = eth_type_trans(skb, nn->netdev);
 
@@ -2382,6 +2432,31 @@
 	return stats;
 }
 
+static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+{
+	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+		return true;
+	return false;
+}
+
+static int
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *tc)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -ENOTSUPP;
+	if (proto != htons(ETH_P_ALL))
+		return -ENOTSUPP;
+
+	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+		return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+
+	return -EINVAL;
+}
+
 static int nfp_net_set_features(struct net_device *netdev,
 				netdev_features_t features)
 {
@@ -2436,6 +2511,11 @@
 			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
 	}
 
+	if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+		nn_err(nn, "Cannot disable HW TC offload while in use\n");
+		return -EBUSY;
+	}
+
 	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
 	       netdev->features, features, changed);
 
@@ -2585,6 +2665,7 @@
 	.ndo_stop		= nfp_net_netdev_close,
 	.ndo_start_xmit		= nfp_net_tx,
 	.ndo_get_stats64	= nfp_net_stat64,
+	.ndo_setup_tc		= nfp_net_setup_tc,
 	.ndo_tx_timeout		= nfp_net_tx_timeout,
 	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
 	.ndo_change_mtu		= nfp_net_change_mtu,
@@ -2610,7 +2691,7 @@
 		nn->fw_ver.resv, nn->fw_ver.class,
 		nn->fw_ver.major, nn->fw_ver.minor,
 		nn->max_mtu);
-	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 		nn->cap,
 		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
 		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
@@ -2627,7 +2708,8 @@
 		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
 		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
 		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
-		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "");
+		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
+		nfp_net_ebpf_capable(nn)            ? "BPF "	  : "");
 }
 
 /**
@@ -2670,10 +2752,13 @@
 	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
 	spin_lock_init(&nn->reconfig_lock);
+	spin_lock_init(&nn->rx_filter_lock);
 	spin_lock_init(&nn->link_status_lock);
 
 	setup_timer(&nn->reconfig_timer,
 		    nfp_net_reconfig_timer, (unsigned long)nn);
+	setup_timer(&nn->rx_filter_stats_timer,
+		    nfp_net_filter_stats_timer, (unsigned long)nn);
 
 	return nn;
 }
@@ -2795,6 +2880,9 @@
 
 	netdev->features = netdev->hw_features;
 
+	if (nfp_net_ebpf_capable(nn))
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	/* Advertise but disable TSO by default. */
 	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ad6c4e3..93b10b4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -66,6 +66,13 @@
 #define NFP_NET_LSO_MAX_HDR_SZ		255
 
 /**
+ * Prepend field types
+ */
+#define NFP_NET_META_FIELD_SIZE		4
+#define NFP_NET_META_HASH		1 /* next field carries hash type */
+#define NFP_NET_META_MARK		2
+
+/**
  * Hash type pre-pended when a RSS hash was computed
  */
 #define NFP_NET_RSS_NONE                0
@@ -123,6 +130,7 @@
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
+#define   NFP_NET_CFG_CTRL_BPF		  (0x1 << 27) /* BPF offload capable */
 #define NFP_NET_CFG_UPDATE              0x0004
 #define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
 #define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
@@ -134,6 +142,7 @@
 #define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
 #define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
+#define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE         0x0008
 #define NFP_NET_CFG_RXRS_ENABLE         0x0010
@@ -196,10 +205,37 @@
 #define NFP_NET_CFG_VXLAN_SZ		  0x0008
 
 /**
- * 64B reserved for future use (0x0080 - 0x00c0)
+ * NFP6000 - BPF section
+ * @NFP_NET_CFG_BPF_ABI:	BPF ABI version
+ * @NFP_NET_CFG_BPF_CAP:	BPF capabilities
+ * @NFP_NET_CFG_BPF_MAX_LEN:	Maximum size of JITed BPF code in bytes
+ * @NFP_NET_CFG_BPF_START:	Offset at which BPF will be loaded
+ * @NFP_NET_CFG_BPF_DONE:	Offset to jump to on exit
+ * @NFP_NET_CFG_BPF_STACK_SZ:	Total size of stack area in 64B chunks
+ * @NFP_NET_CFG_BPF_INL_MTU:	Packet data split offset in 64B chunks
+ * @NFP_NET_CFG_BPF_SIZE:	Size of the JITed BPF code in instructions
+ * @NFP_NET_CFG_BPF_ADDR:	DMA address of the buffer with JITed BPF code
  */
-#define NFP_NET_CFG_RESERVED            0x0080
-#define NFP_NET_CFG_RESERVED_SZ         0x0040
+#define NFP_NET_CFG_BPF_ABI		0x0080
+#define   NFP_NET_BPF_ABI		1
+#define NFP_NET_CFG_BPF_CAP		0x0081
+#define   NFP_NET_BPF_CAP_RELO		(1 << 0) /* seamless reload */
+#define NFP_NET_CFG_BPF_MAX_LEN		0x0082
+#define NFP_NET_CFG_BPF_START		0x0084
+#define NFP_NET_CFG_BPF_DONE		0x0086
+#define NFP_NET_CFG_BPF_STACK_SZ	0x0088
+#define NFP_NET_CFG_BPF_INL_MTU		0x0089
+#define NFP_NET_CFG_BPF_SIZE		0x008e
+#define NFP_NET_CFG_BPF_ADDR		0x0090
+#define   NFP_NET_CFG_BPF_CFG_8CTX	(1 << 0) /* 8ctx mode */
+#define   NFP_NET_CFG_BPF_CFG_MASK	7ULL
+#define   NFP_NET_CFG_BPF_ADDR_MASK	(~NFP_NET_CFG_BPF_CFG_MASK)
+
+/**
+ * 40B reserved for future use (0x0098 - 0x00c0)
+ */
+#define NFP_NET_CFG_RESERVED            0x0098
+#define NFP_NET_CFG_RESERVED_SZ         0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -303,6 +339,15 @@
 #define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
 #define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
 
+#define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
+#define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
+#define NFP_NET_CFG_STATS_APP1_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xa0)
+#define NFP_NET_CFG_STATS_APP1_BYTES	(NFP_NET_CFG_STATS_BASE + 0xa8)
+#define NFP_NET_CFG_STATS_APP2_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xb0)
+#define NFP_NET_CFG_STATS_APP2_BYTES	(NFP_NET_CFG_STATS_BASE + 0xb8)
+#define NFP_NET_CFG_STATS_APP3_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xc0)
+#define NFP_NET_CFG_STATS_APP3_BYTES	(NFP_NET_CFG_STATS_BASE + 0xc8)
+
 /**
  * Per ring stats (0x1000 - 0x1800)
  * options, 64bit per entry
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 4c98972..3418f22 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -106,6 +106,18 @@
 	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
 	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
 	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
+
+	{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
+	{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+	/* see comments in outro functions in nfp_bpf_jit.c to find out
+	 * how different BPF modes use app-specific counters
+	 */
+	{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
+	{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
+	{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
+	{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
+	{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
+	{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
new file mode 100644
index 0000000..43f42f8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_offload.c
+ * Netronome network device driver: TC offload functions for PF and VF
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "nfp_bpf.h"
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+void nfp_net_filter_stats_timer(unsigned long data)
+{
+	struct nfp_net *nn = (void *)data;
+	struct nfp_stat_pair latest;
+
+	spin_lock_bh(&nn->rx_filter_lock);
+
+	if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+		mod_timer(&nn->rx_filter_stats_timer,
+			  jiffies + NFP_NET_STAT_POLL_IVL);
+
+	spin_unlock_bh(&nn->rx_filter_lock);
+
+	latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+
+	if (latest.pkts != nn->rx_filter.pkts)
+		nn->rx_filter_change = jiffies;
+
+	nn->rx_filter = latest;
+}
+
+static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
+{
+	nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+	nn->rx_filter_prev = nn->rx_filter;
+	nn->rx_filter_change = jiffies;
+}
+
+static int
+nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct tc_action *a;
+	LIST_HEAD(actions);
+	u64 bytes, pkts;
+
+	pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
+	bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
+	bytes -= pkts * ETH_HLEN;
+
+	nn->rx_filter_prev = nn->rx_filter;
+
+	preempt_disable();
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list)
+		tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
+
+	preempt_enable();
+
+	return 0;
+}
+
+static int
+nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	/* TC direct action */
+	if (cls_bpf->exts_integrated) {
+		if (tc_no_actions(cls_bpf->exts))
+			return NN_ACT_DIRECT;
+
+		return -ENOTSUPP;
+	}
+
+	/* TC legacy mode */
+	if (!tc_single_action(cls_bpf->exts))
+		return -ENOTSUPP;
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (is_tcf_gact_shot(a))
+			return NN_ACT_TC_DROP;
+
+		if (is_tcf_mirred_redirect(a) &&
+		    tcf_mirred_ifindex(a) == nn->netdev->ifindex)
+			return NN_ACT_TC_REDIR;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int
+nfp_net_bpf_offload_prepare(struct nfp_net *nn,
+			    struct tc_cls_bpf_offload *cls_bpf,
+			    struct nfp_bpf_result *res,
+			    void **code, dma_addr_t *dma_addr, u16 max_instr)
+{
+	unsigned int code_sz = max_instr * sizeof(u64);
+	enum nfp_bpf_action_type act;
+	u16 start_off, done_off;
+	unsigned int max_mtu;
+	int ret;
+
+	ret = nfp_net_bpf_get_act(nn, cls_bpf);
+	if (ret < 0)
+		return ret;
+	act = ret;
+
+	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+	if (max_mtu < nn->netdev->mtu) {
+		nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
+		return -ENOTSUPP;
+	}
+
+	start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
+	done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
+
+	*code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
+				    GFP_KERNEL);
+	if (!*code)
+		return -ENOMEM;
+
+	ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
+			  max_instr, res);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
+	return ret;
+}
+
+static void
+nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
+			   void *code, dma_addr_t dma_addr,
+			   unsigned int code_sz, unsigned int n_instr,
+			   bool dense_mode)
+{
+	u64 bpf_addr = dma_addr;
+	int err;
+
+	nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
+
+	if (dense_mode)
+		bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
+
+	nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
+	nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
+
+	/* Load up the JITed code */
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
+	if (err)
+		nn_err(nn, "FW command error while loading BPF: %d\n", err);
+
+	/* Enable passing packets through BPF function */
+	nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+	if (err)
+		nn_err(nn, "FW command error while enabling BPF: %d\n", err);
+
+	dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+
+	nfp_net_bpf_stats_reset(nn);
+	mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
+}
+
+static int nfp_net_bpf_stop(struct nfp_net *nn)
+{
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
+		return 0;
+
+	spin_lock_bh(&nn->rx_filter_lock);
+	nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+	spin_unlock_bh(&nn->rx_filter_lock);
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+
+	del_timer_sync(&nn->rx_filter_stats_timer);
+	nn->bpf_offload_skip_sw = 0;
+
+	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+}
+
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct nfp_bpf_result res;
+	dma_addr_t dma_addr;
+	u16 max_instr;
+	void *code;
+	int err;
+
+	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+
+	switch (cls_bpf->command) {
+	case TC_CLSBPF_REPLACE:
+		/* There is nothing stopping us from implementing seamless
+		 * replace but the simple method of loading I adopted in
+		 * the firmware does not handle atomic replace (i.e. we have to
+		 * stop the BPF offload and re-enable it).  Leaking-in a few
+		 * frames which didn't have BPF applied in the hardware should
+		 * be fine if software fallback is available, though.
+		 */
+		if (nn->bpf_offload_skip_sw)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_stop(nn);
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_ADD:
+		if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_DESTROY:
+		return nfp_net_bpf_stop(nn);
+
+	case TC_CLSBPF_STATS:
+		return nfp_net_bpf_stats_update(nn, cls_bpf);
+
+	default:
+		return -ENOTSUPP;
+	}
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index f7062cb..2800bbf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -148,7 +148,7 @@
 		dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
 	} else {
 		switch (fw_ver.major) {
-		case 1 ... 3:
+		case 1 ... 4:
 			if (is_nfp3200) {
 				stride = 2;
 				tx_bar_no = NFP_NET_Q0_BAR;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index dd204d9..77a5364 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1269,13 +1269,13 @@
 		if (IS_ERR(ptp->phc_clock)) {
 			rc = PTR_ERR(ptp->phc_clock);
 			goto fail3;
-		}
-
-		INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
-		ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
-		if (!ptp->pps_workwq) {
-			rc = -ENOMEM;
-			goto fail4;
+		} else if (ptp->phc_clock) {
+			INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
+			ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
+			if (!ptp->pps_workwq) {
+				rc = -ENOMEM;
+				goto fail4;
+			}
 		}
 	}
 	ptp->nic_ts_enabled = false;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 170a18b..6e3b829 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -187,7 +187,7 @@
 	if (IS_ERR(priv->ptp_clock)) {
 		priv->ptp_clock = NULL;
 		pr_err("ptp_clock_register() failed on %s\n", priv->dev->name);
-	} else
+	} else if (priv->ptp_clock)
 		pr_debug("Added PTP HW clock successfully on %s\n",
 			 priv->dev->name);
 
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
new file mode 100644
index 0000000..c5cb661
--- /dev/null
+++ b/include/linux/bpf_verifier.h
@@ -0,0 +1,90 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_VERIFIER_H
+#define _LINUX_BPF_VERIFIER_H 1
+
+#include <linux/bpf.h> /* for enum bpf_reg_type */
+#include <linux/filter.h> /* for MAX_BPF_STACK */
+
+struct bpf_reg_state {
+	enum bpf_reg_type type;
+	union {
+		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
+		s64 imm;
+
+		/* valid when type == PTR_TO_PACKET* */
+		struct {
+			u32 id;
+			u16 off;
+			u16 range;
+		};
+
+		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+		 *   PTR_TO_MAP_VALUE_OR_NULL
+		 */
+		struct bpf_map *map_ptr;
+	};
+};
+
+enum bpf_stack_slot_type {
+	STACK_INVALID,    /* nothing was stored in this stack slot */
+	STACK_SPILL,      /* register spilled into stack */
+	STACK_MISC	  /* BPF program wrote some data into this slot */
+};
+
+#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct bpf_verifier_state {
+	struct bpf_reg_state regs[MAX_BPF_REG];
+	u8 stack_slot_type[MAX_BPF_STACK];
+	struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
+};
+
+/* linked list of verifier states used to prune search */
+struct bpf_verifier_state_list {
+	struct bpf_verifier_state state;
+	struct bpf_verifier_state_list *next;
+};
+
+struct bpf_insn_aux_data {
+	enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
+};
+
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
+struct bpf_verifier_env;
+struct bpf_ext_analyzer_ops {
+	int (*insn_hook)(struct bpf_verifier_env *env,
+			 int insn_idx, int prev_insn_idx);
+};
+
+/* single container for all structs
+ * one verifier_env per bpf_check() call
+ */
+struct bpf_verifier_env {
+	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+	int stack_size;			/* number of states to be processed */
+	struct bpf_verifier_state cur_state; /* current verifier state */
+	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
+	void *analyzer_priv; /* pointer to external analyzer's private data */
+	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+	u32 used_map_cnt;		/* number of used maps */
+	u32 id_gen;			/* used to generate unique reg IDs */
+	bool allow_ptr_leaks;
+	bool seen_direct_write;
+	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+};
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv);
+
+#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a10d8d1..69f242c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -789,6 +789,7 @@
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
 	TC_SETUP_MATCHALL,
+	TC_SETUP_CLSBPF,
 };
 
 struct tc_cls_u32_offload;
@@ -800,6 +801,7 @@
 		struct tc_cls_u32_offload *cls_u32;
 		struct tc_cls_flower_offload *cls_flower;
 		struct tc_cls_matchall_offload *cls_mall;
+		struct tc_cls_bpf_offload *cls_bpf;
 	};
 };
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 6b15e16..5ad54fc 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -127,6 +127,11 @@
  *
  * @info:   Structure describing the new clock.
  * @parent: Pointer to the parent device of the new clock.
+ *
+ * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
+ * support is missing at the configuration level, this function
+ * returns NULL, and drivers are expected to gracefully handle that
+ * case separately.
  */
 
 extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c6dab3f..9bf60b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3085,6 +3085,7 @@
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a459be5..5ccaa4b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -486,4 +486,20 @@
 	unsigned long cookie;
 };
 
+enum tc_clsbpf_command {
+	TC_CLSBPF_ADD,
+	TC_CLSBPF_REPLACE,
+	TC_CLSBPF_DESTROY,
+	TC_CLSBPF_STATS,
+};
+
+struct tc_cls_bpf_offload {
+	enum tc_clsbpf_command command;
+	struct tcf_exts *exts;
+	struct bpf_prog *prog;
+	const char *name;
+	bool exts_integrated;
+	u32 gen_flags;
+};
+
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 632e205..87a7f42 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -83,9 +83,9 @@
 #endif
 
 /* Round an int up to the next multiple of 4.  */
-#define WORD_ROUND(s) (((s)+3)&~3)
+#define SCTP_PAD4(s) (((s)+3)&~3)
 /* Truncate to the previous multiple of 4.  */
-#define WORD_TRUNC(s) ((s)&~3)
+#define SCTP_TRUNC4(s) ((s)&~3)
 
 /*
  * Function declarations.
@@ -433,7 +433,7 @@
 	if (asoc->user_frag)
 		frag = min_t(int, frag, asoc->user_frag);
 
-	frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
+	frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
 
 	return frag;
 }
@@ -462,7 +462,7 @@
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
-     pos.v += WORD_ROUND(ntohs(pos.p->length)))
+     pos.v += SCTP_PAD4(ntohs(pos.p->length)))
 
 #define sctp_walk_errors(err, chunk_hdr)\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
@@ -472,7 +472,7 @@
 	    sizeof(sctp_chunkhdr_t));\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
+     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 8915b61..8fd715f 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -396,6 +396,7 @@
 	TCA_BPF_FD,
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
+	TCA_BPF_FLAGS_GEN,
 	__TCA_BPF_MAX,
 };
 
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index be72b6e..bddb272 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,7 @@
 
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
+#define TCA_VLAN_ACT_MODIFY	3
 
 struct tc_vlan {
 	tc_gen;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3a75ee3..8c3f794 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/filter.h>
 #include <net/netlink.h>
 #include <linux/file.h>
@@ -126,77 +127,16 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-struct reg_state {
-	enum bpf_reg_type type;
-	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u32 id;
-			u16 off;
-			u16 range;
-		};
-
-		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
-		 *   PTR_TO_MAP_VALUE_OR_NULL
-		 */
-		struct bpf_map *map_ptr;
-	};
-};
-
-enum bpf_stack_slot_type {
-	STACK_INVALID,    /* nothing was stored in this stack slot */
-	STACK_SPILL,      /* register spilled into stack */
-	STACK_MISC	  /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
-	struct reg_state regs[MAX_BPF_REG];
-	u8 stack_slot_type[MAX_BPF_STACK];
-	struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
-	struct verifier_state state;
-	struct verifier_state_list *next;
-};
-
 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
 	/* verifer state is 'st'
 	 * before processing instruction 'insn_idx'
 	 * and after processing instruction 'prev_insn_idx'
 	 */
-	struct verifier_state st;
+	struct bpf_verifier_state st;
 	int insn_idx;
 	int prev_insn_idx;
-	struct verifier_stack_elem *next;
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
-	struct bpf_prog *prog;		/* eBPF program being verified */
-	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
-	int stack_size;			/* number of states to be processed */
-	struct verifier_state cur_state; /* current verifier state */
-	struct verifier_state_list **explored_states; /* search pruning optimization */
-	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
-	u32 used_map_cnt;		/* number of used maps */
-	u32 id_gen;			/* used to generate unique reg IDs */
-	bool allow_ptr_leaks;
-	bool seen_direct_write;
+	struct bpf_verifier_stack_elem *next;
 };
 
 #define BPF_COMPLEXITY_LIMIT_INSNS	65536
@@ -249,9 +189,9 @@
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
 
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
 {
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	enum bpf_reg_type t;
 	int i;
 
@@ -427,9 +367,9 @@
 	}
 }
 
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 	int insn_idx;
 
 	if (env->head == NULL)
@@ -446,12 +386,12 @@
 	return insn_idx;
 }
 
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
-					 int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+					     int insn_idx, int prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 
-	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+	elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 	if (!elem)
 		goto err;
 
@@ -477,7 +417,7 @@
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
@@ -493,7 +433,7 @@
 	regs[BPF_REG_1].type = PTR_TO_CTX;
 }
 
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
 {
 	BUG_ON(regno >= MAX_BPF_REG);
 	regs[regno].type = UNKNOWN_VALUE;
@@ -506,7 +446,7 @@
 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 };
 
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			 enum reg_arg_type t)
 {
 	if (regno >= MAX_BPF_REG) {
@@ -566,8 +506,8 @@
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
-static int check_stack_write(struct verifier_state *state, int off, int size,
-			     int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+			     int size, int value_regno)
 {
 	int i;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -592,7 +532,7 @@
 	} else {
 		/* regular write of data into stack */
 		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-			(struct reg_state) {};
+			(struct bpf_reg_state) {};
 
 		for (i = 0; i < size; i++)
 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -600,7 +540,7 @@
 	return 0;
 }
 
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 			    int value_regno)
 {
 	u8 *slot_type;
@@ -641,7 +581,7 @@
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -656,7 +596,7 @@
 
 #define MAX_PACKET_OFF 0xffff
 
-static bool may_access_direct_pkt_data(struct verifier_env *env,
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 				       const struct bpf_call_arg_meta *meta)
 {
 	switch (env->prog->type) {
@@ -673,11 +613,11 @@
 	}
 }
 
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 			       int size)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *reg = &regs[regno];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
 
 	off += reg->off;
 	if (off < 0 || size <= 0 || off + size > reg->range) {
@@ -689,9 +629,13 @@
 }
 
 /* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
+	/* for analyzer ctx accesses are already validated and converted */
+	if (env->analyzer_ops)
+		return 0;
+
 	if (env->prog->aux->ops->is_valid_access &&
 	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
@@ -704,7 +648,7 @@
 	return -EACCES;
 }
 
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 {
 	if (env->allow_ptr_leaks)
 		return false;
@@ -718,12 +662,13 @@
 	}
 }
 
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
-			       int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg, int off, int size)
 {
 	if (reg->type != PTR_TO_PACKET) {
 		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n", off, size);
+			verbose("misaligned access off %d size %d\n",
+				off, size);
 			return -EACCES;
 		} else {
 			return 0;
@@ -764,12 +709,12 @@
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int bpf_size, enum bpf_access_type t,
 			    int value_regno)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *reg = &state->regs[regno];
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
 	if (reg->type == PTR_TO_STACK)
@@ -855,9 +800,9 @@
 	return err;
 }
 
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -891,12 +836,12 @@
  * bytes from that pointer, make sure that it's within stack boundary
  * and all elements of stack are initialized
  */
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
 				struct bpf_call_arg_meta *meta)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs;
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
@@ -935,11 +880,11 @@
 	return 0;
 }
 
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
 {
-	struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
 	enum bpf_reg_type expected_type, type = reg->type;
 	int err = 0;
 
@@ -1144,10 +1089,10 @@
 	return count > 1 ? -EINVAL : 0;
 }
 
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
@@ -1167,12 +1112,12 @@
 	}
 }
 
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
 {
-	struct verifier_state *state = &env->cur_state;
+	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
-	struct reg_state *regs = state->regs;
-	struct reg_state *reg;
+	struct bpf_reg_state *regs = state->regs;
+	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1275,12 +1220,13 @@
 	return 0;
 }
 
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
-	struct reg_state tmp_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state tmp_reg;
 	s32 imm;
 
 	if (BPF_SRC(insn->code) == BPF_K) {
@@ -1348,10 +1294,10 @@
 	return 0;
 }
 
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
 	u8 opcode = BPF_OP(insn->code);
 	s64 imm_log2;
 
@@ -1361,7 +1307,7 @@
 	 */
 
 	if (BPF_SRC(insn->code) == BPF_X) {
-		struct reg_state *src_reg = &regs[insn->src_reg];
+		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 
 		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
 		    dst_reg->imm && opcode == BPF_ADD) {
@@ -1450,11 +1396,12 @@
 	return 0;
 }
 
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 	u8 opcode = BPF_OP(insn->code);
 
 	/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1471,9 +1418,9 @@
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1647,10 +1594,10 @@
 	return 0;
 }
 
-static void find_good_pkt_pointers(struct verifier_state *state,
-				   const struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+				   struct bpf_reg_state *dst_reg)
 {
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	/* LLVM can generate two kind of checks:
@@ -1696,11 +1643,11 @@
 	}
 }
 
-static int check_cond_jmp_op(struct verifier_env *env,
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
 			     struct bpf_insn *insn, int *insn_idx)
 {
-	struct verifier_state *other_branch, *this_branch = &env->cur_state;
-	struct reg_state *regs = this_branch->regs, *dst_reg;
+	struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+	struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1762,7 +1709,7 @@
 	if (!other_branch)
 		return -EFAULT;
 
-	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1804,9 +1751,9 @@
 }
 
 /* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1822,9 +1769,19 @@
 	if (err)
 		return err;
 
-	if (insn->src_reg == 0)
-		/* generic move 64-bit immediate into a register */
+	if (insn->src_reg == 0) {
+		/* generic move 64-bit immediate into a register,
+		 * only analyzer needs to collect the ld_imm value.
+		 */
+		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+
+		if (!env->analyzer_ops)
+			return 0;
+
+		regs[insn->dst_reg].type = CONST_IMM;
+		regs[insn->dst_reg].imm = imm;
 		return 0;
+	}
 
 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@@ -1861,11 +1818,11 @@
  * Output:
  *   R0 - 8/16/32-bit skb data converted to cpu endianness
  */
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -1951,7 +1908,7 @@
 	BRANCH = 2,
 };
 
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
 
 static int *insn_stack;	/* stack of insns to process */
 static int cur_stack;	/* current stack index */
@@ -1962,7 +1919,7 @@
  * w - next instruction
  * e - edge
  */
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 {
 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
 		return 0;
@@ -2003,7 +1960,7 @@
 /* non-recursive depth-first-search to detect loops in BPF program
  * loop == back-edge in directed graph
  */
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insns = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2112,7 +2069,8 @@
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
 		return false;
@@ -2187,9 +2145,10 @@
  * whereas register type in current state is meaningful, it means that
  * the current state will reach 'bpf_exit' instruction safely
  */
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_state *old,
+			 struct bpf_verifier_state *cur)
 {
-	struct reg_state *rold, *rcur;
+	struct bpf_reg_state *rold, *rcur;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2229,9 +2188,9 @@
 			 * the same, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
 			 * but current path has stored:
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
@@ -2242,10 +2201,10 @@
 	return true;
 }
 
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
-	struct verifier_state_list *new_sl;
-	struct verifier_state_list *sl;
+	struct bpf_verifier_state_list *new_sl;
+	struct bpf_verifier_state_list *sl;
 
 	sl = env->explored_states[insn_idx];
 	if (!sl)
@@ -2269,7 +2228,7 @@
 	 * it will be rejected. Since there are no loops, we won't be
 	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
 	 */
-	new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+	new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
 	if (!new_sl)
 		return -ENOMEM;
 
@@ -2280,11 +2239,20 @@
 	return 0;
 }
 
-static int do_check(struct verifier_env *env)
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+				  int insn_idx, int prev_insn_idx)
 {
-	struct verifier_state *state = &env->cur_state;
+	if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+		return 0;
+
+	return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
+static int do_check(struct bpf_verifier_env *env)
+{
+	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_insn *insns = env->prog->insnsi;
-	struct reg_state *regs = state->regs;
+	struct bpf_reg_state *regs = state->regs;
 	int insn_cnt = env->prog->len;
 	int insn_idx, prev_insn_idx = 0;
 	int insn_processed = 0;
@@ -2338,13 +2306,17 @@
 			print_bpf_insn(insn);
 		}
 
+		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+		if (err)
+			return err;
+
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
 				return err;
 
 		} else if (class == BPF_LDX) {
-			enum bpf_reg_type src_reg_type;
+			enum bpf_reg_type *prev_src_type, src_reg_type;
 
 			/* check for reserved fields is already done */
 
@@ -2374,16 +2346,18 @@
 				continue;
 			}
 
-			if (insn->imm == 0) {
+			prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+			if (*prev_src_type == NOT_INIT) {
 				/* saw a valid insn
 				 * dst_reg = *(u32 *)(src_reg + off)
-				 * use reserved 'imm' field to mark this insn
+				 * save type to validate intersecting paths
 				 */
-				insn->imm = src_reg_type;
+				*prev_src_type = src_reg_type;
 
-			} else if (src_reg_type != insn->imm &&
+			} else if (src_reg_type != *prev_src_type &&
 				   (src_reg_type == PTR_TO_CTX ||
-				    insn->imm == PTR_TO_CTX)) {
+				    *prev_src_type == PTR_TO_CTX)) {
 				/* ABuser program is trying to use the same insn
 				 * dst_reg = *(u32*) (src_reg + off)
 				 * with different pointer types:
@@ -2396,7 +2370,7 @@
 			}
 
 		} else if (class == BPF_STX) {
-			enum bpf_reg_type dst_reg_type;
+			enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
@@ -2424,11 +2398,13 @@
 			if (err)
 				return err;
 
-			if (insn->imm == 0) {
-				insn->imm = dst_reg_type;
-			} else if (dst_reg_type != insn->imm &&
+			prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+			if (*prev_dst_type == NOT_INIT) {
+				*prev_dst_type = dst_reg_type;
+			} else if (dst_reg_type != *prev_dst_type &&
 				   (dst_reg_type == PTR_TO_CTX ||
-				    insn->imm == PTR_TO_CTX)) {
+				    *prev_dst_type == PTR_TO_CTX)) {
 				verbose("same insn cannot be used with different pointers\n");
 				return -EINVAL;
 			}
@@ -2563,7 +2539,7 @@
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2660,7 +2636,7 @@
 }
 
 /* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
 {
 	int i;
 
@@ -2669,7 +2645,7 @@
 }
 
 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2683,13 +2659,14 @@
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
 	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+	const int insn_cnt = env->prog->len;
 	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i, insn_cnt, cnt;
+	int i, cnt, delta = 0;
 
 	if (ops->gen_prologue) {
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -2703,18 +2680,16 @@
 			if (!new_prog)
 				return -ENOMEM;
 			env->prog = new_prog;
+			delta += cnt - 1;
 		}
 	}
 
 	if (!ops->convert_ctx_access)
 		return 0;
 
-	insn_cnt = env->prog->len;
-	insn = env->prog->insnsi;
+	insn = env->prog->insnsi + delta;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		u32 insn_delta;
-
 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
 			type = BPF_READ;
@@ -2724,11 +2699,8 @@
 		else
 			continue;
 
-		if (insn->imm != PTR_TO_CTX) {
-			/* clear internal mark */
-			insn->imm = 0;
+		if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
 			continue;
-		}
 
 		cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
 					      insn->off, insn_buf, env->prog);
@@ -2737,26 +2709,24 @@
 			return -EINVAL;
 		}
 
-		new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
+		new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
+						 cnt);
 		if (!new_prog)
 			return -ENOMEM;
 
-		insn_delta = cnt - 1;
+		delta += cnt - 1;
 
 		/* keep walking new program and skip insns we just inserted */
 		env->prog = new_prog;
-		insn      = new_prog->insnsi + i + insn_delta;
-
-		insn_cnt += insn_delta;
-		i        += insn_delta;
+		insn      = new_prog->insnsi + i + delta;
 	}
 
 	return 0;
 }
 
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
 {
-	struct verifier_state_list *sl, *sln;
+	struct bpf_verifier_state_list *sl, *sln;
 	int i;
 
 	if (!env->explored_states)
@@ -2779,19 +2749,24 @@
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
-	struct verifier_env *env;
+	struct bpf_verifier_env *env;
 	int ret = -EINVAL;
 
 	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
 		return -E2BIG;
 
-	/* 'struct verifier_env' can be global, but since it's not small,
+	/* 'struct bpf_verifier_env' can be global, but since it's not small,
 	 * allocate/free it every time bpf_check() is called
 	 */
-	env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
+	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+				     (*prog)->len);
+	ret = -ENOMEM;
+	if (!env->insn_aux_data)
+		goto err_free_env;
 	env->prog = *prog;
 
 	/* grab the mutex to protect few globals used by verifier */
@@ -2810,12 +2785,12 @@
 		/* log_* values have to be sane */
 		if (log_size < 128 || log_size > UINT_MAX >> 8 ||
 		    log_level == 0 || log_ubuf == NULL)
-			goto free_env;
+			goto err_unlock;
 
 		ret = -ENOMEM;
 		log_buf = vmalloc(log_size);
 		if (!log_buf)
-			goto free_env;
+			goto err_unlock;
 	} else {
 		log_level = 0;
 	}
@@ -2825,7 +2800,7 @@
 		goto skip_full_check;
 
 	env->explored_states = kcalloc(env->prog->len,
-				       sizeof(struct verifier_state_list *),
+				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
 	if (!env->explored_states)
@@ -2884,14 +2859,67 @@
 free_log_buf:
 	if (log_level)
 		vfree(log_buf);
-free_env:
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
 		 * them now. Otherwise free_bpf_prog_info() will release them.
 		 */
 		release_maps(env);
 	*prog = env->prog;
-	kfree(env);
+err_unlock:
 	mutex_unlock(&bpf_verifier_lock);
+	vfree(env->insn_aux_data);
+err_free_env:
+	kfree(env);
 	return ret;
 }
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv)
+{
+	struct bpf_verifier_env *env;
+	int ret;
+
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+				     prog->len);
+	ret = -ENOMEM;
+	if (!env->insn_aux_data)
+		goto err_free_env;
+	env->prog = prog;
+	env->analyzer_ops = ops;
+	env->analyzer_priv = priv;
+
+	/* grab the mutex to protect few globals used by verifier */
+	mutex_lock(&bpf_verifier_lock);
+
+	log_level = 0;
+
+	env->explored_states = kcalloc(env->prog->len,
+				       sizeof(struct bpf_verifier_state_list *),
+				       GFP_KERNEL);
+	ret = -ENOMEM;
+	if (!env->explored_states)
+		goto skip_full_check;
+
+	ret = check_cfg(env);
+	if (ret < 0)
+		goto skip_full_check;
+
+	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+	ret = do_check(env);
+
+skip_full_check:
+	while (pop_stack(env, NULL) >= 0);
+	free_states(env);
+
+	mutex_unlock(&bpf_verifier_lock);
+	vfree(env->insn_aux_data);
+err_free_env:
+	kfree(env);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_analyzer);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7bf82a2..d36c754 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4522,8 +4522,10 @@
 }
 EXPORT_SYMBOL(skb_ensure_writable);
 
-/* remove VLAN header from packet and update csum accordingly. */
-static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
+/* remove VLAN header from packet and update csum accordingly.
+ * expects a non skb_vlan_tag_present skb with a vlan tag payload
+ */
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
 {
 	struct vlan_hdr *vhdr;
 	unsigned int offset = skb->data - skb_mac_header(skb);
@@ -4554,6 +4556,7 @@
 
 	return err;
 }
+EXPORT_SYMBOL(__skb_vlan_pop);
 
 int skb_vlan_pop(struct sk_buff *skb)
 {
@@ -4564,9 +4567,7 @@
 	if (likely(skb_vlan_tag_present(skb))) {
 		skb->vlan_tci = 0;
 	} else {
-		if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
-			      skb->protocol != htons(ETH_P_8021AD)) ||
-			     skb->len < VLAN_ETH_HLEN))
+		if (unlikely(!eth_type_vlan(skb->protocol)))
 			return 0;
 
 		err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4574,9 +4575,7 @@
 			return err;
 	}
 	/* move next vlan tag to hw accel tag */
-	if (likely((skb->protocol != htons(ETH_P_8021Q) &&
-		    skb->protocol != htons(ETH_P_8021AD)) ||
-		   skb->len < VLAN_ETH_HLEN))
+	if (likely(!eth_type_vlan(skb->protocol)))
 		return 0;
 
 	vlan_proto = skb->protocol;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d025a7..478dfc5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -734,9 +734,16 @@
 {
 	if ((1 << sk->sk_state) &
 	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
-	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK))
-		tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (tp->lost_out > tp->retrans_out &&
+		    tp->snd_cwnd > tcp_packets_in_flight(tp))
+			tcp_xmit_retransmit_queue(sk);
+
+		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
 			       0, GFP_ATOMIC);
+	}
 }
 /*
  * One tasklet per cpu tries to send more skbs.
@@ -2039,6 +2046,39 @@
 	return -1;
 }
 
+/* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+ * This allows for :
+ *  - better RTT estimation and ACK scheduling
+ *  - faster recovery
+ *  - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
+ */
+static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+				  unsigned int factor)
+{
+	unsigned int limit;
+
+	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+	limit <<= factor;
+
+	if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+		set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+		/* It is possible TX completion already happened
+		 * before we set TSQ_THROTTLED, so we must
+		 * test again the condition.
+		 */
+		smp_mb__after_atomic();
+		if (atomic_read(&sk->sk_wmem_alloc) > limit)
+			return true;
+	}
+	return false;
+}
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -2125,29 +2165,8 @@
 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
-		/* TCP Small Queues :
-		 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
-		 * This allows for :
-		 *  - better RTT estimation and ACK scheduling
-		 *  - faster recovery
-		 *  - high rates
-		 * Alas, some drivers / subsystems require a fair amount
-		 * of queued bytes to ensure line rate.
-		 * One example is wifi aggregation (802.11 AMPDU)
-		 */
-		limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
-		limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
-
-		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
-			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
-			/* It is possible TX completion already happened
-			 * before we set TSQ_THROTTLED, so we must
-			 * test again the condition.
-			 */
-			smp_mb__after_atomic();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit)
-				break;
-		}
+		if (tcp_small_queue_check(sk, skb, 0))
+			break;
 
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
 			break;
@@ -2847,6 +2866,9 @@
 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
+		if (tcp_small_queue_check(sk, skb, 1))
+			return;
+
 		if (tcp_retransmit_skb(sk, skb, segs))
 			return;
 
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ef36a56..4dedb96 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -68,7 +68,7 @@
 			 ++i, offset, sch->type, htons(sch->length),
 			 sch->flags);
 #endif
-		offset += WORD_ROUND(ntohs(sch->length));
+		offset += SCTP_PAD4(ntohs(sch->length));
 
 		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1c76387..667dc38 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -204,6 +204,13 @@
 	return retval;
 }
 
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+			     u64 lastuse)
+{
+	tcf_lastuse_update(&a->tcfa_tm);
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+}
+
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 			   int ref)
 {
@@ -281,6 +288,7 @@
 	.type		=	TCA_ACT_MIRRED,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_mirred,
+	.stats_update	=	tcf_stats_update,
 	.dump		=	tcf_mirred_dump,
 	.cleanup	=	tcf_mirred_release,
 	.init		=	tcf_mirred_init,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 59a8d31..a95c00b 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,6 +30,7 @@
 	struct tcf_vlan *v = to_vlan(a);
 	int action;
 	int err;
+	u16 tci;
 
 	spin_lock(&v->tcf_lock);
 	tcf_lastuse_update(&v->tcf_tm);
@@ -48,6 +49,30 @@
 		if (err)
 			goto drop;
 		break;
+	case TCA_VLAN_ACT_MODIFY:
+		/* No-op if no vlan tag (either hw-accel or in-payload) */
+		if (!skb_vlan_tagged(skb))
+			goto unlock;
+		/* extract existing tag (and guarantee no hw-accel tag) */
+		if (skb_vlan_tag_present(skb)) {
+			tci = skb_vlan_tag_get(skb);
+			skb->vlan_tci = 0;
+		} else {
+			/* in-payload vlan tag, pop it */
+			err = __skb_vlan_pop(skb, &tci);
+			if (err)
+				goto drop;
+		}
+		/* replace the vid */
+		tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+		/* replace prio bits, if tcfv_push_prio specified */
+		if (v->tcfv_push_prio) {
+			tci &= ~VLAN_PRIO_MASK;
+			tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+		}
+		/* put updated tci as hwaccel tag */
+		__vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+		break;
 	default:
 		BUG();
 	}
@@ -102,6 +127,7 @@
 	case TCA_VLAN_ACT_POP:
 		break;
 	case TCA_VLAN_ACT_PUSH:
+	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_hash_release(*a, bind);
@@ -185,7 +211,8 @@
 	if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
+	if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
+	     v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
 			  v->tcfv_push_proto) ||
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c6f7a47..bb1d5a4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,8 @@
 MODULE_DESCRIPTION("TC BPF based classifier");
 
 #define CLS_BPF_NAME_LEN	256
+#define CLS_BPF_SUPPORTED_GEN_FLAGS		\
+	(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
 
 struct cls_bpf_head {
 	struct list_head plist;
@@ -39,6 +41,8 @@
 	struct list_head link;
 	struct tcf_result res;
 	bool exts_integrated;
+	bool offloaded;
+	u32 gen_flags;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -54,6 +58,7 @@
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
 	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
+	[TCA_BPF_FLAGS_GEN]	= { .type = NLA_U32 },
 	[TCA_BPF_FD]		= { .type = NLA_U32 },
 	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING,
 				    .len = CLS_BPF_NAME_LEN },
@@ -91,7 +96,9 @@
 
 		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
 
-		if (at_ingress) {
+		if (tc_skip_sw(prog->gen_flags)) {
+			filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
+		} else if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
 			bpf_compute_data_end(skb);
@@ -138,6 +145,91 @@
 	return !prog->bpf_ops;
 }
 
+static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			       enum tc_clsbpf_command cmd)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_bpf_offload bpf_offload = {};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSBPF;
+	offload.cls_bpf = &bpf_offload;
+
+	bpf_offload.command = cmd;
+	bpf_offload.exts = &prog->exts;
+	bpf_offload.prog = prog->filter;
+	bpf_offload.name = prog->bpf_name;
+	bpf_offload.exts_integrated = prog->exts_integrated;
+	bpf_offload.gen_flags = prog->gen_flags;
+
+	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					     tp->protocol, &offload);
+}
+
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			   struct cls_bpf_prog *oldprog)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct cls_bpf_prog *obj = prog;
+	enum tc_clsbpf_command cmd;
+	bool skip_sw;
+	int ret;
+
+	skip_sw = tc_skip_sw(prog->gen_flags) ||
+		(oldprog && tc_skip_sw(oldprog->gen_flags));
+
+	if (oldprog && oldprog->offloaded) {
+		if (tc_should_offload(dev, tp, prog->gen_flags)) {
+			cmd = TC_CLSBPF_REPLACE;
+		} else if (!tc_skip_sw(prog->gen_flags)) {
+			obj = oldprog;
+			cmd = TC_CLSBPF_DESTROY;
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		if (!tc_should_offload(dev, tp, prog->gen_flags))
+			return skip_sw ? -EINVAL : 0;
+		cmd = TC_CLSBPF_ADD;
+	}
+
+	ret = cls_bpf_offload_cmd(tp, obj, cmd);
+	if (ret)
+		return skip_sw ? ret : 0;
+
+	obj->offloaded = true;
+	if (oldprog)
+		oldprog->offloaded = false;
+
+	return 0;
+}
+
+static void cls_bpf_stop_offload(struct tcf_proto *tp,
+				 struct cls_bpf_prog *prog)
+{
+	int err;
+
+	if (!prog->offloaded)
+		return;
+
+	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+	if (err) {
+		pr_err("Stopping hardware offload failed: %d\n", err);
+		return;
+	}
+
+	prog->offloaded = false;
+}
+
+static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
+					 struct cls_bpf_prog *prog)
+{
+	if (!prog->offloaded)
+		return;
+
+	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -177,6 +269,7 @@
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
 
+	cls_bpf_stop_offload(tp, prog);
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
 	call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -193,6 +286,7 @@
 		return false;
 
 	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+		cls_bpf_stop_offload(tp, prog);
 		list_del_rcu(&prog->link);
 		tcf_unbind_filter(tp, &prog->res);
 		call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -302,6 +396,7 @@
 {
 	bool is_bpf, is_ebpf, have_exts = false;
 	struct tcf_exts exts;
+	u32 gen_flags = 0;
 	int ret;
 
 	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@@ -326,8 +421,17 @@
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
+	if (tb[TCA_BPF_FLAGS_GEN]) {
+		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+		    !tc_flags_valid(gen_flags)) {
+			ret = -EINVAL;
+			goto errout;
+		}
+	}
 
 	prog->exts_integrated = have_exts;
+	prog->gen_flags = gen_flags;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
@@ -415,6 +519,12 @@
 	if (ret < 0)
 		goto errout;
 
+	ret = cls_bpf_offload(tp, prog, oldprog);
+	if (ret) {
+		cls_bpf_delete_prog(tp, prog);
+		return ret;
+	}
+
 	if (oldprog) {
 		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
@@ -476,6 +586,8 @@
 
 	tm->tcm_handle = prog->handle;
 
+	cls_bpf_offload_update_stats(tp, prog);
+
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
@@ -498,6 +610,9 @@
 		bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 	if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 		goto nla_put_failure;
+	if (prog->gen_flags &&
+	    nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
+		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 1c23060..f10d339 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1408,7 +1408,7 @@
 				transports) {
 		if (t->pmtu_pending && t->dst) {
 			sctp_transport_update_pmtu(sk, t,
-						   WORD_TRUNC(dst_mtu(t->dst)));
+						   SCTP_TRUNC4(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index af9cc80..8afe2e9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -195,9 +195,10 @@
 	/* This is the biggest possible DATA chunk that can fit into
 	 * the packet
 	 */
-	max_data = (asoc->pathmtu -
-		sctp_sk(asoc->base.sk)->pf->af->net_header_len -
-		sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+	max_data = asoc->pathmtu -
+		   sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+	max_data = SCTP_TRUNC4(max_data);
 
 	max = asoc->frag_point;
 	/* If the the peer requested that we authenticate DATA chunks
@@ -208,8 +209,8 @@
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
-					    hmac_desc->hmac_len);
+			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+					      hmac_desc->hmac_len);
 	}
 
 	/* Now, check if we need to reduce our max */
@@ -229,7 +230,7 @@
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max)
-		max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+		max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 69444d3..a1d85065 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -605,7 +605,7 @@
 		/* PMTU discovery (RFC1191) */
 		if (ICMP_FRAG_NEEDED == code) {
 			sctp_icmp_frag_needed(sk, asoc, transport,
-					      WORD_TRUNC(info));
+					      SCTP_TRUNC4(info));
 			goto out_unlock;
 		} else {
 			if (ICMP_PROT_UNREACH == code) {
@@ -673,7 +673,7 @@
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = offset + WORD_ROUND(ntohs(ch->length));
+		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb->len)
 			break;
 
@@ -1121,7 +1121,7 @@
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			break;
 
@@ -1190,7 +1190,7 @@
 	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
 	 * walk off the end.
 	 */
-	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+	if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
 		return NULL;
 
 	/* If this is INIT/INIT-ACK look inside the chunk too. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 6437aa9..f731de3e8 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -213,7 +213,7 @@
 	}
 
 	chunk->chunk_hdr = ch;
-	chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+	chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0c605ec..2a5c189 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -297,7 +297,7 @@
 					      struct sctp_chunk *chunk)
 {
 	sctp_xmit_t retval = SCTP_XMIT_OK;
-	__u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
+	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -508,7 +508,7 @@
 		if (gso) {
 			pkt_size = packet->overhead;
 			list_for_each_entry(chunk, &packet->chunk_list, list) {
-				int padded = WORD_ROUND(chunk->skb->len);
+				int padded = SCTP_PAD4(chunk->skb->len);
 
 				if (pkt_size + padded > tp->pathmtu)
 					break;
@@ -538,7 +538,7 @@
 		 * included in the chunk length field.  The sender should
 		 * never pad with more than 3 bytes.
 		 *
-		 * [This whole comment explains WORD_ROUND() below.]
+		 * [This whole comment explains SCTP_PAD4() below.]
 		 */
 
 		pkt_size -= packet->overhead;
@@ -560,7 +560,7 @@
 				has_data = 1;
 			}
 
-			padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+			padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
 			if (padding)
 				memset(skb_put(chunk->skb, padding), 0, padding);
 
@@ -587,7 +587,7 @@
 			 * acknowledged or have failed.
 			 * Re-queue auth chunks if needed.
 			 */
-			pkt_size -= WORD_ROUND(chunk->skb->len);
+			pkt_size -= SCTP_PAD4(chunk->skb->len);
 
 			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
 				sctp_chunk_free(chunk);
@@ -911,7 +911,7 @@
 		 */
 		maxsize = pmtu - packet->overhead;
 		if (packet->auth)
-			maxsize -= WORD_ROUND(packet->auth->skb->len);
+			maxsize -= SCTP_PAD4(packet->auth->skb->len);
 		if (chunk_len > maxsize)
 			retval = SCTP_XMIT_PMTU_FULL;
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8c77b87..79dd660 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -253,7 +253,7 @@
 	num_types = sp->pf->supported_addrs(sp, types);
 
 	chunksize = sizeof(init) + addrs_len;
-	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+	chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
 	if (asoc->prsctp_enable)
@@ -283,14 +283,14 @@
 		/* Add HMACS parameter length if any were defined */
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -300,8 +300,8 @@
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -443,13 +443,13 @@
 
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -458,8 +458,8 @@
 	}
 
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -1390,7 +1390,7 @@
 	struct sock *sk;
 
 	/* No need to allocate LL here, as this is only a chunk. */
-	skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+	skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
 	if (!skb)
 		goto nodata;
 
@@ -1482,7 +1482,7 @@
 	void *target;
 	void *padding;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
-	int padlen = WORD_ROUND(chunklen) - chunklen;
+	int padlen = SCTP_PAD4(chunklen) - chunklen;
 
 	padding = skb_put(chunk->skb, padlen);
 	target = skb_put(chunk->skb, len);
@@ -1900,7 +1900,7 @@
 	struct __sctp_missing report;
 	__u16 len;
 
-	len = WORD_ROUND(sizeof(report));
+	len = SCTP_PAD4(sizeof(report));
 
 	/* Make an ERROR chunk, preparing enough room for
 	 * returning multiple unknown parameters.
@@ -2098,9 +2098,9 @@
 
 		if (*errp) {
 			if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
-					WORD_ROUND(ntohs(param.p->length))))
+					SCTP_PAD4(ntohs(param.p->length))))
 				sctp_addto_chunk_fixed(*errp,
-						WORD_ROUND(ntohs(param.p->length)),
+						SCTP_PAD4(ntohs(param.p->length)),
 						param.v);
 		} else {
 			/* If there is no memory for generating the ERROR
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d88bb2b..026e3bc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3454,7 +3454,7 @@
 		}
 
 		/* Report violation if chunk len overflows */
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
@@ -4185,7 +4185,7 @@
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -4203,7 +4203,7 @@
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 81b8667..ce54dce 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -233,7 +233,7 @@
 	}
 
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 	} else
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
@@ -287,7 +287,7 @@
 		return;
 	}
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d85b803..bea0005 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -383,7 +383,7 @@
 
 	ch = (sctp_errhdr_t *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
 
 	/* Pull off the ERROR header.  */
 	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
@@ -688,7 +688,7 @@
 	 * MUST ignore the padding bytes.
 	 */
 	len = ntohs(chunk->chunk_hdr->length);
-	padding = WORD_ROUND(len) - len;
+	padding = SCTP_PAD4(len) - len;
 
 	/* Fixup cloned skb with just this chunks data.  */
 	skb_trim(skb, chunk->chunk_end - padding - skb->data);