Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net Minor overlapping changes for both merge conflicts. Resolution work done by Stephen Rothwell was used as a reference. Signed-off-by: David S. Miller <davem@davemloft.net>

commit: 60747ef4d173c2747bf7f0377fb22846cb422195 [log] [tgz]
author: David S. Miller <davem@davemloft.net> Thu Aug 18 01:17:32 2016 -0400
committer: David S. Miller <davem@davemloft.net> Thu Aug 18 01:17:32 2016 -0400
tree: ea0faf33b952495c47909be1400c475a3f3821b0
parent: 484334198f8ce9552e20930fff9408ebf6bcf94d [diff]
parent: 184ca823481c99dadd7d946e5afd4bb921eab30d [diff]
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
index e41b2d5..f591ab7 100644
--- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
+++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt

@@ -47,6 +47,9 @@
 	    Valid values are between 0 to 7, that maps to
 	    273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps
 	    Default value is 2, which corresponds to 899 ps
+- rxlos-gpios: Input gpio from SFP+ module to indicate availability of
+	       incoming signal.
+
 
 Example:
 	menetclk: menetclk {

diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
new file mode 100644
index 0000000..038dda4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt

@@ -0,0 +1,35 @@
+XILINX GMIITORGMII Converter Driver Device Tree Bindings
+--------------------------------------------------------
+
+The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media
+Independent Interface (RGMII) core provides the RGMII between RGMII-compliant
+Ethernet physical media devices (PHY) and the Gigabit Ethernet controller.
+This core can be used in all three modes of operation(10/100/1000 Mb/s).
+The Management Data Input/Output (MDIO) interface is used to configure the
+Speed of operation. This core can switch dynamically between the three
+Different speed modes by configuring the conveter register through mdio write.
+
+This converter sits between the ethernet MAC and the external phy.
+MAC <==> GMII2RGMII <==> RGMII_PHY
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible	: Should be "xlnx,gmii-to-rgmii-1.0"
+- reg		: The ID number for the phy, usually a small integer
+- phy-handle	: Should point to the external phy device.
+		  See ethernet.txt file in the same directory.
+
+Example:
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy: ethernet-phy@0 {
+			......
+		};
+		gmiitorgmii: gmiitorgmii@8 {
+			compatible = "xlnx,gmii-to-rgmii-1.0";
+			reg = <8>;
+			phy-handle = <&phy>;
+		};
+	};

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 415154a..a7697783 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX

@@ -74,6 +74,8 @@
 	- The DNS resolver module allows kernel servies to make DNS queries.
 driver.txt
 	- Softnet driver issues.
+ena.txt
+	- info on Amazon's Elastic Network Adapter (ENA)
 e100.txt
 	- info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt

diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
index 1b5e7a7..8a8d3d9 100644
--- a/Documentation/networking/batman-adv.txt
+++ b/Documentation/networking/batman-adv.txt

@@ -43,10 +43,15 @@
 reload the module if you plug your USB wifi adapter into your ma-
 chine after batman advanced was initially loaded.
 
-To activate a  given  interface  simply  write  "bat0"  into  its
-"mesh_iface" file inside the batman_adv subfolder:
+The batman-adv soft-interface can be created using  the  iproute2
+tool "ip"
 
-# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link add name bat0 type batadv
+
+To  activate a  given  interface  simply  attach it to the "bat0"
+interface
+
+# ip link set dev eth0 master bat0
 
 Repeat  this step for all interfaces you wish to add.  Now batman
 starts using/broadcasting on this/these interface(s).
@@ -56,10 +61,10 @@
 # cat /sys/class/net/eth0/batman_adv/iface_status
 # active
 
-To deactivate an interface you have  to  write  "none"  into  its
-"mesh_iface" file:
+To  deactivate  an  interface  you  have   to  detach it from the
+"bat0" interface:
 
-# echo none > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link set dev eth0 nomaster
 
 
 All  mesh  wide  settings  can be found in batman's own interface

diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt
new file mode 100644
index 0000000..2b4b6f5
--- /dev/null
+++ b/Documentation/networking/ena.txt

@@ -0,0 +1,305 @@
+Linux kernel driver for Elastic Network Adapter (ENA) family:
+=============================================================
+
+Overview:
+=========
+ENA is a networking interface designed to make good use of modern CPU
+features and system architectures.
+
+The ENA device exposes a lightweight management interface with a
+minimal set of memory mapped registers and extendable command set
+through an Admin Queue.
+
+The driver supports a range of ENA devices, is link-speed independent
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
+a negotiated and extendable feature set.
+
+Some ENA devices support SR-IOV. This driver is used for both the
+SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+
+ENA devices enable high speed and low overhead network traffic
+processing by providing multiple Tx/Rx queue pairs (the maximum number
+is advertised by the device via the Admin Queue), a dedicated MSI-X
+interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
+and CPU cacheline optimized data placement.
+
+The ENA driver supports industry standard TCP/IP offload features such
+as checksum offload and TCP transmit segmentation offload (TSO).
+Receive-side scaling (RSS) is supported for multi-core scaling.
+
+The ENA driver and its corresponding devices implement health
+monitoring mechanisms such as watchdog, enabling the device and driver
+to recover in a manner transparent to the application, as well as
+debug logs.
+
+Some of the ENA devices support a working mode called Low-latency
+Queue (LLQ), which saves several more microseconds.
+
+Supported PCI vendor ID/device IDs:
+===================================
+1d0f:0ec2 - ENA PF
+1d0f:1ec2 - ENA PF with LLQ support
+1d0f:ec20 - ENA VF
+1d0f:ec21 - ENA VF with LLQ support
+
+ENA Source Code Directory Structure:
+====================================
+ena_com.[ch]      - Management communication layer. This layer is
+                    responsible for the handling all the management
+                    (admin) communication between the device and the
+                    driver.
+ena_eth_com.[ch]  - Tx/Rx data path.
+ena_admin_defs.h  - Definition of ENA management interface.
+ena_eth_io_defs.h - Definition of ENA data path interface.
+ena_common_defs.h - Common definitions for ena_com layer.
+ena_regs_defs.h   - Definition of ENA PCI memory-mapped (MMIO) registers.
+ena_netdev.[ch]   - Main Linux kernel driver.
+ena_syfsfs.[ch]   - Sysfs files.
+ena_ethtool.c     - ethtool callbacks.
+ena_pci_id_tbl.h  - Supported device IDs.
+
+Management Interface:
+=====================
+ENA management interface is exposed by means of:
+- PCIe Configuration Space
+- Device Registers
+- Admin Queue (AQ) and Admin Completion Queue (ACQ)
+- Asynchronous Event Notification Queue (AENQ)
+
+ENA device MMIO Registers are accessed only during driver
+initialization and are not involved in further normal device
+operation.
+
+AQ is used for submitting management commands, and the
+results/responses are reported asynchronously through ACQ.
+
+ENA introduces a very small set of management commands with room for
+vendor-specific extensions. Most of the management operations are
+framed in a generic Get/Set feature command.
+
+The following admin queue commands are supported:
+- Create I/O submission queue
+- Create I/O completion queue
+- Destroy I/O submission queue
+- Destroy I/O completion queue
+- Get feature
+- Set feature
+- Configure AENQ
+- Get statistics
+
+Refer to ena_admin_defs.h for the list of supported Get/Set Feature
+properties.
+
+The Asynchronous Event Notification Queue (AENQ) is a uni-directional
+queue used by the ENA device to send to the driver events that cannot
+be reported using ACQ. AENQ events are subdivided into groups. Each
+group may have multiple syndromes, as shown below
+
+The events are:
+	Group			Syndrome
+	Link state change	- X -
+	Fatal error		- X -
+	Notification		Suspend traffic
+	Notification		Resume traffic
+	Keep-Alive		- X -
+
+ACQ and AENQ share the same MSI-X vector.
+
+Keep-Alive is a special mechanism that allows monitoring of the
+device's health. The driver maintains a watchdog (WD) handler which,
+if fired, logs the current state and statistics then resets and
+restarts the ENA device and driver. A Keep-Alive event is delivered by
+the device every second. The driver re-arms the WD upon reception of a
+Keep-Alive event. A missed Keep-Alive event causes the WD handler to
+fire.
+
+Data Path Interface:
+====================
+I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
+SQ correspondingly). Each SQ has a completion queue (CQ) associated
+with it.
+
+The SQs and CQs are implemented as descriptor rings in contiguous
+physical memory.
+
+The ENA driver supports two Queue Operation modes for Tx SQs:
+- Regular mode
+  * In this mode the Tx SQs reside in the host's memory. The ENA
+    device fetches the ENA Tx descriptors and packet data from host
+    memory.
+- Low Latency Queue (LLQ) mode or "push-mode".
+  * In this mode the driver pushes the transmit descriptors and the
+    first 128 bytes of the packet directly to the ENA device memory
+    space. The rest of the packet payload is fetched by the
+    device. For this operation mode, the driver uses a dedicated PCI
+    device memory BAR, which is mapped with write-combine capability.
+
+The Rx SQs support only the regular mode.
+
+Note: Not all ENA devices support LLQ, and this feature is negotiated
+      with the device upon initialization. If the ENA device does not
+      support LLQ mode, the driver falls back to the regular mode.
+
+The driver supports multi-queue for both Tx and Rx. This has various
+benefits:
+- Reduced CPU/thread/process contention on a given Ethernet interface.
+- Cache miss rate on completion is reduced, particularly for data
+  cache lines that hold the sk_buff structures.
+- Increased process-level parallelism when handling received packets.
+- Increased data cache hit rate, by steering kernel processing of
+  packets to the CPU, where the application thread consuming the
+  packet is running.
+- In hardware interrupt re-direction.
+
+Interrupt Modes:
+================
+The driver assigns a single MSI-X vector per queue pair (for both Tx
+and Rx directions). The driver assigns an additional dedicated MSI-X vector
+for management (for ACQ and AENQ).
+
+Management interrupt registration is performed when the Linux kernel
+probes the adapter, and it is de-registered when the adapter is
+removed. I/O queue interrupt registration is performed when the Linux
+interface of the adapter is opened, and it is de-registered when the
+interface is closed.
+
+The management interrupt is named:
+   ena-mgmnt@pci:<PCI domain:bus:slot.function>
+and for each queue pair, an interrupt is named:
+   <interface name>-Tx-Rx-<queue index>
+
+The ENA device operates in auto-mask and auto-clear interrupt
+modes. That is, once MSI-X is delivered to the host, its Cause bit is
+automatically cleared and the interrupt is masked. The interrupt is
+unmasked by the driver after NAPI processing is complete.
+
+Interrupt Moderation:
+=====================
+ENA driver and device can operate in conventional or adaptive interrupt
+moderation mode.
+
+In conventional mode the driver instructs device to postpone interrupt
+posting according to static interrupt delay value. The interrupt delay
+value can be configured through ethtool(8). The following ethtool
+parameters are supported by the driver: tx-usecs, rx-usecs
+
+In adaptive interrupt moderation mode the interrupt delay value is
+updated by the driver dynamically and adjusted every NAPI cycle
+according to the traffic nature.
+
+By default ENA driver applies adaptive coalescing on Rx traffic and
+conventional coalescing on Tx traffic.
+
+Adaptive coalescing can be switched on/off through ethtool(8)
+adaptive_rx on|off parameter.
+
+The driver chooses interrupt delay value according to the number of
+bytes and packets received between interrupt unmasking and interrupt
+posting. The driver uses interrupt delay table that subdivides the
+range of received bytes/packets into 5 levels and assigns interrupt
+delay value to each level.
+
+The user can enable/disable adaptive moderation, modify the interrupt
+delay table and restore its default values through sysfs.
+
+The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
+and can be configured by the ETHTOOL_STUNABLE command of the
+SIOCETHTOOL ioctl.
+
+SKB:
+The driver-allocated SKB for frames received from Rx handling using
+NAPI context. The allocation method depends on the size of the packet.
+If the frame length is larger than rx_copybreak, napi_get_frags()
+is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
+content is copied (by CPU) to the SKB, and the buffer is recycled.
+
+Statistics:
+===========
+The user can obtain ENA device and driver statistics using ethtool.
+The driver can collect regular or extended statistics (including
+per-queue stats) from the device.
+
+In addition the driver logs the stats to syslog upon device reset.
+
+MTU:
+====
+The driver supports an arbitrarily large MTU with a maximum that is
+negotiated with the device. The driver configures MTU using the
+SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
+via ip(8) and similar legacy tools.
+
+Stateless Offloads:
+===================
+The ENA driver supports:
+- TSO over IPv4/IPv6
+- TSO with ECN
+- IPv4 header checksum offload
+- TCP/UDP over IPv4/IPv6 checksum offloads
+
+RSS:
+====
+- The ENA device supports RSS that allows flexible Rx traffic
+  steering.
+- Toeplitz and CRC32 hash functions are supported.
+- Different combinations of L2/L3/L4 fields can be configured as
+  inputs for hash functions.
+- The driver configures RSS settings using the AQ SetFeature command
+  (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and
+  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties).
+- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash
+  function delivered in the Rx CQ descriptor is set in the received
+  SKB.
+- The user can provide a hash key, hash function, and configure the
+  indirection table through ethtool(8).
+
+DATA PATH:
+==========
+Tx:
+---
+end_start_xmit() is called by the stack. This function does the following:
+- Maps data buffers (skb->data and frags).
+- Populates ena_buf for the push buffer (if the driver and device are
+  in push mode.)
+- Prepares ENA bufs for the remaining frags.
+- Allocates a new request ID from the empty req_id ring. The request
+  ID is the index of the packet in the Tx info. This is used for
+  out-of-order TX completions.
+- Adds the packet to the proper place in the Tx ring.
+- Calls ena_com_prepare_tx(), an ENA communication layer that converts
+  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
+  needed.)
+  * This function also copies the ENA descriptors and the push buffer
+    to the Device memory space (if in push mode.)
+- Writes doorbell to the ENA device.
+- When the ENA device finishes sending the packet, a completion
+  interrupt is raised.
+- The interrupt handler schedules NAPI.
+- The ena_clean_tx_irq() function is called. This function handles the
+  completion descriptors generated by the ENA, with a single
+  completion descriptor per completed packet.
+  * req_id is retrieved from the completion descriptor. The tx_info of
+    the packet is retrieved via the req_id. The data buffers are
+    unmapped and req_id is returned to the empty req_id ring.
+  * The function stops when the completion descriptors are completed or
+    the budget is reached.
+
+Rx:
+---
+- When a packet is received from the ENA device.
+- The interrupt handler schedules NAPI.
+- The ena_clean_rx_irq() function is called. This function calls
+  ena_rx_pkt(), an ENA communication layer function, which returns the
+  number of descriptors used for a new unhandled packet, and zero if
+  no new packet is found.
+- Then it calls the ena_clean_rx_irq() function.
+- ena_eth_rx_skb() checks packet length:
+  * If the packet is small (len < rx_copybreak), the driver allocates
+    a SKB for the new packet, and copies the packet payload into the
+    SKB data buffer.
+    - In this way the original data buffer is not passed to the stack
+      and is reused for future Rx packets.
+  * Otherwise the function unmaps the Rx buffer, then allocates the
+    new SKB structure and hooks the Rx buffer to the SKB frags.
+- The new SKB is updated with the necessary information (protocol,
+  checksum hw verify result, etc.), and then passed to the network
+  stack, using the NAPI interface function napi_gro_receive().

diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
new file mode 100644
index 0000000..a0bf573
--- /dev/null
+++ b/Documentation/networking/strparser.txt

@@ -0,0 +1,136 @@
+Stream Parser
+-------------
+
+The stream parser (strparser) is a utility that parses messages of an
+application layer protocol running over a TCP connection. The stream
+parser works in conjunction with an upper layer in the kernel to provide
+kernel support for application layer messages. For instance, Kernel
+Connection Multiplexor (KCM) uses the Stream Parser to parse messages
+using a BPF program.
+
+Interface
+---------
+
+The API includes a context structure, a set of callbacks, utility
+functions, and a data_ready function. The callbacks include
+a parse_msg function that is called to perform parsing (e.g.
+BPF parsing in case of KCM), and a rcv_msg function that is called
+when a full message has been completed.
+
+A stream parser can be instantiated for a TCP connection. This is done
+by:
+
+strp_init(struct strparser *strp, struct sock *csk,
+	  struct strp_callbacks *cb)
+
+strp is a struct of type strparser that is allocated by the upper layer.
+csk is the TCP socket associated with the stream parser. Callbacks are
+called by the stream parser.
+
+Callbacks
+---------
+
+There are four callbacks:
+
+int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    parse_msg is called to determine the length of the next message
+    in the stream. The upper layer must implement this function. It
+    should parse the sk_buff as containing the headers for the
+    next application layer messages in the stream.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    the offset field is relevant in parse_msg and gives the offset
+    where the message starts in the skb.
+
+    The return values of this function are:
+
+    >0 : indicates length of successfully parsed message
+    0  : indicates more data must be received to parse the message
+    -ESTRPIPE : current message should not be processed by the
+          kernel, return control of the socket to userspace which
+          can proceed to read the messages itself
+    other < 0 : Error is parsing, give control back to userspace
+          assuming that synchronization is lost and the stream
+          is unrecoverable (application expected to close TCP socket)
+
+    In the case that an error is returned (return value is less than
+    zero) the stream parser will set the error on TCP socket and wake
+    it up. If parse_msg returned -ESTRPIPE and the stream parser had
+    previously read some bytes for the current message, then the error
+    set on the attached socket is ENODATA since the stream is
+    unrecoverable in that case.
+
+void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    rcv_msg is called when a full message has been received and
+    is queued. The callee must consume the sk_buff; it can
+    call strp_pause to prevent any further messages from being
+    received in rcv_msg (see strp_pause below). This callback
+    must be set.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. This
+    struct contains two fields: offset and full_len. Offset is
+    where the message starts in the skb, and full_len is the
+    the length of the message. skb->len - offset may be greater
+    then full_len since strparser does not trim the skb.
+
+int (*read_sock_done)(struct strparser *strp, int err);
+
+     read_sock_done is called when the stream parser is done reading
+     the TCP socket. The stream parser may read multiple messages
+     in a loop and this function allows cleanup to occur when existing
+     the loop. If the callback is not set (NULL in strp_init) a
+     default function is used.
+
+void (*abort_parser)(struct strparser *strp, int err);
+
+     This function is called when stream parser encounters an error
+     in parsing. The default function stops the stream parser for the
+     TCP socket and sets the error in the socket. The default function
+     can be changed by setting the callback to non-NULL in strp_init.
+
+Functions
+---------
+
+The upper layer calls strp_tcp_data_ready when data is ready on the lower
+socket for strparser to process. This should be called from a data_ready
+callback that is set on the socket.
+
+strp_stop is called to completely stop stream parser operations. This
+is called internally when the stream parser encounters an error, and
+it is called from the upper layer when unattaching a TCP socket.
+
+strp_done is called to unattach the stream parser from the TCP socket.
+This must be called after the stream processor has be stopped.
+
+strp_check_rcv is called to check for new messages on the socket. This
+is normally called at initialization of the a stream parser instance
+of after strp_unpause.
+
+Statistics
+----------
+
+Various counters are kept for each stream parser for a TCP socket.
+These are in the strp_stats structure. strp_aggr_stats is a convenience
+structure for accumulating statistics for multiple stream parser
+instances. save_strp_stats and aggregate_strp_stats are helper functions
+to save and aggregate statistics.
+
+Message assembly limits
+-----------------------
+
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
+
+A timer is set when assembly starts for a new message. The message
+timeout is taken from rcvtime for the associated TCP socket. If the
+timer fires before assembly completes the stream parser is aborted
+and the ETIMEDOUT error is set on the TCP socket.
+
+Message length is limited to the receive buffer size of the associated
+TCP socket. If the length returned by parse_msg is greater than
+the socket buffer size then the stream parser is aborted with
+EMSGSIZE error set on the TCP socket. Note that this makes the
+maximum size of receive skbuffs for a socket with a stream parser
+to be 2*sk_rcvbuf of the TCP socket.

diff --git a/MAINTAINERS b/MAINTAINERS
index a306795..e902b63 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -636,6 +636,15 @@
 F:	include/linux/altera_uart.h
 F:	include/linux/altera_jtaguart.h
 
+AMAZON ETHERNET DRIVERS
+M:	Netanel Belgazal <netanel@annapurnalabs.com>
+R:	Saeed Bishara <saeed@annapurnalabs.com>
+R:	Zorik Machulsky <zorik@annapurnalabs.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/networking/ena.txt
+F:	drivers/net/ethernet/amazon/
+
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
 M:	Tom Lendacky <thomas.lendacky@amd.com>
 M:	Gary Hook <gary.hook@amd.com>

diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index b7fb5d9..32a961c 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts

@@ -74,6 +74,7 @@
 
 &xgenet {
 	status = "ok";
+	rxlos-gpios = <&sbgpio 12 1>;
 };
 
 &mmc0 {

diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index f1c2c71..d5c3435 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi

@@ -923,7 +923,7 @@
 			/* mac address will be overwritten by the bootloader */
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "rgmii";
-			phy-handle = <&menet0phy>,<&menetphy>;
+			phy-handle = <&menetphy>,<&menet0phy>;
 			mdio {
 				compatible = "apm,xgene-mdio";
 				#address-cells = <1>;

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 71f0e79..b3d0275 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c

@@ -600,7 +600,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -752,10 +751,8 @@
 
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		goto nomem;
-	}
 
 	buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
 	if (!buf) {
@@ -1007,10 +1004,8 @@
 		dev->tx_contexts[i].echo_index = MAX_TX_URBS;
 
 	dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->intr_urb) {
-		dev_err(&intf->dev, "Couldn't alloc intr URB\n");
+	if (!dev->intr_urb)
 		goto cleanup_candev;
-	}
 
 	dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
 	if (!dev->intr_in_buffer)

diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 784a900..be928ce 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c

@@ -558,8 +558,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_warn(dev->udev->dev.parent,
-				 "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -730,7 +728,6 @@
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
 		stats->tx_dropped++;
 		dev_kfree_skb(skb);
 		goto nourbmem;

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 6f0cbc3..77e3cc0 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c

@@ -493,10 +493,8 @@
 
 	/* create a URB, and a buffer for it */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URB\n");
+	if (!urb)
 		goto nomem_urb;
-	}
 
 	hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC,
 				&urb->transfer_dma);
@@ -600,11 +598,8 @@
 
 			/* alloc rx urb */
 			urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (!urb) {
-				netdev_err(netdev,
-					   "No memory left for URB\n");
+			if (!urb)
 				return -ENOMEM;
-			}
 
 			/* alloc rx buffer */
 			buf = usb_alloc_coherent(dev->udev,

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 6f1f3b6..d51e0c4 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c

@@ -787,10 +787,8 @@
 	int err;
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 
 	buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
 	if (!buf) {
@@ -1393,8 +1391,6 @@
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_warn(dev->udev->dev.parent,
-				 "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -1670,7 +1666,6 @@
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
 		stats->tx_dropped++;
 		dev_kfree_skb(skb);
 		return NETDEV_TX_OK;

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index bfb91d8..c06382c 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c

@@ -399,7 +399,6 @@
 		/* create a URB, and a buffer for it, to receive usb messages */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -454,7 +453,6 @@
 		/* create a URB and a buffer for it, to transmit usb messages */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -651,10 +649,8 @@
 
 	/* first allocate a urb to handle the asynchronous steps */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(dev->netdev, "no memory left for urb\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 
 	/* also allocate enough space for the commands to send */
 	buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC);

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index a731720..108a30e 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c

@@ -623,10 +623,8 @@
 
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		goto nomem;
-	}
 
 	buf = usb_alloc_coherent(priv->udev, size, GFP_ATOMIC,
 				 &urb->transfer_dma);
@@ -748,7 +746,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index bda37d3..38ee10d 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c

@@ -1681,7 +1681,8 @@
 	return 0;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+				    const struct b53_io_ops *ops,
 				    void *priv)
 {
 	struct dsa_switch *ds;

diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index aa87c3f..477a16b 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c

@@ -267,7 +267,7 @@
 	return mdiobus_write_nested(bus, addr, reg, value);
 }
 
-static struct b53_io_ops b53_mdio_ops = {
+static const struct b53_io_ops b53_mdio_ops = {
 	.read8 = b53_mdio_read8,
 	.read16 = b53_mdio_read16,
 	.read32 = b53_mdio_read32,

diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 77ffc43..cc9e6bd 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c

@@ -208,7 +208,7 @@
 	return 0;
 }
 
-static struct b53_io_ops b53_mmap_ops = {
+static const struct b53_io_ops b53_mmap_ops = {
 	.read8 = b53_mmap_read8,
 	.read16 = b53_mmap_read16,
 	.read32 = b53_mmap_read32,

diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 835a744..d268493 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h

@@ -182,7 +182,8 @@
 	return dev->cpu_port;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+				    const struct b53_io_ops *ops,
 				    void *priv);
 
 int b53_switch_detect(struct b53_device *dev);

diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 2bda0b5..f89f530 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c

@@ -270,7 +270,7 @@
 	return spi_write(spi, txbuf, sizeof(txbuf));
 }
 
-static struct b53_io_ops b53_spi_ops = {
+static const struct b53_io_ops b53_spi_ops = {
 	.read8 = b53_spi_read8,
 	.read16 = b53_spi_read16,
 	.read32 = b53_spi_read32,
@@ -317,8 +317,6 @@
 static struct spi_driver b53_spi_driver = {
 	.driver = {
 		.name	= "b53-switch",
-		.bus	= &spi_bus_type,
-		.owner	= THIS_MODULE,
 	},
 	.probe	= b53_spi_probe,
 	.remove	= b53_spi_remove,

diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 3e2d4a5..8a62b6a 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c

@@ -344,7 +344,7 @@
 	return ret;
 }
 
-static struct b53_io_ops b53_srab_ops = {
+static const struct b53_io_ops b53_srab_ops = {
 	.read8 = b53_srab_read8,
 	.read16 = b53_srab_read16,
 	.read32 = b53_srab_read32,

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d1d9d3c..a230fcb 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c

@@ -216,6 +216,118 @@
 	return 0;
 }
 
+static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
+			      int reg, u16 *val)
+{
+	int addr = phy; /* PHY devices addresses start at 0x0 */
+
+	if (!chip->phy_ops)
+		return -EOPNOTSUPP;
+
+	return chip->phy_ops->read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy,
+			       int reg, u16 val)
+{
+	int addr = phy; /* PHY devices addresses start at 0x0 */
+
+	if (!chip->phy_ops)
+		return -EOPNOTSUPP;
+
+	return chip->phy_ops->write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page)
+{
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PHY_PAGE))
+		return -EOPNOTSUPP;
+
+	return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+}
+
+static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy)
+{
+	int err;
+
+	/* Restore PHY page Copper 0x0 for access via the registered MDIO bus */
+	err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER);
+	if (unlikely(err)) {
+		dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n",
+			phy, err);
+	}
+}
+
+static int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy,
+				   u8 page, int reg, u16 *val)
+{
+	int err;
+
+	/* There is no paging for registers 22 */
+	if (reg == PHY_PAGE)
+		return -EINVAL;
+
+	err = mv88e6xxx_phy_page_get(chip, phy, page);
+	if (!err) {
+		err = mv88e6xxx_phy_read(chip, phy, reg, val);
+		mv88e6xxx_phy_page_put(chip, phy);
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy,
+				    u8 page, int reg, u16 val)
+{
+	int err;
+
+	/* There is no paging for registers 22 */
+	if (reg == PHY_PAGE)
+		return -EINVAL;
+
+	err = mv88e6xxx_phy_page_get(chip, phy, page);
+	if (!err) {
+		err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+		mv88e6xxx_phy_page_put(chip, phy);
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_serdes_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return mv88e6xxx_phy_page_read(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+				       reg, val);
+}
+
+static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return mv88e6xxx_phy_page_write(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+					reg, val);
+}
+
+static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
+			  u16 mask)
+{
+	unsigned long timeout = jiffies + HZ / 10;
+
+	while (time_before(jiffies, timeout)) {
+		u16 val;
+		int err;
+
+		err = mv88e6xxx_read(chip, addr, reg, &val);
+		if (err)
+			return err;
+
+		if (!(val & mask))
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	return -ETIMEDOUT;
+}
+
 /* Indirect write to single pointer-data register with an Update bit */
 static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
 			    u16 update)
@@ -260,22 +372,6 @@
 	return mv88e6xxx_write(chip, addr, reg, val);
 }
 
-static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip,
-				      int addr, int regnum)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_read(chip, addr, regnum);
-	return 0xffff;
-}
-
-static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip,
-				       int addr, int regnum, u16 val)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_write(chip, addr, regnum, val);
-	return 0;
-}
-
 static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
 {
 	int ret;
@@ -400,34 +496,39 @@
 	chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
 }
 
-static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr,
-				   int regnum)
+static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr,
+				  int reg, u16 *val)
 {
-	int ret;
+	int err;
 
-	ret = mv88e6xxx_ppu_access_get(chip);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_read(chip, addr, regnum);
+	err = mv88e6xxx_ppu_access_get(chip);
+	if (!err) {
+		err = mv88e6xxx_read(chip, addr, reg, val);
 		mv88e6xxx_ppu_access_put(chip);
 	}
 
-	return ret;
+	return err;
 }
 
-static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr,
-				    int regnum, u16 val)
+static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr,
+				   int reg, u16 val)
 {
-	int ret;
+	int err;
 
-	ret = mv88e6xxx_ppu_access_get(chip);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_write(chip, addr, regnum, val);
+	err = mv88e6xxx_ppu_access_get(chip);
+	if (!err) {
+		err = mv88e6xxx_write(chip, addr, reg, val);
 		mv88e6xxx_ppu_access_put(chip);
 	}
 
-	return ret;
+	return err;
 }
 
+static const struct mv88e6xxx_ops mv88e6xxx_phy_ppu_ops = {
+	.read = mv88e6xxx_phy_ppu_read,
+	.write = mv88e6xxx_phy_ppu_write,
+};
+
 static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip)
 {
 	return chip->info->family == MV88E6XXX_FAMILY_6065;
@@ -819,130 +920,69 @@
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset,
-			   u16 mask)
-{
-	unsigned long timeout = jiffies + HZ / 10;
-
-	while (time_before(jiffies, timeout)) {
-		int ret;
-
-		ret = _mv88e6xxx_reg_read(chip, reg, offset);
-		if (ret < 0)
-			return ret;
-		if (!(ret & mask))
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-	return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip)
-{
-	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-			       GLOBAL2_SMI_OP_BUSY);
-}
-
 static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
 {
-	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
-			       GLOBAL_ATU_OP_BUSY);
-}
-
-static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip,
-					int addr, int regnum)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_READ | (addr << 5) |
-				   regnum);
-	if (ret < 0)
-		return ret;
-
-	ret = mv88e6xxx_mdio_wait(chip);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA);
-
-	return ret;
-}
-
-static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip,
-					 int addr, int regnum, u16 val)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_WRITE | (addr << 5) |
-				   regnum);
-
-	return mv88e6xxx_mdio_wait(chip);
+	return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
+			      GLOBAL_ATU_OP_BUSY);
 }
 
 static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
 			     struct ethtool_eee *e)
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int reg;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	reg = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-	if (reg < 0)
+	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+	if (err)
 		goto out;
 
 	e->eee_enabled = !!(reg & 0x0200);
 	e->tx_lpi_enabled = !!(reg & 0x0100);
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
-	if (reg < 0)
+	err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, &reg);
+	if (err)
 		goto out;
 
 	e->eee_active = !!(reg & PORT_STATUS_EEE);
-	reg = 0;
-
 out:
 	mutex_unlock(&chip->reg_lock);
-	return reg;
+
+	return err;
 }
 
 static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
 			     struct phy_device *phydev, struct ethtool_eee *e)
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int reg;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-	if (ret < 0)
+	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+	if (err)
 		goto out;
 
-	reg = ret & ~0x0300;
+	reg &= ~0x0300;
 	if (e->eee_enabled)
 		reg |= 0x0200;
 	if (e->tx_lpi_enabled)
 		reg |= 0x0100;
 
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg);
+	err = mv88e6xxx_phy_write(chip, port, 16, reg);
 out:
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return err;
 }
 
 static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd)
@@ -1227,8 +1267,8 @@
 
 static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip)
 {
-	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
-			       GLOBAL_VTU_OP_BUSY);
+	return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
+			      GLOBAL_VTU_OP_BUSY);
 }
 
 static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op)
@@ -2302,38 +2342,6 @@
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip,
-				      int port, int page, int reg, int val)
-{
-	int ret;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val);
-restore_page_0:
-	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-	return ret;
-}
-
-static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip,
-				     int port, int page, int reg)
-{
-	int ret;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = mv88e6xxx_mdio_read_indirect(chip, port, reg);
-restore_page_0:
-	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-	return ret;
-}
-
 static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
 {
 	bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE);
@@ -2396,23 +2404,22 @@
 	return ret;
 }
 
-static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
 {
-	int ret;
+	u16 val;
+	int err;
 
-	ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES,
-					PAGE_FIBER_SERDES, MII_BMCR);
-	if (ret < 0)
-		return ret;
+	/* Clear Power Down bit */
+	err = mv88e6xxx_serdes_read(chip, MII_BMCR, &val);
+	if (err)
+		return err;
 
-	if (ret & BMCR_PDOWN) {
-		ret &= ~BMCR_PDOWN;
-		ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES,
-						 PAGE_FIBER_SERDES, MII_BMCR,
-						 ret);
+	if (val & BMCR_PDOWN) {
+		val &= ~BMCR_PDOWN;
+		err = mv88e6xxx_serdes_write(chip, MII_BMCR, val);
 	}
 
-	return ret;
+	return err;
 }
 
 static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
@@ -2535,7 +2542,7 @@
 	/* If this port is connected to a SerDes, make sure the SerDes is not
 	 * powered down.
 	 */
-	if (mv88e6xxx_6352_family(chip)) {
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) {
 		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
 		if (ret < 0)
 			return ret;
@@ -2543,7 +2550,7 @@
 		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
 		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
 		    (ret == PORT_STATUS_CMODE_SGMII)) {
-			ret = mv88e6xxx_power_on_serdes(chip);
+			ret = mv88e6xxx_serdes_power_on(chip);
 			if (ret < 0)
 				return ret;
 		}
@@ -2949,8 +2956,8 @@
 			break;
 
 		/* Wait for the operation to complete */
-		err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-				      GLOBAL2_IRL_CMD_BUSY);
+		err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+				     GLOBAL2_IRL_CMD_BUSY);
 		if (err)
 			break;
 	}
@@ -3004,9 +3011,9 @@
 
 static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
 {
-	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
-			       GLOBAL2_EEPROM_CMD_BUSY |
-			       GLOBAL2_EEPROM_CMD_RUNNING);
+	return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
+			      GLOBAL2_EEPROM_CMD_BUSY |
+			      GLOBAL2_EEPROM_CMD_RUNNING);
 }
 
 static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
@@ -3054,6 +3061,62 @@
 	return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
 }
 
+static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
+{
+	return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
+			      GLOBAL2_SMI_PHY_CMD_BUSY);
+}
+
+static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+	int err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_smi_phy_wait(chip);
+}
+
+static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr,
+				     int reg, u16 *val)
+{
+	u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
+	int err;
+
+	err = mv88e6xxx_g2_smi_phy_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+}
+
+static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr,
+				      int reg, u16 val)
+{
+	u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
+	int err;
+
+	err = mv88e6xxx_g2_smi_phy_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+}
+
+static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
+	.read = mv88e6xxx_g2_smi_phy_read,
+	.write = mv88e6xxx_g2_smi_phy_write,
+};
+
 static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
 	u16 reg;
@@ -3187,84 +3250,35 @@
 	return err;
 }
 
-#ifdef CONFIG_NET_DSA_HWMON
-static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
-				    int reg)
-{
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&chip->reg_lock);
-	ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg);
-	mutex_unlock(&chip->reg_lock);
-
-	return ret;
-}
-
-static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
-				     int reg, int val)
-{
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&chip->reg_lock);
-	ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val);
-	mutex_unlock(&chip->reg_lock);
-
-	return ret;
-}
-#endif
-
-static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
-{
-	if (port >= 0 && port < chip->info->num_ports)
-		return port;
-	return -EINVAL;
-}
-
-static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum)
+static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
 {
 	struct mv88e6xxx_chip *chip = bus->priv;
-	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-	int ret;
+	u16 val;
+	int err;
 
-	if (addr < 0)
+	if (phy >= chip->info->num_ports)
 		return 0xffff;
 
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum);
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-		ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum);
-	else
-		ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum);
-
+	err = mv88e6xxx_phy_read(chip, phy, reg, &val);
 	mutex_unlock(&chip->reg_lock);
-	return ret;
+
+	return err ? err : val;
 }
 
-static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum,
-				u16 val)
+static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 {
 	struct mv88e6xxx_chip *chip = bus->priv;
-	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-	int ret;
+	int err;
 
-	if (addr < 0)
+	if (phy >= chip->info->num_ports)
 		return 0xffff;
 
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val);
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-		ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val);
-	else
-		ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val);
-
+	err = mv88e6xxx_phy_write(chip, phy, reg, val);
 	mutex_unlock(&chip->reg_lock);
-	return ret;
+
+	return err;
 }
 
 static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
@@ -3274,9 +3288,6 @@
 	struct mii_bus *bus;
 	int err;
 
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		mv88e6xxx_ppu_state_init(chip);
-
 	if (np)
 		chip->mdio_np = of_get_child_by_name(np, "mdio");
 
@@ -3332,44 +3343,42 @@
 static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	u16 val;
 	int ret;
-	int val;
 
 	*temp = 0;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6);
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6);
 	if (ret < 0)
 		goto error;
 
 	/* Enable temperature sensor */
-	ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
+	ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
 	if (ret < 0)
 		goto error;
 
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5));
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5));
 	if (ret < 0)
 		goto error;
 
 	/* Wait for temperature to stabilize */
 	usleep_range(10000, 12000);
 
-	val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
-	if (val < 0) {
-		ret = val;
+	ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
+	if (ret < 0)
 		goto error;
-	}
 
 	/* Disable temperature sensor */
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5));
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5));
 	if (ret < 0)
 		goto error;
 
 	*temp = ((val & 0x1f) - 5) * 5;
 
 error:
-	mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0);
+	mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0);
 	mutex_unlock(&chip->reg_lock);
 	return ret;
 }
@@ -3378,15 +3387,18 @@
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	*temp = 0;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*temp = (ret & 0xff) - 25;
+	*temp = (val & 0xff) - 25;
 
 	return 0;
 }
@@ -3408,6 +3420,7 @@
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3415,11 +3428,13 @@
 
 	*temp = 0;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*temp = (((ret >> 8) & 0x1f) * 5) - 25;
+	*temp = (((val >> 8) & 0x1f) * 5) - 25;
 
 	return 0;
 }
@@ -3428,23 +3443,30 @@
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
-	int ret;
+	u16 val;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
 		return -EOPNOTSUPP;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
-	if (ret < 0)
-		return ret;
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	if (err)
+		goto unlock;
 	temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
-	return mv88e6xxx_mdio_page_write(ds, phy, 6, 26,
-					 (ret & 0xe0ff) | (temp << 8));
+	err = mv88e6xxx_phy_page_write(chip, phy, 6, 26,
+				       (val & 0xe0ff) | (temp << 8));
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
 }
 
 static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 {
 	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3452,11 +3474,13 @@
 
 	*alarm = false;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*alarm = !!(ret & 0x40);
+	*alarm = !!(val & 0x40);
 
 	return 0;
 }
@@ -3873,6 +3897,23 @@
 	return chip;
 }
 
+static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = {
+	.read = mv88e6xxx_read,
+	.write = mv88e6xxx_write,
+};
+
+static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip)
+{
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) {
+		chip->phy_ops = &mv88e6xxx_g2_smi_phy_ops;
+	} else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) {
+		chip->phy_ops = &mv88e6xxx_phy_ppu_ops;
+		mv88e6xxx_ppu_state_init(chip);
+	} else {
+		chip->phy_ops = &mv88e6xxx_phy_ops;
+	}
+}
+
 static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 			      struct mii_bus *bus, int sw_addr)
 {
@@ -3882,7 +3923,7 @@
 
 	if (sw_addr == 0)
 		chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP))
+	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
 		chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
 	else
 		return -EINVAL;
@@ -3920,6 +3961,8 @@
 	if (err)
 		goto free;
 
+	mv88e6xxx_phy_init(chip);
+
 	err = mv88e6xxx_mdio_register(chip, NULL);
 	if (err)
 		goto free;
@@ -4021,6 +4064,8 @@
 	if (err)
 		return err;
 
+	mv88e6xxx_phy_init(chip);
+
 	chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS);
 	if (IS_ERR(chip->reset))
 		return PTR_ERR(chip->reset);

diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 48d6ea7..1f9bab5 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h

@@ -30,9 +30,12 @@
 #define SMI_CMD_OP_45_READ_DATA_INC	((3 << 10) | SMI_CMD_BUSY)
 #define SMI_DATA		0x01
 
-/* Fiber/SERDES Registers are located at SMI address F, page 1 */
-#define REG_FIBER_SERDES	0x0f
-#define PAGE_FIBER_SERDES	0x01
+/* PHY Registers */
+#define PHY_PAGE		0x16
+#define PHY_PAGE_COPPER		0x00
+
+#define ADDR_SERDES		0x0f
+#define SERDES_PAGE_FIBER	0x01
 
 #define REG_PORT(p)		(0x10 + (p))
 #define PORT_STATUS		0x00
@@ -329,17 +332,16 @@
 #define GLOBAL2_EEPROM_DATA	0x15
 #define GLOBAL2_PTP_AVB_OP	0x16
 #define GLOBAL2_PTP_AVB_DATA	0x17
-#define GLOBAL2_SMI_OP		0x18
-#define GLOBAL2_SMI_OP_BUSY		BIT(15)
-#define GLOBAL2_SMI_OP_CLAUSE_22	BIT(12)
-#define GLOBAL2_SMI_OP_22_WRITE		((1 << 10) | GLOBAL2_SMI_OP_BUSY | \
-					 GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_22_READ		((2 << 10) | GLOBAL2_SMI_OP_BUSY | \
-					 GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_45_WRITE_ADDR	((0 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_WRITE_DATA	((1 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_READ_DATA	((2 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_DATA	0x19
+#define GLOBAL2_SMI_PHY_CMD			0x18
+#define GLOBAL2_SMI_PHY_CMD_BUSY		BIT(15)
+#define GLOBAL2_SMI_PHY_CMD_MODE_22		BIT(12)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA	((0x1 << 10) | \
+						 GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+						 GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA	((0x2 << 10) | \
+						 GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+						 GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_DATA			0x19
 #define GLOBAL2_SCRATCH_MISC	0x1a
 #define GLOBAL2_SCRATCH_BUSY		BIT(15)
 #define GLOBAL2_SCRATCH_REGISTER_SHIFT	8
@@ -388,6 +390,21 @@
 	 */
 	MV88E6XXX_CAP_EEE,
 
+	/* Multi-chip Addressing Mode.
+	 * Some chips respond to only 2 registers of its own SMI device address
+	 * when it is non-zero, and use indirect access to internal registers.
+	 */
+	MV88E6XXX_CAP_SMI_CMD,		/* (0x00) SMI Command */
+	MV88E6XXX_CAP_SMI_DATA,		/* (0x01) SMI Data */
+
+	/* PHY Registers.
+	 */
+	MV88E6XXX_CAP_PHY_PAGE,		/* (0x16) Page Register */
+
+	/* Fiber/SERDES Registers (SMI address F).
+	 */
+	MV88E6XXX_CAP_SERDES,
+
 	/* Switch Global 2 Registers.
 	 * The device contains a second set of global 16-bit registers.
 	 */
@@ -402,12 +419,8 @@
 	MV88E6XXX_CAP_G2_POT,		/* (0x0f) Priority Override Table */
 	MV88E6XXX_CAP_G2_EEPROM_CMD,	/* (0x14) EEPROM Command */
 	MV88E6XXX_CAP_G2_EEPROM_DATA,	/* (0x15) EEPROM Data */
-
-	/* Multi-chip Addressing Mode.
-	 * Some chips require an indirect SMI access when their SMI device
-	 * address is not zero. See SMI_CMD and SMI_DATA.
-	 */
-	MV88E6XXX_CAP_MULTI_CHIP,
+	MV88E6XXX_CAP_G2_SMI_PHY_CMD,	/* (0x18) SMI PHY Command */
+	MV88E6XXX_CAP_G2_SMI_PHY_DATA,	/* (0x19) SMI PHY Data */
 
 	/* PHY Polling Unit.
 	 * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING.
@@ -415,12 +428,6 @@
 	MV88E6XXX_CAP_PPU,
 	MV88E6XXX_CAP_PPU_ACTIVE,
 
-	/* SMI PHY Command and Data registers.
-	 * This requires an indirect access to PHY registers through
-	 * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done.
-	 */
-	MV88E6XXX_CAP_SMI_PHY,
-
 	/* Per VLAN Spanning Tree Unit (STU).
 	 * The Port State database, if present, is accessed through VTU
 	 * operations and dedicated SID registers. See GLOBAL_VTU_SID.
@@ -441,6 +448,14 @@
 
 /* Bitmask of capabilities */
 #define MV88E6XXX_FLAG_EEE		BIT(MV88E6XXX_CAP_EEE)
+
+#define MV88E6XXX_FLAG_SMI_CMD		BIT(MV88E6XXX_CAP_SMI_CMD)
+#define MV88E6XXX_FLAG_SMI_DATA		BIT(MV88E6XXX_CAP_SMI_DATA)
+
+#define MV88E6XXX_FLAG_PHY_PAGE		BIT(MV88E6XXX_CAP_PHY_PAGE)
+
+#define MV88E6XXX_FLAG_SERDES		BIT(MV88E6XXX_CAP_SERDES)
+
 #define MV88E6XXX_FLAG_GLOBAL2		BIT(MV88E6XXX_CAP_GLOBAL2)
 #define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
 #define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
@@ -452,10 +467,11 @@
 #define MV88E6XXX_FLAG_G2_POT		BIT(MV88E6XXX_CAP_G2_POT)
 #define MV88E6XXX_FLAG_G2_EEPROM_CMD	BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
 #define MV88E6XXX_FLAG_G2_EEPROM_DATA	BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
-#define MV88E6XXX_FLAG_MULTI_CHIP	BIT(MV88E6XXX_CAP_MULTI_CHIP)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD	BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA	BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
+
 #define MV88E6XXX_FLAG_PPU		BIT(MV88E6XXX_CAP_PPU)
 #define MV88E6XXX_FLAG_PPU_ACTIVE	BIT(MV88E6XXX_CAP_PPU_ACTIVE)
-#define MV88E6XXX_FLAG_SMI_PHY		BIT(MV88E6XXX_CAP_SMI_PHY)
 #define MV88E6XXX_FLAG_STU		BIT(MV88E6XXX_CAP_STU)
 #define MV88E6XXX_FLAG_TEMP		BIT(MV88E6XXX_CAP_TEMP)
 #define MV88E6XXX_FLAG_TEMP_LIMIT	BIT(MV88E6XXX_CAP_TEMP_LIMIT)
@@ -471,28 +487,43 @@
 	(MV88E6XXX_FLAG_G2_IRL_CMD |	\
 	 MV88E6XXX_FLAG_G2_IRL_DATA)
 
+/* Multi-chip Addressing Mode */
+#define MV88E6XXX_FLAGS_MULTI_CHIP	\
+	(MV88E6XXX_FLAG_SMI_CMD |	\
+	 MV88E6XXX_FLAG_SMI_DATA)
+
 /* Cross-chip Port VLAN Table */
 #define MV88E6XXX_FLAGS_PVT		\
 	(MV88E6XXX_FLAG_G2_PVT_ADDR |	\
 	 MV88E6XXX_FLAG_G2_PVT_DATA)
 
+/* Fiber/SERDES Registers at SMI address F, page 1 */
+#define MV88E6XXX_FLAGS_SERDES		\
+	(MV88E6XXX_FLAG_PHY_PAGE |	\
+	 MV88E6XXX_FLAG_SERDES)
+
+/* Indirect PHY access via Global2 SMI PHY registers */
+#define MV88E6XXX_FLAGS_SMI_PHY		\
+	(MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\
+	 MV88E6XXX_FLAG_G2_SMI_PHY_DATA)
+
 #define MV88E6XXX_FLAGS_FAMILY_6095	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6165	\
@@ -501,17 +532,17 @@
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6185	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
 	 MV88E6XXX_FLAG_VTU)
 
@@ -522,15 +553,15 @@
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_EEPROM16 |	\
 	 MV88E6XXX_FLAGS_IRL |		\
-	 MV88E6XXX_FLAGS_PVT)
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_PVT |		\
+	 MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
@@ -538,14 +569,14 @@
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
-	 MV88E6XXX_FLAGS_PVT)
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_PVT |		\
+	 MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352	\
 	(MV88E6XXX_FLAG_EEE |		\
@@ -554,16 +585,17 @@
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_EEPROM16 |	\
 	 MV88E6XXX_FLAGS_IRL |		\
-	 MV88E6XXX_FLAGS_PVT)
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_PVT |		\
+	 MV88E6XXX_FLAGS_SERDES |	\
+	 MV88E6XXX_FLAGS_SMI_PHY)
 
 struct mv88e6xxx_info {
 	enum mv88e6xxx_family family;
@@ -623,6 +655,7 @@
 	/* Handles automatic disabling and re-enabling of the PHY
 	 * polling unit.
 	 */
+	const struct mv88e6xxx_ops *phy_ops;
 	struct mutex		ppu_mutex;
 	int			ppu_disabled;
 	struct work_struct	ppu_work;

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 2ffd634..8cc7467 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig

@@ -24,6 +24,7 @@
 source "drivers/net/ethernet/allwinner/Kconfig"
 source "drivers/net/ethernet/alteon/Kconfig"
 source "drivers/net/ethernet/altera/Kconfig"
+source "drivers/net/ethernet/amazon/Kconfig"
 source "drivers/net/ethernet/amd/Kconfig"
 source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"

diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 1d349e9..a09423d 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile

@@ -10,6 +10,7 @@
 obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
 obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
 obj-$(CONFIG_ALTERA_TSE) += altera/
+obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/
 obj-$(CONFIG_NET_VENDOR_AMD) += amd/
 obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/

diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index 38eaea1..00f9ee3 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c

@@ -192,8 +192,8 @@
 			goto init_error;
 
 		skb_reserve(new_skb, NET_IP_ALIGN);
-		/* Invidate the data cache of skb->data range when it is write back
-		 * cache. It will prevent overwritting the new data from DMA
+		/* Invalidate the data cache of skb->data range when it is write back
+		 * cache. It will prevent overwriting the new data from DMA
 		 */
 		blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
 					 (unsigned long)new_skb->end);
@@ -1205,7 +1205,7 @@
 	}
 	/* reserve 2 bytes for RXDWA padding */
 	skb_reserve(new_skb, NET_IP_ALIGN);
-	/* Invidate the data cache of skb->data range when it is write back
+	/* Invalidate the data cache of skb->data range when it is write back
 	 * cache. It will prevent overwritting the new data from DMA
 	 */
 	blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
@@ -1599,7 +1599,7 @@
 	*(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI());
 
 	/* probe mac */
-	/*todo: how to proble? which is revision_register */
+	/*todo: how to probe? which is revision_register */
 	bfin_write_EMAC_ADDRLO(0x12345678);
 	if (bfin_read_EMAC_ADDRLO() != 0x12345678) {
 		dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n");

diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index bca07c5..f8df824 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c

@@ -1105,27 +1105,6 @@
 	struct greth_private *greth = netdev_priv(dev);
 	greth->msg_enable = value;
 }
-static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phy = greth->phy;
-
-	if (!phy)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phy, cmd);
-}
-
-static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phy = greth->phy;
-
-	if (!phy)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phy, cmd);
-}
 
 static int greth_get_regs_len(struct net_device *dev)
 {
@@ -1157,12 +1136,12 @@
 static const struct ethtool_ops greth_ethtool_ops = {
 	.get_msglevel		= greth_get_msglevel,
 	.set_msglevel		= greth_set_msglevel,
-	.get_settings		= greth_get_settings,
-	.set_settings		= greth_set_settings,
 	.get_drvinfo		= greth_get_drvinfo,
 	.get_regs_len           = greth_get_regs_len,
 	.get_regs               = greth_get_regs,
 	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 static struct net_device_ops greth_netdev_ops = {
@@ -1224,7 +1203,7 @@
 static void greth_link_change(struct net_device *dev)
 {
 	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phydev = greth->phy;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int status_change = 0;
 	u32 ctrl;
@@ -1307,7 +1286,6 @@
 	greth->link = 0;
 	greth->speed = 0;
 	greth->duplex = -1;
-	greth->phy = phy;
 
 	return 0;
 }
@@ -1325,6 +1303,7 @@
 {
 	int ret;
 	unsigned long timeout;
+	struct net_device *ndev = greth->netdev;
 
 	greth->mdio = mdiobus_alloc();
 	if (!greth->mdio) {
@@ -1349,15 +1328,16 @@
 		goto unreg_mdio;
 	}
 
-	phy_start(greth->phy);
+	phy_start(ndev->phydev);
 
 	/* If Ethernet debug link is used make autoneg happen right away */
 	if (greth->edcl && greth_edcl == 1) {
-		phy_start_aneg(greth->phy);
+		phy_start_aneg(ndev->phydev);
 		timeout = jiffies + 6*HZ;
-		while (!phy_aneg_done(greth->phy) && time_before(jiffies, timeout)) {
+		while (!phy_aneg_done(ndev->phydev) &&
+		       time_before(jiffies, timeout)) {
 		}
-		phy_read_status(greth->phy);
+		phy_read_status(ndev->phydev);
 		greth_link_change(greth->netdev);
 	}
 
@@ -1569,8 +1549,8 @@
 
 	dma_free_coherent(&of_dev->dev, 1024, greth->tx_bd_base, greth->tx_bd_base_phys);
 
-	if (greth->phy)
-		phy_stop(greth->phy);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 	mdiobus_unregister(greth->mdio);
 
 	unregister_netdev(ndev);

diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h
index 92dd918..9c07140 100644
--- a/drivers/net/ethernet/aeroflex/greth.h
+++ b/drivers/net/ethernet/aeroflex/greth.h

@@ -123,7 +123,6 @@
 	struct napi_struct napi;
 	spinlock_t devlock;
 
-	struct phy_device *phy;
 	struct mii_bus *mdio;
 	unsigned int link;
 	unsigned int speed;

diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
new file mode 100644
index 0000000..99b30353
--- /dev/null
+++ b/drivers/net/ethernet/amazon/Kconfig

@@ -0,0 +1,27 @@
+#
+# Amazon network device configuration
+#
+
+config NET_VENDOR_AMAZON
+	bool "Amazon Devices"
+	default y
+	---help---
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Amazon devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_AMAZON
+
+config ENA_ETHERNET
+	tristate "Elastic Network Adapter (ENA) support"
+	depends on (PCI_MSI && X86)
+	---help---
+	  This driver supports Elastic Network Adapter (ENA)"
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called ena.
+
+endif #NET_VENDOR_AMAZON

diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile
new file mode 100644
index 0000000..8e0b73f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/Makefile

@@ -0,0 +1,5 @@
+#
+# Makefile for the Amazon network device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena/

diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
new file mode 100644
index 0000000..eaeeae06
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/Makefile

@@ -0,0 +1,7 @@
+#
+# Makefile for the Elastic Network Adapter (ENA) device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena.o
+
+ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
new file mode 100644
index 0000000..a46e749
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h

@@ -0,0 +1,973 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ADMIN_H_
+#define _ENA_ADMIN_H_
+
+enum ena_admin_aq_opcode {
+	ENA_ADMIN_CREATE_SQ	= 1,
+
+	ENA_ADMIN_DESTROY_SQ	= 2,
+
+	ENA_ADMIN_CREATE_CQ	= 3,
+
+	ENA_ADMIN_DESTROY_CQ	= 4,
+
+	ENA_ADMIN_GET_FEATURE	= 8,
+
+	ENA_ADMIN_SET_FEATURE	= 9,
+
+	ENA_ADMIN_GET_STATS	= 11,
+};
+
+enum ena_admin_aq_completion_status {
+	ENA_ADMIN_SUCCESS			= 0,
+
+	ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE	= 1,
+
+	ENA_ADMIN_BAD_OPCODE			= 2,
+
+	ENA_ADMIN_UNSUPPORTED_OPCODE		= 3,
+
+	ENA_ADMIN_MALFORMED_REQUEST		= 4,
+
+	/* Additional status is provided in ACQ entry extended_status */
+	ENA_ADMIN_ILLEGAL_PARAMETER		= 5,
+
+	ENA_ADMIN_UNKNOWN_ERROR			= 6,
+};
+
+enum ena_admin_aq_feature_id {
+	ENA_ADMIN_DEVICE_ATTRIBUTES		= 1,
+
+	ENA_ADMIN_MAX_QUEUES_NUM		= 2,
+
+	ENA_ADMIN_RSS_HASH_FUNCTION		= 10,
+
+	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG	= 11,
+
+	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG	= 12,
+
+	ENA_ADMIN_MTU				= 14,
+
+	ENA_ADMIN_RSS_HASH_INPUT		= 18,
+
+	ENA_ADMIN_INTERRUPT_MODERATION		= 20,
+
+	ENA_ADMIN_AENQ_CONFIG			= 26,
+
+	ENA_ADMIN_LINK_CONFIG			= 27,
+
+	ENA_ADMIN_HOST_ATTR_CONFIG		= 28,
+
+	ENA_ADMIN_FEATURES_OPCODE_NUM		= 32,
+};
+
+enum ena_admin_placement_policy_type {
+	/* descriptors and headers are in host memory */
+	ENA_ADMIN_PLACEMENT_POLICY_HOST	= 1,
+
+	/* descriptors and headers are in device memory (a.k.a Low Latency
+	 * Queue)
+	 */
+	ENA_ADMIN_PLACEMENT_POLICY_DEV	= 3,
+};
+
+enum ena_admin_link_types {
+	ENA_ADMIN_LINK_SPEED_1G		= 0x1,
+
+	ENA_ADMIN_LINK_SPEED_2_HALF_G	= 0x2,
+
+	ENA_ADMIN_LINK_SPEED_5G		= 0x4,
+
+	ENA_ADMIN_LINK_SPEED_10G	= 0x8,
+
+	ENA_ADMIN_LINK_SPEED_25G	= 0x10,
+
+	ENA_ADMIN_LINK_SPEED_40G	= 0x20,
+
+	ENA_ADMIN_LINK_SPEED_50G	= 0x40,
+
+	ENA_ADMIN_LINK_SPEED_100G	= 0x80,
+
+	ENA_ADMIN_LINK_SPEED_200G	= 0x100,
+
+	ENA_ADMIN_LINK_SPEED_400G	= 0x200,
+};
+
+enum ena_admin_completion_policy_type {
+	/* completion queue entry for each sq descriptor */
+	ENA_ADMIN_COMPLETION_POLICY_DESC		= 0,
+
+	/* completion queue entry upon request in sq descriptor */
+	ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND	= 1,
+
+	/* current queue head pointer is updated in OS memory upon sq
+	 * descriptor request
+	 */
+	ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND	= 2,
+
+	/* current queue head pointer is updated in OS memory for each sq
+	 * descriptor
+	 */
+	ENA_ADMIN_COMPLETION_POLICY_HEAD		= 3,
+};
+
+/* basic stats return ena_admin_basic_stats while extanded stats return a
+ * buffer (string format) with additional statistics per queue and per
+ * device id
+ */
+enum ena_admin_get_stats_type {
+	ENA_ADMIN_GET_STATS_TYPE_BASIC		= 0,
+
+	ENA_ADMIN_GET_STATS_TYPE_EXTENDED	= 1,
+};
+
+enum ena_admin_get_stats_scope {
+	ENA_ADMIN_SPECIFIC_QUEUE	= 0,
+
+	ENA_ADMIN_ETH_TRAFFIC		= 1,
+};
+
+struct ena_admin_aq_common_desc {
+	/* 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command_id;
+
+	/* as appears in ena_admin_aq_opcode */
+	u8 opcode;
+
+	/* 0 : phase
+	 * 1 : ctrl_data - control buffer address valid
+	 * 2 : ctrl_data_indirect - control buffer address
+	 *    points to list of pages with addresses of control
+	 *    buffers
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+};
+
+/* used in ena_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct ena_admin_ctrl_buff_info {
+	u32 length;
+
+	struct ena_common_mem_addr address;
+};
+
+struct ena_admin_sq {
+	u16 sq_idx;
+
+	/* 4:0 : reserved
+	 * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx
+	 */
+	u8 sq_identity;
+
+	u8 reserved1;
+};
+
+struct ena_admin_aq_entry {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		u32 inline_data_w1[3];
+
+		struct ena_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	u32 inline_data_w4[12];
+};
+
+struct ena_admin_acq_common_desc {
+	/* command identifier to associate it with the aq descriptor
+	 * 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command;
+
+	u8 status;
+
+	/* 0 : phase
+	 * 7:1 : reserved1
+	 */
+	u8 flags;
+
+	u16 extended_status;
+
+	/* serves as a hint what AQ entries can be revoked */
+	u16 sq_head_indx;
+};
+
+struct ena_admin_acq_entry {
+	struct ena_admin_acq_common_desc acq_common_descriptor;
+
+	u32 response_specific_data[14];
+};
+
+struct ena_admin_aq_create_sq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	/* 4:0 : reserved0_w1
+	 * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx
+	 */
+	u8 sq_identity;
+
+	u8 reserved8_w1;
+
+	/* 3:0 : placement_policy - Describing where the SQ
+	 *    descriptor ring and the SQ packet headers reside:
+	 *    0x1 - descriptors and headers are in OS memory,
+	 *    0x3 - descriptors and headers in device memory
+	 *    (a.k.a Low Latency Queue)
+	 * 6:4 : completion_policy - Describing what policy
+	 *    to use for generation completion entry (cqe) in
+	 *    the CQ associated with this SQ: 0x0 - cqe for each
+	 *    sq descriptor, 0x1 - cqe upon request in sq
+	 *    descriptor, 0x2 - current queue head pointer is
+	 *    updated in OS memory upon sq descriptor request
+	 *    0x3 - current queue head pointer is updated in OS
+	 *    memory for each sq descriptor
+	 * 7 : reserved15_w1
+	 */
+	u8 sq_caps_2;
+
+	/* 0 : is_physically_contiguous - Described if the
+	 *    queue ring memory is allocated in physical
+	 *    contiguous pages or split.
+	 * 7:1 : reserved17_w1
+	 */
+	u8 sq_caps_3;
+
+	/* associated completion queue id. This CQ must be created prior to
+	 *    SQ creation
+	 */
+	u16 cq_idx;
+
+	/* submission queue depth in entries */
+	u16 sq_depth;
+
+	/* SQ physical base address in OS memory. This field should not be
+	 * used for Low Latency queues. Has to be page aligned.
+	 */
+	struct ena_common_mem_addr sq_ba;
+
+	/* specifies queue head writeback location in OS memory. Valid if
+	 * completion_policy is set to completion_policy_head_on_demand or
+	 * completion_policy_head. Has to be cache aligned
+	 */
+	struct ena_common_mem_addr sq_head_writeback;
+
+	u32 reserved0_w7;
+
+	u32 reserved0_w8;
+};
+
+enum ena_admin_sq_direction {
+	ENA_ADMIN_SQ_DIRECTION_TX	= 1,
+
+	ENA_ADMIN_SQ_DIRECTION_RX	= 2,
+};
+
+struct ena_admin_acq_create_sq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	u16 sq_idx;
+
+	u16 reserved;
+
+	/* queue doorbell address as an offset to PCIe MMIO REG BAR */
+	u32 sq_doorbell_offset;
+
+	/* low latency queue ring base address as an offset to PCIe MMIO
+	 * LLQ_MEM BAR
+	 */
+	u32 llq_descriptors_offset;
+
+	/* low latency queue headers' memory as an offset to PCIe MMIO
+	 * LLQ_MEM BAR
+	 */
+	u32 llq_headers_offset;
+};
+
+struct ena_admin_aq_destroy_sq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_sq sq;
+};
+
+struct ena_admin_acq_destroy_sq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+struct ena_admin_aq_create_cq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	/* 4:0 : reserved5
+	 * 5 : interrupt_mode_enabled - if set, cq operates
+	 *    in interrupt mode, otherwise - polling
+	 * 7:6 : reserved6
+	 */
+	u8 cq_caps_1;
+
+	/* 4:0 : cq_entry_size_words - size of CQ entry in
+	 *    32-bit words, valid values: 4, 8.
+	 * 7:5 : reserved7
+	 */
+	u8 cq_caps_2;
+
+	/* completion queue depth in # of entries. must be power of 2 */
+	u16 cq_depth;
+
+	/* msix vector assigned to this cq */
+	u32 msix_vector;
+
+	/* cq physical base address in OS memory. CQ must be physically
+	 * contiguous
+	 */
+	struct ena_common_mem_addr cq_ba;
+};
+
+struct ena_admin_acq_create_cq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	u16 cq_idx;
+
+	/* actual cq depth in number of entries */
+	u16 cq_actual_depth;
+
+	u32 numa_node_register_offset;
+
+	u32 cq_head_db_register_offset;
+
+	u32 cq_interrupt_unmask_register_offset;
+};
+
+struct ena_admin_aq_destroy_cq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	u16 cq_idx;
+
+	u16 reserved1;
+};
+
+struct ena_admin_acq_destroy_cq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+/* ENA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct ena_admin_aq_get_stats_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		/* command specific inline data */
+		u32 inline_data_w1[3];
+
+		struct ena_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	/* stats type as defined in enum ena_admin_get_stats_type */
+	u8 type;
+
+	/* stats scope defined in enum ena_admin_get_stats_scope */
+	u8 scope;
+
+	u16 reserved3;
+
+	/* queue id. used when scope is specific_queue */
+	u16 queue_idx;
+
+	/* device id, value 0xFFFF means mine. only privileged device can get
+	 *    stats of other device
+	 */
+	u16 device_id;
+};
+
+/* Basic Statistics Command. */
+struct ena_admin_basic_stats {
+	u32 tx_bytes_low;
+
+	u32 tx_bytes_high;
+
+	u32 tx_pkts_low;
+
+	u32 tx_pkts_high;
+
+	u32 rx_bytes_low;
+
+	u32 rx_bytes_high;
+
+	u32 rx_pkts_low;
+
+	u32 rx_pkts_high;
+
+	u32 rx_drops_low;
+
+	u32 rx_drops_high;
+};
+
+struct ena_admin_acq_get_stats_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	struct ena_admin_basic_stats basic_stats;
+};
+
+struct ena_admin_get_set_feature_common_desc {
+	/* 1:0 : select - 0x1 - current value; 0x3 - default
+	 *    value
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+
+	/* as appears in ena_admin_aq_feature_id */
+	u8 feature_id;
+
+	u16 reserved16;
+};
+
+struct ena_admin_device_attr_feature_desc {
+	u32 impl_id;
+
+	u32 device_version;
+
+	/* bitmap of ena_admin_aq_feature_id */
+	u32 supported_features;
+
+	u32 reserved3;
+
+	/* Indicates how many bits are used physical address access. */
+	u32 phys_addr_width;
+
+	/* Indicates how many bits are used virtual address access. */
+	u32 virt_addr_width;
+
+	/* unicast MAC address (in Network byte order) */
+	u8 mac_addr[6];
+
+	u8 reserved7[2];
+
+	u32 max_mtu;
+};
+
+struct ena_admin_queue_feature_desc {
+	/* including LLQs */
+	u32 max_sq_num;
+
+	u32 max_sq_depth;
+
+	u32 max_cq_num;
+
+	u32 max_cq_depth;
+
+	u32 max_llq_num;
+
+	u32 max_llq_depth;
+
+	u32 max_header_size;
+
+	/* Maximum Descriptors number, including meta descriptor, allowed for
+	 *    a single Tx packet
+	 */
+	u16 max_packet_tx_descs;
+
+	/* Maximum Descriptors number allowed for a single Rx packet */
+	u16 max_packet_rx_descs;
+};
+
+struct ena_admin_set_feature_mtu_desc {
+	/* exclude L2 */
+	u32 mtu;
+};
+
+struct ena_admin_set_feature_host_attr_desc {
+	/* host OS info base address in OS memory. host info is 4KB of
+	 * physically contiguous
+	 */
+	struct ena_common_mem_addr os_info_ba;
+
+	/* host debug area base address in OS memory. debug area must be
+	 * physically contiguous
+	 */
+	struct ena_common_mem_addr debug_ba;
+
+	/* debug area size */
+	u32 debug_area_size;
+};
+
+struct ena_admin_feature_intr_moder_desc {
+	/* interrupt delay granularity in usec */
+	u16 intr_delay_resolution;
+
+	u16 reserved;
+};
+
+struct ena_admin_get_feature_link_desc {
+	/* Link speed in Mb */
+	u32 speed;
+
+	/* bit field of enum ena_admin_link types */
+	u32 supported;
+
+	/* 0 : autoneg
+	 * 1 : duplex - Full Duplex
+	 * 31:2 : reserved2
+	 */
+	u32 flags;
+};
+
+struct ena_admin_feature_aenq_desc {
+	/* bitmask for AENQ groups the device can report */
+	u32 supported_groups;
+
+	/* bitmask for AENQ groups to report */
+	u32 enabled_groups;
+};
+
+struct ena_admin_feature_offload_desc {
+	/* 0 : TX_L3_csum_ipv4
+	 * 1 : TX_L4_ipv4_csum_part - The checksum field
+	 *    should be initialized with pseudo header checksum
+	 * 2 : TX_L4_ipv4_csum_full
+	 * 3 : TX_L4_ipv6_csum_part - The checksum field
+	 *    should be initialized with pseudo header checksum
+	 * 4 : TX_L4_ipv6_csum_full
+	 * 5 : tso_ipv4
+	 * 6 : tso_ipv6
+	 * 7 : tso_ecn
+	 */
+	u32 tx;
+
+	/* Receive side supported stateless offload
+	 * 0 : RX_L3_csum_ipv4 - IPv4 checksum
+	 * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum
+	 * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum
+	 * 3 : RX_hash - Hash calculation
+	 */
+	u32 rx_supported;
+
+	u32 rx_enabled;
+};
+
+enum ena_admin_hash_functions {
+	ENA_ADMIN_TOEPLITZ	= 1,
+
+	ENA_ADMIN_CRC32		= 2,
+};
+
+struct ena_admin_feature_rss_flow_hash_control {
+	u32 keys_num;
+
+	u32 reserved;
+
+	u32 key[10];
+};
+
+struct ena_admin_feature_rss_flow_hash_function {
+	/* 7:0 : funcs - bitmask of ena_admin_hash_functions */
+	u32 supported_func;
+
+	/* 7:0 : selected_func - bitmask of
+	 *    ena_admin_hash_functions
+	 */
+	u32 selected_func;
+
+	/* initial value */
+	u32 init_val;
+};
+
+/* RSS flow hash protocols */
+enum ena_admin_flow_hash_proto {
+	ENA_ADMIN_RSS_TCP4	= 0,
+
+	ENA_ADMIN_RSS_UDP4	= 1,
+
+	ENA_ADMIN_RSS_TCP6	= 2,
+
+	ENA_ADMIN_RSS_UDP6	= 3,
+
+	ENA_ADMIN_RSS_IP4	= 4,
+
+	ENA_ADMIN_RSS_IP6	= 5,
+
+	ENA_ADMIN_RSS_IP4_FRAG	= 6,
+
+	ENA_ADMIN_RSS_NOT_IP	= 7,
+
+	ENA_ADMIN_RSS_PROTO_NUM	= 16,
+};
+
+/* RSS flow hash fields */
+enum ena_admin_flow_hash_fields {
+	/* Ethernet Dest Addr */
+	ENA_ADMIN_RSS_L2_DA	= 0,
+
+	/* Ethernet Src Addr */
+	ENA_ADMIN_RSS_L2_SA	= 1,
+
+	/* ipv4/6 Dest Addr */
+	ENA_ADMIN_RSS_L3_DA	= 2,
+
+	/* ipv4/6 Src Addr */
+	ENA_ADMIN_RSS_L3_SA	= 5,
+
+	/* tcp/udp Dest Port */
+	ENA_ADMIN_RSS_L4_DP	= 6,
+
+	/* tcp/udp Src Port */
+	ENA_ADMIN_RSS_L4_SP	= 7,
+};
+
+struct ena_admin_proto_input {
+	/* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */
+	u16 fields;
+
+	u16 reserved2;
+};
+
+struct ena_admin_feature_rss_hash_control {
+	struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM];
+};
+
+struct ena_admin_feature_rss_flow_hash_input {
+	/* supported hash input sorting
+	 * 1 : L3_sort - support swap L3 addresses if DA is
+	 *    smaller than SA
+	 * 2 : L4_sort - support swap L4 ports if DP smaller
+	 *    SP
+	 */
+	u16 supported_input_sort;
+
+	/* enabled hash input sorting
+	 * 1 : enable_L3_sort - enable swap L3 addresses if
+	 *    DA smaller than SA
+	 * 2 : enable_L4_sort - enable swap L4 ports if DP
+	 *    smaller than SP
+	 */
+	u16 enabled_input_sort;
+};
+
+enum ena_admin_os_type {
+	ENA_ADMIN_OS_LINUX	= 1,
+
+	ENA_ADMIN_OS_WIN	= 2,
+
+	ENA_ADMIN_OS_DPDK	= 3,
+
+	ENA_ADMIN_OS_FREEBSD	= 4,
+
+	ENA_ADMIN_OS_IPXE	= 5,
+};
+
+struct ena_admin_host_info {
+	/* defined in enum ena_admin_os_type */
+	u32 os_type;
+
+	/* os distribution string format */
+	u8 os_dist_str[128];
+
+	/* OS distribution numeric format */
+	u32 os_dist;
+
+	/* kernel version string format */
+	u8 kernel_ver_str[32];
+
+	/* Kernel version numeric format */
+	u32 kernel_ver;
+
+	/* 7:0 : major
+	 * 15:8 : minor
+	 * 23:16 : sub_minor
+	 */
+	u32 driver_version;
+
+	/* features bitmap */
+	u32 supported_network_features[4];
+};
+
+struct ena_admin_rss_ind_table_entry {
+	u16 cq_idx;
+
+	u16 reserved;
+};
+
+struct ena_admin_feature_rss_ind_table {
+	/* min supported table size (2^min_size) */
+	u16 min_size;
+
+	/* max supported table size (2^max_size) */
+	u16 max_size;
+
+	/* table size (2^size) */
+	u16 size;
+
+	u16 reserved;
+
+	/* index of the inline entry. 0xFFFFFFFF means invalid */
+	u32 inline_index;
+
+	/* used for updating single entry, ignored when setting the entire
+	 * table through the control buffer.
+	 */
+	struct ena_admin_rss_ind_table_entry inline_entry;
+};
+
+struct ena_admin_get_feat_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_ctrl_buff_info control_buffer;
+
+	struct ena_admin_get_set_feature_common_desc feat_common;
+
+	u32 raw[11];
+};
+
+struct ena_admin_get_feat_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+
+		struct ena_admin_device_attr_feature_desc dev_attr;
+
+		struct ena_admin_queue_feature_desc max_queue;
+
+		struct ena_admin_feature_aenq_desc aenq;
+
+		struct ena_admin_get_feature_link_desc link;
+
+		struct ena_admin_feature_offload_desc offload;
+
+		struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+		struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+		struct ena_admin_feature_rss_ind_table ind_table;
+
+		struct ena_admin_feature_intr_moder_desc intr_moderation;
+	} u;
+};
+
+struct ena_admin_set_feat_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_ctrl_buff_info control_buffer;
+
+	struct ena_admin_get_set_feature_common_desc feat_common;
+
+	union {
+		u32 raw[11];
+
+		/* mtu size */
+		struct ena_admin_set_feature_mtu_desc mtu;
+
+		/* host attributes */
+		struct ena_admin_set_feature_host_attr_desc host_attr;
+
+		/* AENQ configuration */
+		struct ena_admin_feature_aenq_desc aenq;
+
+		/* rss flow hash function */
+		struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+		/* rss flow hash input */
+		struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+		/* rss indirection table */
+		struct ena_admin_feature_rss_ind_table ind_table;
+	} u;
+};
+
+struct ena_admin_set_feat_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+	} u;
+};
+
+struct ena_admin_aenq_common_desc {
+	u16 group;
+
+	u16 syndrom;
+
+	/* 0 : phase */
+	u8 flags;
+
+	u8 reserved1[3];
+
+	u32 timestamp_low;
+
+	u32 timestamp_high;
+};
+
+/* asynchronous event notification groups */
+enum ena_admin_aenq_group {
+	ENA_ADMIN_LINK_CHANGE		= 0,
+
+	ENA_ADMIN_FATAL_ERROR		= 1,
+
+	ENA_ADMIN_WARNING		= 2,
+
+	ENA_ADMIN_NOTIFICATION		= 3,
+
+	ENA_ADMIN_KEEP_ALIVE		= 4,
+
+	ENA_ADMIN_AENQ_GROUPS_NUM	= 5,
+};
+
+enum ena_admin_aenq_notification_syndrom {
+	ENA_ADMIN_SUSPEND	= 0,
+
+	ENA_ADMIN_RESUME	= 1,
+};
+
+struct ena_admin_aenq_entry {
+	struct ena_admin_aenq_common_desc aenq_common_desc;
+
+	/* command specific inline data */
+	u32 inline_data_w4[12];
+};
+
+struct ena_admin_aenq_link_change_desc {
+	struct ena_admin_aenq_common_desc aenq_common_desc;
+
+	/* 0 : link_status */
+	u32 flags;
+};
+
+struct ena_admin_ena_mmio_req_read_less_resp {
+	u16 req_id;
+
+	u16 reg_off;
+
+	/* value is valid when poll is cleared */
+	u32 reg_val;
+};
+
+/* aq_common_desc */
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* sq */
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+
+/* acq_common_desc */
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aq_create_sq_cmd */
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
+
+/* aq_create_cq_cmd */
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+
+/* get_feature_link_desc */
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+
+/* feature_offload_desc */
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+
+/* feature_rss_flow_hash_function */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0)
+
+/* feature_rss_flow_hash_input */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
+
+/* host_info */
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+
+/* aenq_common_desc */
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_link_change_desc */
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+
+#endif /*_ENA_ADMIN_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
new file mode 100644
index 0000000..3066d9c
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c

@@ -0,0 +1,2666 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_com.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* Timeout in micro-sec */
+#define ADMIN_CMD_TIMEOUT_US (1000000)
+
+#define ENA_ASYNC_QUEUE_DEPTH 4
+#define ENA_ADMIN_QUEUE_DEPTH 32
+
+#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
+		ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \
+		| (ENA_COMMON_SPEC_VERSION_MINOR))
+
+#define ENA_CTRL_MAJOR		0
+#define ENA_CTRL_MINOR		0
+#define ENA_CTRL_SUB_MINOR	1
+
+#define MIN_ENA_CTRL_VER \
+	(((ENA_CTRL_MAJOR) << \
+	(ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+	((ENA_CTRL_MINOR) << \
+	(ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+	(ENA_CTRL_SUB_MINOR))
+
+#define ENA_DMA_ADDR_TO_UINT32_LOW(x)	((u32)((u64)(x)))
+#define ENA_DMA_ADDR_TO_UINT32_HIGH(x)	((u32)(((u64)(x)) >> 32))
+
+#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
+
+enum ena_cmd_status {
+	ENA_CMD_SUBMITTED,
+	ENA_CMD_COMPLETED,
+	/* Abort - canceled by the driver */
+	ENA_CMD_ABORTED,
+};
+
+struct ena_comp_ctx {
+	struct completion wait_event;
+	struct ena_admin_acq_entry *user_cqe;
+	u32 comp_size;
+	enum ena_cmd_status status;
+	/* status from the device */
+	u8 comp_status;
+	u8 cmd_opcode;
+	bool occupied;
+};
+
+struct ena_com_stats_ctx {
+	struct ena_admin_aq_get_stats_cmd get_cmd;
+	struct ena_admin_acq_get_stats_resp get_resp;
+};
+
+static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+				       struct ena_common_mem_addr *ena_addr,
+				       dma_addr_t addr)
+{
+	if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) {
+		pr_err("dma address has more bits that the device supports\n");
+		return -EINVAL;
+	}
+
+	ena_addr->mem_addr_low = (u32)addr;
+	ena_addr->mem_addr_high = (u64)addr >> 32;
+
+	return 0;
+}
+
+static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
+{
+	struct ena_com_admin_sq *sq = &queue->sq;
+	u16 size = ADMIN_SQ_SIZE(queue->q_depth);
+
+	sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
+					  GFP_KERNEL);
+
+	if (!sq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	sq->head = 0;
+	sq->tail = 0;
+	sq->phase = 1;
+
+	sq->db_addr = NULL;
+
+	return 0;
+}
+
+static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
+{
+	struct ena_com_admin_cq *cq = &queue->cq;
+	u16 size = ADMIN_CQ_SIZE(queue->q_depth);
+
+	cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
+					  GFP_KERNEL);
+
+	if (!cq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	cq->head = 0;
+	cq->phase = 1;
+
+	return 0;
+}
+
+static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
+				   struct ena_aenq_handlers *aenq_handlers)
+{
+	struct ena_com_aenq *aenq = &dev->aenq;
+	u32 addr_low, addr_high, aenq_caps;
+	u16 size;
+
+	dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
+	size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
+	aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr,
+					    GFP_KERNEL);
+
+	if (!aenq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	aenq->head = aenq->q_depth;
+	aenq->phase = 1;
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+	writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
+	writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
+
+	aenq_caps = 0;
+	aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+	aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
+		      << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+		     ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+	writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
+
+	if (unlikely(!aenq_handlers)) {
+		pr_err("aenq handlers pointer is NULL\n");
+		return -EINVAL;
+	}
+
+	aenq->aenq_handlers = aenq_handlers;
+
+	return 0;
+}
+
+static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+				     struct ena_comp_ctx *comp_ctx)
+{
+	comp_ctx->occupied = false;
+	atomic_dec(&queue->outstanding_cmds);
+}
+
+static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
+					  u16 command_id, bool capture)
+{
+	if (unlikely(command_id >= queue->q_depth)) {
+		pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
+		       command_id, queue->q_depth);
+		return NULL;
+	}
+
+	if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
+		pr_err("Completion context is occupied\n");
+		return NULL;
+	}
+
+	if (capture) {
+		atomic_inc(&queue->outstanding_cmds);
+		queue->comp_ctx[command_id].occupied = true;
+	}
+
+	return &queue->comp_ctx[command_id];
+}
+
+static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+						       struct ena_admin_aq_entry *cmd,
+						       size_t cmd_size_in_bytes,
+						       struct ena_admin_acq_entry *comp,
+						       size_t comp_size_in_bytes)
+{
+	struct ena_comp_ctx *comp_ctx;
+	u16 tail_masked, cmd_id;
+	u16 queue_size_mask;
+	u16 cnt;
+
+	queue_size_mask = admin_queue->q_depth - 1;
+
+	tail_masked = admin_queue->sq.tail & queue_size_mask;
+
+	/* In case of queue FULL */
+	cnt = admin_queue->sq.tail - admin_queue->sq.head;
+	if (cnt >= admin_queue->q_depth) {
+		pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
+			 admin_queue->sq.tail, admin_queue->sq.head,
+			 admin_queue->q_depth);
+		admin_queue->stats.out_of_space++;
+		return ERR_PTR(-ENOSPC);
+	}
+
+	cmd_id = admin_queue->curr_cmd_id;
+
+	cmd->aq_common_descriptor.flags |= admin_queue->sq.phase &
+		ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+	cmd->aq_common_descriptor.command_id |= cmd_id &
+		ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true);
+	if (unlikely(!comp_ctx))
+		return ERR_PTR(-EINVAL);
+
+	comp_ctx->status = ENA_CMD_SUBMITTED;
+	comp_ctx->comp_size = (u32)comp_size_in_bytes;
+	comp_ctx->user_cqe = comp;
+	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+	reinit_completion(&comp_ctx->wait_event);
+
+	memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes);
+
+	admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) &
+		queue_size_mask;
+
+	admin_queue->sq.tail++;
+	admin_queue->stats.submitted_cmd++;
+
+	if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0))
+		admin_queue->sq.phase = !admin_queue->sq.phase;
+
+	writel(admin_queue->sq.tail, admin_queue->sq.db_addr);
+
+	return comp_ctx;
+}
+
+static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+{
+	size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
+	struct ena_comp_ctx *comp_ctx;
+	u16 i;
+
+	queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
+	if (unlikely(!queue->comp_ctx)) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < queue->q_depth; i++) {
+		comp_ctx = get_comp_ctxt(queue, i, false);
+		if (comp_ctx)
+			init_completion(&comp_ctx->wait_event);
+	}
+
+	return 0;
+}
+
+static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+						     struct ena_admin_aq_entry *cmd,
+						     size_t cmd_size_in_bytes,
+						     struct ena_admin_acq_entry *comp,
+						     size_t comp_size_in_bytes)
+{
+	unsigned long flags;
+	struct ena_comp_ctx *comp_ctx;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	if (unlikely(!admin_queue->running_state)) {
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		return ERR_PTR(-ENODEV);
+	}
+	comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd,
+					      cmd_size_in_bytes,
+					      comp,
+					      comp_size_in_bytes);
+	if (unlikely(IS_ERR(comp_ctx)))
+		admin_queue->running_state = false;
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+	return comp_ctx;
+}
+
+static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+			      struct ena_com_create_io_ctx *ctx,
+			      struct ena_com_io_sq *io_sq)
+{
+	size_t size;
+	int dev_node = 0;
+
+	memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+	io_sq->desc_entry_size =
+		(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+		sizeof(struct ena_eth_io_tx_desc) :
+		sizeof(struct ena_eth_io_rx_desc);
+
+	size = io_sq->desc_entry_size * io_sq->q_depth;
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+		dev_node = dev_to_node(ena_dev->dmadev);
+		set_dev_node(ena_dev->dmadev, ctx->numa_node);
+		io_sq->desc_addr.virt_addr =
+			dma_zalloc_coherent(ena_dev->dmadev, size,
+					    &io_sq->desc_addr.phys_addr,
+					    GFP_KERNEL);
+		set_dev_node(ena_dev->dmadev, dev_node);
+		if (!io_sq->desc_addr.virt_addr) {
+			io_sq->desc_addr.virt_addr =
+				dma_zalloc_coherent(ena_dev->dmadev, size,
+						    &io_sq->desc_addr.phys_addr,
+						    GFP_KERNEL);
+		}
+	} else {
+		dev_node = dev_to_node(ena_dev->dmadev);
+		set_dev_node(ena_dev->dmadev, ctx->numa_node);
+		io_sq->desc_addr.virt_addr =
+			devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+		set_dev_node(ena_dev->dmadev, dev_node);
+		if (!io_sq->desc_addr.virt_addr) {
+			io_sq->desc_addr.virt_addr =
+				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+		}
+	}
+
+	if (!io_sq->desc_addr.virt_addr) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	io_sq->tail = 0;
+	io_sq->next_to_comp = 0;
+	io_sq->phase = 1;
+
+	return 0;
+}
+
+static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+			      struct ena_com_create_io_ctx *ctx,
+			      struct ena_com_io_cq *io_cq)
+{
+	size_t size;
+	int prev_node = 0;
+
+	memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+	/* Use the basic completion descriptor for Rx */
+	io_cq->cdesc_entry_size_in_bytes =
+		(io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+		sizeof(struct ena_eth_io_tx_cdesc) :
+		sizeof(struct ena_eth_io_rx_cdesc_base);
+
+	size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+	prev_node = dev_to_node(ena_dev->dmadev);
+	set_dev_node(ena_dev->dmadev, ctx->numa_node);
+	io_cq->cdesc_addr.virt_addr =
+		dma_zalloc_coherent(ena_dev->dmadev, size,
+				    &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+	set_dev_node(ena_dev->dmadev, prev_node);
+	if (!io_cq->cdesc_addr.virt_addr) {
+		io_cq->cdesc_addr.virt_addr =
+			dma_zalloc_coherent(ena_dev->dmadev, size,
+					    &io_cq->cdesc_addr.phys_addr,
+					    GFP_KERNEL);
+	}
+
+	if (!io_cq->cdesc_addr.virt_addr) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	io_cq->phase = 1;
+	io_cq->head = 0;
+
+	return 0;
+}
+
+static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue,
+						   struct ena_admin_acq_entry *cqe)
+{
+	struct ena_comp_ctx *comp_ctx;
+	u16 cmd_id;
+
+	cmd_id = cqe->acq_common_descriptor.command &
+		ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false);
+	if (unlikely(!comp_ctx)) {
+		pr_err("comp_ctx is NULL. Changing the admin queue running state\n");
+		admin_queue->running_state = false;
+		return;
+	}
+
+	comp_ctx->status = ENA_CMD_COMPLETED;
+	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+
+	if (comp_ctx->user_cqe)
+		memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size);
+
+	if (!admin_queue->polling)
+		complete(&comp_ctx->wait_event);
+}
+
+static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue)
+{
+	struct ena_admin_acq_entry *cqe = NULL;
+	u16 comp_num = 0;
+	u16 head_masked;
+	u8 phase;
+
+	head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1);
+	phase = admin_queue->cq.phase;
+
+	cqe = &admin_queue->cq.entries[head_masked];
+
+	/* Go over all the completions */
+	while ((cqe->acq_common_descriptor.flags &
+			ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+		/* Do not read the rest of the completion entry before the
+		 * phase bit was validated
+		 */
+		rmb();
+		ena_com_handle_single_admin_completion(admin_queue, cqe);
+
+		head_masked++;
+		comp_num++;
+		if (unlikely(head_masked == admin_queue->q_depth)) {
+			head_masked = 0;
+			phase = !phase;
+		}
+
+		cqe = &admin_queue->cq.entries[head_masked];
+	}
+
+	admin_queue->cq.head += comp_num;
+	admin_queue->cq.phase = phase;
+	admin_queue->sq.head += comp_num;
+	admin_queue->stats.completed_cmd += comp_num;
+}
+
+static int ena_com_comp_status_to_errno(u8 comp_status)
+{
+	if (unlikely(comp_status != 0))
+		pr_err("admin command failed[%u]\n", comp_status);
+
+	if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR))
+		return -EINVAL;
+
+	switch (comp_status) {
+	case ENA_ADMIN_SUCCESS:
+		return 0;
+	case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+		return -ENOMEM;
+	case ENA_ADMIN_UNSUPPORTED_OPCODE:
+		return -EPERM;
+	case ENA_ADMIN_BAD_OPCODE:
+	case ENA_ADMIN_MALFORMED_REQUEST:
+	case ENA_ADMIN_ILLEGAL_PARAMETER:
+	case ENA_ADMIN_UNKNOWN_ERROR:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
+						     struct ena_com_admin_queue *admin_queue)
+{
+	unsigned long flags;
+	u32 start_time;
+	int ret;
+
+	start_time = ((u32)jiffies_to_usecs(jiffies));
+
+	while (comp_ctx->status == ENA_CMD_SUBMITTED) {
+		if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
+		    ADMIN_CMD_TIMEOUT_US) {
+			pr_err("Wait for completion (polling) timeout\n");
+			/* ENA didn't have any completion */
+			spin_lock_irqsave(&admin_queue->q_lock, flags);
+			admin_queue->stats.no_completion++;
+			admin_queue->running_state = false;
+			spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+			ret = -ETIME;
+			goto err;
+		}
+
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		msleep(100);
+	}
+
+	if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
+		pr_err("Command was aborted\n");
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		admin_queue->stats.aborted_cmd++;
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n",
+	     comp_ctx->status);
+
+	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+	comp_ctxt_release(admin_queue, comp_ctx);
+	return ret;
+}
+
+static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
+							struct ena_com_admin_queue *admin_queue)
+{
+	unsigned long flags;
+	int ret;
+
+	wait_for_completion_timeout(&comp_ctx->wait_event,
+				    usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US));
+
+	/* In case the command wasn't completed find out the root cause.
+	 * There might be 2 kinds of errors
+	 * 1) No completion (timeout reached)
+	 * 2) There is completion but the device didn't get any msi-x interrupt.
+	 */
+	if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) {
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		admin_queue->stats.no_completion++;
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		if (comp_ctx->status == ENA_CMD_COMPLETED)
+			pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
+			       comp_ctx->cmd_opcode);
+		else
+			pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+			       comp_ctx->cmd_opcode, comp_ctx->status);
+
+		admin_queue->running_state = false;
+		ret = -ETIME;
+		goto err;
+	}
+
+	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+	comp_ctxt_release(admin_queue, comp_ctx);
+	return ret;
+}
+
+/* This method read the hardware device register through posting writes
+ * and waiting for response
+ * On timeout the function will return ENA_MMIO_READ_TIMEOUT
+ */
+static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+	volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
+		mmio_read->read_resp;
+	u32 mmio_read_reg, ret;
+	unsigned long flags;
+	int i;
+
+	might_sleep();
+
+	/* If readless is disabled, perform regular read */
+	if (!mmio_read->readless_supported)
+		return readl(ena_dev->reg_bar + offset);
+
+	spin_lock_irqsave(&mmio_read->lock, flags);
+	mmio_read->seq_num++;
+
+	read_resp->req_id = mmio_read->seq_num + 0xDEAD;
+	mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+			ENA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+	mmio_read_reg |= mmio_read->seq_num &
+			ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+	/* make sure read_resp->req_id get updated before the hw can write
+	 * there
+	 */
+	wmb();
+
+	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+
+	for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) {
+		if (read_resp->req_id == mmio_read->seq_num)
+			break;
+
+		udelay(1);
+	}
+
+	if (unlikely(i == ENA_REG_READ_TIMEOUT)) {
+		pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
+		       mmio_read->seq_num, offset, read_resp->req_id,
+		       read_resp->reg_off);
+		ret = ENA_MMIO_READ_TIMEOUT;
+		goto err;
+	}
+
+	if (read_resp->reg_off != offset) {
+		pr_err("Read failure: wrong offset provided");
+		ret = ENA_MMIO_READ_TIMEOUT;
+	} else {
+		ret = read_resp->reg_val;
+	}
+err:
+	spin_unlock_irqrestore(&mmio_read->lock, flags);
+
+	return ret;
+}
+
+/* There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called ena_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx,
+					     struct ena_com_admin_queue *admin_queue)
+{
+	if (admin_queue->polling)
+		return ena_com_wait_and_process_admin_cq_polling(comp_ctx,
+								 admin_queue);
+
+	return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx,
+							    admin_queue);
+}
+
+static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
+				 struct ena_com_io_sq *io_sq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_destroy_sq_cmd destroy_cmd;
+	struct ena_admin_acq_destroy_sq_resp_desc destroy_resp;
+	u8 direction;
+	int ret;
+
+	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+	if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		direction = ENA_ADMIN_SQ_DIRECTION_TX;
+	else
+		direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+	destroy_cmd.sq.sq_identity |= (direction <<
+		ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) &
+		ENA_ADMIN_SQ_SQ_DIRECTION_MASK;
+
+	destroy_cmd.sq.sq_idx = io_sq->idx;
+	destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&destroy_cmd,
+					    sizeof(destroy_cmd),
+					    (struct ena_admin_acq_entry *)&destroy_resp,
+					    sizeof(destroy_resp));
+
+	if (unlikely(ret && (ret != -ENODEV)))
+		pr_err("failed to destroy io sq error: %d\n", ret);
+
+	return ret;
+}
+
+static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
+				  struct ena_com_io_sq *io_sq,
+				  struct ena_com_io_cq *io_cq)
+{
+	size_t size;
+
+	if (io_cq->cdesc_addr.virt_addr) {
+		size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+		dma_free_coherent(ena_dev->dmadev, size,
+				  io_cq->cdesc_addr.virt_addr,
+				  io_cq->cdesc_addr.phys_addr);
+
+		io_cq->cdesc_addr.virt_addr = NULL;
+	}
+
+	if (io_sq->desc_addr.virt_addr) {
+		size = io_sq->desc_entry_size * io_sq->q_depth;
+
+		if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+			dma_free_coherent(ena_dev->dmadev, size,
+					  io_sq->desc_addr.virt_addr,
+					  io_sq->desc_addr.phys_addr);
+		else
+			devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr);
+
+		io_sq->desc_addr.virt_addr = NULL;
+	}
+}
+
+static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
+				u16 exp_state)
+{
+	u32 val, i;
+
+	for (i = 0; i < timeout; i++) {
+		val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+		if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
+			pr_err("Reg read timeout occurred\n");
+			return -ETIME;
+		}
+
+		if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+			exp_state)
+			return 0;
+
+		/* The resolution of the timeout is 100ms */
+		msleep(100);
+	}
+
+	return -ETIME;
+}
+
+static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
+					       enum ena_admin_aq_feature_id feature_id)
+{
+	u32 feature_mask = 1 << feature_id;
+
+	/* Device attributes is always supported */
+	if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) &&
+	    !(ena_dev->supported_features & feature_mask))
+		return false;
+
+	return true;
+}
+
+static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
+				  struct ena_admin_get_feat_resp *get_resp,
+				  enum ena_admin_aq_feature_id feature_id,
+				  dma_addr_t control_buf_dma_addr,
+				  u32 control_buff_size)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_get_feat_cmd get_cmd;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
+		pr_info("Feature %d isn't supported\n", feature_id);
+		return -EPERM;
+	}
+
+	memset(&get_cmd, 0x0, sizeof(get_cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE;
+
+	if (control_buff_size)
+		get_cmd.aq_common_descriptor.flags =
+			ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	else
+		get_cmd.aq_common_descriptor.flags = 0;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &get_cmd.control_buffer.address,
+				   control_buf_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	get_cmd.control_buffer.length = control_buff_size;
+
+	get_cmd.feat_common.feature_id = feature_id;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)
+					    &get_cmd,
+					    sizeof(get_cmd),
+					    (struct ena_admin_acq_entry *)
+					    get_resp,
+					    sizeof(*get_resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to submit get_feature command %d error: %d\n",
+		       feature_id, ret);
+
+	return ret;
+}
+
+static int ena_com_get_feature(struct ena_com_dev *ena_dev,
+			       struct ena_admin_get_feat_resp *get_resp,
+			       enum ena_admin_aq_feature_id feature_id)
+{
+	return ena_com_get_feature_ex(ena_dev,
+				      get_resp,
+				      feature_id,
+				      0,
+				      0);
+}
+
+static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	rss->hash_key =
+		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+				    &rss->hash_key_dma_addr, GFP_KERNEL);
+
+	if (unlikely(!rss->hash_key))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (rss->hash_key)
+		dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+				  rss->hash_key, rss->hash_key_dma_addr);
+	rss->hash_key = NULL;
+}
+
+static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	rss->hash_ctrl =
+		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+				    &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+
+	if (unlikely(!rss->hash_ctrl))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (rss->hash_ctrl)
+		dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+				  rss->hash_ctrl, rss->hash_ctrl_dma_addr);
+	rss->hash_ctrl = NULL;
+}
+
+static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
+					   u16 log_size)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	size_t tbl_size;
+	int ret;
+
+	ret = ena_com_get_feature(ena_dev, &get_resp,
+				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+	if (unlikely(ret))
+		return ret;
+
+	if ((get_resp.u.ind_table.min_size > log_size) ||
+	    (get_resp.u.ind_table.max_size < log_size)) {
+		pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
+		       1 << log_size, 1 << get_resp.u.ind_table.min_size,
+		       1 << get_resp.u.ind_table.max_size);
+		return -EINVAL;
+	}
+
+	tbl_size = (1ULL << log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	rss->rss_ind_tbl =
+		dma_zalloc_coherent(ena_dev->dmadev, tbl_size,
+				    &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+	if (unlikely(!rss->rss_ind_tbl))
+		goto mem_err1;
+
+	tbl_size = (1ULL << log_size) * sizeof(u16);
+	rss->host_rss_ind_tbl =
+		devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
+	if (unlikely(!rss->host_rss_ind_tbl))
+		goto mem_err2;
+
+	rss->tbl_log_size = log_size;
+
+	return 0;
+
+mem_err2:
+	tbl_size = (1ULL << log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+			  rss->rss_ind_tbl_dma_addr);
+	rss->rss_ind_tbl = NULL;
+mem_err1:
+	rss->tbl_log_size = 0;
+	return -ENOMEM;
+}
+
+static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	size_t tbl_size = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	if (rss->rss_ind_tbl)
+		dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+				  rss->rss_ind_tbl_dma_addr);
+	rss->rss_ind_tbl = NULL;
+
+	if (rss->host_rss_ind_tbl)
+		devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl);
+	rss->host_rss_ind_tbl = NULL;
+}
+
+static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
+				struct ena_com_io_sq *io_sq, u16 cq_idx)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_create_sq_cmd create_cmd;
+	struct ena_admin_acq_create_sq_resp_desc cmd_completion;
+	u8 direction;
+	int ret;
+
+	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd));
+
+	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
+
+	if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		direction = ENA_ADMIN_SQ_DIRECTION_TX;
+	else
+		direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+	create_cmd.sq_identity |= (direction <<
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK;
+
+	create_cmd.sq_caps_2 |= io_sq->mem_queue_type &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK;
+
+	create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC <<
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK;
+
+	create_cmd.sq_caps_3 |=
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK;
+
+	create_cmd.cq_idx = cq_idx;
+	create_cmd.sq_depth = io_sq->q_depth;
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+		ret = ena_com_mem_addr_set(ena_dev,
+					   &create_cmd.sq_ba,
+					   io_sq->desc_addr.phys_addr);
+		if (unlikely(ret)) {
+			pr_err("memory address set failed\n");
+			return ret;
+		}
+	}
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&create_cmd,
+					    sizeof(create_cmd),
+					    (struct ena_admin_acq_entry *)&cmd_completion,
+					    sizeof(cmd_completion));
+	if (unlikely(ret)) {
+		pr_err("Failed to create IO SQ. error: %d\n", ret);
+		return ret;
+	}
+
+	io_sq->idx = cmd_completion.sq_idx;
+
+	io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		(uintptr_t)cmd_completion.sq_doorbell_offset);
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar
+				+ cmd_completion.llq_headers_offset);
+
+		io_sq->desc_addr.pbuf_dev_addr =
+			(u8 __iomem *)((uintptr_t)ena_dev->mem_bar +
+			cmd_completion.llq_descriptors_offset);
+	}
+
+	pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
+
+	return ret;
+}
+
+static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_com_io_sq *io_sq;
+	u16 qid;
+	int i;
+
+	for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+		qid = rss->host_rss_ind_tbl[i];
+		if (qid >= ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+
+		io_sq = &ena_dev->io_sq_queues[qid];
+
+		if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX)
+			return -EINVAL;
+
+		rss->rss_ind_tbl[i].cq_idx = io_sq->idx;
+	}
+
+	return 0;
+}
+
+static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev)
+{
+	u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 };
+	struct ena_rss *rss = &ena_dev->rss;
+	u8 idx;
+	u16 i;
+
+	for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++)
+		dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i;
+
+	for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+		if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+		idx = (u8)rss->rss_ind_tbl[i].cq_idx;
+
+		if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+
+		rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx];
+	}
+
+	return 0;
+}
+
+static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+	size_t size;
+
+	size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS;
+
+	ena_dev->intr_moder_tbl =
+		devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+	if (!ena_dev->intr_moder_tbl)
+		return -ENOMEM;
+
+	ena_com_config_default_interrupt_moderation_table(ena_dev);
+
+	return 0;
+}
+
+static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
+						 u16 intr_delay_resolution)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+	unsigned int i;
+
+	if (!intr_delay_resolution) {
+		pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
+		intr_delay_resolution = 1;
+	}
+	ena_dev->intr_delay_resolution = intr_delay_resolution;
+
+	/* update Rx */
+	for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++)
+		intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution;
+
+	/* update Tx */
+	ena_dev->intr_moder_tx_interval /= intr_delay_resolution;
+}
+
+/*****************************************************************************/
+/*******************************      API       ******************************/
+/*****************************************************************************/
+
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+				  struct ena_admin_aq_entry *cmd,
+				  size_t cmd_size,
+				  struct ena_admin_acq_entry *comp,
+				  size_t comp_size)
+{
+	struct ena_comp_ctx *comp_ctx;
+	int ret;
+
+	comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
+					    comp, comp_size);
+	if (unlikely(IS_ERR(comp_ctx))) {
+		pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx));
+		return PTR_ERR(comp_ctx);
+	}
+
+	ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue);
+	if (unlikely(ret)) {
+		if (admin_queue->running_state)
+			pr_err("Failed to process command. ret = %d\n", ret);
+		else
+			pr_debug("Failed to process command. ret = %d\n", ret);
+	}
+	return ret;
+}
+
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+			 struct ena_com_io_cq *io_cq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_create_cq_cmd create_cmd;
+	struct ena_admin_acq_create_cq_resp_desc cmd_completion;
+	int ret;
+
+	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd));
+
+	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
+
+	create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) &
+		ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+	create_cmd.cq_caps_1 |=
+		ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK;
+
+	create_cmd.msix_vector = io_cq->msix_vector;
+	create_cmd.cq_depth = io_cq->q_depth;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &create_cmd.cq_ba,
+				   io_cq->cdesc_addr.phys_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&create_cmd,
+					    sizeof(create_cmd),
+					    (struct ena_admin_acq_entry *)&cmd_completion,
+					    sizeof(cmd_completion));
+	if (unlikely(ret)) {
+		pr_err("Failed to create IO CQ. error: %d\n", ret);
+		return ret;
+	}
+
+	io_cq->idx = cmd_completion.cq_idx;
+
+	io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		cmd_completion.cq_interrupt_unmask_register_offset);
+
+	if (cmd_completion.cq_head_db_register_offset)
+		io_cq->cq_head_db_reg =
+			(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+			cmd_completion.cq_head_db_register_offset);
+
+	if (cmd_completion.numa_node_register_offset)
+		io_cq->numa_node_cfg_reg =
+			(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+			cmd_completion.numa_node_register_offset);
+
+	pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
+
+	return ret;
+}
+
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+			    struct ena_com_io_sq **io_sq,
+			    struct ena_com_io_cq **io_cq)
+{
+	if (qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Invalid queue number %d but the max is %d\n", qid,
+		       ENA_TOTAL_NUM_QUEUES);
+		return -EINVAL;
+	}
+
+	*io_sq = &ena_dev->io_sq_queues[qid];
+	*io_cq = &ena_dev->io_cq_queues[qid];
+
+	return 0;
+}
+
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_comp_ctx *comp_ctx;
+	u16 i;
+
+	if (!admin_queue->comp_ctx)
+		return;
+
+	for (i = 0; i < admin_queue->q_depth; i++) {
+		comp_ctx = get_comp_ctxt(admin_queue, i, false);
+		if (unlikely(!comp_ctx))
+			break;
+
+		comp_ctx->status = ENA_CMD_ABORTED;
+
+		complete(&comp_ctx->wait_event);
+	}
+}
+
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		msleep(20);
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+	}
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+			  struct ena_com_io_cq *io_cq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_destroy_cq_cmd destroy_cmd;
+	struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
+	int ret;
+
+	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+	destroy_cmd.cq_idx = io_cq->idx;
+	destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&destroy_cmd,
+					    sizeof(destroy_cmd),
+					    (struct ena_admin_acq_entry *)&destroy_resp,
+					    sizeof(destroy_resp));
+
+	if (unlikely(ret && (ret != -ENODEV)))
+		pr_err("Failed to destroy IO CQ. error: %d\n", ret);
+
+	return ret;
+}
+
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->admin_queue.running_state;
+}
+
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	ena_dev->admin_queue.running_state = state;
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev)
+{
+	u16 depth = ena_dev->aenq.q_depth;
+
+	WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n");
+
+	/* Init head_db to mark that all entries in the queue
+	 * are initially available
+	 */
+	writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	struct ena_admin_get_feat_resp get_resp;
+	int ret;
+
+	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+	if (ret) {
+		pr_info("Can't get aenq configuration\n");
+		return ret;
+	}
+
+	if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
+		pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n",
+			get_resp.u.aenq.supported_groups, groups_flag);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags = 0;
+	cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG;
+	cmd.u.aenq.enabled_groups = groups_flag;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to config AENQ ret: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev)
+{
+	u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+	int width;
+
+	if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+		ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+	pr_debug("ENA dma width: %d\n", width);
+
+	if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) {
+		pr_err("DMA width illegal value: %d\n", width);
+		return -EINVAL;
+	}
+
+	ena_dev->dma_addr_bits = width;
+
+	return width;
+}
+
+int ena_com_validate_version(struct ena_com_dev *ena_dev)
+{
+	u32 ver;
+	u32 ctrl_ver;
+	u32 ctrl_ver_masked;
+
+	/* Make sure the ENA version and the controller version are at least
+	 * as the driver expects
+	 */
+	ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF);
+	ctrl_ver = ena_com_reg_bar_read32(ena_dev,
+					  ENA_REGS_CONTROLLER_VERSION_OFF);
+
+	if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) ||
+		     (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	pr_info("ena device version: %d.%d\n",
+		(ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+			ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+		ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
+
+	if (ver < MIN_ENA_VER) {
+		pr_err("ENA version is lower than the minimal version the driver supports\n");
+		return -1;
+	}
+
+	pr_info("ena controller version: %d.%d.%d implementation version %d\n",
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+	ctrl_ver_masked =
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+	/* Validate the ctrl version without the implementation ID */
+	if (ctrl_ver_masked < MIN_ENA_CTRL_VER) {
+		pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_com_admin_cq *cq = &admin_queue->cq;
+	struct ena_com_admin_sq *sq = &admin_queue->sq;
+	struct ena_com_aenq *aenq = &ena_dev->aenq;
+	u16 size;
+
+	if (admin_queue->comp_ctx)
+		devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
+	admin_queue->comp_ctx = NULL;
+	size = ADMIN_SQ_SIZE(admin_queue->q_depth);
+	if (sq->entries)
+		dma_free_coherent(ena_dev->dmadev, size, sq->entries,
+				  sq->dma_addr);
+	sq->entries = NULL;
+
+	size = ADMIN_CQ_SIZE(admin_queue->q_depth);
+	if (cq->entries)
+		dma_free_coherent(ena_dev->dmadev, size, cq->entries,
+				  cq->dma_addr);
+	cq->entries = NULL;
+
+	size = ADMIN_AENQ_SIZE(aenq->q_depth);
+	if (ena_dev->aenq.entries)
+		dma_free_coherent(ena_dev->dmadev, size, aenq->entries,
+				  aenq->dma_addr);
+	aenq->entries = NULL;
+}
+
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
+{
+	ena_dev->admin_queue.polling = polling;
+}
+
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	spin_lock_init(&mmio_read->lock);
+	mmio_read->read_resp =
+		dma_zalloc_coherent(ena_dev->dmadev,
+				    sizeof(*mmio_read->read_resp),
+				    &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+	if (unlikely(!mmio_read->read_resp))
+		return -ENOMEM;
+
+	ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+	mmio_read->read_resp->req_id = 0x0;
+	mmio_read->seq_num = 0x0;
+	mmio_read->readless_supported = true;
+
+	return 0;
+}
+
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	mmio_read->readless_supported = readless_supported;
+}
+
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+	writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+
+	dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
+			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+
+	mmio_read->read_resp = NULL;
+}
+
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+	u32 addr_low, addr_high;
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+}
+
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+		       struct ena_aenq_handlers *aenq_handlers,
+		       bool init_spinlock)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
+	int ret;
+
+	dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+	if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) {
+		pr_err("Device isn't ready, abort com init\n");
+		return -ENODEV;
+	}
+
+	admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH;
+
+	admin_queue->q_dmadev = ena_dev->dmadev;
+	admin_queue->polling = false;
+	admin_queue->curr_cmd_id = 0;
+
+	atomic_set(&admin_queue->outstanding_cmds, 0);
+
+	if (init_spinlock)
+		spin_lock_init(&admin_queue->q_lock);
+
+	ret = ena_com_init_comp_ctxt(admin_queue);
+	if (ret)
+		goto error;
+
+	ret = ena_com_admin_init_sq(admin_queue);
+	if (ret)
+		goto error;
+
+	ret = ena_com_admin_init_cq(admin_queue);
+	if (ret)
+		goto error;
+
+	admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		ENA_REGS_AQ_DB_OFF);
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF);
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF);
+
+	aq_caps = 0;
+	aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+	aq_caps |= (sizeof(struct ena_admin_aq_entry) <<
+			ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+			ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+	acq_caps = 0;
+	acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+	acq_caps |= (sizeof(struct ena_admin_acq_entry) <<
+		ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+		ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+
+	writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF);
+	writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF);
+	ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers);
+	if (ret)
+		goto error;
+
+	admin_queue->running_state = true;
+
+	return 0;
+error:
+	ena_com_admin_destroy(ena_dev);
+
+	return ret;
+}
+
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+			    struct ena_com_create_io_ctx *ctx)
+{
+	struct ena_com_io_sq *io_sq;
+	struct ena_com_io_cq *io_cq;
+	int ret;
+
+	if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Qid (%d) is bigger than max num of queues (%d)\n",
+		       ctx->qid, ENA_TOTAL_NUM_QUEUES);
+		return -EINVAL;
+	}
+
+	io_sq = &ena_dev->io_sq_queues[ctx->qid];
+	io_cq = &ena_dev->io_cq_queues[ctx->qid];
+
+	memset(io_sq, 0x0, sizeof(struct ena_com_io_sq));
+	memset(io_cq, 0x0, sizeof(struct ena_com_io_cq));
+
+	/* Init CQ */
+	io_cq->q_depth = ctx->queue_size;
+	io_cq->direction = ctx->direction;
+	io_cq->qid = ctx->qid;
+
+	io_cq->msix_vector = ctx->msix_vector;
+
+	io_sq->q_depth = ctx->queue_size;
+	io_sq->direction = ctx->direction;
+	io_sq->qid = ctx->qid;
+
+	io_sq->mem_queue_type = ctx->mem_queue_type;
+
+	if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		/* header length is limited to 8 bits */
+		io_sq->tx_max_header_size =
+			min_t(u32, ena_dev->tx_max_header_size, SZ_256);
+
+	ret = ena_com_init_io_sq(ena_dev, ctx, io_sq);
+	if (ret)
+		goto error;
+	ret = ena_com_init_io_cq(ena_dev, ctx, io_cq);
+	if (ret)
+		goto error;
+
+	ret = ena_com_create_io_cq(ena_dev, io_cq);
+	if (ret)
+		goto error;
+
+	ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx);
+	if (ret)
+		goto destroy_io_cq;
+
+	return 0;
+
+destroy_io_cq:
+	ena_com_destroy_io_cq(ena_dev, io_cq);
+error:
+	ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+	return ret;
+}
+
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
+{
+	struct ena_com_io_sq *io_sq;
+	struct ena_com_io_cq *io_cq;
+
+	if (qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid,
+		       ENA_TOTAL_NUM_QUEUES);
+		return;
+	}
+
+	io_sq = &ena_dev->io_sq_queues[qid];
+	io_cq = &ena_dev->io_cq_queues[qid];
+
+	ena_com_destroy_io_sq(ena_dev, io_sq);
+	ena_com_destroy_io_cq(ena_dev, io_cq);
+
+	ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+}
+
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+			    struct ena_admin_get_feat_resp *resp)
+{
+	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+}
+
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	struct ena_admin_get_feat_resp get_resp;
+	int rc;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_DEVICE_ATTRIBUTES);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr,
+	       sizeof(get_resp.u.dev_attr));
+	ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_MAX_QUEUES_NUM);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+	       sizeof(get_resp.u.max_queue));
+	ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_AENQ_CONFIG);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq,
+	       sizeof(get_resp.u.aenq));
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->offload, &get_resp.u.offload,
+	       sizeof(get_resp.u.offload));
+
+	return 0;
+}
+
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev)
+{
+	ena_com_handle_admin_completion(&ena_dev->admin_queue);
+}
+
+/* ena_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev,
+						     u16 group)
+{
+	struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers;
+
+	if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group])
+		return aenq_handlers->handlers[group];
+
+	return aenq_handlers->unimplemented_handler;
+}
+
+/* ena_aenq_intr_handler:
+ * handles the aenq incoming events.
+ * pop events from the queue and apply the specific handler
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
+{
+	struct ena_admin_aenq_entry *aenq_e;
+	struct ena_admin_aenq_common_desc *aenq_common;
+	struct ena_com_aenq *aenq  = &dev->aenq;
+	ena_aenq_handler handler_cb;
+	u16 masked_head, processed = 0;
+	u8 phase;
+
+	masked_head = aenq->head & (aenq->q_depth - 1);
+	phase = aenq->phase;
+	aenq_e = &aenq->entries[masked_head]; /* Get first entry */
+	aenq_common = &aenq_e->aenq_common_desc;
+
+	/* Go over all the events */
+	while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) ==
+	       phase) {
+		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
+			 aenq_common->group, aenq_common->syndrom,
+			 (u64)aenq_common->timestamp_low +
+				 ((u64)aenq_common->timestamp_high << 32));
+
+		/* Handle specific event*/
+		handler_cb = ena_com_get_specific_aenq_cb(dev,
+							  aenq_common->group);
+		handler_cb(data, aenq_e); /* call the actual event handler*/
+
+		/* Get next event entry */
+		masked_head++;
+		processed++;
+
+		if (unlikely(masked_head == aenq->q_depth)) {
+			masked_head = 0;
+			phase = !phase;
+		}
+		aenq_e = &aenq->entries[masked_head];
+		aenq_common = &aenq_e->aenq_common_desc;
+	}
+
+	aenq->head += processed;
+	aenq->phase = phase;
+
+	/* Don't update aenq doorbell if there weren't any processed events */
+	if (!processed)
+		return;
+
+	/* write the aenq doorbell after all AENQ descriptors were read */
+	mb();
+	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_dev_reset(struct ena_com_dev *ena_dev)
+{
+	u32 stat, timeout, cap, reset_val;
+	int rc;
+
+	stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+	cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+
+	if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) ||
+		     (cap == ENA_MMIO_READ_TIMEOUT))) {
+		pr_err("Reg read32 timeout occurred\n");
+		return -ETIME;
+	}
+
+	if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) {
+		pr_err("Device isn't ready, can't reset device\n");
+		return -EINVAL;
+	}
+
+	timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+			ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+	if (timeout == 0) {
+		pr_err("Invalid timeout value\n");
+		return -EINVAL;
+	}
+
+	/* start reset */
+	reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK;
+	writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+
+	/* Write again the MMIO read request address */
+	ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+	rc = wait_for_reset_state(ena_dev, timeout,
+				  ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+	if (rc != 0) {
+		pr_err("Reset indication didn't turn on\n");
+		return rc;
+	}
+
+	/* reset done */
+	writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+	rc = wait_for_reset_state(ena_dev, timeout, 0);
+	if (rc != 0) {
+		pr_err("Reset indication didn't turn off\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
+			     struct ena_com_stats_ctx *ctx,
+			     enum ena_admin_get_stats_type type)
+{
+	struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
+	struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
+	struct ena_com_admin_queue *admin_queue;
+	int ret;
+
+	admin_queue = &ena_dev->admin_queue;
+
+	get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
+	get_cmd->aq_common_descriptor.flags = 0;
+	get_cmd->type = type;
+
+	ret =  ena_com_execute_admin_command(admin_queue,
+					     (struct ena_admin_aq_entry *)get_cmd,
+					     sizeof(*get_cmd),
+					     (struct ena_admin_acq_entry *)get_resp,
+					     sizeof(*get_resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to get stats. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+				struct ena_admin_basic_stats *stats)
+{
+	struct ena_com_stats_ctx ctx;
+	int ret;
+
+	memset(&ctx, 0x0, sizeof(ctx));
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC);
+	if (likely(ret == 0))
+		memcpy(stats, &ctx.get_resp.basic_stats,
+		       sizeof(ctx.get_resp.basic_stats));
+
+	return ret;
+}
+
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
+		pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags = 0;
+	cmd.feat_common.feature_id = ENA_ADMIN_MTU;
+	cmd.u.mtu.mtu = mtu;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set mtu %d. error: %d\n", mtu, ret);
+
+	return ret;
+}
+
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+				 struct ena_admin_feature_offload_desc *offload)
+{
+	int ret;
+	struct ena_admin_get_feat_resp resp;
+
+	ret = ena_com_get_feature(ena_dev, &resp,
+				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+	if (unlikely(ret)) {
+		pr_err("Failed to get offload capabilities %d\n", ret);
+		return ret;
+	}
+
+	memcpy(offload, &resp.u.offload, sizeof(resp.u.offload));
+
+	return 0;
+}
+
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	struct ena_admin_get_feat_resp get_resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_RSS_HASH_FUNCTION)) {
+		pr_info("Feature %d isn't supported\n",
+			ENA_ADMIN_RSS_HASH_FUNCTION);
+		return -EPERM;
+	}
+
+	/* Validate hash function is supported */
+	ret = ena_com_get_feature(ena_dev, &get_resp,
+				  ENA_ADMIN_RSS_HASH_FUNCTION);
+	if (unlikely(ret))
+		return ret;
+
+	if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) {
+		pr_err("Func hash %d isn't supported by device, abort\n",
+		       rss->hash_func);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION;
+	cmd.u.flow_hash_func.init_val = rss->hash_init_val;
+	cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->hash_key_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.control_buffer.length = sizeof(*rss->hash_key);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+	if (unlikely(ret)) {
+		pr_err("Failed to set hash function %d. error: %d\n",
+		       rss->hash_func, ret);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+			       enum ena_admin_hash_functions func,
+			       const u8 *key, u16 key_len, u32 init_val)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key =
+		rss->hash_key;
+	int rc;
+
+	/* Make sure size is a mult of DWs */
+	if (unlikely(key_len & 0x3))
+		return -EINVAL;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_FUNCTION,
+				    rss->hash_key_dma_addr,
+				    sizeof(*rss->hash_key));
+	if (unlikely(rc))
+		return rc;
+
+	if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) {
+		pr_err("Flow hash function %d isn't supported\n", func);
+		return -EPERM;
+	}
+
+	switch (func) {
+	case ENA_ADMIN_TOEPLITZ:
+		if (key_len > sizeof(hash_key->key)) {
+			pr_err("key len (%hu) is bigger than the max supported (%zu)\n",
+			       key_len, sizeof(hash_key->key));
+			return -EINVAL;
+		}
+
+		memcpy(hash_key->key, key, key_len);
+		rss->hash_init_val = init_val;
+		hash_key->keys_num = key_len >> 2;
+		break;
+	case ENA_ADMIN_CRC32:
+		rss->hash_init_val = init_val;
+		break;
+	default:
+		pr_err("Invalid hash function (%d)\n", func);
+		return -EINVAL;
+	}
+
+	rc = ena_com_set_hash_function(ena_dev);
+
+	/* Restore the old function */
+	if (unlikely(rc))
+		ena_com_get_hash_function(ena_dev, NULL, NULL);
+
+	return rc;
+}
+
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+			      enum ena_admin_hash_functions *func,
+			      u8 *key)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key =
+		rss->hash_key;
+	int rc;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_FUNCTION,
+				    rss->hash_key_dma_addr,
+				    sizeof(*rss->hash_key));
+	if (unlikely(rc))
+		return rc;
+
+	rss->hash_func = get_resp.u.flow_hash_func.selected_func;
+	if (func)
+		*func = rss->hash_func;
+
+	if (key)
+		memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
+
+	return 0;
+}
+
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+			  enum ena_admin_flow_hash_proto proto,
+			  u16 *fields)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	int rc;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_INPUT,
+				    rss->hash_ctrl_dma_addr,
+				    sizeof(*rss->hash_ctrl));
+	if (unlikely(rc))
+		return rc;
+
+	if (fields)
+		*fields = rss->hash_ctrl->selected_fields[proto].fields;
+
+	return 0;
+}
+
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_RSS_HASH_INPUT)) {
+		pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT;
+	cmd.u.flow_hash_input.enabled_input_sort =
+		ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK |
+		ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->hash_ctrl_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+	cmd.control_buffer.length = sizeof(*hash_ctrl);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+	if (unlikely(ret))
+		pr_err("Failed to set hash input. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl =
+		rss->hash_ctrl;
+	u16 available_fields = 0;
+	int rc, i;
+
+	/* Get the supported hash input */
+	rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+	if (unlikely(rc))
+		return rc;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+		ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA;
+
+	for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) {
+		available_fields = hash_ctrl->selected_fields[i].fields &
+				hash_ctrl->supported_fields[i].fields;
+		if (available_fields != hash_ctrl->selected_fields[i].fields) {
+			pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
+			       i, hash_ctrl->supported_fields[i].fields,
+			       hash_ctrl->selected_fields[i].fields);
+			return -EPERM;
+		}
+	}
+
+	rc = ena_com_set_hash_ctrl(ena_dev);
+
+	/* In case of failure, restore the old hash ctrl */
+	if (unlikely(rc))
+		ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+	return rc;
+}
+
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+			   enum ena_admin_flow_hash_proto proto,
+			   u16 hash_fields)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+	u16 supported_fields;
+	int rc;
+
+	if (proto >= ENA_ADMIN_RSS_PROTO_NUM) {
+		pr_err("Invalid proto num (%u)\n", proto);
+		return -EINVAL;
+	}
+
+	/* Get the ctrl table */
+	rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL);
+	if (unlikely(rc))
+		return rc;
+
+	/* Make sure all the fields are supported */
+	supported_fields = hash_ctrl->supported_fields[proto].fields;
+	if ((hash_fields & supported_fields) != hash_fields) {
+		pr_err("proto %d doesn't support the required fields %x. supports only: %x\n",
+		       proto, hash_fields, supported_fields);
+	}
+
+	hash_ctrl->selected_fields[proto].fields = hash_fields;
+
+	rc = ena_com_set_hash_ctrl(ena_dev);
+
+	/* In case of failure, restore the old hash ctrl */
+	if (unlikely(rc))
+		ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+	return 0;
+}
+
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+				      u16 entry_idx, u16 entry_value)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (unlikely(entry_idx >= (1 << rss->tbl_log_size)))
+		return -EINVAL;
+
+	if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES)))
+		return -EINVAL;
+
+	rss->host_rss_ind_tbl[entry_idx] = entry_value;
+
+	return 0;
+}
+
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(
+		    ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) {
+		pr_info("Feature %d isn't supported\n",
+			ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+		return -EPERM;
+	}
+
+	ret = ena_com_ind_tbl_convert_to_device(ena_dev);
+	if (ret) {
+		pr_err("Failed to convert host indirection table to device table\n");
+		return ret;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG;
+	cmd.u.ind_table.size = rss->tbl_log_size;
+	cmd.u.ind_table.inline_index = 0xFFFFFFFF;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->rss_ind_tbl_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.control_buffer.length = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set indirect table. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	u32 tbl_size;
+	int i, rc;
+
+	tbl_size = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
+				    rss->rss_ind_tbl_dma_addr,
+				    tbl_size);
+	if (unlikely(rc))
+		return rc;
+
+	if (!ind_tbl)
+		return 0;
+
+	rc = ena_com_ind_tbl_convert_from_device(ena_dev);
+	if (unlikely(rc))
+		return rc;
+
+	for (i = 0; i < (1 << rss->tbl_log_size); i++)
+		ind_tbl[i] = rss->host_rss_ind_tbl[i];
+
+	return 0;
+}
+
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
+{
+	int rc;
+
+	memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+
+	rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size);
+	if (unlikely(rc))
+		goto err_indr_tbl;
+
+	rc = ena_com_hash_key_allocate(ena_dev);
+	if (unlikely(rc))
+		goto err_hash_key;
+
+	rc = ena_com_hash_ctrl_init(ena_dev);
+	if (unlikely(rc))
+		goto err_hash_ctrl;
+
+	return 0;
+
+err_hash_ctrl:
+	ena_com_hash_key_destroy(ena_dev);
+err_hash_key:
+	ena_com_indirect_table_destroy(ena_dev);
+err_indr_tbl:
+
+	return rc;
+}
+
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev)
+{
+	ena_com_indirect_table_destroy(ena_dev);
+	ena_com_hash_key_destroy(ena_dev);
+	ena_com_hash_ctrl_destroy(ena_dev);
+
+	memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+}
+
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	host_attr->host_info =
+		dma_zalloc_coherent(ena_dev->dmadev, SZ_4K,
+				    &host_attr->host_info_dma_addr, GFP_KERNEL);
+	if (unlikely(!host_attr->host_info))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+				u32 debug_area_size)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	host_attr->debug_area_virt_addr =
+		dma_zalloc_coherent(ena_dev->dmadev, debug_area_size,
+				    &host_attr->debug_area_dma_addr, GFP_KERNEL);
+	if (unlikely(!host_attr->debug_area_virt_addr)) {
+		host_attr->debug_area_size = 0;
+		return -ENOMEM;
+	}
+
+	host_attr->debug_area_size = debug_area_size;
+
+	return 0;
+}
+
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	if (host_attr->host_info) {
+		dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info,
+				  host_attr->host_info_dma_addr);
+		host_attr->host_info = NULL;
+	}
+}
+
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	if (host_attr->debug_area_virt_addr) {
+		dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size,
+				  host_attr->debug_area_virt_addr,
+				  host_attr->debug_area_dma_addr);
+		host_attr->debug_area_virt_addr = NULL;
+	}
+}
+
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_HOST_ATTR_CONFIG)) {
+		pr_warn("Set host attribute isn't supported\n");
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.u.host_attr.debug_ba,
+				   host_attr->debug_area_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.u.host_attr.os_info_ba,
+				   host_attr->host_info_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.u.host_attr.debug_area_size = host_attr->debug_area_size;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set host attributes: %d\n", ret);
+
+	return ret;
+}
+
+/* Interrupt moderation */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev)
+{
+	return ena_com_check_supported_feature_id(ena_dev,
+						  ENA_ADMIN_INTERRUPT_MODERATION);
+}
+
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+						      u32 tx_coalesce_usecs)
+{
+	if (!ena_dev->intr_delay_resolution) {
+		pr_err("Illegal interrupt delay granularity value\n");
+		return -EFAULT;
+	}
+
+	ena_dev->intr_moder_tx_interval = tx_coalesce_usecs /
+		ena_dev->intr_delay_resolution;
+
+	return 0;
+}
+
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+						      u32 rx_coalesce_usecs)
+{
+	if (!ena_dev->intr_delay_resolution) {
+		pr_err("Illegal interrupt delay granularity value\n");
+		return -EFAULT;
+	}
+
+	/* We use LOWEST entry of moderation table for storing
+	 * nonadaptive interrupt coalescing values
+	 */
+	ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+		rx_coalesce_usecs / ena_dev->intr_delay_resolution;
+
+	return 0;
+}
+
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+	if (ena_dev->intr_moder_tbl)
+		devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl);
+	ena_dev->intr_moder_tbl = NULL;
+}
+
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+	struct ena_admin_get_feat_resp get_resp;
+	u16 delay_resolution;
+	int rc;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_INTERRUPT_MODERATION);
+
+	if (rc) {
+		if (rc == -EPERM) {
+			pr_info("Feature %d isn't supported\n",
+				ENA_ADMIN_INTERRUPT_MODERATION);
+			rc = 0;
+		} else {
+			pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n",
+			       rc);
+		}
+
+		/* no moderation supported, disable adaptive support */
+		ena_com_disable_adaptive_moderation(ena_dev);
+		return rc;
+	}
+
+	rc = ena_com_init_interrupt_moderation_table(ena_dev);
+	if (rc)
+		goto err;
+
+	/* if moderation is supported by device we set adaptive moderation */
+	delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
+	ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
+	ena_com_enable_adaptive_moderation(ena_dev);
+
+	return 0;
+err:
+	ena_com_destroy_interrupt_moderation(ena_dev);
+	return rc;
+}
+
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (!intr_moder_tbl)
+		return;
+
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+		ENA_INTR_LOWEST_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval =
+		ENA_INTR_LOWEST_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval =
+		ENA_INTR_LOWEST_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval =
+		ENA_INTR_LOW_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval =
+		ENA_INTR_LOW_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval =
+		ENA_INTR_LOW_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval =
+		ENA_INTR_MID_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval =
+		ENA_INTR_MID_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval =
+		ENA_INTR_MID_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval =
+		ENA_INTR_HIGH_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval =
+		ENA_INTR_HIGH_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval =
+		ENA_INTR_HIGH_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval =
+		ENA_INTR_HIGHEST_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval =
+		ENA_INTR_HIGHEST_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval =
+		ENA_INTR_HIGHEST_BYTES;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->intr_moder_tx_interval;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (intr_moder_tbl)
+		return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval;
+
+	return 0;
+}
+
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+					enum ena_intr_moder_level level,
+					struct ena_intr_moder_entry *entry)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+		return;
+
+	intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval;
+	if (ena_dev->intr_delay_resolution)
+		intr_moder_tbl[level].intr_moder_interval /=
+			ena_dev->intr_delay_resolution;
+	intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval;
+
+	/* use hardcoded value until ethtool supports bytecount parameter */
+	if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED)
+		intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval;
+}
+
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+				       enum ena_intr_moder_level level,
+				       struct ena_intr_moder_entry *entry)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+		return;
+
+	entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval;
+	if (ena_dev->intr_delay_resolution)
+		entry->intr_moder_interval *= ena_dev->intr_delay_resolution;
+	entry->pkts_per_interval =
+	intr_moder_tbl[level].pkts_per_interval;
+	entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval;
+}

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
new file mode 100644
index 0000000..509d7b8
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h

@@ -0,0 +1,1038 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_COM
+#define ENA_COM
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "ena_common_defs.h"
+#include "ena_admin_defs.h"
+#include "ena_eth_io_defs.h"
+#include "ena_regs_defs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define ENA_MAX_NUM_IO_QUEUES		128U
+/* We need to queues for each IO (on for Tx and one for Rx) */
+#define ENA_TOTAL_NUM_QUEUES		(2 * (ENA_MAX_NUM_IO_QUEUES))
+
+#define ENA_MAX_HANDLERS 256
+
+#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48
+
+/* Unit in usec */
+#define ENA_REG_READ_TIMEOUT 200000
+
+#define ADMIN_SQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_aq_entry))
+#define ADMIN_CQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_acq_entry))
+#define ADMIN_AENQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_aenq_entry))
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* ENA adaptive interrupt moderation settings */
+
+#define ENA_INTR_LOWEST_USECS           (0)
+#define ENA_INTR_LOWEST_PKTS            (3)
+#define ENA_INTR_LOWEST_BYTES           (2 * 1524)
+
+#define ENA_INTR_LOW_USECS              (32)
+#define ENA_INTR_LOW_PKTS               (12)
+#define ENA_INTR_LOW_BYTES              (16 * 1024)
+
+#define ENA_INTR_MID_USECS              (80)
+#define ENA_INTR_MID_PKTS               (48)
+#define ENA_INTR_MID_BYTES              (64 * 1024)
+
+#define ENA_INTR_HIGH_USECS             (128)
+#define ENA_INTR_HIGH_PKTS              (96)
+#define ENA_INTR_HIGH_BYTES             (128 * 1024)
+
+#define ENA_INTR_HIGHEST_USECS          (192)
+#define ENA_INTR_HIGHEST_PKTS           (128)
+#define ENA_INTR_HIGHEST_BYTES          (192 * 1024)
+
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS		196
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS		4
+#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT			6
+#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT			4
+#define ENA_INTR_MODER_LEVEL_STRIDE			2
+#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED		0xFFFFFF
+
+enum ena_intr_moder_level {
+	ENA_INTR_MODER_LOWEST = 0,
+	ENA_INTR_MODER_LOW,
+	ENA_INTR_MODER_MID,
+	ENA_INTR_MODER_HIGH,
+	ENA_INTR_MODER_HIGHEST,
+	ENA_INTR_MAX_NUM_OF_LEVELS,
+};
+
+struct ena_intr_moder_entry {
+	unsigned int intr_moder_interval;
+	unsigned int pkts_per_interval;
+	unsigned int bytes_per_interval;
+};
+
+enum queue_direction {
+	ENA_COM_IO_QUEUE_DIRECTION_TX,
+	ENA_COM_IO_QUEUE_DIRECTION_RX
+};
+
+struct ena_com_buf {
+	dma_addr_t paddr; /**< Buffer physical address */
+	u16 len; /**< Buffer length in bytes */
+};
+
+struct ena_com_rx_buf_info {
+	u16 len;
+	u16 req_id;
+};
+
+struct ena_com_io_desc_addr {
+	u8 __iomem *pbuf_dev_addr; /* LLQ address */
+	u8 *virt_addr;
+	dma_addr_t phys_addr;
+};
+
+struct ena_com_tx_meta {
+	u16 mss;
+	u16 l3_hdr_len;
+	u16 l3_hdr_offset;
+	u16 l4_hdr_len; /* In words */
+};
+
+struct ena_com_io_cq {
+	struct ena_com_io_desc_addr cdesc_addr;
+
+	/* Interrupt unmask register */
+	u32 __iomem *unmask_reg;
+
+	/* The completion queue head doorbell register */
+	u32 __iomem *cq_head_db_reg;
+
+	/* numa configuration register (for TPH) */
+	u32 __iomem *numa_node_cfg_reg;
+
+	/* The value to write to the above register to unmask
+	 * the interrupt of this queue
+	 */
+	u32 msix_vector;
+
+	enum queue_direction direction;
+
+	/* holds the number of cdesc of the current packet */
+	u16 cur_rx_pkt_cdesc_count;
+	/* save the firt cdesc idx of the current packet */
+	u16 cur_rx_pkt_cdesc_start_idx;
+
+	u16 q_depth;
+	/* Caller qid */
+	u16 qid;
+
+	/* Device queue index */
+	u16 idx;
+	u16 head;
+	u16 last_head_update;
+	u8 phase;
+	u8 cdesc_entry_size_in_bytes;
+
+} ____cacheline_aligned;
+
+struct ena_com_io_sq {
+	struct ena_com_io_desc_addr desc_addr;
+
+	u32 __iomem *db_addr;
+	u8 __iomem *header_addr;
+
+	enum queue_direction direction;
+	enum ena_admin_placement_policy_type mem_queue_type;
+
+	u32 msix_vector;
+	struct ena_com_tx_meta cached_tx_meta;
+
+	u16 q_depth;
+	u16 qid;
+
+	u16 idx;
+	u16 tail;
+	u16 next_to_comp;
+	u32 tx_max_header_size;
+	u8 phase;
+	u8 desc_entry_size;
+	u8 dma_addr_bits;
+} ____cacheline_aligned;
+
+struct ena_com_admin_cq {
+	struct ena_admin_acq_entry *entries;
+	dma_addr_t dma_addr;
+
+	u16 head;
+	u8 phase;
+};
+
+struct ena_com_admin_sq {
+	struct ena_admin_aq_entry *entries;
+	dma_addr_t dma_addr;
+
+	u32 __iomem *db_addr;
+
+	u16 head;
+	u16 tail;
+	u8 phase;
+
+};
+
+struct ena_com_stats_admin {
+	u32 aborted_cmd;
+	u32 submitted_cmd;
+	u32 completed_cmd;
+	u32 out_of_space;
+	u32 no_completion;
+};
+
+struct ena_com_admin_queue {
+	void *q_dmadev;
+	spinlock_t q_lock; /* spinlock for the admin queue */
+	struct ena_comp_ctx *comp_ctx;
+	u16 q_depth;
+	struct ena_com_admin_cq cq;
+	struct ena_com_admin_sq sq;
+
+	/* Indicate if the admin queue should poll for completion */
+	bool polling;
+
+	u16 curr_cmd_id;
+
+	/* Indicate that the ena was initialized and can
+	 * process new admin commands
+	 */
+	bool running_state;
+
+	/* Count the number of outstanding admin commands */
+	atomic_t outstanding_cmds;
+
+	struct ena_com_stats_admin stats;
+};
+
+struct ena_aenq_handlers;
+
+struct ena_com_aenq {
+	u16 head;
+	u8 phase;
+	struct ena_admin_aenq_entry *entries;
+	dma_addr_t dma_addr;
+	u16 q_depth;
+	struct ena_aenq_handlers *aenq_handlers;
+};
+
+struct ena_com_mmio_read {
+	struct ena_admin_ena_mmio_req_read_less_resp *read_resp;
+	dma_addr_t read_resp_dma_addr;
+	u16 seq_num;
+	bool readless_supported;
+	/* spin lock to ensure a single outstanding read */
+	spinlock_t lock;
+};
+
+struct ena_rss {
+	/* Indirect table */
+	u16 *host_rss_ind_tbl;
+	struct ena_admin_rss_ind_table_entry *rss_ind_tbl;
+	dma_addr_t rss_ind_tbl_dma_addr;
+	u16 tbl_log_size;
+
+	/* Hash key */
+	enum ena_admin_hash_functions hash_func;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key;
+	dma_addr_t hash_key_dma_addr;
+	u32 hash_init_val;
+
+	/* Flow Control */
+	struct ena_admin_feature_rss_hash_control *hash_ctrl;
+	dma_addr_t hash_ctrl_dma_addr;
+
+};
+
+struct ena_host_attribute {
+	/* Debug area */
+	u8 *debug_area_virt_addr;
+	dma_addr_t debug_area_dma_addr;
+	u32 debug_area_size;
+
+	/* Host information */
+	struct ena_admin_host_info *host_info;
+	dma_addr_t host_info_dma_addr;
+};
+
+/* Each ena_dev is a PCI function. */
+struct ena_com_dev {
+	struct ena_com_admin_queue admin_queue;
+	struct ena_com_aenq aenq;
+	struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES];
+	struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES];
+	u8 __iomem *reg_bar;
+	void __iomem *mem_bar;
+	void *dmadev;
+
+	enum ena_admin_placement_policy_type tx_mem_queue_type;
+	u32 tx_max_header_size;
+	u16 stats_func; /* Selected function for extended statistic dump */
+	u16 stats_queue; /* Selected queue for extended statistic dump */
+
+	struct ena_com_mmio_read mmio_read;
+
+	struct ena_rss rss;
+	u32 supported_features;
+	u32 dma_addr_bits;
+
+	struct ena_host_attribute host_attr;
+	bool adaptive_coalescing;
+	u16 intr_delay_resolution;
+	u32 intr_moder_tx_interval;
+	struct ena_intr_moder_entry *intr_moder_tbl;
+};
+
+struct ena_com_dev_get_features_ctx {
+	struct ena_admin_queue_feature_desc max_queues;
+	struct ena_admin_device_attr_feature_desc dev_attr;
+	struct ena_admin_feature_aenq_desc aenq;
+	struct ena_admin_feature_offload_desc offload;
+};
+
+struct ena_com_create_io_ctx {
+	enum ena_admin_placement_policy_type mem_queue_type;
+	enum queue_direction direction;
+	int numa_node;
+	u32 msix_vector;
+	u16 queue_size;
+	u16 qid;
+};
+
+typedef void (*ena_aenq_handler)(void *data,
+	struct ena_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct ena_aenq_handlers {
+	ena_aenq_handler handlers[ENA_MAX_HANDLERS];
+	ena_aenq_handler unimplemented_handler;
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ *
+ * Initialize the register read mechanism.
+ *
+ * @note: This method must be the first stage in the initialization sequence.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ * @readless_supported: readless mode (enable/disable)
+ */
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev,
+				bool readless_supported);
+
+/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return
+ * value physical address.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev);
+
+/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_init - Init the admin and the async queues
+ * @ena_dev: ENA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ * @init_spinlock: Indicate if this method should init the admin spinlock or
+ * the spinlock was init before (for example, in a case of FLR).
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+		       struct ena_aenq_handlers *aenq_handlers,
+		       bool init_spinlock);
+
+/* ena_com_admin_destroy - Destroy the admin and the async events queues.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @note: Before calling this method, the caller must validate that the device
+ * won't send any additional admin completions/aenq.
+ * To achieve that, a FLR is recommended.
+ */
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_dev_reset - Perform device FLR to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_dev_reset(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_queue - Create io queue.
+ * @ena_dev: ENA communication layer struct
+ * @ctx - create context structure
+ *
+ * Create the submission and the completion queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+			    struct ena_com_create_io_ctx *ctx);
+
+/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid.
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ */
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid);
+
+/* ena_com_get_io_handlers - Return the io queue handlers
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ * @io_sq - IO submission queue handler
+ * @io_cq - IO completion queue handler.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+			    struct ena_com_io_sq **io_sq,
+			    struct ena_com_io_cq **io_cq);
+
+/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications
+ * @ena_dev: ENA communication layer struct
+ *
+ * After this method, aenq event can be received via AENQ.
+ */
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_running_state - Set the state of the admin queue
+ * @ena_dev: ENA communication layer struct
+ *
+ * Change the state of the admin queue (enable/disable)
+ */
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state);
+
+/* ena_com_get_admin_running_state - Get the admin queue state
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the state of the admin queue (enable/disable)
+ *
+ * @return - current polling mode (enable/disable)
+ */
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: ENAble/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
+
+/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ *
+ * Get the admin completion mode.
+ * If polling mode is on, ena_com_execute_admin_command will perform a
+ * polling on the admin completion queue for the commands completion,
+ * otherwise it will wait on wait event.
+ *
+ * @return state
+ */
+bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the admin completion queue and wake up all the pending
+ * threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
+
+/* ena_com_aenq_intr_handler - AENQ interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the async event notification queue and call the proper
+ * aenq handler.
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
+
+/* ena_com_abort_admin_commands - Abort all the outstanding admin commands.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method aborts all the outstanding admin commands.
+ * The caller should then call ena_com_wait_for_abort_completion to make sure
+ * all the commands were completed.
+ */
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
+
+/* ena_com_wait_for_abort_completion - Wait for admin commands abort.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method wait until all the outstanding admin commands will be completed.
+ */
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
+
+/* ena_com_validate_version - Validate the device parameters
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method validate the device parameters are the same as the saved
+ * parameters in ena_dev.
+ * This method is useful after device reset, to validate the device mac address
+ * and the device offloads are the same as before the reset.
+ *
+ * @return - 0 on success negative value otherwise.
+ */
+int ena_com_validate_version(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_link_params - Retrieve physical link parameters.
+ * @ena_dev: ENA communication layer struct
+ * @resp: Link parameters
+ *
+ * Retrieve the physical link parameters,
+ * like speed, auto-negotiation and full duplex support.
+ *
+ * @return - 0 on Success negative value otherwise.
+ */
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+			    struct ena_admin_get_feat_resp *resp);
+
+/* ena_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_aenq_config - Set aenq groups configurations
+ * @ena_dev: ENA communication layer struct
+ * @groups flag: bit fields flags of enum ena_admin_aenq_group.
+ *
+ * Configure which aenq event group the driver would like to receive.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag);
+
+/* ena_com_get_dev_attr_feat - Get device features
+ * @ena_dev: ENA communication layer struct
+ * @get_feat_ctx: returned context that contain the get features.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+			      struct ena_com_dev_get_features_ctx *get_feat_ctx);
+
+/* ena_com_get_dev_basic_stats - Get device basic statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+				struct ena_admin_basic_stats *stats);
+
+/* ena_com_set_dev_mtu - Configure the device mtu.
+ * @ena_dev: ENA communication layer struct
+ * @mtu: mtu value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu);
+
+/* ena_com_get_offload_settings - Retrieve the device offloads capabilities
+ * @ena_dev: ENA communication layer struct
+ * @offlad: offload return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+				 struct ena_admin_feature_offload_desc *offload);
+
+/* ena_com_rss_init - Init RSS
+ * @ena_dev: ENA communication layer struct
+ * @log_size: indirection log size
+ *
+ * Allocate RSS/RFS resources.
+ * The caller then can configure rss using ena_com_set_hash_function,
+ * ena_com_set_hash_ctrl and ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
+
+/* ena_com_rss_destroy - Destroy rss
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free all the RSS/RFS resources.
+ */
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_fill_hash_function - Fill RSS hash function
+ * @ena_dev: ENA communication layer struct
+ * @func: The hash function (Toeplitz or crc)
+ * @key: Hash key (for toeplitz hash)
+ * @key_len: key length (max length 10 DW)
+ * @init_val: initial value for the hash function
+ *
+ * Fill the ena_dev resources with the desire hash function, hash key, key_len
+ * and key initial value (if needed by the hash function).
+ * To flush the key into the device the caller should call
+ * ena_com_set_hash_function.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+			       enum ena_admin_hash_functions func,
+			       const u8 *key, u16 key_len, u32 init_val);
+
+/* ena_com_set_hash_function - Flush the hash function and it dependencies to
+ * the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash function and it dependencies (key, key length and
+ * initial value) if needed.
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_function
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_function - Retrieve the hash function and the hash key
+ * from the device.
+ * @ena_dev: ENA communication layer struct
+ * @func: hash function
+ * @key: hash key
+ *
+ * Retrieve the hash function and the hash key from the device.
+ *
+ * @note: If the caller called ena_com_fill_hash_function but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+			      enum ena_admin_hash_functions *func,
+			      u8 *key);
+
+/* ena_com_fill_hash_ctrl - Fill RSS hash control
+ * @ena_dev: ENA communication layer struct.
+ * @proto: The protocol to configure.
+ * @hash_fields: bit mask of ena_admin_flow_hash_fields
+ *
+ * Fill the ena_dev resources with the desire hash control (the ethernet
+ * fields that take part of the hash) for a specific protocol.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+			   enum ena_admin_flow_hash_proto proto,
+			   u16 hash_fields);
+
+/* ena_com_set_hash_ctrl - Flush the hash control resources to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash control (the ethernet fields that take part of the hash)
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_ctrl - Retrieve the hash control from the device.
+ * @ena_dev: ENA communication layer struct
+ * @proto: The protocol to retrieve.
+ * @fields: bit mask of ena_admin_flow_hash_fields.
+ *
+ * Retrieve the hash control from the device.
+ *
+ * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+			  enum ena_admin_flow_hash_proto proto,
+			  u16 *fields);
+
+/* ena_com_set_default_hash_ctrl - Set the hash control to a default
+ * configuration.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Fill the ena_dev resources with the default hash control configuration.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS
+ * indirection table
+ * @ena_dev: ENA communication layer struct.
+ * @entry_idx - indirection table entry.
+ * @entry_value - redirection value
+ *
+ * Fill a single entry of the RSS indirection table in the ena_dev resources.
+ * To flush the indirection table to the device, the called should call
+ * ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+				      u16 entry_idx, u16 entry_value);
+
+/* ena_com_indirect_table_set - Flush the indirection table to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the indirection hash control to the device.
+ * Prior to this method the caller should call ena_com_indirect_table_fill_entry
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_get - Retrieve the indirection table from the device.
+ * @ena_dev: ENA communication layer struct
+ * @ind_tbl: indirection table
+ *
+ * Retrieve the RSS indirection table from the device.
+ *
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl);
+
+/* ena_com_allocate_host_info - Allocate host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_allocate_debug_area - Allocate debug area.
+ * @ena_dev: ENA communication layer struct
+ * @debug_area_size - debug area size.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+				u32 debug_area_size);
+
+/* ena_com_delete_debug_area - Free the debug area resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate debug area.
+ */
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
+
+/* ena_com_delete_host_info - Free the host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate host info.
+ */
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_host_attributes - Update the device with the host
+ * attributes (debug area and host info) base address.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_cq - Create io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Create IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+			 struct ena_com_io_cq *io_cq);
+
+/* ena_com_destroy_io_cq - Destroy io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Destroy IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+			  struct ena_com_io_cq *io_cq);
+
+/* ena_com_execute_admin_command - Execute admin command
+ * @admin_queue: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @cmd_completion: command completion return value.
+ * @cmd_comp_size: command completion size.
+
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copyed into cmd_comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+				  struct ena_admin_aq_entry *cmd,
+				  size_t cmd_size,
+				  struct ena_admin_acq_entry *cmd_comp,
+				  size_t cmd_comp_size);
+
+/* ena_com_init_interrupt_moderation - Init interrupt moderation
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_interrupt_moderation_supported - Return if interrupt moderation
+ * capability is supported by the device.
+ *
+ * @return - supported or not.
+ */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
+
+/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt
+ * moderation table back to the default parameters.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev);
+
+/* ena_com_update_nonadaptive_moderation_interval_tx - Update the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ * @tx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+						      u32 tx_coalesce_usecs);
+
+/* ena_com_update_nonadaptive_moderation_interval_rx - Update the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ * @rx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+						      u32 rx_coalesce_usecs);
+
+/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
+
+/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt
+ * moderation table.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry value
+ *
+ * Update a single entry in the interrupt moderation table.
+ */
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+					enum ena_intr_moder_level level,
+					struct ena_intr_moder_entry *entry);
+
+/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry to fill.
+ *
+ * Initialize the entry according to the adaptive interrupt moderation table.
+ */
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+				       enum ena_intr_moder_level level,
+				       struct ena_intr_moder_entry *entry);
+
+static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->adaptive_coalescing;
+}
+
+static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+	ena_dev->adaptive_coalescing = true;
+}
+
+static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+	ena_dev->adaptive_coalescing = false;
+}
+
+/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay
+ * @ena_dev: ENA communication layer struct
+ * @pkts: Number of packets since the last update
+ * @bytes: Number of bytes received since the last update.
+ * @smoothed_interval: Returned interval
+ * @moder_tbl_idx: Current table level as input update new level as return
+ * value.
+ */
+static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev,
+						     unsigned int pkts,
+						     unsigned int bytes,
+						     unsigned int *smoothed_interval,
+						     unsigned int *moder_tbl_idx)
+{
+	enum ena_intr_moder_level curr_moder_idx, new_moder_idx;
+	struct ena_intr_moder_entry *curr_moder_entry;
+	struct ena_intr_moder_entry *pred_moder_entry;
+	struct ena_intr_moder_entry *new_moder_entry;
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+	unsigned int interval;
+
+	/* We apply adaptive moderation on Rx path only.
+	 * Tx uses static interrupt moderation.
+	 */
+	if (!pkts || !bytes)
+		/* Tx interrupt, or spurious interrupt,
+		 * in both cases we just use same delay values
+		 */
+		return;
+
+	curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx);
+	if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) {
+		pr_err("Wrong moderation index %u\n", curr_moder_idx);
+		return;
+	}
+
+	curr_moder_entry = &intr_moder_tbl[curr_moder_idx];
+	new_moder_idx = curr_moder_idx;
+
+	if (curr_moder_idx == ENA_INTR_MODER_LOWEST) {
+		if ((pkts > curr_moder_entry->pkts_per_interval) ||
+		    (bytes > curr_moder_entry->bytes_per_interval))
+			new_moder_idx =
+				(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+	} else {
+		pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE];
+
+		if ((pkts <= pred_moder_entry->pkts_per_interval) ||
+		    (bytes <= pred_moder_entry->bytes_per_interval))
+			new_moder_idx =
+				(enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE);
+		else if ((pkts > curr_moder_entry->pkts_per_interval) ||
+			 (bytes > curr_moder_entry->bytes_per_interval)) {
+			if (curr_moder_idx != ENA_INTR_MODER_HIGHEST)
+				new_moder_idx =
+					(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+		}
+	}
+	new_moder_entry = &intr_moder_tbl[new_moder_idx];
+
+	interval = new_moder_entry->intr_moder_interval;
+	*smoothed_interval = (
+		(interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT +
+		ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) /
+		10;
+
+	*moder_tbl_idx = new_moder_idx;
+}
+
+/* ena_com_update_intr_reg - Prepare interrupt register
+ * @intr_reg: interrupt register to update.
+ * @rx_delay_interval: Rx interval in usecs
+ * @tx_delay_interval: Tx interval in usecs
+ * @unmask: unask enable/disable
+ *
+ * Prepare interrupt update register with the supplied parameters.
+ */
+static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg,
+					   u32 rx_delay_interval,
+					   u32 tx_delay_interval,
+					   bool unmask)
+{
+	intr_reg->intr_control = 0;
+	intr_reg->intr_control |= rx_delay_interval &
+		ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK;
+
+	intr_reg->intr_control |=
+		(tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT)
+		& ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK;
+
+	if (unmask)
+		intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK;
+}
+
+#endif /* !(ENA_COM) */

diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
new file mode 100644
index 0000000..bb8d736
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_COMMON_H_
+#define _ENA_COMMON_H_
+
+#define ENA_COMMON_SPEC_VERSION_MAJOR	0 /*  */
+#define ENA_COMMON_SPEC_VERSION_MINOR	10 /*  */
+
+/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
+struct ena_common_mem_addr {
+	u32 mem_addr_low;
+
+	u16 mem_addr_high;
+
+	/* MBZ */
+	u16 reserved16;
+};
+
+#endif /*_ENA_COMMON_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
new file mode 100644
index 0000000..539c536
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c

@@ -0,0 +1,501 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_eth_com.h"
+
+static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+	struct ena_com_io_cq *io_cq)
+{
+	struct ena_eth_io_rx_cdesc_base *cdesc;
+	u16 expected_phase, head_masked;
+	u16 desc_phase;
+
+	head_masked = io_cq->head & (io_cq->q_depth - 1);
+	expected_phase = io_cq->phase;
+
+	cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+			+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
+
+	desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+			ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
+
+	if (desc_phase != expected_phase)
+		return NULL;
+
+	return cdesc;
+}
+
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+	io_cq->head++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+		io_cq->phase ^= 1;
+}
+
+static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+	u16 tail_masked;
+	u32 offset;
+
+	tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+
+	offset = tail_masked * io_sq->desc_entry_size;
+
+	return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
+}
+
+static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq)
+{
+	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+	u32 offset = tail_masked * io_sq->desc_entry_size;
+
+	/* In case this queue isn't a LLQ */
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return;
+
+	memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset,
+		    io_sq->desc_addr.virt_addr + offset,
+		    io_sq->desc_entry_size);
+}
+
+static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+	io_sq->tail++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+		io_sq->phase ^= 1;
+}
+
+static inline int ena_com_write_header(struct ena_com_io_sq *io_sq,
+				       u8 *head_src, u16 header_len)
+{
+	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+	u8 __iomem *dev_head_addr =
+		io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size);
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return 0;
+
+	if (unlikely(!io_sq->header_addr)) {
+		pr_err("Push buffer header ptr is NULL\n");
+		return -EINVAL;
+	}
+
+	memcpy_toio(dev_head_addr, head_src, header_len);
+
+	return 0;
+}
+
+static inline struct ena_eth_io_rx_cdesc_base *
+	ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
+{
+	idx &= (io_cq->q_depth - 1);
+	return (struct ena_eth_io_rx_cdesc_base *)
+		((uintptr_t)io_cq->cdesc_addr.virt_addr +
+		idx * io_cq->cdesc_entry_size_in_bytes);
+}
+
+static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+					   u16 *first_cdesc_idx)
+{
+	struct ena_eth_io_rx_cdesc_base *cdesc;
+	u16 count = 0, head_masked;
+	u32 last = 0;
+
+	do {
+		cdesc = ena_com_get_next_rx_cdesc(io_cq);
+		if (!cdesc)
+			break;
+
+		ena_com_cq_inc_head(io_cq);
+		count++;
+		last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+			ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
+	} while (!last);
+
+	if (last) {
+		*first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx;
+		count += io_cq->cur_rx_pkt_cdesc_count;
+
+		head_masked = io_cq->head & (io_cq->q_depth - 1);
+
+		io_cq->cur_rx_pkt_cdesc_count = 0;
+		io_cq->cur_rx_pkt_cdesc_start_idx = head_masked;
+
+		pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n",
+			 io_cq->qid, *first_cdesc_idx, count);
+	} else {
+		io_cq->cur_rx_pkt_cdesc_count += count;
+		count = 0;
+	}
+
+	return count;
+}
+
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+					     struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	int rc;
+
+	if (ena_tx_ctx->meta_valid) {
+		rc = memcmp(&io_sq->cached_tx_meta,
+			    &ena_tx_ctx->ena_meta,
+			    sizeof(struct ena_com_tx_meta));
+
+		if (unlikely(rc != 0))
+			return true;
+	}
+
+	return false;
+}
+
+static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+							 struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
+	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+
+	meta_desc = get_sq_desc(io_sq);
+	memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK;
+
+	/* bits 0-9 of the mss */
+	meta_desc->word2 |= (ena_meta->mss <<
+		ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK;
+	/* bits 10-13 of the mss */
+	meta_desc->len_ctrl |= ((ena_meta->mss >> 10) <<
+		ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK;
+
+	/* Extended meta desc */
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+	meta_desc->len_ctrl |= (io_sq->phase <<
+		ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+	meta_desc->word2 |= ena_meta->l3_hdr_len &
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
+	meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK;
+
+	meta_desc->word2 |= (ena_meta->l4_hdr_len <<
+		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
+	/* Cached the meta desc */
+	memcpy(&io_sq->cached_tx_meta, ena_meta,
+	       sizeof(struct ena_com_tx_meta));
+
+	ena_com_copy_curr_sq_desc_to_dev(io_sq);
+	ena_com_sq_update_tail(io_sq);
+}
+
+static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+					struct ena_eth_io_rx_cdesc_base *cdesc)
+{
+	ena_rx_ctx->l3_proto = cdesc->status &
+		ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
+	ena_rx_ctx->l4_proto =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
+	ena_rx_ctx->l3_csum_err =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT;
+	ena_rx_ctx->l4_csum_err =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT;
+	ena_rx_ctx->hash = cdesc->hash;
+	ena_rx_ctx->frag =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT;
+
+	pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n",
+		 ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
+		 ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
+		 ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
+}
+
+/*****************************************************************************/
+/*****************************     API      **********************************/
+/*****************************************************************************/
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+		       struct ena_com_tx_ctx *ena_tx_ctx,
+		       int *nb_hw_desc)
+{
+	struct ena_eth_io_tx_desc *desc = NULL;
+	struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
+	void *push_header = ena_tx_ctx->push_header;
+	u16 header_len = ena_tx_ctx->header_len;
+	u16 num_bufs = ena_tx_ctx->num_bufs;
+	int total_desc, i, rc;
+	bool have_meta;
+	u64 addr_hi;
+
+	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
+
+	/* num_bufs +1 for potential meta desc */
+	if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) {
+		pr_err("Not enough space in the tx queue\n");
+		return -ENOMEM;
+	}
+
+	if (unlikely(header_len > io_sq->tx_max_header_size)) {
+		pr_err("header size is too large %d max header: %d\n",
+		       header_len, io_sq->tx_max_header_size);
+		return -EINVAL;
+	}
+
+	/* start with pushing the header (if needed) */
+	rc = ena_com_write_header(io_sq, push_header, header_len);
+	if (unlikely(rc))
+		return rc;
+
+	have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
+			ena_tx_ctx);
+	if (have_meta)
+		ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+
+	/* If the caller doesn't want send packets */
+	if (unlikely(!num_bufs && !header_len)) {
+		*nb_hw_desc = have_meta ? 0 : 1;
+		return 0;
+	}
+
+	desc = get_sq_desc(io_sq);
+	memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+	/* Set first desc when we don't have meta descriptor */
+	if (!have_meta)
+		desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK;
+
+	desc->buff_addr_hi_hdr_sz |= (header_len <<
+		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) &
+		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK;
+	desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+		ENA_ETH_IO_TX_DESC_PHASE_MASK;
+
+	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK;
+
+	/* Bits 0-9 */
+	desc->meta_ctrl |= (ena_tx_ctx->req_id <<
+		ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) &
+		ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK;
+
+	desc->meta_ctrl |= (ena_tx_ctx->df <<
+		ENA_ETH_IO_TX_DESC_DF_SHIFT) &
+		ENA_ETH_IO_TX_DESC_DF_MASK;
+
+	/* Bits 10-15 */
+	desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) <<
+		ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) &
+		ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK;
+
+	if (ena_tx_ctx->meta_valid) {
+		desc->meta_ctrl |= (ena_tx_ctx->tso_enable <<
+			ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_TSO_EN_MASK;
+		desc->meta_ctrl |= ena_tx_ctx->l3_proto &
+			ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_proto <<
+			ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable <<
+			ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable <<
+			ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial <<
+			ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK;
+	}
+
+	for (i = 0; i < num_bufs; i++) {
+		/* The first desc share the same desc as the header */
+		if (likely(i != 0)) {
+			ena_com_copy_curr_sq_desc_to_dev(io_sq);
+			ena_com_sq_update_tail(io_sq);
+
+			desc = get_sq_desc(io_sq);
+			memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+			desc->len_ctrl |= (io_sq->phase <<
+				ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+				ENA_ETH_IO_TX_DESC_PHASE_MASK;
+		}
+
+		desc->len_ctrl |= ena_bufs->len &
+			ENA_ETH_IO_TX_DESC_LENGTH_MASK;
+
+		addr_hi = ((ena_bufs->paddr &
+			GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+		desc->buff_addr_lo = (u32)ena_bufs->paddr;
+		desc->buff_addr_hi_hdr_sz |= addr_hi &
+			ENA_ETH_IO_TX_DESC_ADDR_HI_MASK;
+		ena_bufs++;
+	}
+
+	/* set the last desc indicator */
+	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
+
+	ena_com_copy_curr_sq_desc_to_dev(io_sq);
+
+	ena_com_sq_update_tail(io_sq);
+
+	total_desc = max_t(u16, num_bufs, 1);
+	total_desc += have_meta ? 1 : 0;
+
+	*nb_hw_desc = total_desc;
+	return 0;
+}
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+		   struct ena_com_io_sq *io_sq,
+		   struct ena_com_rx_ctx *ena_rx_ctx)
+{
+	struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
+	struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+	u16 cdesc_idx = 0;
+	u16 nb_hw_desc;
+	u16 i;
+
+	WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+	nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx);
+	if (nb_hw_desc == 0) {
+		ena_rx_ctx->descs = nb_hw_desc;
+		return 0;
+	}
+
+	pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
+		 nb_hw_desc);
+
+	if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
+		pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc,
+		       ena_rx_ctx->max_bufs);
+		return -ENOSPC;
+	}
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
+
+		ena_buf->len = cdesc->length;
+		ena_buf->req_id = cdesc->req_id;
+		ena_buf++;
+	}
+
+	/* Update SQ head ptr */
+	io_sq->next_to_comp += nb_hw_desc;
+
+	pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid,
+		 io_sq->next_to_comp);
+
+	/* Get rx flags from the last pkt */
+	ena_com_rx_set_flags(ena_rx_ctx, cdesc);
+
+	ena_rx_ctx->descs = nb_hw_desc;
+	return 0;
+}
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+			       struct ena_com_buf *ena_buf,
+			       u16 req_id)
+{
+	struct ena_eth_io_rx_desc *desc;
+
+	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+	if (unlikely(ena_com_sq_empty_space(io_sq) == 0))
+		return -ENOSPC;
+
+	desc = get_sq_desc(io_sq);
+	memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
+
+	desc->length = ena_buf->len;
+
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
+	desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+
+	desc->req_id = req_id;
+
+	desc->buff_addr_lo = (u32)ena_buf->paddr;
+	desc->buff_addr_hi =
+		((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+	ena_com_sq_update_tail(io_sq);
+
+	return 0;
+}
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
+{
+	u8 expected_phase, cdesc_phase;
+	struct ena_eth_io_tx_cdesc *cdesc;
+	u16 masked_head;
+
+	masked_head = io_cq->head & (io_cq->q_depth - 1);
+	expected_phase = io_cq->phase;
+
+	cdesc = (struct ena_eth_io_tx_cdesc *)
+		((uintptr_t)io_cq->cdesc_addr.virt_addr +
+		(masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+	/* When the current completion descriptor phase isn't the same as the
+	 * expected, it mean that the device still didn't update
+	 * this completion.
+	 */
+	cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+	if (cdesc_phase != expected_phase)
+		return -EAGAIN;
+
+	ena_com_cq_inc_head(io_cq);
+
+	*req_id = cdesc->req_id;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
new file mode 100644
index 0000000..bb53c3a
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h

@@ -0,0 +1,160 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_ETH_COM_H_
+#define ENA_ETH_COM_H_
+
+#include "ena_com.h"
+
+/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
+#define ENA_COMP_HEAD_THRESH 4
+
+struct ena_com_tx_ctx {
+	struct ena_com_tx_meta ena_meta;
+	struct ena_com_buf *ena_bufs;
+	/* For LLQ, header buffer - pushed to the device mem space */
+	void *push_header;
+
+	enum ena_eth_io_l3_proto_index l3_proto;
+	enum ena_eth_io_l4_proto_index l4_proto;
+	u16 num_bufs;
+	u16 req_id;
+	/* For regular queue, indicate the size of the header
+	 * For LLQ, indicate the size of the pushed buffer
+	 */
+	u16 header_len;
+
+	u8 meta_valid;
+	u8 tso_enable;
+	u8 l3_csum_enable;
+	u8 l4_csum_enable;
+	u8 l4_csum_partial;
+	u8 df; /* Don't fragment */
+};
+
+struct ena_com_rx_ctx {
+	struct ena_com_rx_buf_info *ena_bufs;
+	enum ena_eth_io_l3_proto_index l3_proto;
+	enum ena_eth_io_l4_proto_index l4_proto;
+	bool l3_csum_err;
+	bool l4_csum_err;
+	/* fragmented packet */
+	bool frag;
+	u32 hash;
+	u16 descs;
+	int max_bufs;
+};
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+		       struct ena_com_tx_ctx *ena_tx_ctx,
+		       int *nb_hw_desc);
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+		   struct ena_com_io_sq *io_sq,
+		   struct ena_com_rx_ctx *ena_rx_ctx);
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+			       struct ena_com_buf *ena_buf,
+			       u16 req_id);
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);
+
+static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
+				       struct ena_eth_io_intr_reg *intr_reg)
+{
+	writel(intr_reg->intr_control, io_cq->unmask_reg);
+}
+
+static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
+{
+	u16 tail, next_to_comp, cnt;
+
+	next_to_comp = io_sq->next_to_comp;
+	tail = io_sq->tail;
+	cnt = tail - next_to_comp;
+
+	return io_sq->q_depth - 1 - cnt;
+}
+
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+{
+	u16 tail;
+
+	tail = io_sq->tail;
+
+	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+		 io_sq->qid, tail);
+
+	writel(tail, io_sq->db_addr);
+
+	return 0;
+}
+
+static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
+{
+	u16 unreported_comp, head;
+	bool need_update;
+
+	head = io_cq->head;
+	unreported_comp = head - io_cq->last_head_update;
+	need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+	if (io_cq->cq_head_db_reg && need_update) {
+		pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
+			 io_cq->qid, head);
+		writel(head, io_cq->cq_head_db_reg);
+		io_cq->last_head_update = head;
+	}
+
+	return 0;
+}
+
+static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq,
+					    u8 numa_node)
+{
+	struct ena_eth_io_numa_node_cfg_reg numa_cfg;
+
+	if (!io_cq->numa_node_cfg_reg)
+		return;
+
+	numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK)
+		| ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK;
+
+	writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg);
+}
+
+static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem)
+{
+	io_sq->next_to_comp += elem;
+}
+
+#endif /* ENA_ETH_COM_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
new file mode 100644
index 0000000..f320c58
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h

@@ -0,0 +1,416 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ETH_IO_H_
+#define _ENA_ETH_IO_H_
+
+enum ena_eth_io_l3_proto_index {
+	ENA_ETH_IO_L3_PROTO_UNKNOWN	= 0,
+
+	ENA_ETH_IO_L3_PROTO_IPV4	= 8,
+
+	ENA_ETH_IO_L3_PROTO_IPV6	= 11,
+
+	ENA_ETH_IO_L3_PROTO_FCOE	= 21,
+
+	ENA_ETH_IO_L3_PROTO_ROCE	= 22,
+};
+
+enum ena_eth_io_l4_proto_index {
+	ENA_ETH_IO_L4_PROTO_UNKNOWN		= 0,
+
+	ENA_ETH_IO_L4_PROTO_TCP			= 12,
+
+	ENA_ETH_IO_L4_PROTO_UDP			= 13,
+
+	ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE	= 23,
+};
+
+struct ena_eth_io_tx_desc {
+	/* 15:0 : length - Buffer length in bytes, must
+	 *    include any packet trailers that the ENA supposed
+	 *    to update like End-to-End CRC, Authentication GMAC
+	 *    etc. This length must not include the
+	 *    'Push_Buffer' length. This length must not include
+	 *    the 4-byte added in the end for 802.3 Ethernet FCS
+	 * 21:16 : req_id_hi - Request ID[15:10]
+	 * 22 : reserved22 - MBZ
+	 * 23 : meta_desc - MBZ
+	 * 24 : phase
+	 * 25 : reserved1 - MBZ
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 28 : comp_req - Indicates whether completion
+	 *    should be posted, after packet is transmitted.
+	 *    Valid only for first descriptor
+	 * 30:29 : reserved29 - MBZ
+	 * 31 : reserved31 - MBZ
+	 */
+	u32 len_ctrl;
+
+	/* 3:0 : l3_proto_idx - L3 protocol. This field
+	 *    required when l3_csum_en,l3_csum or tso_en are set.
+	 * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and
+	 *    DF flags of the IPv4 header is 0. Otherwise must
+	 *    be set to 1
+	 * 6:5 : reserved5
+	 * 7 : tso_en - Enable TSO, For TCP only.
+	 * 12:8 : l4_proto_idx - L4 protocol. This field need
+	 *    to be set when l4_csum_en or tso_en are set.
+	 * 13 : l3_csum_en - enable IPv4 header checksum.
+	 * 14 : l4_csum_en - enable TCP/UDP checksum.
+	 * 15 : ethernet_fcs_dis - when set, the controller
+	 *    will not append the 802.3 Ethernet Frame Check
+	 *    Sequence to the packet
+	 * 16 : reserved16
+	 * 17 : l4_csum_partial - L4 partial checksum. when
+	 *    set to 0, the ENA calculates the L4 checksum,
+	 *    where the Destination Address required for the
+	 *    TCP/UDP pseudo-header is taken from the actual
+	 *    packet L3 header. when set to 1, the ENA doesn't
+	 *    calculate the sum of the pseudo-header, instead,
+	 *    the checksum field of the L4 is used instead. When
+	 *    TSO enabled, the checksum of the pseudo-header
+	 *    must not include the tcp length field. L4 partial
+	 *    checksum should be used for IPv6 packet that
+	 *    contains Routing Headers.
+	 * 20:18 : reserved18 - MBZ
+	 * 21 : reserved21 - MBZ
+	 * 31:22 : req_id_lo - Request ID[9:0]
+	 */
+	u32 meta_ctrl;
+
+	u32 buff_addr_lo;
+
+	/* address high and header size
+	 * 15:0 : addr_hi - Buffer Pointer[47:32]
+	 * 23:16 : reserved16_w2
+	 * 31:24 : header_length - Header length. For Low
+	 *    Latency Queues, this fields indicates the number
+	 *    of bytes written to the headers' memory. For
+	 *    normal queues, if packet is TCP or UDP, and longer
+	 *    than max_header_size, then this field should be
+	 *    set to the sum of L4 header offset and L4 header
+	 *    size(without options), otherwise, this field
+	 *    should be set to 0. For both modes, this field
+	 *    must not exceed the max_header_size.
+	 *    max_header_size value is reported by the Max
+	 *    Queues Feature descriptor
+	 */
+	u32 buff_addr_hi_hdr_sz;
+};
+
+struct ena_eth_io_tx_meta_desc {
+	/* 9:0 : req_id_lo - Request ID[9:0]
+	 * 11:10 : reserved10 - MBZ
+	 * 12 : reserved12 - MBZ
+	 * 13 : reserved13 - MBZ
+	 * 14 : ext_valid - if set, offset fields in Word2
+	 *    are valid Also MSS High in Word 0 and bits [31:24]
+	 *    in Word 3
+	 * 15 : reserved15
+	 * 19:16 : mss_hi
+	 * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1:
+	 *    Extended Metadata Descriptor
+	 * 21 : meta_store - Store extended metadata in queue
+	 *    cache
+	 * 22 : reserved22 - MBZ
+	 * 23 : meta_desc - MBO
+	 * 24 : phase
+	 * 25 : reserved25 - MBZ
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 28 : comp_req - Indicates whether completion
+	 *    should be posted, after packet is transmitted.
+	 *    Valid only for first descriptor
+	 * 30:29 : reserved29 - MBZ
+	 * 31 : reserved31 - MBZ
+	 */
+	u32 len_ctrl;
+
+	/* 5:0 : req_id_hi
+	 * 31:6 : reserved6 - MBZ
+	 */
+	u32 word1;
+
+	/* 7:0 : l3_hdr_len
+	 * 15:8 : l3_hdr_off
+	 * 21:16 : l4_hdr_len_in_words - counts the L4 header
+	 *    length in words. there is an explicit assumption
+	 *    that L4 header appears right after L3 header and
+	 *    L4 offset is based on l3_hdr_off+l3_hdr_len
+	 * 31:22 : mss_lo
+	 */
+	u32 word2;
+
+	u32 reserved;
+};
+
+struct ena_eth_io_tx_cdesc {
+	/* Request ID[15:0] */
+	u16 req_id;
+
+	u8 status;
+
+	/* flags
+	 * 0 : phase
+	 * 7:1 : reserved1
+	 */
+	u8 flags;
+
+	u16 sub_qid;
+
+	u16 sq_head_idx;
+};
+
+struct ena_eth_io_rx_desc {
+	/* In bytes. 0 means 64KB */
+	u16 length;
+
+	/* MBZ */
+	u8 reserved2;
+
+	/* 0 : phase
+	 * 1 : reserved1 - MBZ
+	 * 2 : first - Indicates first descriptor in
+	 *    transaction
+	 * 3 : last - Indicates last descriptor in transaction
+	 * 4 : comp_req
+	 * 5 : reserved5 - MBO
+	 * 7:6 : reserved6 - MBZ
+	 */
+	u8 ctrl;
+
+	u16 req_id;
+
+	/* MBZ */
+	u16 reserved6;
+
+	u32 buff_addr_lo;
+
+	u16 buff_addr_hi;
+
+	/* MBZ */
+	u16 reserved16_w3;
+};
+
+/* 4-word format Note: all ethernet parsing information are valid only when
+ * last=1
+ */
+struct ena_eth_io_rx_cdesc_base {
+	/* 4:0 : l3_proto_idx
+	 * 6:5 : src_vlan_cnt
+	 * 7 : reserved7 - MBZ
+	 * 12:8 : l4_proto_idx
+	 * 13 : l3_csum_err - when set, either the L3
+	 *    checksum error detected, or, the controller didn't
+	 *    validate the checksum. This bit is valid only when
+	 *    l3_proto_idx indicates IPv4 packet
+	 * 14 : l4_csum_err - when set, either the L4
+	 *    checksum error detected, or, the controller didn't
+	 *    validate the checksum. This bit is valid only when
+	 *    l4_proto_idx indicates TCP/UDP packet, and,
+	 *    ipv4_frag is not set
+	 * 15 : ipv4_frag - Indicates IPv4 fragmented packet
+	 * 23:16 : reserved16
+	 * 24 : phase
+	 * 25 : l3_csum2 - second checksum engine result
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 29:28 : reserved28
+	 * 30 : buffer - 0: Metadata descriptor. 1: Buffer
+	 *    Descriptor was used
+	 * 31 : reserved31
+	 */
+	u32 status;
+
+	u16 length;
+
+	u16 req_id;
+
+	/* 32-bit hash result */
+	u32 hash;
+
+	u16 sub_qid;
+
+	u16 reserved;
+};
+
+/* 8-word format */
+struct ena_eth_io_rx_cdesc_ext {
+	struct ena_eth_io_rx_cdesc_base base;
+
+	u32 buff_addr_lo;
+
+	u16 buff_addr_hi;
+
+	u16 reserved16;
+
+	u32 reserved_w6;
+
+	u32 reserved_w7;
+};
+
+struct ena_eth_io_intr_reg {
+	/* 14:0 : rx_intr_delay
+	 * 29:15 : tx_intr_delay
+	 * 30 : intr_unmask
+	 * 31 : reserved
+	 */
+	u32 intr_control;
+};
+
+struct ena_eth_io_numa_node_cfg_reg {
+	/* 7:0 : numa
+	 * 30:8 : reserved
+	 * 31 : enabled
+	 */
+	u32 numa_cfg;
+};
+
+/* tx_desc */
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
+#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+
+/* tx_meta_desc */
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+
+/* tx_cdesc */
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+
+/* rx_desc */
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+
+/* rx_cdesc_base */
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+
+/* intr_reg */
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+
+/* numa_node_cfg_reg */
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+
+#endif /*_ENA_ETH_IO_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
new file mode 100644
index 0000000..67b2338f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c

@@ -0,0 +1,895 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+
+#include "ena_netdev.h"
+
+struct ena_stats {
+	char name[ETH_GSTRING_LEN];
+	int stat_offset;
+};
+
+#define ENA_STAT_ENA_COM_ENTRY(stat) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_com_stats_admin, stat) \
+}
+
+#define ENA_STAT_ENTRY(stat, stat_type) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
+}
+
+#define ENA_STAT_RX_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, rx)
+
+#define ENA_STAT_TX_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, tx)
+
+#define ENA_STAT_GLOBAL_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, dev)
+
+static const struct ena_stats ena_stats_global_strings[] = {
+	ENA_STAT_GLOBAL_ENTRY(tx_timeout),
+	ENA_STAT_GLOBAL_ENTRY(io_suspend),
+	ENA_STAT_GLOBAL_ENTRY(io_resume),
+	ENA_STAT_GLOBAL_ENTRY(wd_expired),
+	ENA_STAT_GLOBAL_ENTRY(interface_up),
+	ENA_STAT_GLOBAL_ENTRY(interface_down),
+	ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
+};
+
+static const struct ena_stats ena_stats_tx_strings[] = {
+	ENA_STAT_TX_ENTRY(cnt),
+	ENA_STAT_TX_ENTRY(bytes),
+	ENA_STAT_TX_ENTRY(queue_stop),
+	ENA_STAT_TX_ENTRY(queue_wakeup),
+	ENA_STAT_TX_ENTRY(dma_mapping_err),
+	ENA_STAT_TX_ENTRY(linearize),
+	ENA_STAT_TX_ENTRY(linearize_failed),
+	ENA_STAT_TX_ENTRY(napi_comp),
+	ENA_STAT_TX_ENTRY(tx_poll),
+	ENA_STAT_TX_ENTRY(doorbells),
+	ENA_STAT_TX_ENTRY(prepare_ctx_err),
+	ENA_STAT_TX_ENTRY(missing_tx_comp),
+	ENA_STAT_TX_ENTRY(bad_req_id),
+};
+
+static const struct ena_stats ena_stats_rx_strings[] = {
+	ENA_STAT_RX_ENTRY(cnt),
+	ENA_STAT_RX_ENTRY(bytes),
+	ENA_STAT_RX_ENTRY(refil_partial),
+	ENA_STAT_RX_ENTRY(bad_csum),
+	ENA_STAT_RX_ENTRY(page_alloc_fail),
+	ENA_STAT_RX_ENTRY(skb_alloc_fail),
+	ENA_STAT_RX_ENTRY(dma_mapping_err),
+	ENA_STAT_RX_ENTRY(bad_desc_num),
+	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+};
+
+static const struct ena_stats ena_stats_ena_com_strings[] = {
+	ENA_STAT_ENA_COM_ENTRY(aborted_cmd),
+	ENA_STAT_ENA_COM_ENTRY(submitted_cmd),
+	ENA_STAT_ENA_COM_ENTRY(completed_cmd),
+	ENA_STAT_ENA_COM_ENTRY(out_of_space),
+	ENA_STAT_ENA_COM_ENTRY(no_completion),
+};
+
+#define ENA_STATS_ARRAY_GLOBAL	ARRAY_SIZE(ena_stats_global_strings)
+#define ENA_STATS_ARRAY_TX	ARRAY_SIZE(ena_stats_tx_strings)
+#define ENA_STATS_ARRAY_RX	ARRAY_SIZE(ena_stats_rx_strings)
+#define ENA_STATS_ARRAY_ENA_COM	ARRAY_SIZE(ena_stats_ena_com_strings)
+
+static void ena_safe_update_stat(u64 *src, u64 *dst,
+				 struct u64_stats_sync *syncp)
+{
+	unsigned int start;
+
+	do {
+		start = u64_stats_fetch_begin_irq(syncp);
+		*(dst) = *src;
+	} while (u64_stats_fetch_retry_irq(syncp, start));
+}
+
+static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+	const struct ena_stats *ena_stats;
+	struct ena_ring *ring;
+
+	u64 *ptr;
+	int i, j;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		/* Tx stats */
+		ring = &adapter->tx_ring[i];
+
+		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+			ena_stats = &ena_stats_tx_strings[j];
+
+			ptr = (u64 *)((uintptr_t)&ring->tx_stats +
+				(uintptr_t)ena_stats->stat_offset);
+
+			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+		}
+
+		/* Rx stats */
+		ring = &adapter->rx_ring[i];
+
+		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+			ena_stats = &ena_stats_rx_strings[j];
+
+			ptr = (u64 *)((uintptr_t)&ring->rx_stats +
+				(uintptr_t)ena_stats->stat_offset);
+
+			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+		}
+	}
+}
+
+static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+	const struct ena_stats *ena_stats;
+	u32 *ptr;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+		ena_stats = &ena_stats_ena_com_strings[i];
+
+		ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
+			(uintptr_t)ena_stats->stat_offset);
+
+		*(*data)++ = *ptr;
+	}
+}
+
+static void ena_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats,
+				  u64 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	const struct ena_stats *ena_stats;
+	u64 *ptr;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+		ena_stats = &ena_stats_global_strings[i];
+
+		ptr = (u64 *)((uintptr_t)&adapter->dev_stats +
+			(uintptr_t)ena_stats->stat_offset);
+
+		ena_safe_update_stat(ptr, data++, &adapter->syncp);
+	}
+
+	ena_queue_stats(adapter, &data);
+	ena_dev_admin_queue_stats(adapter, &data);
+}
+
+int ena_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	if (sset != ETH_SS_STATS)
+		return -EOPNOTSUPP;
+
+	return  adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+}
+
+static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
+{
+	const struct ena_stats *ena_stats;
+	int i, j;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		/* Tx stats */
+		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+			ena_stats = &ena_stats_tx_strings[j];
+
+			snprintf(*data, ETH_GSTRING_LEN,
+				 "queue_%u_tx_%s", i, ena_stats->name);
+			 (*data) += ETH_GSTRING_LEN;
+		}
+		/* Rx stats */
+		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+			ena_stats = &ena_stats_rx_strings[j];
+
+			snprintf(*data, ETH_GSTRING_LEN,
+				 "queue_%u_rx_%s", i, ena_stats->name);
+			(*data) += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void ena_com_dev_strings(u8 **data)
+{
+	const struct ena_stats *ena_stats;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+		ena_stats = &ena_stats_ena_com_strings[i];
+
+		snprintf(*data, ETH_GSTRING_LEN,
+			 "ena_admin_q_%s", ena_stats->name);
+		(*data) += ETH_GSTRING_LEN;
+	}
+}
+
+static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	const struct ena_stats *ena_stats;
+	int i;
+
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+		ena_stats = &ena_stats_global_strings[i];
+
+		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	ena_queue_strings(adapter, &data);
+	ena_com_dev_strings(&data);
+}
+
+static int ena_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_admin_get_feature_link_desc *link;
+	struct ena_admin_get_feat_resp feat_resp;
+	int rc;
+
+	rc = ena_com_get_link_params(ena_dev, &feat_resp);
+	if (rc)
+		return rc;
+
+	link = &feat_resp.u.link;
+	link_ksettings->base.speed = link->speed;
+
+	if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) {
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+	}
+
+	link_ksettings->base.autoneg =
+		(link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ?
+		AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	link_ksettings->base.duplex = DUPLEX_FULL;
+
+	return 0;
+}
+
+static int ena_get_coalesce(struct net_device *net_dev,
+			    struct ethtool_coalesce *coalesce)
+{
+	struct ena_adapter *adapter = netdev_priv(net_dev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_intr_moder_entry intr_moder_entry;
+
+	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+		/* the devie doesn't support interrupt moderation */
+		return -EOPNOTSUPP;
+	}
+	coalesce->tx_coalesce_usecs =
+		ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) /
+			ena_dev->intr_delay_resolution;
+	if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+		coalesce->rx_coalesce_usecs =
+			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
+			/ ena_dev->intr_delay_resolution;
+	} else {
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval;
+
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval;
+
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval;
+	}
+	coalesce->use_adaptive_rx_coalesce =
+		ena_com_get_adaptive_moderation_enabled(ena_dev);
+
+	return 0;
+}
+
+static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
+{
+	unsigned int val;
+	int i;
+
+	val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
+
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->tx_ring[i].smoothed_interval = val;
+}
+
+static int ena_set_coalesce(struct net_device *net_dev,
+			    struct ethtool_coalesce *coalesce)
+{
+	struct ena_adapter *adapter = netdev_priv(net_dev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_intr_moder_entry intr_moder_entry;
+	int rc;
+
+	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+		/* the devie doesn't support interrupt moderation */
+		return -EOPNOTSUPP;
+	}
+
+	if (coalesce->rx_coalesce_usecs_irq ||
+	    coalesce->rx_max_coalesced_frames_irq ||
+	    coalesce->tx_coalesce_usecs_irq ||
+	    coalesce->tx_max_coalesced_frames ||
+	    coalesce->tx_max_coalesced_frames_irq ||
+	    coalesce->stats_block_coalesce_usecs ||
+	    coalesce->use_adaptive_tx_coalesce ||
+	    coalesce->pkt_rate_low ||
+	    coalesce->tx_coalesce_usecs_low ||
+	    coalesce->tx_max_coalesced_frames_low ||
+	    coalesce->pkt_rate_high ||
+	    coalesce->tx_coalesce_usecs_high ||
+	    coalesce->tx_max_coalesced_frames_high ||
+	    coalesce->rate_sample_interval)
+		return -EINVAL;
+
+	rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
+							       coalesce->tx_coalesce_usecs);
+	if (rc)
+		return rc;
+
+	ena_update_tx_rings_intr_moderation(adapter);
+
+	if (ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+		if (!coalesce->use_adaptive_rx_coalesce) {
+			ena_com_disable_adaptive_moderation(ena_dev);
+			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+									       coalesce->rx_coalesce_usecs);
+			return rc;
+		}
+	} else { /* was in non-adaptive mode */
+		if (coalesce->use_adaptive_rx_coalesce) {
+			ena_com_enable_adaptive_moderation(ena_dev);
+		} else {
+			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+									       coalesce->rx_coalesce_usecs);
+			return rc;
+		}
+	}
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+
+	return 0;
+}
+
+static u32 ena_get_msglevel(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void ena_set_msglevel(struct net_device *netdev, u32 value)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = value;
+}
+
+static void ena_get_drvinfo(struct net_device *dev,
+			    struct ethtool_drvinfo *info)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+
+	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(adapter->pdev),
+		sizeof(info->bus_info));
+}
+
+static void ena_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_ring *tx_ring = &adapter->tx_ring[0];
+	struct ena_ring *rx_ring = &adapter->rx_ring[0];
+
+	ring->rx_max_pending = rx_ring->ring_size;
+	ring->tx_max_pending = tx_ring->ring_size;
+	ring->rx_pending = rx_ring->ring_size;
+	ring->tx_pending = tx_ring->ring_size;
+}
+
+static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
+{
+	u32 data = 0;
+
+	if (hash_fields & ENA_ADMIN_RSS_L2_DA)
+		data |= RXH_L2DA;
+
+	if (hash_fields & ENA_ADMIN_RSS_L3_DA)
+		data |= RXH_IP_DST;
+
+	if (hash_fields & ENA_ADMIN_RSS_L3_SA)
+		data |= RXH_IP_SRC;
+
+	if (hash_fields & ENA_ADMIN_RSS_L4_DP)
+		data |= RXH_L4_B_2_3;
+
+	if (hash_fields & ENA_ADMIN_RSS_L4_SP)
+		data |= RXH_L4_B_0_1;
+
+	return data;
+}
+
+static u16 ena_flow_data_to_flow_hash(u32 hash_fields)
+{
+	u16 data = 0;
+
+	if (hash_fields & RXH_L2DA)
+		data |= ENA_ADMIN_RSS_L2_DA;
+
+	if (hash_fields & RXH_IP_DST)
+		data |= ENA_ADMIN_RSS_L3_DA;
+
+	if (hash_fields & RXH_IP_SRC)
+		data |= ENA_ADMIN_RSS_L3_SA;
+
+	if (hash_fields & RXH_L4_B_2_3)
+		data |= ENA_ADMIN_RSS_L4_DP;
+
+	if (hash_fields & RXH_L4_B_0_1)
+		data |= ENA_ADMIN_RSS_L4_SP;
+
+	return data;
+}
+
+static int ena_get_rss_hash(struct ena_com_dev *ena_dev,
+			    struct ethtool_rxnfc *cmd)
+{
+	enum ena_admin_flow_hash_proto proto;
+	u16 hash_fields;
+	int rc;
+
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_TCP4;
+		break;
+	case UDP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_UDP4;
+		break;
+	case TCP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_TCP6;
+		break;
+	case UDP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_UDP6;
+		break;
+	case IPV4_FLOW:
+		proto = ENA_ADMIN_RSS_IP4;
+		break;
+	case IPV6_FLOW:
+		proto = ENA_ADMIN_RSS_IP6;
+		break;
+	case ETHER_FLOW:
+		proto = ENA_ADMIN_RSS_NOT_IP;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+
+	rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields);
+	if (rc) {
+		/* If device don't have permission, return unsupported */
+		if (rc == -EPERM)
+			rc = -EOPNOTSUPP;
+		return rc;
+	}
+
+	cmd->data = ena_flow_hash_to_flow_type(hash_fields);
+
+	return 0;
+}
+
+static int ena_set_rss_hash(struct ena_com_dev *ena_dev,
+			    struct ethtool_rxnfc *cmd)
+{
+	enum ena_admin_flow_hash_proto proto;
+	u16 hash_fields;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_TCP4;
+		break;
+	case UDP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_UDP4;
+		break;
+	case TCP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_TCP6;
+		break;
+	case UDP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_UDP6;
+		break;
+	case IPV4_FLOW:
+		proto = ENA_ADMIN_RSS_IP4;
+		break;
+	case IPV6_FLOW:
+		proto = ENA_ADMIN_RSS_IP6;
+		break;
+	case ETHER_FLOW:
+		proto = ENA_ADMIN_RSS_NOT_IP;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+
+	hash_fields = ena_flow_data_to_flow_hash(cmd->data);
+
+	return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields);
+}
+
+static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_SRXFH:
+		rc = ena_set_rss_hash(adapter->ena_dev, info);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter %d is not supported\n", info->cmd);
+		rc = -EOPNOTSUPP;
+	}
+
+	return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
+			 u32 *rules)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data = adapter->num_queues;
+		rc = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		rc = ena_get_rss_hash(adapter->ena_dev, info);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter %d is not supported\n", info->cmd);
+		rc = -EOPNOTSUPP;
+	}
+
+	return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static u32 ena_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return ENA_RX_RSS_TABLE_SIZE;
+}
+
+static u32 ena_get_rxfh_key_size(struct net_device *netdev)
+{
+	return ENA_HASH_KEY_SIZE;
+}
+
+static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	enum ena_admin_hash_functions ena_func;
+	u8 func;
+	int rc;
+
+	rc = ena_com_indirect_table_get(adapter->ena_dev, indir);
+	if (rc)
+		return rc;
+
+	rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key);
+	if (rc)
+		return rc;
+
+	switch (ena_func) {
+	case ENA_ADMIN_TOEPLITZ:
+		func = ETH_RSS_HASH_TOP;
+		break;
+	case ENA_ADMIN_CRC32:
+		func = ETH_RSS_HASH_XOR;
+		break;
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (hfunc)
+		*hfunc = func;
+
+	return rc;
+}
+
+static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	enum ena_admin_hash_functions func;
+	int rc, i;
+
+	if (indir) {
+		for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+			rc = ena_com_indirect_table_fill_entry(ena_dev,
+							       ENA_IO_RXQ_IDX(indir[i]),
+							       i);
+			if (unlikely(rc)) {
+				netif_err(adapter, drv, netdev,
+					  "Cannot fill indirect table (index is too large)\n");
+				return rc;
+			}
+		}
+
+		rc = ena_com_indirect_table_set(ena_dev);
+		if (rc) {
+			netif_err(adapter, drv, netdev,
+				  "Cannot set indirect table\n");
+			return rc == -EPERM ? -EOPNOTSUPP : rc;
+		}
+	}
+
+	switch (hfunc) {
+	case ETH_RSS_HASH_TOP:
+		func = ENA_ADMIN_TOEPLITZ;
+		break;
+	case ETH_RSS_HASH_XOR:
+		func = ENA_ADMIN_CRC32;
+		break;
+	default:
+		netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n",
+			  hfunc);
+		return -EOPNOTSUPP;
+	}
+
+	if (key) {
+		rc = ena_com_fill_hash_function(ena_dev, func, key,
+						ENA_HASH_KEY_SIZE,
+						0xFFFFFFFF);
+		if (unlikely(rc)) {
+			netif_err(adapter, drv, netdev, "Cannot fill key\n");
+			return rc == -EPERM ? -EOPNOTSUPP : rc;
+		}
+	}
+
+	return 0;
+}
+
+static void ena_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *channels)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	channels->max_rx = ENA_MAX_NUM_IO_QUEUES;
+	channels->max_tx = ENA_MAX_NUM_IO_QUEUES;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = adapter->num_queues;
+	channels->tx_count = adapter->num_queues;
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
+static int ena_get_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *tuna, void *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = adapter->rx_copybreak;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ena_set_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *tuna,
+			   const void *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+	u32 len;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		len = *(u32 *)data;
+		if (len > adapter->netdev->mtu) {
+			ret = -EINVAL;
+			break;
+		}
+		adapter->rx_copybreak = len;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct ethtool_ops ena_ethtool_ops = {
+	.get_link_ksettings	= ena_get_link_ksettings,
+	.get_drvinfo		= ena_get_drvinfo,
+	.get_msglevel		= ena_get_msglevel,
+	.set_msglevel		= ena_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= ena_get_coalesce,
+	.set_coalesce		= ena_set_coalesce,
+	.get_ringparam		= ena_get_ringparam,
+	.get_sset_count         = ena_get_sset_count,
+	.get_strings		= ena_get_strings,
+	.get_ethtool_stats      = ena_get_ethtool_stats,
+	.get_rxnfc		= ena_get_rxnfc,
+	.set_rxnfc		= ena_set_rxnfc,
+	.get_rxfh_indir_size    = ena_get_rxfh_indir_size,
+	.get_rxfh_key_size	= ena_get_rxfh_key_size,
+	.get_rxfh		= ena_get_rxfh,
+	.set_rxfh		= ena_set_rxfh,
+	.get_channels		= ena_get_channels,
+	.get_tunable		= ena_get_tunable,
+	.set_tunable		= ena_set_tunable,
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ena_ethtool_ops;
+}
+
+static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
+{
+	struct net_device *netdev = adapter->netdev;
+	u8 *strings_buf;
+	u64 *data_buf;
+	int strings_num;
+	int i, rc;
+
+	strings_num = ena_get_sset_count(netdev, ETH_SS_STATS);
+	if (strings_num <= 0) {
+		netif_err(adapter, drv, netdev, "Can't get stats num\n");
+		return;
+	}
+
+	strings_buf = devm_kzalloc(&adapter->pdev->dev,
+				   strings_num * ETH_GSTRING_LEN,
+				   GFP_ATOMIC);
+	if (!strings_buf) {
+		netif_err(adapter, drv, netdev,
+			  "failed to alloc strings_buf\n");
+		return;
+	}
+
+	data_buf = devm_kzalloc(&adapter->pdev->dev,
+				strings_num * sizeof(u64),
+				GFP_ATOMIC);
+	if (!data_buf) {
+		netif_err(adapter, drv, netdev,
+			  "failed to allocate data buf\n");
+		devm_kfree(&adapter->pdev->dev, strings_buf);
+		return;
+	}
+
+	ena_get_strings(netdev, ETH_SS_STATS, strings_buf);
+	ena_get_ethtool_stats(netdev, NULL, data_buf);
+
+	/* If there is a buffer, dump stats, otherwise print them to dmesg */
+	if (buf)
+		for (i = 0; i < strings_num; i++) {
+			rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64),
+				      "%s %llu\n",
+				      strings_buf + i * ETH_GSTRING_LEN,
+				      data_buf[i]);
+			buf += rc;
+		}
+	else
+		for (i = 0; i < strings_num; i++)
+			netif_err(adapter, drv, netdev, "%s: %llu\n",
+				  strings_buf + i * ETH_GSTRING_LEN,
+				  data_buf[i]);
+
+	devm_kfree(&adapter->pdev->dev, strings_buf);
+	devm_kfree(&adapter->pdev->dev, data_buf);
+}
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf)
+{
+	if (!buf)
+		return;
+
+	ena_dump_stats_ex(adapter, buf);
+}
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter)
+{
+	ena_dump_stats_ex(adapter, NULL);
+}

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
new file mode 100644
index 0000000..5c536b8
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c

@@ -0,0 +1,3278 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif /* CONFIG_RFS_ACCEL */
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/numa.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+
+#include "ena_netdev.h"
+#include "ena_pci_id_tbl.h"
+
+static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (5 * HZ)
+
+#define ENA_NAPI_BUDGET 64
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
+		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static struct ena_aenq_handlers aenq_handlers;
+
+static struct workqueue_struct *ena_wq;
+
+MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
+
+static int ena_rss_init_default(struct ena_adapter *adapter);
+
+static void ena_tx_timeout(struct net_device *dev)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.tx_timeout++;
+	u64_stats_update_end(&adapter->syncp);
+
+	netif_err(adapter, tx_err, dev, "Transmit time out\n");
+
+	/* Change the state of the device to trigger reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
+
+static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->rx_ring[i].mtu = mtu;
+}
+
+static int ena_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	int ret;
+
+	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
+		netif_err(adapter, drv, dev,
+			  "Invalid MTU setting. new_mtu: %d\n", new_mtu);
+
+		return -EINVAL;
+	}
+
+	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
+	if (!ret) {
+		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
+		update_rx_ring_mtu(adapter, new_mtu);
+		dev->mtu = new_mtu;
+	} else {
+		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
+			  new_mtu);
+	}
+
+	return ret;
+}
+
+static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
+{
+#ifdef CONFIG_RFS_ACCEL
+	u32 i;
+	int rc;
+
+	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
+	if (!adapter->netdev->rx_cpu_rmap)
+		return -ENOMEM;
+	for (i = 0; i < adapter->num_queues; i++) {
+		int irq_idx = ENA_IO_IRQ_IDX(i);
+
+		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
+				      adapter->msix_entries[irq_idx].vector);
+		if (rc) {
+			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+			adapter->netdev->rx_cpu_rmap = NULL;
+			return rc;
+		}
+	}
+#endif /* CONFIG_RFS_ACCEL */
+	return 0;
+}
+
+static void ena_init_io_rings_common(struct ena_adapter *adapter,
+				     struct ena_ring *ring, u16 qid)
+{
+	ring->qid = qid;
+	ring->pdev = adapter->pdev;
+	ring->dev = &adapter->pdev->dev;
+	ring->netdev = adapter->netdev;
+	ring->napi = &adapter->ena_napi[qid].napi;
+	ring->adapter = adapter;
+	ring->ena_dev = adapter->ena_dev;
+	ring->per_napi_packets = 0;
+	ring->per_napi_bytes = 0;
+	ring->cpu = 0;
+	u64_stats_init(&ring->syncp);
+}
+
+static void ena_init_io_rings(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev;
+	struct ena_ring *txr, *rxr;
+	int i;
+
+	ena_dev = adapter->ena_dev;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		txr = &adapter->tx_ring[i];
+		rxr = &adapter->rx_ring[i];
+
+		/* TX/RX common ring state */
+		ena_init_io_rings_common(adapter, txr, i);
+		ena_init_io_rings_common(adapter, rxr, i);
+
+		/* TX specific ring state */
+		txr->ring_size = adapter->tx_ring_size;
+		txr->tx_max_header_size = ena_dev->tx_max_header_size;
+		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+		txr->sgl_size = adapter->max_tx_sgl_size;
+		txr->smoothed_interval =
+			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+
+		/* RX specific ring state */
+		rxr->ring_size = adapter->rx_ring_size;
+		rxr->rx_copybreak = adapter->rx_copybreak;
+		rxr->sgl_size = adapter->max_rx_sgl_size;
+		rxr->smoothed_interval =
+			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+	}
+}
+
+/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+	int size, i, node;
+
+	if (tx_ring->tx_buffer_info) {
+		netif_err(adapter, ifup,
+			  adapter->netdev, "tx_buffer_info info is not NULL");
+		return -EEXIST;
+	}
+
+	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
+	node = cpu_to_node(ena_irq->cpu);
+
+	tx_ring->tx_buffer_info = vzalloc_node(size, node);
+	if (!tx_ring->tx_buffer_info) {
+		tx_ring->tx_buffer_info = vzalloc(size);
+		if (!tx_ring->tx_buffer_info)
+			return -ENOMEM;
+	}
+
+	size = sizeof(u16) * tx_ring->ring_size;
+	tx_ring->free_tx_ids = vzalloc_node(size, node);
+	if (!tx_ring->free_tx_ids) {
+		tx_ring->free_tx_ids = vzalloc(size);
+		if (!tx_ring->free_tx_ids) {
+			vfree(tx_ring->tx_buffer_info);
+			return -ENOMEM;
+		}
+	}
+
+	/* Req id ring for TX out of order completions */
+	for (i = 0; i < tx_ring->ring_size; i++)
+		tx_ring->free_tx_ids[i] = i;
+
+	/* Reset tx statistics */
+	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->cpu = ena_irq->cpu;
+	return 0;
+}
+
+/* ena_free_tx_resources - Free I/O Tx Resources per Queue
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	vfree(tx_ring->free_tx_ids);
+	tx_ring->free_tx_ids = NULL;
+}
+
+/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
+ * @adapter: private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_setup_tx_resources(adapter, i);
+		if (rc)
+			goto err_setup_tx;
+	}
+
+	return 0;
+
+err_setup_tx:
+
+	netif_err(adapter, ifup, adapter->netdev,
+		  "Tx queue %d: allocation failed\n", i);
+
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ena_free_tx_resources(adapter, i);
+	return rc;
+}
+
+/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_tx_resources(adapter, i);
+}
+
+/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ena_setup_rx_resources(struct ena_adapter *adapter,
+				  u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+	int size, node;
+
+	if (rx_ring->rx_buffer_info) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "rx_buffer_info is not NULL");
+		return -EEXIST;
+	}
+
+	/* alloc extra element so in rx path
+	 * we can always prefetch rx_info + 1
+	 */
+	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
+	node = cpu_to_node(ena_irq->cpu);
+
+	rx_ring->rx_buffer_info = vzalloc_node(size, node);
+	if (!rx_ring->rx_buffer_info) {
+		rx_ring->rx_buffer_info = vzalloc(size);
+		if (!rx_ring->rx_buffer_info)
+			return -ENOMEM;
+	}
+
+	/* Reset rx statistics */
+	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+	rx_ring->cpu = ena_irq->cpu;
+
+	return 0;
+}
+
+/* ena_free_rx_resources - Free I/O Rx Resources
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all receive software resources
+ */
+static void ena_free_rx_resources(struct ena_adapter *adapter,
+				  u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+}
+
+/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_setup_rx_resources(adapter, i);
+		if (rc)
+			goto err_setup_rx;
+	}
+
+	return 0;
+
+err_setup_rx:
+
+	netif_err(adapter, ifup, adapter->netdev,
+		  "Rx queue %d: allocation failed\n", i);
+
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ena_free_rx_resources(adapter, i);
+	return rc;
+}
+
+/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_rx_resources(adapter, i);
+}
+
+static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
+				    struct ena_rx_buffer *rx_info, gfp_t gfp)
+{
+	struct ena_com_buf *ena_buf;
+	struct page *page;
+	dma_addr_t dma;
+
+	/* if previous allocated page is not used */
+	if (unlikely(rx_info->page))
+		return 0;
+
+	page = alloc_page(gfp);
+	if (unlikely(!page)) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.page_alloc_fail++;
+		u64_stats_update_end(&rx_ring->syncp);
+		return -ENOMEM;
+	}
+
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
+			   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.dma_mapping_err++;
+		u64_stats_update_end(&rx_ring->syncp);
+
+		__free_page(page);
+		return -EIO;
+	}
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "alloc page %p, rx_info %p\n", page, rx_info);
+
+	rx_info->page = page;
+	rx_info->page_offset = 0;
+	ena_buf = &rx_info->ena_buf;
+	ena_buf->paddr = dma;
+	ena_buf->len = PAGE_SIZE;
+
+	return 0;
+}
+
+static void ena_free_rx_page(struct ena_ring *rx_ring,
+			     struct ena_rx_buffer *rx_info)
+{
+	struct page *page = rx_info->page;
+	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
+
+	if (unlikely(!page)) {
+		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+			   "Trying to free unallocated buffer\n");
+		return;
+	}
+
+	dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
+		       DMA_FROM_DEVICE);
+
+	__free_page(page);
+	rx_info->page = NULL;
+}
+
+static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
+{
+	u16 next_to_use;
+	u32 i;
+	int rc;
+
+	next_to_use = rx_ring->next_to_use;
+
+	for (i = 0; i < num; i++) {
+		struct ena_rx_buffer *rx_info =
+			&rx_ring->rx_buffer_info[next_to_use];
+
+		rc = ena_alloc_rx_page(rx_ring, rx_info,
+				       __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+		if (unlikely(rc < 0)) {
+			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+				   "failed to alloc buffer for rx queue %d\n",
+				   rx_ring->qid);
+			break;
+		}
+		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
+						&rx_info->ena_buf,
+						next_to_use);
+		if (unlikely(rc)) {
+			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+				   "failed to add buffer for rx queue %d\n",
+				   rx_ring->qid);
+			break;
+		}
+		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
+						   rx_ring->ring_size);
+	}
+
+	if (unlikely(i < num)) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.refil_partial++;
+		u64_stats_update_end(&rx_ring->syncp);
+		netdev_warn(rx_ring->netdev,
+			    "refilled rx qid %d with only %d buffers (from %d)\n",
+			    rx_ring->qid, i, num);
+	}
+
+	if (likely(i)) {
+		/* Add memory barrier to make sure the desc were written before
+		 * issue a doorbell
+		 */
+		wmb();
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+	}
+
+	rx_ring->next_to_use = next_to_use;
+
+	return i;
+}
+
+static void ena_free_rx_bufs(struct ena_adapter *adapter,
+			     u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+	u32 i;
+
+	for (i = 0; i < rx_ring->ring_size; i++) {
+		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
+
+		if (rx_info->page)
+			ena_free_rx_page(rx_ring, rx_info);
+	}
+}
+
+/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
+ * @adapter: board private structure
+ *
+ */
+static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
+{
+	struct ena_ring *rx_ring;
+	int i, rc, bufs_num;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rx_ring = &adapter->rx_ring[i];
+		bufs_num = rx_ring->ring_size - 1;
+		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
+
+		if (unlikely(rc != bufs_num))
+			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+				   "refilling Queue %d failed. allocated %d buffers from: %d\n",
+				   i, rc, bufs_num);
+	}
+}
+
+static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_rx_bufs(adapter, i);
+}
+
+/* ena_free_tx_bufs - Free Tx Buffers per Queue
+ * @tx_ring: TX ring for which buffers be freed
+ */
+static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+{
+	u32 i;
+
+	for (i = 0; i < tx_ring->ring_size; i++) {
+		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
+		struct ena_com_buf *ena_buf;
+		int nr_frags;
+		int j;
+
+		if (!tx_info->skb)
+			continue;
+
+		netdev_notice(tx_ring->netdev,
+			      "free uncompleted tx skb qid %d idx 0x%x\n",
+			      tx_ring->qid, i);
+
+		ena_buf = tx_info->bufs;
+		dma_unmap_single(tx_ring->dev,
+				 ena_buf->paddr,
+				 ena_buf->len,
+				 DMA_TO_DEVICE);
+
+		/* unmap remaining mapped pages */
+		nr_frags = tx_info->num_of_bufs - 1;
+		for (j = 0; j < nr_frags; j++) {
+			ena_buf++;
+			dma_unmap_page(tx_ring->dev,
+				       ena_buf->paddr,
+				       ena_buf->len,
+				       DMA_TO_DEVICE);
+		}
+
+		dev_kfree_skb_any(tx_info->skb);
+	}
+	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+						  tx_ring->qid));
+}
+
+static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
+{
+	struct ena_ring *tx_ring;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		tx_ring = &adapter->tx_ring[i];
+		ena_free_tx_bufs(tx_ring);
+	}
+}
+
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
+{
+	u16 ena_qid;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		ena_qid = ENA_IO_TXQ_IDX(i);
+		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+	}
+}
+
+static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
+{
+	u16 ena_qid;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		ena_qid = ENA_IO_RXQ_IDX(i);
+		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+	}
+}
+
+static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
+{
+	ena_destroy_all_tx_queues(adapter);
+	ena_destroy_all_rx_queues(adapter);
+}
+
+static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+{
+	struct ena_tx_buffer *tx_info = NULL;
+
+	if (likely(req_id < tx_ring->ring_size)) {
+		tx_info = &tx_ring->tx_buffer_info[req_id];
+		if (likely(tx_info->skb))
+			return 0;
+	}
+
+	if (tx_info)
+		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "tx_info doesn't have valid skb\n");
+	else
+		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "Invalid req_id: %hu\n", req_id);
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.bad_req_id++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	/* Trigger device reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
+	return -EFAULT;
+}
+
+static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+{
+	struct netdev_queue *txq;
+	bool above_thresh;
+	u32 tx_bytes = 0;
+	u32 total_done = 0;
+	u16 next_to_clean;
+	u16 req_id;
+	int tx_pkts = 0;
+	int rc;
+
+	next_to_clean = tx_ring->next_to_clean;
+	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
+
+	while (tx_pkts < budget) {
+		struct ena_tx_buffer *tx_info;
+		struct sk_buff *skb;
+		struct ena_com_buf *ena_buf;
+		int i, nr_frags;
+
+		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+						&req_id);
+		if (rc)
+			break;
+
+		rc = validate_tx_req_id(tx_ring, req_id);
+		if (rc)
+			break;
+
+		tx_info = &tx_ring->tx_buffer_info[req_id];
+		skb = tx_info->skb;
+
+		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
+		prefetch(&skb->end);
+
+		tx_info->skb = NULL;
+		tx_info->last_jiffies = 0;
+
+		if (likely(tx_info->num_of_bufs != 0)) {
+			ena_buf = tx_info->bufs;
+
+			dma_unmap_single(tx_ring->dev,
+					 dma_unmap_addr(ena_buf, paddr),
+					 dma_unmap_len(ena_buf, len),
+					 DMA_TO_DEVICE);
+
+			/* unmap remaining mapped pages */
+			nr_frags = tx_info->num_of_bufs - 1;
+			for (i = 0; i < nr_frags; i++) {
+				ena_buf++;
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(ena_buf, paddr),
+					       dma_unmap_len(ena_buf, len),
+					       DMA_TO_DEVICE);
+			}
+		}
+
+		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+			  skb);
+
+		tx_bytes += skb->len;
+		dev_kfree_skb(skb);
+		tx_pkts++;
+		total_done += tx_info->tx_descs;
+
+		tx_ring->free_tx_ids[next_to_clean] = req_id;
+		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+						     tx_ring->ring_size);
+	}
+
+	tx_ring->next_to_clean = next_to_clean;
+	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
+	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+
+	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
+	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+		  "tx_poll: q %d done. total pkts: %d\n",
+		  tx_ring->qid, tx_pkts);
+
+	/* need to make the rings circular update visible to
+	 * ena_start_xmit() before checking for netif_queue_stopped().
+	 */
+	smp_mb();
+
+	above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+		ENA_TX_WAKEUP_THRESH;
+	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
+		__netif_tx_lock(txq, smp_processor_id());
+		above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+			ENA_TX_WAKEUP_THRESH;
+		if (netif_tx_queue_stopped(txq) && above_thresh) {
+			netif_tx_wake_queue(txq);
+			u64_stats_update_begin(&tx_ring->syncp);
+			tx_ring->tx_stats.queue_wakeup++;
+			u64_stats_update_end(&tx_ring->syncp);
+		}
+		__netif_tx_unlock(txq);
+	}
+
+	tx_ring->per_napi_bytes += tx_bytes;
+	tx_ring->per_napi_packets += tx_pkts;
+
+	return tx_pkts;
+}
+
+static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+				  struct ena_com_rx_buf_info *ena_bufs,
+				  u32 descs,
+				  u16 *next_to_clean)
+{
+	struct sk_buff *skb;
+	struct ena_rx_buffer *rx_info =
+		&rx_ring->rx_buffer_info[*next_to_clean];
+	u32 len;
+	u32 buf = 0;
+	void *va;
+
+	len = ena_bufs[0].len;
+	if (unlikely(!rx_info->page)) {
+		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+			  "Page is NULL\n");
+		return NULL;
+	}
+
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "rx_info %p page %p\n",
+		  rx_info, rx_info->page);
+
+	/* save virt address of first buffer */
+	va = page_address(rx_info->page) + rx_info->page_offset;
+	prefetch(va + NET_IP_ALIGN);
+
+	if (len <= rx_ring->rx_copybreak) {
+		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+						rx_ring->rx_copybreak);
+		if (unlikely(!skb)) {
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.skb_alloc_fail++;
+			u64_stats_update_end(&rx_ring->syncp);
+			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+				  "Failed to allocate skb\n");
+			return NULL;
+		}
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx allocated small packet. len %d. data_len %d\n",
+			  skb->len, skb->data_len);
+
+		/* sync this buffer for CPU use */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					dma_unmap_addr(&rx_info->ena_buf, paddr),
+					len,
+					DMA_FROM_DEVICE);
+		skb_copy_to_linear_data(skb, va, len);
+		dma_sync_single_for_device(rx_ring->dev,
+					   dma_unmap_addr(&rx_info->ena_buf, paddr),
+					   len,
+					   DMA_FROM_DEVICE);
+
+		skb_put(skb, len);
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
+						     rx_ring->ring_size);
+		return skb;
+	}
+
+	skb = napi_get_frags(rx_ring->napi);
+	if (unlikely(!skb)) {
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "Failed allocating skb\n");
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.skb_alloc_fail++;
+		u64_stats_update_end(&rx_ring->syncp);
+		return NULL;
+	}
+
+	do {
+		dma_unmap_page(rx_ring->dev,
+			       dma_unmap_addr(&rx_info->ena_buf, paddr),
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
+				rx_info->page_offset, len, PAGE_SIZE);
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx skb updated. len %d. data_len %d\n",
+			  skb->len, skb->data_len);
+
+		rx_info->page = NULL;
+		*next_to_clean =
+			ENA_RX_RING_IDX_NEXT(*next_to_clean,
+					     rx_ring->ring_size);
+		if (likely(--descs == 0))
+			break;
+		rx_info = &rx_ring->rx_buffer_info[*next_to_clean];
+		len = ena_bufs[++buf].len;
+	} while (1);
+
+	return skb;
+}
+
+/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @adapter: structure containing adapter specific data
+ * @ena_rx_ctx: received packet context/metadata
+ * @skb: skb currently being received and modified
+ */
+static inline void ena_rx_checksum(struct ena_ring *rx_ring,
+				   struct ena_com_rx_ctx *ena_rx_ctx,
+				   struct sk_buff *skb)
+{
+	/* Rx csum disabled */
+	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+
+	/* For fragmented packets the checksum isn't valid */
+	if (ena_rx_ctx->frag) {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+
+	/* if IP and error */
+	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
+		     (ena_rx_ctx->l3_csum_err))) {
+		/* ipv4 checksum error */
+		skb->ip_summed = CHECKSUM_NONE;
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.bad_csum++;
+		u64_stats_update_end(&rx_ring->syncp);
+		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+			  "RX IPv4 header checksum error\n");
+		return;
+	}
+
+	/* if TCP/UDP */
+	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
+		if (unlikely(ena_rx_ctx->l4_csum_err)) {
+			/* TCP/UDP checksum error */
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.bad_csum++;
+			u64_stats_update_end(&rx_ring->syncp);
+			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+				  "RX L4 checksum error\n");
+			skb->ip_summed = CHECKSUM_NONE;
+			return;
+		}
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+
+static void ena_set_rx_hash(struct ena_ring *rx_ring,
+			    struct ena_com_rx_ctx *ena_rx_ctx,
+			    struct sk_buff *skb)
+{
+	enum pkt_hash_types hash_type;
+
+	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
+		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
+
+			hash_type = PKT_HASH_TYPE_L4;
+		else
+			hash_type = PKT_HASH_TYPE_NONE;
+
+		/* Override hash type if the packet is fragmented */
+		if (ena_rx_ctx->frag)
+			hash_type = PKT_HASH_TYPE_NONE;
+
+		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
+	}
+}
+
+/* ena_clean_rx_irq - Cleanup RX irq
+ * @rx_ring: RX ring to clean
+ * @napi: napi handler
+ * @budget: how many packets driver is allowed to clean
+ *
+ * Returns the number of cleaned buffers.
+ */
+static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+			    u32 budget)
+{
+	u16 next_to_clean = rx_ring->next_to_clean;
+	u32 res_budget, work_done;
+
+	struct ena_com_rx_ctx ena_rx_ctx;
+	struct ena_adapter *adapter;
+	struct sk_buff *skb;
+	int refill_required;
+	int refill_threshold;
+	int rc = 0;
+	int total_len = 0;
+	int rx_copybreak_pkt = 0;
+
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "%s qid %d\n", __func__, rx_ring->qid);
+	res_budget = budget;
+
+	do {
+		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+		ena_rx_ctx.descs = 0;
+		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
+				    rx_ring->ena_com_io_sq,
+				    &ena_rx_ctx);
+		if (unlikely(rc))
+			goto error;
+
+		if (unlikely(ena_rx_ctx.descs == 0))
+			break;
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+
+		/* allocate skb and fill it */
+		skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
+				 &next_to_clean);
+
+		/* exit if we failed to retrieve a buffer */
+		if (unlikely(!skb)) {
+			next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean,
+							    ena_rx_ctx.descs,
+							    rx_ring->ring_size);
+			break;
+		}
+
+		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
+
+		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
+
+		skb_record_rx_queue(skb, rx_ring->qid);
+
+		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
+			total_len += rx_ring->ena_bufs[0].len;
+			rx_copybreak_pkt++;
+			napi_gro_receive(napi, skb);
+		} else {
+			total_len += skb->len;
+			napi_gro_frags(napi);
+		}
+
+		res_budget--;
+	} while (likely(res_budget));
+
+	work_done = budget - res_budget;
+	rx_ring->per_napi_bytes += total_len;
+	rx_ring->per_napi_packets += work_done;
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->rx_stats.bytes += total_len;
+	rx_ring->rx_stats.cnt += work_done;
+	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
+	u64_stats_update_end(&rx_ring->syncp);
+
+	rx_ring->next_to_clean = next_to_clean;
+
+	refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+	refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
+
+	/* Optimization, try to batch new rx buffers */
+	if (refill_required > refill_threshold) {
+		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+		ena_refill_rx_bufs(rx_ring, refill_required);
+	}
+
+	return work_done;
+
+error:
+	adapter = netdev_priv(rx_ring->netdev);
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->rx_stats.bad_desc_num++;
+	u64_stats_update_end(&rx_ring->syncp);
+
+	/* Too many desc from the device. Trigger reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
+	return 0;
+}
+
+inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
+				       struct ena_ring *tx_ring)
+{
+	/* We apply adaptive moderation on Rx path only.
+	 * Tx uses static interrupt moderation.
+	 */
+	ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
+					  rx_ring->per_napi_packets,
+					  rx_ring->per_napi_bytes,
+					  &rx_ring->smoothed_interval,
+					  &rx_ring->moder_tbl_idx);
+
+	/* Reset per napi packets/bytes */
+	tx_ring->per_napi_packets = 0;
+	tx_ring->per_napi_bytes = 0;
+	rx_ring->per_napi_packets = 0;
+	rx_ring->per_napi_bytes = 0;
+}
+
+static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+					     struct ena_ring *rx_ring)
+{
+	int cpu = get_cpu();
+	int numa_node;
+
+	/* Check only one ring since the 2 rings are running on the same cpu */
+	if (likely(tx_ring->cpu == cpu))
+		goto out;
+
+	numa_node = cpu_to_node(cpu);
+	put_cpu();
+
+	if (numa_node != NUMA_NO_NODE) {
+		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+		ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
+	}
+
+	tx_ring->cpu = cpu;
+	rx_ring->cpu = cpu;
+
+	return;
+out:
+	put_cpu();
+}
+
+static int ena_io_poll(struct napi_struct *napi, int budget)
+{
+	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+	struct ena_ring *tx_ring, *rx_ring;
+	struct ena_eth_io_intr_reg intr_reg;
+
+	u32 tx_work_done;
+	u32 rx_work_done;
+	int tx_budget;
+	int napi_comp_call = 0;
+	int ret;
+
+	tx_ring = ena_napi->tx_ring;
+	rx_ring = ena_napi->rx_ring;
+
+	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+		napi_complete_done(napi, 0);
+		return 0;
+	}
+
+	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+	rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+	if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
+		napi_complete_done(napi, rx_work_done);
+
+		napi_comp_call = 1;
+		/* Tx and Rx share the same interrupt vector */
+		if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+			ena_adjust_intr_moderation(rx_ring, tx_ring);
+
+		/* Update intr register: rx intr delay, tx intr delay and
+		 * interrupt unmask
+		 */
+		ena_com_update_intr_reg(&intr_reg,
+					rx_ring->smoothed_interval,
+					tx_ring->smoothed_interval,
+					true);
+
+		/* It is a shared MSI-X. Tx and Rx CQ have pointer to it.
+		 * So we use one of them to reach the intr reg
+		 */
+		ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+
+		ena_update_ring_numa_node(tx_ring, rx_ring);
+
+		ret = rx_work_done;
+	} else {
+		ret = budget;
+	}
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.napi_comp += napi_comp_call;
+	tx_ring->tx_stats.tx_poll++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	return ret;
+}
+
+static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
+
+	/* Don't call the aenq handler before probe is done */
+	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
+		ena_com_aenq_intr_handler(adapter->ena_dev, data);
+
+	return IRQ_HANDLED;
+}
+
+/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
+ * @irq: interrupt number
+ * @data: pointer to a network interface private napi device structure
+ */
+static irqreturn_t ena_intr_msix_io(int irq, void *data)
+{
+	struct ena_napi *ena_napi = data;
+
+	napi_schedule(&ena_napi->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
+{
+	int i, msix_vecs, rc;
+
+	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "Error, MSI-X is already enabled\n");
+		return -EPERM;
+	}
+
+	/* Reserved the max msix vectors we might need */
+	msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
+
+	netif_dbg(adapter, probe, adapter->netdev,
+		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
+
+	adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry));
+
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < msix_vecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs);
+	if (rc != 0) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "Failed to enable MSI-X, vectors %d rc %d\n",
+			  msix_vecs, rc);
+		return -ENOSPC;
+	}
+
+	netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n",
+		  msix_vecs);
+
+	if (msix_vecs >= 1) {
+		if (ena_init_rx_cpu_rmap(adapter))
+			netif_warn(adapter, probe, adapter->netdev,
+				   "Failed to map IRQs to CPUs\n");
+	}
+
+	adapter->msix_vecs = msix_vecs;
+	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
+
+	return 0;
+}
+
+static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
+{
+	u32 cpu;
+
+	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
+		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
+		 pci_name(adapter->pdev));
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
+		ena_intr_msix_mgmnt;
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
+		adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
+	cpu = cpumask_first(cpu_online_mask);
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
+	cpumask_set_cpu(cpu,
+			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
+}
+
+static void ena_setup_io_intr(struct ena_adapter *adapter)
+{
+	struct net_device *netdev;
+	int irq_idx, i, cpu;
+
+	netdev = adapter->netdev;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		irq_idx = ENA_IO_IRQ_IDX(i);
+		cpu = i % num_online_cpus();
+
+		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
+			 "%s-Tx-Rx-%d", netdev->name, i);
+		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
+		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
+		adapter->irq_tbl[irq_idx].vector =
+			adapter->msix_entries[irq_idx].vector;
+		adapter->irq_tbl[irq_idx].cpu = cpu;
+
+		cpumask_set_cpu(cpu,
+				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
+	}
+}
+
+static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
+{
+	unsigned long flags = 0;
+	struct ena_irq *irq;
+	int rc;
+
+	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+			 irq->data);
+	if (rc) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "failed to request admin irq\n");
+		return rc;
+	}
+
+	netif_dbg(adapter, probe, adapter->netdev,
+		  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
+		  irq->affinity_hint_mask.bits[0], irq->vector);
+
+	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+	return rc;
+}
+
+static int ena_request_io_irq(struct ena_adapter *adapter)
+{
+	unsigned long flags = 0;
+	struct ena_irq *irq;
+	int rc = 0, i, k;
+
+	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
+		return -EINVAL;
+	}
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+		irq = &adapter->irq_tbl[i];
+		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+				 irq->data);
+		if (rc) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Failed to request I/O IRQ. index %d rc %d\n",
+				   i, rc);
+			goto err;
+		}
+
+		netif_dbg(adapter, ifup, adapter->netdev,
+			  "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
+			  i, irq->affinity_hint_mask.bits[0], irq->vector);
+
+		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+	}
+
+	return rc;
+
+err:
+	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
+		irq = &adapter->irq_tbl[k];
+		free_irq(irq->vector, irq->data);
+	}
+
+	return rc;
+}
+
+static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
+{
+	struct ena_irq *irq;
+
+	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+	synchronize_irq(irq->vector);
+	irq_set_affinity_hint(irq->vector, NULL);
+	free_irq(irq->vector, irq->data);
+}
+
+static void ena_free_io_irq(struct ena_adapter *adapter)
+{
+	struct ena_irq *irq;
+	int i;
+
+#ifdef CONFIG_RFS_ACCEL
+	if (adapter->msix_vecs >= 1) {
+		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+		adapter->netdev->rx_cpu_rmap = NULL;
+	}
+#endif /* CONFIG_RFS_ACCEL */
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+		irq = &adapter->irq_tbl[i];
+		irq_set_affinity_hint(irq->vector, NULL);
+		free_irq(irq->vector, irq->data);
+	}
+}
+
+static void ena_disable_msix(struct ena_adapter *adapter)
+{
+	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
+		pci_disable_msix(adapter->pdev);
+
+	if (adapter->msix_entries)
+		vfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
+{
+	int i;
+
+	if (!netif_running(adapter->netdev))
+		return;
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
+		synchronize_irq(adapter->irq_tbl[i].vector);
+}
+
+static void ena_del_napi(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		netif_napi_del(&adapter->ena_napi[i].napi);
+}
+
+static void ena_init_napi(struct ena_adapter *adapter)
+{
+	struct ena_napi *napi;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		napi = &adapter->ena_napi[i];
+
+		netif_napi_add(adapter->netdev,
+			       &adapter->ena_napi[i].napi,
+			       ena_io_poll,
+			       ENA_NAPI_BUDGET);
+		napi->rx_ring = &adapter->rx_ring[i];
+		napi->tx_ring = &adapter->tx_ring[i];
+		napi->qid = i;
+	}
+}
+
+static void ena_napi_disable_all(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_disable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_napi_enable_all(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_enable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_restore_ethtool_params(struct ena_adapter *adapter)
+{
+	adapter->tx_usecs = 0;
+	adapter->rx_usecs = 0;
+	adapter->tx_frames = 1;
+	adapter->rx_frames = 1;
+}
+
+/* Configure the Rx forwarding */
+static int ena_rss_configure(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc;
+
+	/* In case the RSS table wasn't initialized by probe */
+	if (!ena_dev->rss.tbl_log_size) {
+		rc = ena_rss_init_default(adapter);
+		if (rc && (rc != -EPERM)) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Failed to init RSS rc: %d\n", rc);
+			return rc;
+		}
+	}
+
+	/* Set indirect table */
+	rc = ena_com_indirect_table_set(ena_dev);
+	if (unlikely(rc && rc != -EPERM))
+		return rc;
+
+	/* Configure hash function (if supported) */
+	rc = ena_com_set_hash_function(ena_dev);
+	if (unlikely(rc && (rc != -EPERM)))
+		return rc;
+
+	/* Configure hash inputs (if supported) */
+	rc = ena_com_set_hash_ctrl(ena_dev);
+	if (unlikely(rc && (rc != -EPERM)))
+		return rc;
+
+	return 0;
+}
+
+static int ena_up_complete(struct ena_adapter *adapter)
+{
+	int rc, i;
+
+	rc = ena_rss_configure(adapter);
+	if (rc)
+		return rc;
+
+	ena_init_napi(adapter);
+
+	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
+
+	ena_refill_all_rx_bufs(adapter);
+
+	/* enable transmits */
+	netif_tx_start_all_queues(adapter->netdev);
+
+	ena_restore_ethtool_params(adapter);
+
+	ena_napi_enable_all(adapter);
+
+	/* schedule napi in case we had pending packets
+	 * from the last time we disable napi
+	 */
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_schedule(&adapter->ena_napi[i].napi);
+
+	return 0;
+}
+
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+{
+	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_com_dev *ena_dev;
+	struct ena_ring *tx_ring;
+	u32 msix_vector;
+	u16 ena_qid;
+	int rc;
+
+	ena_dev = adapter->ena_dev;
+
+	tx_ring = &adapter->tx_ring[qid];
+	msix_vector = ENA_IO_IRQ_IDX(qid);
+	ena_qid = ENA_IO_TXQ_IDX(qid);
+
+	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
+	ctx.qid = ena_qid;
+	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+	ctx.msix_vector = msix_vector;
+	ctx.queue_size = adapter->tx_ring_size;
+	ctx.numa_node = cpu_to_node(tx_ring->cpu);
+
+	rc = ena_com_create_io_queue(ena_dev, &ctx);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to create I/O TX queue num %d rc: %d\n",
+			  qid, rc);
+		return rc;
+	}
+
+	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+				     &tx_ring->ena_com_io_sq,
+				     &tx_ring->ena_com_io_cq);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
+			  qid, rc);
+		ena_com_destroy_io_queue(ena_dev, ena_qid);
+	}
+
+	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
+	return rc;
+}
+
+static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc, i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_create_io_tx_queue(adapter, i);
+		if (rc)
+			goto create_err;
+	}
+
+	return 0;
+
+create_err:
+	while (i--)
+		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
+
+	return rc;
+}
+
+static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+{
+	struct ena_com_dev *ena_dev;
+	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_ring *rx_ring;
+	u32 msix_vector;
+	u16 ena_qid;
+	int rc;
+
+	ena_dev = adapter->ena_dev;
+
+	rx_ring = &adapter->rx_ring[qid];
+	msix_vector = ENA_IO_IRQ_IDX(qid);
+	ena_qid = ENA_IO_RXQ_IDX(qid);
+
+	ctx.qid = ena_qid;
+	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
+	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+	ctx.msix_vector = msix_vector;
+	ctx.queue_size = adapter->rx_ring_size;
+	ctx.numa_node = cpu_to_node(rx_ring->cpu);
+
+	rc = ena_com_create_io_queue(ena_dev, &ctx);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to create I/O RX queue num %d rc: %d\n",
+			  qid, rc);
+		return rc;
+	}
+
+	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+				     &rx_ring->ena_com_io_sq,
+				     &rx_ring->ena_com_io_cq);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
+			  qid, rc);
+		ena_com_destroy_io_queue(ena_dev, ena_qid);
+	}
+
+	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
+
+	return rc;
+}
+
+static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc, i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_create_io_rx_queue(adapter, i);
+		if (rc)
+			goto create_err;
+	}
+
+	return 0;
+
+create_err:
+	while (i--)
+		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
+
+	return rc;
+}
+
+static int ena_up(struct ena_adapter *adapter)
+{
+	int rc;
+
+	netdev_dbg(adapter->netdev, "%s\n", __func__);
+
+	ena_setup_io_intr(adapter);
+
+	rc = ena_request_io_irq(adapter);
+	if (rc)
+		goto err_req_irq;
+
+	/* allocate transmit descriptors */
+	rc = ena_setup_all_tx_resources(adapter);
+	if (rc)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	rc = ena_setup_all_rx_resources(adapter);
+	if (rc)
+		goto err_setup_rx;
+
+	/* Create TX queues */
+	rc = ena_create_all_io_tx_queues(adapter);
+	if (rc)
+		goto err_create_tx_queues;
+
+	/* Create RX queues */
+	rc = ena_create_all_io_rx_queues(adapter);
+	if (rc)
+		goto err_create_rx_queues;
+
+	rc = ena_up_complete(adapter);
+	if (rc)
+		goto err_up;
+
+	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+		netif_carrier_on(adapter->netdev);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.interface_up++;
+	u64_stats_update_end(&adapter->syncp);
+
+	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+	return rc;
+
+err_up:
+	ena_destroy_all_rx_queues(adapter);
+err_create_rx_queues:
+	ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+	ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+	ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+	ena_free_io_irq(adapter);
+err_req_irq:
+
+	return rc;
+}
+
+static void ena_down(struct ena_adapter *adapter)
+{
+	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
+
+	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.interface_down++;
+	u64_stats_update_end(&adapter->syncp);
+
+	/* After this point the napi handler won't enable the tx queue */
+	ena_napi_disable_all(adapter);
+	netif_carrier_off(adapter->netdev);
+	netif_tx_disable(adapter->netdev);
+
+	/* After destroy the queue there won't be any new interrupts */
+	ena_destroy_all_io_queues(adapter);
+
+	ena_disable_io_intr_sync(adapter);
+	ena_free_io_irq(adapter);
+	ena_del_napi(adapter);
+
+	ena_free_all_tx_bufs(adapter);
+	ena_free_all_rx_bufs(adapter);
+	ena_free_all_io_tx_resources(adapter);
+	ena_free_all_io_rx_resources(adapter);
+}
+
+/* ena_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int ena_open(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	/* Notify the stack of the actual queue counts. */
+	rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
+	if (rc) {
+		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
+		return rc;
+	}
+
+	rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
+	if (rc) {
+		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
+		return rc;
+	}
+
+	rc = ena_up(adapter);
+	if (rc)
+		return rc;
+
+	return rc;
+}
+
+/* ena_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int ena_close(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
+
+	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		ena_down(adapter);
+
+	return 0;
+}
+
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
+{
+	u32 mss = skb_shinfo(skb)->gso_size;
+	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+	u8 l4_protocol = 0;
+
+	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
+		ena_tx_ctx->l4_csum_enable = 1;
+		if (mss) {
+			ena_tx_ctx->tso_enable = 1;
+			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
+			ena_tx_ctx->l4_csum_partial = 0;
+		} else {
+			ena_tx_ctx->tso_enable = 0;
+			ena_meta->l4_hdr_len = 0;
+			ena_tx_ctx->l4_csum_partial = 1;
+		}
+
+		switch (ip_hdr(skb)->version) {
+		case IPVERSION:
+			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
+			if (ip_hdr(skb)->frag_off & htons(IP_DF))
+				ena_tx_ctx->df = 1;
+			if (mss)
+				ena_tx_ctx->l3_csum_enable = 1;
+			l4_protocol = ip_hdr(skb)->protocol;
+			break;
+		case 6:
+			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
+			l4_protocol = ipv6_hdr(skb)->nexthdr;
+			break;
+		default:
+			break;
+		}
+
+		if (l4_protocol == IPPROTO_TCP)
+			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
+		else
+			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
+
+		ena_meta->mss = mss;
+		ena_meta->l3_hdr_len = skb_network_header_len(skb);
+		ena_meta->l3_hdr_offset = skb_network_offset(skb);
+		ena_tx_ctx->meta_valid = 1;
+
+	} else {
+		ena_tx_ctx->meta_valid = 0;
+	}
+}
+
+static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
+				       struct sk_buff *skb)
+{
+	int num_frags, header_len, rc;
+
+	num_frags = skb_shinfo(skb)->nr_frags;
+	header_len = skb_headlen(skb);
+
+	if (num_frags < tx_ring->sgl_size)
+		return 0;
+
+	if ((num_frags == tx_ring->sgl_size) &&
+	    (header_len < tx_ring->tx_max_header_size))
+		return 0;
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.linearize++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	rc = skb_linearize(skb);
+	if (unlikely(rc)) {
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.linearize_failed++;
+		u64_stats_update_end(&tx_ring->syncp);
+	}
+
+	return rc;
+}
+
+/* Called with netif_tx_lock. */
+static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	struct ena_tx_buffer *tx_info;
+	struct ena_com_tx_ctx ena_tx_ctx;
+	struct ena_ring *tx_ring;
+	struct netdev_queue *txq;
+	struct ena_com_buf *ena_buf;
+	void *push_hdr;
+	u32 len, last_frag;
+	u16 next_to_use;
+	u16 req_id;
+	u16 push_len;
+	u16 header_len;
+	dma_addr_t dma;
+	int qid, rc, nb_hw_desc;
+	int i = -1;
+
+	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
+	/*  Determine which tx ring we will be placed on */
+	qid = skb_get_queue_mapping(skb);
+	tx_ring = &adapter->tx_ring[qid];
+	txq = netdev_get_tx_queue(dev, qid);
+
+	rc = ena_check_and_linearize_skb(tx_ring, skb);
+	if (unlikely(rc))
+		goto error_drop_packet;
+
+	skb_tx_timestamp(skb);
+	len = skb_headlen(skb);
+
+	next_to_use = tx_ring->next_to_use;
+	req_id = tx_ring->free_tx_ids[next_to_use];
+	tx_info = &tx_ring->tx_buffer_info[req_id];
+	tx_info->num_of_bufs = 0;
+
+	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
+	ena_buf = tx_info->bufs;
+	tx_info->skb = skb;
+
+	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		/* prepared the push buffer */
+		push_len = min_t(u32, len, tx_ring->tx_max_header_size);
+		header_len = push_len;
+		push_hdr = skb->data;
+	} else {
+		push_len = 0;
+		header_len = min_t(u32, len, tx_ring->tx_max_header_size);
+		push_hdr = NULL;
+	}
+
+	netif_dbg(adapter, tx_queued, dev,
+		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
+		  push_hdr, push_len);
+
+	if (len > push_len) {
+		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
+				     len - push_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = len - push_len;
+
+		ena_buf++;
+		tx_info->num_of_bufs++;
+	}
+
+	last_frag = skb_shinfo(skb)->nr_frags;
+
+	for (i = 0; i < last_frag; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		len = skb_frag_size(frag);
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = len;
+		ena_buf++;
+	}
+
+	tx_info->num_of_bufs += last_frag;
+
+	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
+	ena_tx_ctx.ena_bufs = tx_info->bufs;
+	ena_tx_ctx.push_header = push_hdr;
+	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+	ena_tx_ctx.req_id = req_id;
+	ena_tx_ctx.header_len = header_len;
+
+	/* set flags and meta data */
+	ena_tx_csum(&ena_tx_ctx, skb);
+
+	/* prepare the packet's descriptors to dma engine */
+	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
+				&nb_hw_desc);
+
+	if (unlikely(rc)) {
+		netif_err(adapter, tx_queued, dev,
+			  "failed to prepare tx bufs\n");
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.queue_stop++;
+		tx_ring->tx_stats.prepare_ctx_err++;
+		u64_stats_update_end(&tx_ring->syncp);
+		netif_tx_stop_queue(txq);
+		goto error_unmap_dma;
+	}
+
+	netdev_tx_sent_queue(txq, skb->len);
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.cnt++;
+	tx_ring->tx_stats.bytes += skb->len;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	tx_info->tx_descs = nb_hw_desc;
+	tx_info->last_jiffies = jiffies;
+
+	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+		tx_ring->ring_size);
+
+	/* This WMB is aimed to:
+	 * 1 - perform smp barrier before reading next_to_completion
+	 * 2 - make sure the desc were written before trigger DB
+	 */
+	wmb();
+
+	/* stop the queue when no more space available, the packet can have up
+	 * to sgl_size + 2. one for the meta descriptor and one for header
+	 * (if the header is larger than tx_max_header_size).
+	 */
+	if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
+		     (tx_ring->sgl_size + 2))) {
+		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
+			  __func__, qid);
+
+		netif_tx_stop_queue(txq);
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.queue_stop++;
+		u64_stats_update_end(&tx_ring->syncp);
+
+		/* There is a rare condition where this function decide to
+		 * stop the queue but meanwhile clean_tx_irq updates
+		 * next_to_completion and terminates.
+		 * The queue will remain stopped forever.
+		 * To solve this issue this function perform rmb, check
+		 * the wakeup condition and wake up the queue if needed.
+		 */
+		smp_rmb();
+
+		if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
+				> ENA_TX_WAKEUP_THRESH) {
+			netif_tx_wake_queue(txq);
+			u64_stats_update_begin(&tx_ring->syncp);
+			tx_ring->tx_stats.queue_wakeup++;
+			u64_stats_update_end(&tx_ring->syncp);
+		}
+	}
+
+	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
+		/* trigger the dma engine */
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.doorbells++;
+		u64_stats_update_end(&tx_ring->syncp);
+	}
+
+	return NETDEV_TX_OK;
+
+error_report_dma_error:
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.dma_mapping_err++;
+	u64_stats_update_end(&tx_ring->syncp);
+	netdev_warn(adapter->netdev, "failed to map skb\n");
+
+	tx_info->skb = NULL;
+
+error_unmap_dma:
+	if (i >= 0) {
+		/* save value of frag that failed */
+		last_frag = i;
+
+		/* start back at beginning and unmap skb */
+		tx_info->skb = NULL;
+		ena_buf = tx_info->bufs;
+		dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+				 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+
+		/* unmap remaining mapped pages */
+		for (i = 0; i < last_frag; i++) {
+			ena_buf++;
+			dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+				       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+		}
+	}
+
+error_drop_packet:
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void ena_netpoll(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_schedule(&adapter->ena_napi[i].napi);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    void *accel_priv, select_queue_fallback_t fallback)
+{
+	u16 qid;
+	/* we suspect that this is good for in--kernel network services that
+	 * want to loop incoming skb rx to tx in normal user generated traffic,
+	 * most probably we will not get to this
+	 */
+	if (skb_rx_queue_recorded(skb))
+		qid = skb_get_rx_queue(skb);
+	else
+		qid = fallback(dev, skb);
+
+	return qid;
+}
+
+static void ena_config_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_admin_host_info *host_info;
+	int rc;
+
+	/* Allocate only the host info */
+	rc = ena_com_allocate_host_info(ena_dev);
+	if (rc) {
+		pr_err("Cannot allocate host info\n");
+		return;
+	}
+
+	host_info = ena_dev->host_attr.host_info;
+
+	host_info->os_type = ENA_ADMIN_OS_LINUX;
+	host_info->kernel_ver = LINUX_VERSION_CODE;
+	strncpy(host_info->kernel_ver_str, utsname()->version,
+		sizeof(host_info->kernel_ver_str) - 1);
+	host_info->os_dist = 0;
+	strncpy(host_info->os_dist_str, utsname()->release,
+		sizeof(host_info->os_dist_str) - 1);
+	host_info->driver_version =
+		(DRV_MODULE_VER_MAJOR) |
+		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
+		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+
+	rc = ena_com_set_host_attributes(ena_dev);
+	if (rc) {
+		if (rc == -EPERM)
+			pr_warn("Cannot set host attributes\n");
+		else
+			pr_err("Cannot set host attributes\n");
+
+		goto err;
+	}
+
+	return;
+
+err:
+	ena_com_delete_host_info(ena_dev);
+}
+
+static void ena_config_debug_area(struct ena_adapter *adapter)
+{
+	u32 debug_area_size;
+	int rc, ss_count;
+
+	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
+	if (ss_count <= 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "SS count is negative\n");
+		return;
+	}
+
+	/* allocate 32 bytes for each string and 64bit for the value */
+	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
+
+	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
+	if (rc) {
+		pr_err("Cannot allocate debug area\n");
+		return;
+	}
+
+	rc = ena_com_set_host_attributes(adapter->ena_dev);
+	if (rc) {
+		if (rc == -EPERM)
+			netif_warn(adapter, drv, adapter->netdev,
+				   "Cannot set host attributes\n");
+		else
+			netif_err(adapter, drv, adapter->netdev,
+				  "Cannot set host attributes\n");
+		goto err;
+	}
+
+	return;
+err:
+	ena_com_delete_debug_area(adapter->ena_dev);
+}
+
+static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
+						 struct rtnl_link_stats64 *stats)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_admin_basic_stats ena_stats;
+	int rc;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return NULL;
+
+	rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
+	if (rc)
+		return NULL;
+
+	stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
+		ena_stats.tx_bytes_low;
+	stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
+		ena_stats.rx_bytes_low;
+
+	stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
+		ena_stats.rx_pkts_low;
+	stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
+		ena_stats.tx_pkts_low;
+
+	stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
+		ena_stats.rx_drops_low;
+
+	stats->multicast = 0;
+	stats->collisions = 0;
+
+	stats->rx_length_errors = 0;
+	stats->rx_crc_errors = 0;
+	stats->rx_frame_errors = 0;
+	stats->rx_fifo_errors = 0;
+	stats->rx_missed_errors = 0;
+	stats->tx_window_errors = 0;
+
+	stats->rx_errors = 0;
+	stats->tx_errors = 0;
+
+	return stats;
+}
+
+static const struct net_device_ops ena_netdev_ops = {
+	.ndo_open		= ena_open,
+	.ndo_stop		= ena_close,
+	.ndo_start_xmit		= ena_start_xmit,
+	.ndo_select_queue	= ena_select_queue,
+	.ndo_get_stats64	= ena_get_stats64,
+	.ndo_tx_timeout		= ena_tx_timeout,
+	.ndo_change_mtu		= ena_change_mtu,
+	.ndo_set_mac_address	= NULL,
+	.ndo_validate_addr	= eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= ena_netpoll,
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+};
+
+static void ena_device_io_suspend(struct work_struct *work)
+{
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, suspend_io_task);
+	struct net_device *netdev = adapter->netdev;
+
+	/* ena_napi_disable_all disables only the IO handling.
+	 * We are still subject to AENQ keep alive watchdog.
+	 */
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.io_suspend++;
+	u64_stats_update_begin(&adapter->syncp);
+	ena_napi_disable_all(adapter);
+	netif_tx_lock(netdev);
+	netif_device_detach(netdev);
+	netif_tx_unlock(netdev);
+}
+
+static void ena_device_io_resume(struct work_struct *work)
+{
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, resume_io_task);
+	struct net_device *netdev = adapter->netdev;
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.io_resume++;
+	u64_stats_update_end(&adapter->syncp);
+
+	netif_device_attach(netdev);
+	ena_napi_enable_all(adapter);
+}
+
+static int ena_device_validate_params(struct ena_adapter *adapter,
+				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	struct net_device *netdev = adapter->netdev;
+	int rc;
+
+	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
+			      adapter->mac_addr);
+	if (!rc) {
+		netif_err(adapter, drv, netdev,
+			  "Error, mac address are different\n");
+		return -EINVAL;
+	}
+
+	if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
+	    (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
+		netif_err(adapter, drv, netdev,
+			  "Error, device doesn't support enough queues\n");
+		return -EINVAL;
+	}
+
+	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
+		netif_err(adapter, drv, netdev,
+			  "Error, device max mtu is smaller than netdev MTU\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
+			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
+			   bool *wd_state)
+{
+	struct device *dev = &pdev->dev;
+	bool readless_supported;
+	u32 aenq_groups;
+	int dma_width;
+	int rc;
+
+	rc = ena_com_mmio_reg_read_request_init(ena_dev);
+	if (rc) {
+		dev_err(dev, "failed to init mmio read less\n");
+		return rc;
+	}
+
+	/* The PCIe configuration space revision id indicate if mmio reg
+	 * read is disabled
+	 */
+	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
+	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
+
+	rc = ena_com_dev_reset(ena_dev);
+	if (rc) {
+		dev_err(dev, "Can not reset device\n");
+		goto err_mmio_read_less;
+	}
+
+	rc = ena_com_validate_version(ena_dev);
+	if (rc) {
+		dev_err(dev, "device version is too low\n");
+		goto err_mmio_read_less;
+	}
+
+	dma_width = ena_com_get_dma_width(ena_dev);
+	if (dma_width < 0) {
+		dev_err(dev, "Invalid dma width value %d", dma_width);
+		rc = dma_width;
+		goto err_mmio_read_less;
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (rc) {
+		dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
+		goto err_mmio_read_less;
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (rc) {
+		dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
+			rc);
+		goto err_mmio_read_less;
+	}
+
+	/* ENA admin level init */
+	rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+	if (rc) {
+		dev_err(dev,
+			"Can not initialize ena admin queue with device\n");
+		goto err_mmio_read_less;
+	}
+
+	/* To enable the msix interrupts the driver needs to know the number
+	 * of queues. So the driver uses polling mode to retrieve this
+	 * information
+	 */
+	ena_com_set_admin_polling_mode(ena_dev, true);
+
+	/* Get Device Attributes*/
+	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
+	if (rc) {
+		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
+		goto err_admin_init;
+	}
+
+	/* Try to turn all the available aenq groups */
+	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
+		BIT(ENA_ADMIN_FATAL_ERROR) |
+		BIT(ENA_ADMIN_WARNING) |
+		BIT(ENA_ADMIN_NOTIFICATION) |
+		BIT(ENA_ADMIN_KEEP_ALIVE);
+
+	aenq_groups &= get_feat_ctx->aenq.supported_groups;
+
+	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
+	if (rc) {
+		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
+		goto err_admin_init;
+	}
+
+	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+
+	ena_config_host_info(ena_dev);
+
+	return 0;
+
+err_admin_init:
+	ena_com_admin_destroy(ena_dev);
+err_mmio_read_less:
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	return rc;
+}
+
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
+						    int io_vectors)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct device *dev = &adapter->pdev->dev;
+	int rc;
+
+	rc = ena_enable_msix(adapter, io_vectors);
+	if (rc) {
+		dev_err(dev, "Can not reserve msix vectors\n");
+		return rc;
+	}
+
+	ena_setup_mgmnt_intr(adapter);
+
+	rc = ena_request_mgmnt_irq(adapter);
+	if (rc) {
+		dev_err(dev, "Can not setup management interrupts\n");
+		goto err_disable_msix;
+	}
+
+	ena_com_set_admin_polling_mode(ena_dev, false);
+
+	ena_com_admin_aenq_enable(ena_dev);
+
+	return 0;
+
+err_disable_msix:
+	ena_disable_msix(adapter);
+
+	return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, reset_task);
+	struct net_device *netdev = adapter->netdev;
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct pci_dev *pdev = adapter->pdev;
+	bool dev_up, wd_state;
+	int rc;
+
+	del_timer_sync(&adapter->timer_service);
+
+	rtnl_lock();
+
+	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	ena_com_set_admin_running_state(ena_dev, false);
+
+	/* After calling ena_close the tx queues and the napi
+	 * are disabled so no one can interfere or touch the
+	 * data structures
+	 */
+	ena_close(netdev);
+
+	rc = ena_com_dev_reset(ena_dev);
+	if (rc) {
+		dev_err(&pdev->dev, "Device reset failed\n");
+		goto err;
+	}
+
+	ena_free_mgmnt_irq(adapter);
+
+	ena_disable_msix(adapter);
+
+	ena_com_abort_admin_commands(ena_dev);
+
+	ena_com_wait_for_abort_completion(ena_dev);
+
+	ena_com_admin_destroy(ena_dev);
+
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	/* Finish with the destroy part. Start the init part */
+
+	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
+	if (rc) {
+		dev_err(&pdev->dev, "Can not initialize device\n");
+		goto err;
+	}
+	adapter->wd_state = wd_state;
+
+	rc = ena_device_validate_params(adapter, &get_feat_ctx);
+	if (rc) {
+		dev_err(&pdev->dev, "Validation of device parameters failed\n");
+		goto err_device_destroy;
+	}
+
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
+						      adapter->num_queues);
+	if (rc) {
+		dev_err(&pdev->dev, "Enable MSI-X failed\n");
+		goto err_device_destroy;
+	}
+	/* If the interface was up before the reset bring it up */
+	if (dev_up) {
+		rc = ena_up(adapter);
+		if (rc) {
+			dev_err(&pdev->dev, "Failed to create I/O queues\n");
+			goto err_disable_msix;
+		}
+	}
+
+	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+
+	rtnl_unlock();
+
+	dev_err(&pdev->dev, "Device reset completed successfully\n");
+
+	return;
+err_disable_msix:
+	ena_free_mgmnt_irq(adapter);
+	ena_disable_msix(adapter);
+err_device_destroy:
+	ena_com_admin_destroy(ena_dev);
+err:
+	rtnl_unlock();
+
+	dev_err(&pdev->dev,
+		"Reset attempt failed. Can not reset the device\n");
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
+	struct ena_tx_buffer *tx_buf;
+	unsigned long last_jiffies;
+	struct ena_ring *tx_ring;
+	int i, j, budget;
+	u32 missed_tx;
+
+	/* Make sure the driver doesn't turn the device in other process */
+	smp_rmb();
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return;
+
+	budget = ENA_MONITORED_TX_QUEUES;
+
+	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
+		tx_ring = &adapter->tx_ring[i];
+
+		for (j = 0; j < tx_ring->ring_size; j++) {
+			tx_buf = &tx_ring->tx_buffer_info[j];
+			last_jiffies = tx_buf->last_jiffies;
+			if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+				netif_notice(adapter, tx_err, adapter->netdev,
+					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+					     tx_ring->qid, j);
+
+				u64_stats_update_begin(&tx_ring->syncp);
+				missed_tx = tx_ring->tx_stats.missing_tx_comp++;
+				u64_stats_update_end(&tx_ring->syncp);
+
+				/* Clear last jiffies so the lost buffer won't
+				 * be counted twice.
+				 */
+				tx_buf->last_jiffies = 0;
+
+				if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+					netif_err(adapter, tx_err, adapter->netdev,
+						  "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
+						  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+					set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+				}
+			}
+		}
+
+		budget--;
+		if (!budget)
+			break;
+	}
+
+	adapter->last_monitored_tx_qid = i % adapter->num_queues;
+}
+
+/* Check for keep alive expiration */
+static void check_for_missing_keep_alive(struct ena_adapter *adapter)
+{
+	unsigned long keep_alive_expired;
+
+	if (!adapter->wd_state)
+		return;
+
+	keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies
+					   + ENA_DEVICE_KALIVE_TIMEOUT);
+	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Keep alive watchdog timeout.\n");
+		u64_stats_update_begin(&adapter->syncp);
+		adapter->dev_stats.wd_expired++;
+		u64_stats_update_end(&adapter->syncp);
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	}
+}
+
+static void check_for_admin_com_state(struct ena_adapter *adapter)
+{
+	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "ENA admin queue is not in running state!\n");
+		u64_stats_update_begin(&adapter->syncp);
+		adapter->dev_stats.admin_q_pause++;
+		u64_stats_update_end(&adapter->syncp);
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	}
+}
+
+static void ena_update_host_info(struct ena_admin_host_info *host_info,
+				 struct net_device *netdev)
+{
+	host_info->supported_network_features[0] =
+		netdev->features & GENMASK_ULL(31, 0);
+	host_info->supported_network_features[1] =
+		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
+}
+
+static void ena_timer_service(unsigned long data)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
+	struct ena_admin_host_info *host_info =
+		adapter->ena_dev->host_attr.host_info;
+
+	check_for_missing_keep_alive(adapter);
+
+	check_for_admin_com_state(adapter);
+
+	check_for_missing_tx_completions(adapter);
+
+	if (debug_area)
+		ena_dump_stats_to_buf(adapter, debug_area);
+
+	if (host_info)
+		ena_update_host_info(host_info, adapter->netdev);
+
+	if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Trigger reset is on\n");
+		ena_dump_stats_to_dmesg(adapter);
+		queue_work(ena_wq, &adapter->reset_task);
+		return;
+	}
+
+	/* Reset the timer */
+	mod_timer(&adapter->timer_service, jiffies + HZ);
+}
+
+static int ena_calc_io_queue_num(struct pci_dev *pdev,
+				 struct ena_com_dev *ena_dev,
+				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	int io_sq_num, io_queue_num;
+
+	/* In case of LLQ use the llq number in the get feature cmd */
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		io_sq_num = get_feat_ctx->max_queues.max_llq_num;
+
+		if (io_sq_num == 0) {
+			dev_err(&pdev->dev,
+				"Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
+
+			ena_dev->tx_mem_queue_type =
+				ENA_ADMIN_PLACEMENT_POLICY_HOST;
+			io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+		}
+	} else {
+		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+	}
+
+	io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES);
+	io_queue_num = min_t(int, io_queue_num, io_sq_num);
+	io_queue_num = min_t(int, io_queue_num,
+			     get_feat_ctx->max_queues.max_cq_num);
+	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
+	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
+	if (unlikely(!io_queue_num)) {
+		dev_err(&pdev->dev, "The device doesn't have io queues\n");
+		return -EFAULT;
+	}
+
+	return io_queue_num;
+}
+
+static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+			     struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	bool has_mem_bar;
+
+	has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
+
+	/* Enable push mode if device supports LLQ */
+	if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
+	else
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+
+	return 0;
+}
+
+static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
+				 struct net_device *netdev)
+{
+	netdev_features_t dev_features = 0;
+
+	/* Set offload features */
+	if (feat->offload.tx &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
+		dev_features |= NETIF_F_IP_CSUM;
+
+	if (feat->offload.tx &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
+		dev_features |= NETIF_F_IPV6_CSUM;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
+		dev_features |= NETIF_F_TSO;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
+		dev_features |= NETIF_F_TSO6;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
+		dev_features |= NETIF_F_TSO_ECN;
+
+	if (feat->offload.rx_supported &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
+		dev_features |= NETIF_F_RXCSUM;
+
+	if (feat->offload.rx_supported &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
+		dev_features |= NETIF_F_RXCSUM;
+
+	netdev->features =
+		dev_features |
+		NETIF_F_SG |
+		NETIF_F_NTUPLE |
+		NETIF_F_RXHASH |
+		NETIF_F_HIGHDMA;
+
+	netdev->hw_features |= netdev->features;
+	netdev->vlan_features |= netdev->features;
+}
+
+static void ena_set_conf_feat_params(struct ena_adapter *adapter,
+				     struct ena_com_dev_get_features_ctx *feat)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	/* Copy mac address */
+	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+	} else {
+		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
+		ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
+	}
+
+	/* Set offload features */
+	ena_set_dev_offloads(feat, netdev);
+
+	adapter->max_mtu = feat->dev_attr.max_mtu;
+}
+
+static int ena_rss_init_default(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct device *dev = &adapter->pdev->dev;
+	int rc, i;
+	u32 val;
+
+	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
+	if (unlikely(rc)) {
+		dev_err(dev, "Cannot init indirect table\n");
+		goto err_rss_init;
+	}
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+		val = ethtool_rxfh_indir_default(i, adapter->num_queues);
+		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
+						       ENA_IO_RXQ_IDX(val));
+		if (unlikely(rc && (rc != -EPERM))) {
+			dev_err(dev, "Cannot fill indirect table\n");
+			goto err_fill_indir;
+		}
+	}
+
+	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
+					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+	if (unlikely(rc && (rc != -EPERM))) {
+		dev_err(dev, "Cannot fill hash function\n");
+		goto err_fill_indir;
+	}
+
+	rc = ena_com_set_default_hash_ctrl(ena_dev);
+	if (unlikely(rc && (rc != -EPERM))) {
+		dev_err(dev, "Cannot fill hash control\n");
+		goto err_fill_indir;
+	}
+
+	return 0;
+
+err_fill_indir:
+	ena_com_rss_destroy(ena_dev);
+err_rss_init:
+
+	return rc;
+}
+
+static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
+{
+	int release_bars;
+
+	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+	pci_release_selected_regions(pdev, release_bars);
+}
+
+static int ena_calc_queue_size(struct pci_dev *pdev,
+			       struct ena_com_dev *ena_dev,
+			       u16 *max_tx_sgl_size,
+			       u16 *max_rx_sgl_size,
+			       struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	u32 queue_size = ENA_DEFAULT_RING_SIZE;
+
+	queue_size = min_t(u32, queue_size,
+			   get_feat_ctx->max_queues.max_cq_depth);
+	queue_size = min_t(u32, queue_size,
+			   get_feat_ctx->max_queues.max_sq_depth);
+
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		queue_size = min_t(u32, queue_size,
+				   get_feat_ctx->max_queues.max_llq_depth);
+
+	queue_size = rounddown_pow_of_two(queue_size);
+
+	if (unlikely(!queue_size)) {
+		dev_err(&pdev->dev, "Invalid queue size\n");
+		return -EFAULT;
+	}
+
+	*max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+				 get_feat_ctx->max_queues.max_packet_tx_descs);
+	*max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+				 get_feat_ctx->max_queues.max_packet_rx_descs);
+
+	return queue_size;
+}
+
+/* ena_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ena_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ena_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	static int version_printed;
+	struct net_device *netdev;
+	struct ena_adapter *adapter;
+	struct ena_com_dev *ena_dev = NULL;
+	static int adapters_found;
+	int io_queue_num, bars, rc;
+	int queue_size;
+	u16 tx_sgl_size = 0;
+	u16 rx_sgl_size = 0;
+	bool wd_state;
+
+	dev_dbg(&pdev->dev, "%s\n", __func__);
+
+	if (version_printed++ == 0)
+		dev_info(&pdev->dev, "%s", version);
+
+	rc = pci_enable_device_mem(pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+		return rc;
+	}
+
+	pci_set_master(pdev);
+
+	ena_dev = vzalloc(sizeof(*ena_dev));
+	if (!ena_dev) {
+		rc = -ENOMEM;
+		goto err_disable_device;
+	}
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+	if (rc) {
+		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+			rc);
+		goto err_free_ena_dev;
+	}
+
+	ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
+				   pci_resource_len(pdev, ENA_REG_BAR));
+	if (!ena_dev->reg_bar) {
+		dev_err(&pdev->dev, "failed to remap regs bar\n");
+		rc = -EFAULT;
+		goto err_free_region;
+	}
+
+	ena_dev->dmadev = &pdev->dev;
+
+	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
+	if (rc) {
+		dev_err(&pdev->dev, "ena device init failed\n");
+		if (rc == -ETIME)
+			rc = -EPROBE_DEFER;
+		goto err_free_region;
+	}
+
+	rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
+	if (rc) {
+		dev_err(&pdev->dev, "Invalid module param(push_mode)\n");
+		goto err_device_destroy;
+	}
+
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
+					      pci_resource_len(pdev, ENA_MEM_BAR));
+		if (!ena_dev->mem_bar) {
+			rc = -EFAULT;
+			goto err_device_destroy;
+		}
+	}
+
+	/* initial Tx interrupt delay, Assumes 1 usec granularity.
+	* Updated during device initialization with the real granularity
+	*/
+	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
+	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+	queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
+					 &rx_sgl_size, &get_feat_ctx);
+	if ((queue_size <= 0) || (io_queue_num <= 0)) {
+		rc = -EFAULT;
+		goto err_device_destroy;
+	}
+
+	dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
+		 io_queue_num, queue_size);
+
+	/* dev zeroed in init_etherdev */
+	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
+	if (!netdev) {
+		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
+		rc = -ENOMEM;
+		goto err_device_destroy;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adapter = netdev_priv(netdev);
+	pci_set_drvdata(pdev, adapter);
+
+	adapter->ena_dev = ena_dev;
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+
+	ena_set_conf_feat_params(adapter, &get_feat_ctx);
+
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	adapter->tx_ring_size = queue_size;
+	adapter->rx_ring_size = queue_size;
+
+	adapter->max_tx_sgl_size = tx_sgl_size;
+	adapter->max_rx_sgl_size = rx_sgl_size;
+
+	adapter->num_queues = io_queue_num;
+	adapter->last_monitored_tx_qid = 0;
+
+	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
+	adapter->wd_state = wd_state;
+
+	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
+
+	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to query interrupt moderation feature\n");
+		goto err_netdev_destroy;
+	}
+	ena_init_io_rings(adapter);
+
+	netdev->netdev_ops = &ena_netdev_ops;
+	netdev->watchdog_timeo = TX_TIMEOUT;
+	ena_set_ethtool_ops(netdev);
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	u64_stats_init(&adapter->syncp);
+
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to enable and set the admin interrupts\n");
+		goto err_worker_destroy;
+	}
+	rc = ena_rss_init_default(adapter);
+	if (rc && (rc != -EPERM)) {
+		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
+		goto err_free_msix;
+	}
+
+	ena_config_debug_area(adapter);
+
+	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
+
+	netif_carrier_off(netdev);
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot register net device\n");
+		goto err_rss;
+	}
+
+	INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
+	INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
+	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
+
+	adapter->last_keep_alive_jiffies = jiffies;
+
+	init_timer(&adapter->timer_service);
+	adapter->timer_service.expires = round_jiffies(jiffies + HZ);
+	adapter->timer_service.function = ena_timer_service;
+	adapter->timer_service.data = (unsigned long)adapter;
+
+	add_timer(&adapter->timer_service);
+
+	dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
+		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
+		 netdev->dev_addr, io_queue_num);
+
+	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+	adapters_found++;
+
+	return 0;
+
+err_rss:
+	ena_com_delete_debug_area(ena_dev);
+	ena_com_rss_destroy(ena_dev);
+err_free_msix:
+	ena_com_dev_reset(ena_dev);
+	ena_free_mgmnt_irq(adapter);
+	ena_disable_msix(adapter);
+err_worker_destroy:
+	ena_com_destroy_interrupt_moderation(ena_dev);
+	del_timer(&adapter->timer_service);
+	cancel_work_sync(&adapter->suspend_io_task);
+	cancel_work_sync(&adapter->resume_io_task);
+err_netdev_destroy:
+	free_netdev(netdev);
+err_device_destroy:
+	ena_com_delete_host_info(ena_dev);
+	ena_com_admin_destroy(ena_dev);
+err_free_region:
+	ena_release_bars(ena_dev, pdev);
+err_free_ena_dev:
+	vfree(ena_dev);
+err_disable_device:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+/*****************************************************************************/
+static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	int rc;
+
+	if (numvfs > 0) {
+		rc = pci_enable_sriov(dev, numvfs);
+		if (rc != 0) {
+			dev_err(&dev->dev,
+				"pci_enable_sriov failed to enable: %d vfs with the error: %d\n",
+				numvfs, rc);
+			return rc;
+		}
+
+		return numvfs;
+	}
+
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void ena_remove(struct pci_dev *pdev)
+{
+	struct ena_adapter *adapter = pci_get_drvdata(pdev);
+	struct ena_com_dev *ena_dev;
+	struct net_device *netdev;
+
+	if (!adapter)
+		/* This device didn't load properly and it's resources
+		 * already released, nothing to do
+		 */
+		return;
+
+	ena_dev = adapter->ena_dev;
+	netdev = adapter->netdev;
+
+#ifdef CONFIG_RFS_ACCEL
+	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
+		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+		netdev->rx_cpu_rmap = NULL;
+	}
+#endif /* CONFIG_RFS_ACCEL */
+
+	unregister_netdev(netdev);
+	del_timer_sync(&adapter->timer_service);
+
+	cancel_work_sync(&adapter->reset_task);
+
+	cancel_work_sync(&adapter->suspend_io_task);
+
+	cancel_work_sync(&adapter->resume_io_task);
+
+	ena_com_dev_reset(ena_dev);
+
+	ena_free_mgmnt_irq(adapter);
+
+	ena_disable_msix(adapter);
+
+	free_netdev(netdev);
+
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	ena_com_abort_admin_commands(ena_dev);
+
+	ena_com_wait_for_abort_completion(ena_dev);
+
+	ena_com_admin_destroy(ena_dev);
+
+	ena_com_rss_destroy(ena_dev);
+
+	ena_com_delete_debug_area(ena_dev);
+
+	ena_com_delete_host_info(ena_dev);
+
+	ena_release_bars(ena_dev, pdev);
+
+	pci_disable_device(pdev);
+
+	ena_com_destroy_interrupt_moderation(ena_dev);
+
+	vfree(ena_dev);
+}
+
+static struct pci_driver ena_pci_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= ena_pci_tbl,
+	.probe		= ena_probe,
+	.remove		= ena_remove,
+	.sriov_configure = ena_sriov_configure,
+};
+
+static int __init ena_init(void)
+{
+	pr_info("%s", version);
+
+	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
+	if (!ena_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
+	return pci_register_driver(&ena_pci_driver);
+}
+
+static void __exit ena_cleanup(void)
+{
+	pci_unregister_driver(&ena_pci_driver);
+
+	if (ena_wq) {
+		destroy_workqueue(ena_wq);
+		ena_wq = NULL;
+	}
+}
+
+/******************************************************************************
+ ******************************** AENQ Handlers *******************************
+ *****************************************************************************/
+/* ena_update_on_link_change:
+ * Notify the network interface about the change in link status
+ */
+static void ena_update_on_link_change(void *adapter_data,
+				      struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+	struct ena_admin_aenq_link_change_desc *aenq_desc =
+		(struct ena_admin_aenq_link_change_desc *)aenq_e;
+	int status = aenq_desc->flags &
+		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
+
+	if (status) {
+		netdev_dbg(adapter->netdev, "%s\n", __func__);
+		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+		netif_carrier_on(adapter->netdev);
+	} else {
+		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+		netif_carrier_off(adapter->netdev);
+	}
+}
+
+static void ena_keep_alive_wd(void *adapter_data,
+			      struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+	adapter->last_keep_alive_jiffies = jiffies;
+}
+
+static void ena_notification(void *adapter_data,
+			     struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
+	     "Invalid group(%x) expected %x\n",
+	     aenq_e->aenq_common_desc.group,
+	     ENA_ADMIN_NOTIFICATION);
+
+	switch (aenq_e->aenq_common_desc.syndrom) {
+	case ENA_ADMIN_SUSPEND:
+		/* Suspend just the IO queues.
+		 * We deliberately don't suspend admin so the timer and
+		 * the keep_alive events should remain.
+		 */
+		queue_work(ena_wq, &adapter->suspend_io_task);
+		break;
+	case ENA_ADMIN_RESUME:
+		queue_work(ena_wq, &adapter->resume_io_task);
+		break;
+	default:
+		netif_err(adapter, drv, adapter->netdev,
+			  "Invalid aenq notification link state %d\n",
+			  aenq_e->aenq_common_desc.syndrom);
+	}
+}
+
+/* This handler will called for unknown event group or unimplemented handlers*/
+static void unimplemented_aenq_handler(void *data,
+				       struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+	netif_err(adapter, drv, adapter->netdev,
+		  "Unknown event was received or event with unimplemented handler\n");
+}
+
+static struct ena_aenq_handlers aenq_handlers = {
+	.handlers = {
+		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
+		[ENA_ADMIN_NOTIFICATION] = ena_notification,
+		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
+	},
+	.unimplemented_handler = unimplemented_aenq_handler
+};
+
+module_init(ena_init);
+module_exit(ena_cleanup);

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
new file mode 100644
index 0000000..69d7e9e
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h

@@ -0,0 +1,324 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_H
+#define ENA_H
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "ena_com.h"
+#include "ena_eth_com.h"
+
+#define DRV_MODULE_VER_MAJOR	1
+#define DRV_MODULE_VER_MINOR	0
+#define DRV_MODULE_VER_SUBMINOR 2
+
+#define DRV_MODULE_NAME		"ena"
+#ifndef DRV_MODULE_VERSION
+#define DRV_MODULE_VERSION \
+	__stringify(DRV_MODULE_VER_MAJOR) "."	\
+	__stringify(DRV_MODULE_VER_MINOR) "."	\
+	__stringify(DRV_MODULE_VER_SUBMINOR)
+#endif
+
+#define DEVICE_NAME	"Elastic Network Adapter (ENA)"
+
+/* 1 for AENQ + ADMIN */
+#define ENA_MAX_MSIX_VEC(io_queues)	(1 + (io_queues))
+
+#define ENA_REG_BAR			0
+#define ENA_MEM_BAR			2
+#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
+
+#define ENA_DEFAULT_RING_SIZE	(1024)
+
+#define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
+#define ENA_DEFAULT_RX_COPYBREAK	(128 - NET_IP_ALIGN)
+
+/* limit the buffer size to 600 bytes to handle MTU changes from very
+ * small to very large, in which case the number of buffers per packet
+ * could exceed ENA_PKT_MAX_BUFS
+ */
+#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600
+
+#define ENA_MIN_MTU		128
+
+#define ENA_NAME_MAX_LEN	20
+#define ENA_IRQNAME_SIZE	40
+
+#define ENA_PKT_MAX_BUFS	19
+
+#define ENA_RX_RSS_TABLE_LOG_SIZE  7
+#define ENA_RX_RSS_TABLE_SIZE	(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
+
+#define ENA_HASH_KEY_SIZE	40
+
+/* The number of tx packet completions that will be handled each NAPI poll
+ * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER.
+ */
+#define ENA_TX_POLL_BUDGET_DIVIDER	4
+
+/* Refill Rx queue when number of available descriptors is below
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER
+ */
+#define ENA_RX_REFILL_THRESH_DIVIDER	8
+
+/* Number of queues to check for missing queues per timer service */
+#define ENA_MONITORED_TX_QUEUES	4
+/* Max timeout packets before device reset */
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
+
+#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+
+#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \
+	(((idx) + (n)) & ((ring_size) - 1))
+
+#define ENA_IO_TXQ_IDX(q)	(2 * (q))
+#define ENA_IO_RXQ_IDX(q)	(2 * (q) + 1)
+
+#define ENA_MGMNT_IRQ_IDX		0
+#define ENA_IO_IRQ_FIRST_IDX		1
+#define ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
+
+/* ENA device should send keep alive msg every 1 sec.
+ * We wait for 3 sec just to be on the safe side.
+ */
+#define ENA_DEVICE_KALIVE_TIMEOUT	(3 * HZ)
+
+#define ENA_MMIO_DISABLE_REG_READ	BIT(0)
+
+struct ena_irq {
+	irq_handler_t handler;
+	void *data;
+	int cpu;
+	u32 vector;
+	cpumask_t affinity_hint_mask;
+	char name[ENA_IRQNAME_SIZE];
+};
+
+struct ena_napi {
+	struct napi_struct napi ____cacheline_aligned;
+	struct ena_ring *tx_ring;
+	struct ena_ring *rx_ring;
+	u32 qid;
+};
+
+struct ena_tx_buffer {
+	struct sk_buff *skb;
+	/* num of ena desc for this specific skb
+	 * (includes data desc and metadata desc)
+	 */
+	u32 tx_descs;
+	/* num of buffers used by this skb */
+	u32 num_of_bufs;
+	/* Save the last jiffies to detect missing tx packets */
+	unsigned long last_jiffies;
+	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
+} ____cacheline_aligned;
+
+struct ena_rx_buffer {
+	struct sk_buff *skb;
+	struct page *page;
+	u32 page_offset;
+	struct ena_com_buf ena_buf;
+} ____cacheline_aligned;
+
+struct ena_stats_tx {
+	u64 cnt;
+	u64 bytes;
+	u64 queue_stop;
+	u64 prepare_ctx_err;
+	u64 queue_wakeup;
+	u64 dma_mapping_err;
+	u64 linearize;
+	u64 linearize_failed;
+	u64 napi_comp;
+	u64 tx_poll;
+	u64 doorbells;
+	u64 missing_tx_comp;
+	u64 bad_req_id;
+};
+
+struct ena_stats_rx {
+	u64 cnt;
+	u64 bytes;
+	u64 refil_partial;
+	u64 bad_csum;
+	u64 page_alloc_fail;
+	u64 skb_alloc_fail;
+	u64 dma_mapping_err;
+	u64 bad_desc_num;
+	u64 rx_copybreak_pkt;
+};
+
+struct ena_ring {
+	/* Holds the empty requests for TX out of order completions */
+	u16 *free_tx_ids;
+	union {
+		struct ena_tx_buffer *tx_buffer_info;
+		struct ena_rx_buffer *rx_buffer_info;
+	};
+
+	/* cache ptr to avoid using the adapter */
+	struct device *dev;
+	struct pci_dev *pdev;
+	struct napi_struct *napi;
+	struct net_device *netdev;
+	struct ena_com_dev *ena_dev;
+	struct ena_adapter *adapter;
+	struct ena_com_io_cq *ena_com_io_cq;
+	struct ena_com_io_sq *ena_com_io_sq;
+
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 rx_copybreak;
+	u16 qid;
+	u16 mtu;
+	u16 sgl_size;
+
+	/* The maximum header length the device can handle */
+	u8 tx_max_header_size;
+
+	/* cpu for TPH */
+	int cpu;
+	 /* number of tx/rx_buffer_info's entries */
+	int ring_size;
+
+	enum ena_admin_placement_policy_type tx_mem_queue_type;
+
+	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
+	u32  smoothed_interval;
+	u32  per_napi_packets;
+	u32  per_napi_bytes;
+	enum ena_intr_moder_level moder_tbl_idx;
+	struct u64_stats_sync syncp;
+	union {
+		struct ena_stats_tx tx_stats;
+		struct ena_stats_rx rx_stats;
+	};
+} ____cacheline_aligned;
+
+struct ena_stats_dev {
+	u64 tx_timeout;
+	u64 io_suspend;
+	u64 io_resume;
+	u64 wd_expired;
+	u64 interface_up;
+	u64 interface_down;
+	u64 admin_q_pause;
+};
+
+enum ena_flags_t {
+	ENA_FLAG_DEVICE_RUNNING,
+	ENA_FLAG_DEV_UP,
+	ENA_FLAG_LINK_UP,
+	ENA_FLAG_MSIX_ENABLED,
+	ENA_FLAG_TRIGGER_RESET
+};
+
+/* adapter specific private data structure */
+struct ena_adapter {
+	struct ena_com_dev *ena_dev;
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	/* rx packets that shorter that this len will be copied to the skb
+	 * header
+	 */
+	u32 rx_copybreak;
+	u32 max_mtu;
+
+	int num_queues;
+
+	struct msix_entry *msix_entries;
+	int msix_vecs;
+
+	u32 tx_usecs, rx_usecs; /* interrupt moderation */
+	u32 tx_frames, rx_frames; /* interrupt moderation */
+
+	u32 tx_ring_size;
+	u32 rx_ring_size;
+
+	u32 msg_enable;
+
+	u16 max_tx_sgl_size;
+	u16 max_rx_sgl_size;
+
+	u8 mac_addr[ETH_ALEN];
+
+	char name[ENA_NAME_MAX_LEN];
+
+	unsigned long flags;
+	/* TX */
+	struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
+		____cacheline_aligned_in_smp;
+
+	/* RX */
+	struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
+		____cacheline_aligned_in_smp;
+
+	struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES];
+
+	struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
+
+	/* timer service */
+	struct work_struct reset_task;
+	struct work_struct suspend_io_task;
+	struct work_struct resume_io_task;
+	struct timer_list timer_service;
+
+	bool wd_state;
+	unsigned long last_keep_alive_jiffies;
+
+	struct u64_stats_sync syncp;
+	struct ena_stats_dev dev_stats;
+
+	/* last queue index that was checked for uncompleted tx packets */
+	u32 last_monitored_tx_qid;
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev);
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
+
+int ena_get_sset_count(struct net_device *netdev, int sset);
+
+#endif /* !(ENA_H) */

diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
new file mode 100644
index 0000000..f80d2a4
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h

@@ -0,0 +1,67 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_PCI_ID_TBL_H_
+#define ENA_PCI_ID_TBL_H_
+
+#ifndef PCI_VENDOR_ID_AMAZON
+#define PCI_VENDOR_ID_AMAZON 0x1d0f
+#endif
+
+#ifndef PCI_DEV_ID_ENA_PF
+#define PCI_DEV_ID_ENA_PF	0x0ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_PF
+#define PCI_DEV_ID_ENA_LLQ_PF	0x1ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_VF
+#define PCI_DEV_ID_ENA_VF	0xec20
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_VF
+#define PCI_DEV_ID_ENA_LLQ_VF	0xec21
+#endif
+
+#define ENA_PCI_ID_TABLE_ENTRY(devid) \
+	{PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)},
+
+static const struct pci_device_id ena_pci_tbl[] = {
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF)
+	{ }
+};
+
+#endif /* ENA_PCI_ID_TBL_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
new file mode 100644
index 0000000..26097a2
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h

@@ -0,0 +1,133 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_REGS_H_
+#define _ENA_REGS_H_
+
+/* ena_registers offsets */
+#define ENA_REGS_VERSION_OFF		0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF		0x4
+#define ENA_REGS_CAPS_OFF		0x8
+#define ENA_REGS_CAPS_EXT_OFF		0xc
+#define ENA_REGS_AQ_BASE_LO_OFF		0x10
+#define ENA_REGS_AQ_BASE_HI_OFF		0x14
+#define ENA_REGS_AQ_CAPS_OFF		0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF		0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF		0x24
+#define ENA_REGS_ACQ_CAPS_OFF		0x28
+#define ENA_REGS_AQ_DB_OFF		0x2c
+#define ENA_REGS_ACQ_TAIL_OFF		0x30
+#define ENA_REGS_AENQ_CAPS_OFF		0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF		0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF		0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF		0x40
+#define ENA_REGS_AENQ_TAIL_OFF		0x44
+#define ENA_REGS_INTR_MASK_OFF		0x4c
+#define ENA_REGS_DEV_CTL_OFF		0x54
+#define ENA_REGS_DEV_STS_OFF		0x58
+#define ENA_REGS_MMIO_REG_READ_OFF		0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF		0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF		0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF		0x68
+
+/* version register */
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK		0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT		8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK		0xff00
+
+/* controller_version register */
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK		0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT		8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK		0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT		16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK		0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT		24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK		0xff000000
+
+/* caps register */
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK		0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT		1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK		0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT		8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK		0xff00
+
+/* aq_caps register */
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK		0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* acq_caps register */
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK		0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* aenq_caps register */
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK		0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* dev_ctl register */
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK		0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT		1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK		0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT		2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK		0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT		3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK		0x8
+
+/* dev_sts register */
+#define ENA_REGS_DEV_STS_READY_MASK		0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT		1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK		0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT		2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK		0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT		3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK		0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT		4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK		0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT		5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK		0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT		6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK		0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT		7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK		0x80
+
+/* mmio_reg_read register */
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK		0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT		16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK		0xffff0000
+
+/* rss_ind_entry_update register */
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK		0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT		16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK		0xffff0000
+
+#endif /*_ENA_REGS_H_ */

diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
index 300e3b5..afccb03 100644
--- a/drivers/net/ethernet/apm/xgene/Kconfig
+++ b/drivers/net/ethernet/apm/xgene/Kconfig

@@ -4,6 +4,7 @@
 	depends on ARCH_XGENE || COMPILE_TEST
 	select PHYLIB
 	select MDIO_XGENE
+	select GPIOLIB
 	help
 	  This is the Ethernet driver for the on-chip ethernet interface on the
 	  APM X-Gene SoC.

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
index 472c0fb..23d72af 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c

@@ -32,12 +32,19 @@
 		SET_VAL(SB_HDRLEN, len);
 }
 
-static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel,
+static void xgene_cle_idt_to_hw(struct xgene_enet_pdata *pdata,
+				u32 dstqid, u32 fpsel,
 				u32 nfpsel, u32 *idt_reg)
 {
-	*idt_reg =  SET_VAL(IDT_DSTQID, dstqid) |
-		    SET_VAL(IDT_FPSEL, fpsel) |
-		    SET_VAL(IDT_NFPSEL, nfpsel);
+	if (pdata->enet_id == XGENE_ENET1) {
+		*idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+			   SET_VAL(IDT_FPSEL1, fpsel)  |
+			   SET_VAL(IDT_NFPSEL1, nfpsel);
+	} else {
+		*idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+			   SET_VAL(IDT_FPSEL, fpsel)   |
+			   SET_VAL(IDT_NFPSEL, nfpsel);
+	}
 }
 
 static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata,
@@ -344,7 +351,7 @@
 		nfpsel = 0;
 		idt_reg = 0;
 
-		xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg);
+		xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg);
 		ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i,
 					RSS_IDT, CLE_CMD_WR);
 		if (ret)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
index 33c5f6b..9ac9f8e 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h

@@ -196,9 +196,13 @@
 #define IDT_DSTQID_POS		0
 #define IDT_DSTQID_LEN		12
 #define IDT_FPSEL_POS		12
-#define IDT_FPSEL_LEN		4
-#define IDT_NFPSEL_POS		16
-#define IDT_NFPSEL_LEN		4
+#define IDT_FPSEL_LEN		5
+#define IDT_NFPSEL_POS		17
+#define IDT_NFPSEL_LEN		5
+#define IDT_FPSEL1_POS		12
+#define IDT_FPSEL1_LEN		4
+#define IDT_NFPSEL1_POS		16
+#define IDT_NFPSEL1_LEN		4
 
 struct xgene_cle_ptree_branch {
 	bool valid;

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 18bb955..321fb19 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c

@@ -761,18 +761,18 @@
 	if (dev->of_node) {
 		for (i = 0 ; i < 2; i++) {
 			np = of_parse_phandle(dev->of_node, "phy-handle", i);
-			if (np)
+
+			if (!np)
+				continue;
+
+			phy_dev = of_phy_connect(ndev, np,
+						 &xgene_enet_adjust_link,
+						 0, pdata->phy_mode);
+			of_node_put(np);
+			if (phy_dev)
 				break;
 		}
 
-		if (!np) {
-			netdev_dbg(ndev, "No phy-handle found in DT\n");
-			return -ENODEV;
-		}
-
-		phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link,
-					 0, pdata->phy_mode);
-		of_node_put(np);
 		if (!phy_dev) {
 			netdev_err(ndev, "Could not connect to PHY\n");
 			return -ENODEV;

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 179a44d..8a8d055 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h

@@ -124,6 +124,12 @@
 #define MAC_READ_REG_OFFSET		0x0c
 #define MAC_COMMAND_DONE_REG_OFFSET	0x10
 
+#define PCS_ADDR_REG_OFFSET		0x00
+#define PCS_COMMAND_REG_OFFSET		0x04
+#define PCS_WRITE_REG_OFFSET		0x08
+#define PCS_READ_REG_OFFSET		0x0c
+#define PCS_COMMAND_DONE_REG_OFFSET	0x10
+
 #define MII_MGMT_CONFIG_ADDR		0x20
 #define MII_MGMT_COMMAND_ADDR		0x24
 #define MII_MGMT_ADDRESS_ADDR		0x28

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d1d6b5e..b8b9495 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c

@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_sgmac.h"
@@ -72,7 +73,6 @@
 		skb = netdev_alloc_skb_ip_align(ndev, len);
 		if (unlikely(!skb))
 			return -ENOMEM;
-		buf_pool->rx_skb[tail] = skb;
 
 		dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
 		if (dma_mapping_error(dev, dma_addr)) {
@@ -81,6 +81,8 @@
 			return -EINVAL;
 		}
 
+		buf_pool->rx_skb[tail] = skb;
+
 		raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
 					   SET_VAL(BUFDATALEN, bufdatalen) |
 					   SET_BIT(COHERENT));
@@ -102,12 +104,21 @@
 
 static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
 {
+	struct device *dev = ndev_to_dev(buf_pool->ndev);
+	struct xgene_enet_raw_desc16 *raw_desc;
+	dma_addr_t dma_addr;
 	int i;
 
 	/* Free up the buffers held by hardware */
 	for (i = 0; i < buf_pool->slots; i++) {
-		if (buf_pool->rx_skb[i])
+		if (buf_pool->rx_skb[i]) {
 			dev_kfree_skb_any(buf_pool->rx_skb[i]);
+
+			raw_desc = &buf_pool->raw_desc16[i];
+			dma_addr = GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1));
+			dma_unmap_single(dev, dma_addr, XGENE_ENET_MAX_MTU,
+					 DMA_FROM_DEVICE);
+		}
 	}
 }
 
@@ -452,7 +463,6 @@
 			       struct xgene_enet_raw_desc *raw_desc)
 {
 	struct net_device *ndev;
-	struct xgene_enet_pdata *pdata;
 	struct device *dev;
 	struct xgene_enet_desc_ring *buf_pool;
 	u32 datalen, skb_index;
@@ -461,7 +471,6 @@
 	int ret = 0;
 
 	ndev = rx_ring->ndev;
-	pdata = netdev_priv(ndev);
 	dev = ndev_to_dev(rx_ring->ndev);
 	buf_pool = rx_ring->buf_pool;
 
@@ -1312,6 +1321,18 @@
 	return 0;
 }
 
+static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata)
+{
+	struct device *dev = &pdata->pdev->dev;
+
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
+		return;
+
+	pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN);
+	if (IS_ERR(pdata->sfp_rdy))
+		pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN);
+}
+
 static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 {
 	struct platform_device *pdev;
@@ -1401,6 +1422,8 @@
 	if (ret)
 		return ret;
 
+	xgene_enet_gpiod_get(pdata);
+
 	pdata->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk)) {
 		/* Firmware may have set up the clock already. */
@@ -1425,6 +1448,7 @@
 	} else {
 		pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET;
 		pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET;
+		pdata->pcs_addr = base_addr + BLOCK_PCS_OFFSET;
 	}
 	pdata->rx_buff_cnt = NUM_PKT_BUF;
 
@@ -1454,10 +1478,8 @@
 		buf_pool = pdata->rx_ring[i]->buf_pool;
 		xgene_enet_init_bufpool(buf_pool);
 		ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt);
-		if (ret) {
-			xgene_enet_delete_desc_rings(pdata);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 
 	dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
@@ -1474,7 +1496,7 @@
 		ret = pdata->cle_ops->cle_init(pdata);
 		if (ret) {
 			netdev_err(ndev, "Preclass Tree init error\n");
-			return ret;
+			goto err;
 		}
 	} else {
 		pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id);
@@ -1484,6 +1506,10 @@
 	pdata->mac_ops->init(pdata);
 
 	return ret;
+
+err:
+	xgene_enet_delete_desc_rings(pdata);
+	return ret;
 }
 
 static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
@@ -1631,8 +1657,8 @@
 	}
 #endif
 	if (!pdata->enet_id) {
-		free_netdev(ndev);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err;
 	}
 
 	ret = xgene_enet_get_resources(pdata);
@@ -1655,7 +1681,7 @@
 
 	ret = xgene_enet_init_hw(pdata);
 	if (ret)
-		goto err_netdev;
+		goto err;
 
 	link_state = pdata->mac_ops->link_state;
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
@@ -1665,21 +1691,32 @@
 			ret = xgene_enet_mdio_config(pdata);
 		else
 			INIT_DELAYED_WORK(&pdata->link_work, link_state);
+
+		if (ret)
+			goto err1;
 	}
-	if (ret)
-		goto err;
 
 	xgene_enet_napi_add(pdata);
 	ret = register_netdev(ndev);
 	if (ret) {
 		netdev_err(ndev, "Failed to register netdev\n");
-		goto err;
+		goto err2;
 	}
 
 	return 0;
 
-err_netdev:
-	unregister_netdev(ndev);
+err2:
+	/*
+	 * If necessary, free_netdev() will call netif_napi_del() and undo
+	 * the effects of xgene_enet_napi_add()'s calls to netif_napi_add().
+	 */
+
+	if (pdata->mdio_driver)
+		xgene_enet_phy_disconnect(pdata);
+	else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+		xgene_enet_mdio_remove(pdata);
+err1:
+	xgene_enet_delete_desc_rings(pdata);
 err:
 	free_netdev(ndev);
 	return ret;
@@ -1688,11 +1725,9 @@
 static int xgene_enet_remove(struct platform_device *pdev)
 {
 	struct xgene_enet_pdata *pdata;
-	const struct xgene_mac_ops *mac_ops;
 	struct net_device *ndev;
 
 	pdata = platform_get_drvdata(pdev);
-	mac_ops = pdata->mac_ops;
 	ndev = pdata->ndev;
 
 	rtnl_lock();

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 217546e..b339fc1 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h

@@ -196,6 +196,7 @@
 	void __iomem *mcx_mac_addr;
 	void __iomem *mcx_mac_csr_addr;
 	void __iomem *base_addr;
+	void __iomem *pcs_addr;
 	void __iomem *ring_csr_addr;
 	void __iomem *ring_cmd_addr;
 	int phy_mode;
@@ -216,6 +217,7 @@
 	u8 tx_delay;
 	u8 rx_delay;
 	bool mdio_driver;
+	struct gpio_desc *sfp_rdy;
 };
 
 struct xgene_indirect_ctl {

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 9c6ad0d..d672e71 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c

@@ -18,6 +18,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_xgmac.h"
@@ -84,6 +86,21 @@
 			   wr_addr);
 }
 
+static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata,
+			      u32 wr_addr, u32 wr_data)
+{
+	void __iomem *addr, *wr, *cmd, *cmd_done;
+
+	addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+	wr = pdata->pcs_addr + PCS_WRITE_REG_OFFSET;
+	cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+	cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+	if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data))
+		netdev_err(pdata->ndev, "PCS write failed, addr: %04x\n",
+			   wr_addr);
+}
+
 static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata,
 			      u32 offset, u32 *val)
 {
@@ -122,6 +139,7 @@
 
 	return true;
 }
+
 static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata,
 			      u32 rd_addr, u32 *rd_data)
 {
@@ -137,6 +155,21 @@
 			   rd_addr);
 }
 
+static void xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
+			      u32 rd_addr, u32 *rd_data)
+{
+	void __iomem *addr, *rd, *cmd, *cmd_done;
+
+	addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+	rd = pdata->pcs_addr + PCS_READ_REG_OFFSET;
+	cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+	cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+	if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data))
+		netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n",
+			   rd_addr);
+}
+
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
 {
 	struct net_device *ndev = pdata->ndev;
@@ -171,6 +204,15 @@
 	xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_0, 0);
 }
 
+static void xgene_pcs_reset(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data);
+	xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST);
+	xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST);
+}
+
 static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
 	u32 addr0, addr1;
@@ -216,12 +258,12 @@
 	data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
 	xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data);
 
-	xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
-	xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
 	xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data);
 	data |= BIT(12);
 	xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data);
 	xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82);
+	xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
+	xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
 }
 
 static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata)
@@ -359,14 +401,17 @@
 {
 	struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work),
 					 struct xgene_enet_pdata, link_work);
+	struct gpio_desc *sfp_rdy = pdata->sfp_rdy;
 	struct net_device *ndev = pdata->ndev;
 	u32 link_status, poll_interval;
 
 	link_status = xgene_enet_link_status(pdata);
+	if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy))
+		link_status = 0;
+
 	if (link_status) {
 		if (!netif_carrier_ok(ndev)) {
 			netif_carrier_on(ndev);
-			xgene_xgmac_init(pdata);
 			xgene_xgmac_rx_enable(pdata);
 			xgene_xgmac_tx_enable(pdata);
 			netdev_info(ndev, "Link is Up - 10Gbps\n");
@@ -380,6 +425,8 @@
 			netdev_info(ndev, "Link is Down\n");
 		}
 		poll_interval = PHY_POLL_LINK_OFF;
+
+		xgene_pcs_reset(pdata);
 	}
 
 	schedule_delayed_work(&pdata->link_work, poll_interval);

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index f1ea485..360ccbd 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h

@@ -24,6 +24,7 @@
 #define X2_BLOCK_ETH_MAC_CSR_OFFSET	0x3000
 #define BLOCK_AXG_MAC_OFFSET		0x0800
 #define BLOCK_AXG_MAC_CSR_OFFSET	0x2000
+#define BLOCK_PCS_OFFSET		0x3800
 
 #define XGENET_CONFIG_REG_ADDR		0x20
 #define XGENET_SRST_ADDR		0x00
@@ -72,6 +73,9 @@
 #define XG_MCX_ICM_CONFIG0_REG_0_ADDR	0x00e0
 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR	0x00e8
 
+#define PCS_CONTROL_1			0x0000
+#define PCS_CTRL_PCS_RST		BIT(15)
+
 extern const struct xgene_mac_ops xgene_xgmac_ops;
 extern const struct xgene_port_ops xgene_xgport_ops;
 

diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 771cc26..f9df4b5a 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c

@@ -54,9 +54,7 @@
  * Global variables
  */
 static u32 bnad_rxqs_per_cq = 2;
-static u32 bna_id;
-static struct mutex bnad_list_mutex;
-static LIST_HEAD(bnad_list);
+static atomic_t bna_id;
 static const u8 bnad_bcast_addr[] __aligned(2) =
 	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -76,23 +74,6 @@
 	(_res_info)->res_u.mem_info.len = (_size);		\
 } while (0)
 
-static void
-bnad_add_to_list(struct bnad *bnad)
-{
-	mutex_lock(&bnad_list_mutex);
-	list_add_tail(&bnad->list_entry, &bnad_list);
-	bnad->id = bna_id++;
-	mutex_unlock(&bnad_list_mutex);
-}
-
-static void
-bnad_remove_from_list(struct bnad *bnad)
-{
-	mutex_lock(&bnad_list_mutex);
-	list_del(&bnad->list_entry);
-	mutex_unlock(&bnad_list_mutex);
-}
-
 /*
  * Reinitialize completions in CQ, once Rx is taken down
  */
@@ -3573,14 +3554,12 @@
 {
 	spin_lock_init(&bnad->bna_lock);
 	mutex_init(&bnad->conf_mutex);
-	mutex_init(&bnad_list_mutex);
 }
 
 static void
 bnad_lock_uninit(struct bnad *bnad)
 {
 	mutex_destroy(&bnad->conf_mutex);
-	mutex_destroy(&bnad_list_mutex);
 }
 
 /* PCI Initialization */
@@ -3653,7 +3632,7 @@
 	}
 	bnad = netdev_priv(netdev);
 	bnad_lock_init(bnad);
-	bnad_add_to_list(bnad);
+	bnad->id = atomic_inc_return(&bna_id) - 1;
 
 	mutex_lock(&bnad->conf_mutex);
 	/*
@@ -3807,7 +3786,6 @@
 	bnad_pci_uninit(pdev);
 unlock_mutex:
 	mutex_unlock(&bnad->conf_mutex);
-	bnad_remove_from_list(bnad);
 	bnad_lock_uninit(bnad);
 	free_netdev(netdev);
 	return err;
@@ -3845,7 +3823,6 @@
 	bnad_disable_msix(bnad);
 	bnad_pci_uninit(pdev);
 	mutex_unlock(&bnad->conf_mutex);
-	bnad_remove_from_list(bnad);
 	bnad_lock_uninit(bnad);
 	/* Remove the debugfs node for this bnad */
 	kfree(bnad->regdata);

diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h
index f4ed816..46f7b84 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.h
+++ b/drivers/net/ethernet/brocade/bna/bnad.h

@@ -288,7 +288,6 @@
 struct bnad {
 	struct net_device	*netdev;
 	u32			id;
-	struct list_head	list_entry;
 
 	/* Data path */
 	struct bnad_tx_info tx_info[BNAD_MAX_TX];

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 89c0cfa..dbce938 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c

@@ -541,6 +541,14 @@
 	}
 }
 
+static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr)
+{
+	desc->addr = (u32)addr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	desc->addrh = (u32)(addr >> 32);
+#endif
+}
+
 static void macb_tx_error_task(struct work_struct *work)
 {
 	struct macb_queue	*queue = container_of(work, struct macb_queue,
@@ -621,14 +629,17 @@
 
 	/* Set end of TX queue */
 	desc = macb_tx_desc(queue, 0);
-	desc->addr = 0;
+	macb_set_addr(desc, 0);
 	desc->ctrl = MACB_BIT(TX_USED);
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	queue_writel(queue, TBQP, queue->tx_ring_dma);
+	queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
 	/* Make TX ring reflect state of hardware */
 	queue->tx_head = 0;
 	queue->tx_tail = 0;
@@ -750,7 +761,7 @@
 
 			if (entry == RX_RING_SIZE - 1)
 				paddr |= MACB_BIT(RX_WRAP);
-			bp->rx_ring[entry].addr = paddr;
+			macb_set_addr(&(bp->rx_ring[entry]), paddr);
 			bp->rx_ring[entry].ctrl = 0;
 
 			/* properly align Ethernet header */
@@ -798,7 +809,9 @@
 	int			count = 0;
 
 	while (count < budget) {
-		u32 addr, ctrl;
+		u32 ctrl;
+		dma_addr_t addr;
+		bool rxused;
 
 		entry = macb_rx_ring_wrap(bp->rx_tail);
 		desc = &bp->rx_ring[entry];
@@ -806,10 +819,14 @@
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		addr = desc->addr;
+		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
+		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		addr |= ((u64)(desc->addrh) << 32);
+#endif
 		ctrl = desc->ctrl;
 
-		if (!(addr & MACB_BIT(RX_USED)))
+		if (!rxused)
 			break;
 
 		bp->rx_tail++;
@@ -835,7 +852,6 @@
 		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
-		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
 		dma_unmap_single(&bp->pdev->dev, addr,
 				 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
@@ -1299,7 +1315,7 @@
 			ctrl |= MACB_BIT(TX_WRAP);
 
 		/* Set TX buffer descriptor */
-		desc->addr = tx_skb->mapping;
+		macb_set_addr(desc, tx_skb->mapping);
 		/* desc->addr must be visible to hardware before clearing
 		 * 'TX_USED' bit in desc->ctrl.
 		 */
@@ -1422,6 +1438,9 @@
 
 		desc = &bp->rx_ring[i];
 		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		addr |= ((u64)(desc->addrh) << 32);
+#endif
 		dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
 				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
@@ -1547,7 +1566,7 @@
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		for (i = 0; i < TX_RING_SIZE; i++) {
-			queue->tx_ring[i].addr = 0;
+			macb_set_addr(&(queue->tx_ring[i]), 0);
 			queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
 		}
 		queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
@@ -1694,6 +1713,10 @@
 			dmacfg |= GEM_BIT(TXCOEN);
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		dmacfg |= GEM_BIT(ADDR64);
+#endif
 		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
@@ -1739,9 +1762,15 @@
 	macb_configure_dma(bp);
 
 	/* Initialize TX and RX buffers */
-	macb_writel(bp, RBQP, bp->rx_ring_dma);
+	macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32));
+#endif
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, TBQP, queue->tx_ring_dma);
+		queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
 
 		/* Enable interrupts */
 		queue_writel(queue, IER,
@@ -2379,6 +2408,9 @@
 			queue->IDR  = GEM_IDR(hw_q - 1);
 			queue->IMR  = GEM_IMR(hw_q - 1);
 			queue->TBQP = GEM_TBQP(hw_q - 1);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			queue->TBQPH = GEM_TBQPH(hw_q -1);
+#endif
 		} else {
 			/* queue0 uses legacy registers */
 			queue->ISR  = MACB_ISR;
@@ -2386,6 +2418,9 @@
 			queue->IDR  = MACB_IDR;
 			queue->IMR  = MACB_IMR;
 			queue->TBQP = MACB_TBQP;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			queue->TBQPH = MACB_TBQPH;
+#endif
 		}
 
 		/* get irq: here we use the linux queue index, not the hardware
@@ -2935,6 +2970,11 @@
 		bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
 	device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32)
+		dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+#endif
+
 	spin_lock_init(&bp->lock);
 
 	/* setup capabilities */
@@ -2945,7 +2985,7 @@
 	dev->irq = platform_get_irq(pdev, 0);
 	if (dev->irq < 0) {
 		err = dev->irq;
-		goto err_disable_clocks;
+		goto err_out_free_netdev;
 	}
 
 	mac = of_get_mac_address(np);

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index b6fcf10..aa3aeec 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h

@@ -66,6 +66,8 @@
 #define MACB_USRIO		0x00c0
 #define MACB_WOL		0x00c4
 #define MACB_MID		0x00fc
+#define MACB_TBQPH		0x04C8
+#define MACB_RBQPH		0x04D4
 
 /* GEM register offsets. */
 #define GEM_NCFGR		0x0004 /* Network Config */
@@ -139,6 +141,7 @@
 
 #define GEM_ISR(hw_q)		(0x0400 + ((hw_q) << 2))
 #define GEM_TBQP(hw_q)		(0x0440 + ((hw_q) << 2))
+#define GEM_TBQPH(hw_q)		(0x04C8)
 #define GEM_RBQP(hw_q)		(0x0480 + ((hw_q) << 2))
 #define GEM_IER(hw_q)		(0x0600 + ((hw_q) << 2))
 #define GEM_IDR(hw_q)		(0x0620 + ((hw_q) << 2))
@@ -249,6 +252,8 @@
 #define GEM_RXBS_SIZE		8
 #define GEM_DDRP_OFFSET		24 /* disc_when_no_ahb */
 #define GEM_DDRP_SIZE		1
+#define GEM_ADDR64_OFFSET	30 /* Address bus width - 64b or 32b */
+#define GEM_ADDR64_SIZE		1
 
 
 /* Bitfields in NSR */
@@ -474,6 +479,10 @@
 struct macb_dma_desc {
 	u32	addr;
 	u32	ctrl;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32     addrh;
+	u32     resvd;
+#endif
 };
 
 /* DMA descriptor bitfields */
@@ -777,6 +786,7 @@
 	unsigned int		IDR;
 	unsigned int		IMR;
 	unsigned int		TBQP;
+	unsigned int		TBQPH;
 
 	unsigned int		tx_head, tx_tail;
 	struct macb_dma_desc	*tx_ring;

diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 0ef232d..e1b78b5 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig

@@ -36,10 +36,20 @@
 	depends on 64BIT
 	select PHYLIB
 	select MDIO_THUNDER
+	select THUNDER_NIC_RGX
 	---help---
 	  This driver supports programming and controlling of MAC
 	  interface from NIC physical function driver.
 
+config	THUNDER_NIC_RGX
+	tristate "Thunder MAC interface driver (RGX)"
+	depends on 64BIT
+	select PHYLIB
+	select MDIO_THUNDER
+	---help---
+	  This driver supports configuring XCV block of RGX interface
+	  present on CN81XX chip.
+
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT

diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile
index 5c4615c..6b4d4ad 100644
--- a/drivers/net/ethernet/cavium/thunder/Makefile
+++ b/drivers/net/ethernet/cavium/thunder/Makefile

@@ -2,6 +2,7 @@
 # Makefile for Cavium's Thunder ethernet device
 #
 
+obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o
 obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o
 obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o
 obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 83025bb..dd63f96 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h

@@ -20,6 +20,17 @@
 #define	PCI_DEVICE_ID_THUNDER_NIC_VF		0xA034
 #define	PCI_DEVICE_ID_THUNDER_BGX		0xA026
 
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_NIC_PF		0xA11E
+#define PCI_SUBSYS_DEVID_81XX_NIC_PF		0xA21E
+#define PCI_SUBSYS_DEVID_83XX_NIC_PF		0xA31E
+
+#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF	0xA11E
+#define PCI_SUBSYS_DEVID_88XX_NIC_VF		0xA134
+#define PCI_SUBSYS_DEVID_81XX_NIC_VF		0xA234
+#define PCI_SUBSYS_DEVID_83XX_NIC_VF		0xA334
+
+
 /* PCI BAR nos */
 #define	PCI_CFG_REG_BAR_NUM		0
 #define	PCI_MSIX_REG_BAR_NUM		4
@@ -41,40 +52,8 @@
 /* Max pkinds */
 #define	NIC_MAX_PKIND			16
 
-/* Rx Channels */
-/* Receive channel configuration in TNS bypass mode
- * Below is configuration in TNS bypass mode
- * BGX0-LMAC0-CHAN0 - VNIC CHAN0
- * BGX0-LMAC1-CHAN0 - VNIC CHAN16
- * ...
- * BGX1-LMAC0-CHAN0 - VNIC CHAN128
- * ...
- * BGX1-LMAC3-CHAN0 - VNIC CHAN174
- */
-#define	NIC_INTF_COUNT			2  /* Interfaces btw VNIC and TNS/BGX */
-#define	NIC_CHANS_PER_INF		128
-#define	NIC_MAX_CHANS			(NIC_INTF_COUNT * NIC_CHANS_PER_INF)
-#define	NIC_CPI_COUNT			2048 /* No of channel parse indices */
-
-/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
-#define NIC_MAX_BGX			MAX_BGX_PER_CN88XX
-#define	NIC_CPI_PER_BGX			(NIC_CPI_COUNT / NIC_MAX_BGX)
-#define	NIC_MAX_CPI_PER_LMAC		64 /* Max when CPI_ALG is IP diffserv */
-#define	NIC_RSSI_PER_BGX		(NIC_RSSI_COUNT / NIC_MAX_BGX)
-
-/* Tx scheduling */
-#define	NIC_MAX_TL4			1024
-#define	NIC_MAX_TL4_SHAPERS		256 /* 1 shaper for 4 TL4s */
-#define	NIC_MAX_TL3			256
-#define	NIC_MAX_TL3_SHAPERS		64  /* 1 shaper for 4 TL3s */
-#define	NIC_MAX_TL2			64
-#define	NIC_MAX_TL2_SHAPERS		2  /* 1 shaper for 32 TL2s */
-#define	NIC_MAX_TL1			2
-
-/* TNS bypass mode */
-#define	NIC_TL2_PER_BGX			32
-#define	NIC_TL4_PER_BGX			(NIC_MAX_TL4 / NIC_MAX_BGX)
-#define	NIC_TL4_PER_LMAC		(NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+/* Max when CPI_ALG is IP diffserv */
+#define	NIC_MAX_CPI_PER_LMAC		64
 
 /* NIC VF Interrupts */
 #define	NICVF_INTR_CQ			0
@@ -148,7 +127,6 @@
 	struct	napi_struct napi;
 };
 
-#define	NIC_RSSI_COUNT			4096 /* Total no of RSS indices */
 #define NIC_MAX_RSS_HASH_BITS		8
 #define NIC_MAX_RSS_IDR_TBL_SIZE	(1 << NIC_MAX_RSS_HASH_BITS)
 #define RSS_HASH_KEY_SIZE		5 /* 320 bit key */
@@ -273,6 +251,7 @@
 	struct net_device	*netdev;
 	struct pci_dev		*pdev;
 	void __iomem		*reg_base;
+#define	MAX_QUEUES_PER_QSET			8
 	struct queue_set	*qs;
 	struct nicvf_cq_poll	*napi[8];
 	u8			vf_id;
@@ -368,6 +347,7 @@
 #define	NIC_MBOX_MSG_PNICVF_PTR		0x14	/* Get primary qset nicvf ptr */
 #define	NIC_MBOX_MSG_SNICVF_PTR		0x15	/* Send sqet nicvf ptr to PVF */
 #define	NIC_MBOX_MSG_LOOPBACK		0x16	/* Set interface in loopback */
+#define	NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17	/* Reset statistics counters */
 #define	NIC_MBOX_MSG_CFG_DONE		0xF0	/* VF configuration done */
 #define	NIC_MBOX_MSG_SHUTDOWN		0xF1	/* VF is being shutdown */
 
@@ -484,6 +464,31 @@
 	bool  enable;
 };
 
+/* Reset statistics counters */
+struct reset_stat_cfg {
+	u8    msg;
+	/* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */
+	u16   rx_stat_mask;
+	/* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */
+	u8    tx_stat_mask;
+	/* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1)
+	 * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1)
+	 * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1)
+	 * ..
+	 * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1)
+	 * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1)
+	 */
+	u16   rq_stat_mask;
+	/* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1)
+	 * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1)
+	 * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1)
+	 * ..
+	 * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1)
+	 * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1)
+	 */
+	u16   sq_stat_mask;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
 	struct { u8 msg; }	msg;
@@ -501,6 +506,7 @@
 	struct sqs_alloc        sqs_alloc;
 	struct nicvf_ptr	nicvf;
 	struct set_loopback	lbk;
+	struct reset_stat_cfg	reset_stat;
 };
 
 #define NIC_NODE_ID_MASK	0x03
@@ -514,7 +520,14 @@
 
 static inline bool pass1_silicon(struct pci_dev *pdev)
 {
-	return pdev->revision < 8;
+	return (pdev->revision < 8) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
+}
+
+static inline bool pass2_silicon(struct pci_dev *pdev)
+{
+	return (pdev->revision >= 8) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
 }
 
 int nicvf_set_real_num_queues(struct net_device *netdev,

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 16ed203..25618d2 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c

@@ -20,8 +20,25 @@
 #define DRV_NAME	"thunder-nic"
 #define DRV_VERSION	"1.0"
 
+struct hw_info {
+	u8		bgx_cnt;
+	u8		chans_per_lmac;
+	u8		chans_per_bgx; /* Rx/Tx chans */
+	u8		chans_per_rgx;
+	u8		chans_per_lbk;
+	u16		cpi_cnt;
+	u16		rssi_cnt;
+	u16		rss_ind_tbl_size;
+	u16		tl4_cnt;
+	u16		tl3_cnt;
+	u8		tl2_cnt;
+	u8		tl1_cnt;
+	bool		tl1_per_bgx; /* TL1 per BGX or per LMAC */
+};
+
 struct nicpf {
 	struct pci_dev		*pdev;
+	struct hw_info          *hw;
 	u8			node;
 	unsigned int		flags;
 	u8			num_vf_en;      /* No of VF enabled */
@@ -36,22 +53,22 @@
 #define	NIC_SET_VF_LMAC_MAP(bgx, lmac)	(((bgx & 0xF) << 4) | (lmac & 0xF))
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
-	u8			vf_lmac_map[MAX_LMAC];
+	u8			*vf_lmac_map;
 	struct delayed_work     dwork;
 	struct workqueue_struct *check_link;
-	u8			link[MAX_LMAC];
-	u8			duplex[MAX_LMAC];
-	u32			speed[MAX_LMAC];
+	u8			*link;
+	u8			*duplex;
+	u32			*speed;
 	u16			cpi_base[MAX_NUM_VFS_SUPPORTED];
 	u16			rssi_base[MAX_NUM_VFS_SUPPORTED];
-	u16			rss_ind_tbl_size;
 	bool			mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
 	/* MSI-X */
 	bool			msix_enabled;
 	u8			num_vec;
-	struct msix_entry	msix_entries[NIC_PF_MSIX_VECTORS];
+	struct msix_entry	*msix_entries;
 	bool			irq_allocated[NIC_PF_MSIX_VECTORS];
+	char			irq_name[NIC_PF_MSIX_VECTORS][20];
 };
 
 /* Supported devices */
@@ -89,9 +106,22 @@
 /* PF -> VF mailbox communication APIs */
 static void nic_enable_mbx_intr(struct nicpf *nic)
 {
-	/* Enable mailbox interrupt for all 128 VFs */
-	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
-	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+	int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
+
+#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
+
+	/* Clear it, to avoid spurious interrupts (if any) */
+	nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
+
+	/* Enable mailbox interrupt for all VFs */
+	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
+	/* One mailbox intr enable reg per 64 VFs */
+	if (vf_cnt > 64) {
+		nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
+			      INTR_MASK(vf_cnt - 64));
+		nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
+			      INTR_MASK(vf_cnt - 64));
+	}
 }
 
 static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
@@ -144,7 +174,7 @@
 
 	mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
 
-	if (vf < MAX_LMAC) {
+	if (vf < nic->num_vf_en) {
 		bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 
@@ -155,7 +185,7 @@
 	mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
 	mbx.nic_cfg.node_id = nic->node;
 
-	mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+	mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
 
 	nic_send_msg_to_vf(nic, vf, &mbx);
 }
@@ -248,14 +278,22 @@
 /* Set minimum transmit packet size */
 static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
 {
-	int lmac;
+	int lmac, max_lmac;
+	u16 sdevid;
 	u64 lmac_cfg;
 
 	/* Max value that can be set is 60 */
 	if (size > 60)
 		size = 60;
 
-	for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	/* 81xx's RGX has only one LMAC */
+	if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
+		max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
+	else
+		max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+	for (lmac = 0; lmac < max_lmac; lmac++) {
 		lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
 		lmac_cfg &= ~(0xF << 2);
 		lmac_cfg |= ((size / 4) << 2);
@@ -275,7 +313,7 @@
 
 	nic->num_vf_en = 0;
 
-	for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+	for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
 		if (!(bgx_map & (1 << bgx)))
 			continue;
 		lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
@@ -295,28 +333,125 @@
 			nic_reg_write(nic,
 				      NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
 				      lmac_credit);
+
+		/* On CN81XX there are only 8 VFs but max possible no of
+		 * interfaces are 9.
+		 */
+		if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
+			nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
+			break;
+		}
 	}
 }
 
+static void nic_free_lmacmem(struct nicpf *nic)
+{
+	kfree(nic->vf_lmac_map);
+	kfree(nic->link);
+	kfree(nic->duplex);
+	kfree(nic->speed);
+}
+
+static int nic_get_hw_info(struct nicpf *nic)
+{
+	u8 max_lmac;
+	u16 sdevid;
+	struct hw_info *hw = nic->hw;
+
+	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+	switch (sdevid) {
+	case PCI_SUBSYS_DEVID_88XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN88XX;
+		hw->chans_per_lmac = 16;
+		hw->chans_per_bgx = 128;
+		hw->cpi_cnt = 2048;
+		hw->rssi_cnt = 4096;
+		hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+		hw->tl3_cnt = 256;
+		hw->tl2_cnt = 64;
+		hw->tl1_cnt = 2;
+		hw->tl1_per_bgx = true;
+		break;
+	case PCI_SUBSYS_DEVID_81XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN81XX;
+		hw->chans_per_lmac = 8;
+		hw->chans_per_bgx = 32;
+		hw->chans_per_rgx = 8;
+		hw->chans_per_lbk = 24;
+		hw->cpi_cnt = 512;
+		hw->rssi_cnt = 256;
+		hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
+		hw->tl3_cnt = 64;
+		hw->tl2_cnt = 16;
+		hw->tl1_cnt = 10;
+		hw->tl1_per_bgx = false;
+		break;
+	case PCI_SUBSYS_DEVID_83XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN83XX;
+		hw->chans_per_lmac = 8;
+		hw->chans_per_bgx = 32;
+		hw->chans_per_lbk = 64;
+		hw->cpi_cnt = 2048;
+		hw->rssi_cnt = 1024;
+		hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
+		hw->tl3_cnt = 256;
+		hw->tl2_cnt = 64;
+		hw->tl1_cnt = 18;
+		hw->tl1_per_bgx = false;
+		break;
+	}
+	hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
+
+	/* Allocate memory for LMAC tracking elements */
+	max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX;
+	nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->vf_lmac_map)
+		goto error;
+	nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->link)
+		goto error;
+	nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->duplex)
+		goto error;
+	nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL);
+	if (!nic->speed)
+		goto error;
+	return 0;
+
+error:
+	nic_free_lmacmem(nic);
+	return -ENOMEM;
+}
+
 #define BGX0_BLOCK 8
 #define BGX1_BLOCK 9
 
-static void nic_init_hw(struct nicpf *nic)
+static int nic_init_hw(struct nicpf *nic)
 {
-	int i;
+	int i, err;
 	u64 cqm_cfg;
 
+	/* Get HW capability info */
+	err = nic_get_hw_info(nic);
+	if (err)
+		return err;
+
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
 
 	/* Enable backpressure */
 	nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
 
-	/* Disable TNS mode on both interfaces */
-	nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
-		      (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
-	nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
-		      (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+	/* TNS and TNS bypass modes are present only on 88xx */
+	if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
+		/* Disable TNS mode on both interfaces */
+		nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+			      (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+		nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+			      (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+	}
+
 	nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
 		      (1ULL << 63) | BGX0_BLOCK);
 	nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
@@ -346,11 +481,14 @@
 	cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
 	if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
 		nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
+
+	return 0;
 }
 
 /* Channel parse index configuration */
 static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 {
+	struct hw_info *hw = nic->hw;
 	u32 vnic, bgx, lmac, chan;
 	u32 padd, cpi_count = 0;
 	u64 cpi_base, cpi, rssi_base, rssi;
@@ -360,9 +498,9 @@
 	bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
 	lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
 
-	chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-	cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
-	rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+	chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+	cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
+	rssi_base = vnic * hw->rss_ind_tbl_size;
 
 	/* Rx channel configuration */
 	nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
@@ -434,7 +572,7 @@
 	msg = (u64 *)&mbx;
 
 	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
-	mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
+	mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
 	nic_send_msg_to_vf(nic, vf, &mbx);
 }
 
@@ -481,7 +619,7 @@
 /* 4 level transmit side scheduler configutation
  * for TNS bypass mode
  *
- * Sample configuration for SQ0
+ * Sample configuration for SQ0 on 88xx
  * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
@@ -494,6 +632,7 @@
 static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 			       struct sq_cfg_msg *sq)
 {
+	struct hw_info *hw = nic->hw;
 	u32 bgx, lmac, chan;
 	u32 tl2, tl3, tl4;
 	u32 rr_quantum;
@@ -512,21 +651,28 @@
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	if (!sq->sqs_mode) {
-		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
-	} else {
-		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
-			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
-				break;
+	/* For 88xx 0-511 TL4 transmits via BGX0 and
+	 * 512-1023 TL4s transmit via BGX1.
+	 */
+	if (hw->tl1_per_bgx) {
+		tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
+		if (!sq->sqs_mode) {
+			tl4 += (lmac * MAX_QUEUES_PER_QSET);
+		} else {
+			for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+				if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+					break;
+			}
+			tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
+			tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
+			tl4 += (svf * MAX_QUEUES_PER_QSET);
 		}
-		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
-		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
-		tl4 += (svf * NIC_TL4_PER_LMAC);
-		tl4 += (bgx * NIC_TL4_PER_BGX);
+	} else {
+		tl4 = (vnic * MAX_QUEUES_PER_QSET);
 	}
 	tl4 += sq_idx;
 
-	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
+	tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
 		      ((u64)vnic << NIC_QS_ID_SHIFT) |
 		      ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
@@ -534,8 +680,19 @@
 		      ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
 
 	nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
-	chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-	nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+
+	/* On 88xx 0-127 channels are for BGX0 and
+	 * 127-255 channels for BGX1.
+	 *
+	 * On 81xx/83xx TL3_CHAN reg should be configured with channel
+	 * within LMAC i.e 0-7 and not the actual channel number like on 88xx
+	 */
+	chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+	if (hw->tl1_per_bgx)
+		nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+	else
+		nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
+
 	/* Enable backpressure on the channel */
 	nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
 
@@ -544,6 +701,16 @@
 	nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
 	/* No priorities as of now */
 	nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
+
+	/* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
+	 * on 81xx/83xx TL2 needs to be configured to transmit to one of the
+	 * possible LMACs.
+	 *
+	 * This register doesn't exist on 88xx.
+	 */
+	if (!hw->tl1_per_bgx)
+		nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
+			      lmac + (bgx * MAX_LMAC_PER_BGX));
 }
 
 /* Send primary nicvf pointer to secondary QS's VF */
@@ -615,7 +782,7 @@
 {
 	int bgx_idx, lmac_idx;
 
-	if (lbk->vf_id > MAX_LMAC)
+	if (lbk->vf_id >= nic->num_vf_en)
 		return -1;
 
 	bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
@@ -626,6 +793,67 @@
 	return 0;
 }
 
+/* Reset statistics counters */
+static int nic_reset_stat_counters(struct nicpf *nic,
+				   int vf, struct reset_stat_cfg *cfg)
+{
+	int i, stat, qnum;
+	u64 reg_addr;
+
+	for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
+		if (cfg->rx_stat_mask & BIT(i)) {
+			reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
+				   (vf << NIC_QS_ID_SHIFT) |
+				   (i << 3);
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+
+	for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
+		if (cfg->tx_stat_mask & BIT(i)) {
+			reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
+				   (vf << NIC_QS_ID_SHIFT) |
+				   (i << 3);
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+
+	for (i = 0; i <= 15; i++) {
+		qnum = i >> 1;
+		stat = i & 1 ? 1 : 0;
+		reg_addr = (vf << NIC_QS_ID_SHIFT) |
+			   (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
+		if (cfg->rq_stat_mask & BIT(i)) {
+			reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
+			nic_reg_write(nic, reg_addr, 0);
+		}
+		if (cfg->sq_stat_mask & BIT(i)) {
+			reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+	return 0;
+}
+
+static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
+{
+	u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
+	u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
+			      (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
+
+	/* Configure tunnel parsing parameters */
+	nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
+		      (1ULL << 63 | UDP_GENEVE_PORT_NUM));
+	nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
+		      ((7ULL << 61) | prot_def));
+	nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
+		      ((7ULL << 61) | prot_def));
+	nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
+		      ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
+	nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
+		      ((0xfULL << 60) | vxlan_prot_def));
+}
+
 static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
 {
 	int bgx, lmac;
@@ -664,18 +892,17 @@
 		mbx_addr += sizeof(u64);
 	}
 
-	dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n",
+	dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
 		__func__, mbx.msg.msg, vf);
 	switch (mbx.msg.msg) {
 	case NIC_MBOX_MSG_READY:
 		nic_mbx_send_ready(nic, vf);
-		if (vf < MAX_LMAC) {
+		if (vf < nic->num_vf_en) {
 			nic->link[vf] = 0;
 			nic->duplex[vf] = 0;
 			nic->speed[vf] = 0;
 		}
-		ret = 1;
-		break;
+		goto unlock;
 	case NIC_MBOX_MSG_QS_CFG:
 		reg_addr = NIC_PF_QSET_0_127_CFG |
 			   (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -693,6 +920,15 @@
 			   (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
 			   (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
 		nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+		/* Enable CQE_RX2_S extension in CQE_RX descriptor.
+		 * This gets appended by default on 81xx/83xx chips,
+		 * for consistency enabling the same on 88xx pass2
+		 * where this is introduced.
+		 */
+		if (pass2_silicon(nic->pdev))
+			nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
+		if (!pass1_silicon(nic->pdev))
+			nic_enable_tunnel_parsing(nic, vf);
 		break;
 	case NIC_MBOX_MSG_RQ_BP_CFG:
 		reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
@@ -717,8 +953,10 @@
 		nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
 		break;
 	case NIC_MBOX_MSG_SET_MAC:
-		if (vf >= nic->num_vf_en)
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
 			break;
+		}
 		lmac = mbx.mac.vf_id;
 		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
@@ -767,25 +1005,38 @@
 	case NIC_MBOX_MSG_LOOPBACK:
 		ret = nic_config_loopback(nic, &mbx.lbk);
 		break;
+	case NIC_MBOX_MSG_RESET_STAT_COUNTER:
+		ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
+		break;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
 		break;
 	}
 
-	if (!ret)
+	if (!ret) {
 		nic_mbx_send_ack(nic, vf);
-	else if (mbx.msg.msg != NIC_MBOX_MSG_READY)
+	} else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
+		dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
+			mbx.msg.msg, vf);
 		nic_mbx_send_nack(nic, vf);
+	}
 unlock:
 	nic->mbx_lock[vf] = false;
 }
 
-static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
+static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
 {
+	struct nicpf *nic = (struct nicpf *)nic_irq;
+	int mbx;
 	u64 intr;
 	u8  vf, vf_per_mbx_reg = 64;
 
+	if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector)
+		mbx = 0;
+	else
+		mbx = 1;
+
 	intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
 	dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
 	for (vf = 0; vf < vf_per_mbx_reg; vf++) {
@@ -797,23 +1048,6 @@
 			nic_clear_mbx_intr(nic, vf, mbx);
 		}
 	}
-}
-
-static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq)
-{
-	struct nicpf *nic = (struct nicpf *)nic_irq;
-
-	nic_mbx_intr_handler(nic, 0);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq)
-{
-	struct nicpf *nic = (struct nicpf *)nic_irq;
-
-	nic_mbx_intr_handler(nic, 1);
-
 	return IRQ_HANDLED;
 }
 
@@ -821,7 +1055,13 @@
 {
 	int i, ret;
 
-	nic->num_vec = NIC_PF_MSIX_VECTORS;
+	nic->num_vec = pci_msix_vec_count(nic->pdev);
+
+	nic->msix_entries = kmalloc_array(nic->num_vec,
+					  sizeof(struct msix_entry),
+					  GFP_KERNEL);
+	if (!nic->msix_entries)
+		return -ENOMEM;
 
 	for (i = 0; i < nic->num_vec; i++)
 		nic->msix_entries[i].entry = i;
@@ -829,8 +1069,9 @@
 	ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
 	if (ret) {
 		dev_err(&nic->pdev->dev,
-			"Request for #%d msix vectors failed\n",
-			   nic->num_vec);
+			"Request for #%d msix vectors failed, returned %d\n",
+			   nic->num_vec, ret);
+		kfree(nic->msix_entries);
 		return ret;
 	}
 
@@ -842,6 +1083,7 @@
 {
 	if (nic->msix_enabled) {
 		pci_disable_msix(nic->pdev);
+		kfree(nic->msix_entries);
 		nic->msix_enabled = 0;
 		nic->num_vec = 0;
 	}
@@ -860,27 +1102,26 @@
 
 static int nic_register_interrupts(struct nicpf *nic)
 {
-	int ret;
+	int i, ret;
 
 	/* Enable MSI-X */
 	ret = nic_enable_msix(nic);
 	if (ret)
 		return ret;
 
-	/* Register mailbox interrupt handlers */
-	ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector,
-			  nic_mbx0_intr_handler, 0, "NIC Mbox0", nic);
-	if (ret)
-		goto fail;
+	/* Register mailbox interrupt handler */
+	for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
+		sprintf(nic->irq_name[i],
+			"NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
 
-	nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true;
+		ret = request_irq(nic->msix_entries[i].vector,
+				  nic_mbx_intr_handler, 0,
+				  nic->irq_name[i], nic);
+		if (ret)
+			goto fail;
 
-	ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector,
-			  nic_mbx1_intr_handler, 0, "NIC Mbox1", nic);
-	if (ret)
-		goto fail;
-
-	nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true;
+		nic->irq_allocated[i] = true;
+	}
 
 	/* Enable mailbox interrupt */
 	nic_enable_mbx_intr(nic);
@@ -889,6 +1130,7 @@
 fail:
 	dev_err(&nic->pdev->dev, "Request irq failed\n");
 	nic_free_all_interrupts(nic);
+	nic_disable_msix(nic);
 	return ret;
 }
 
@@ -903,6 +1145,12 @@
 	int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
 	u16 total_vf;
 
+	/* Secondary Qsets are needed only if CPU count is
+	 * morethan MAX_QUEUES_PER_QSET.
+	 */
+	if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
+		return 0;
+
 	/* Check if its a multi-node environment */
 	if (nr_node_ids > 1)
 		sqs_per_vf = MAX_SQS_PER_VF;
@@ -1008,6 +1256,12 @@
 	if (!nic)
 		return -ENOMEM;
 
+	nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
+	if (!nic->hw) {
+		devm_kfree(dev, nic);
+		return -ENOMEM;
+	}
+
 	pci_set_drvdata(pdev, nic);
 
 	nic->pdev = pdev;
@@ -1047,13 +1301,12 @@
 
 	nic->node = nic_get_node_id(pdev);
 
-	nic_set_lmac_vf_mapping(nic);
-
 	/* Initialize hardware */
-	nic_init_hw(nic);
+	err = nic_init_hw(nic);
+	if (err)
+		goto err_release_regions;
 
-	/* Set RSS TBL size for each VF */
-	nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+	nic_set_lmac_vf_mapping(nic);
 
 	/* Register interrupts */
 	err = nic_register_interrupts(nic);
@@ -1086,6 +1339,9 @@
 err_release_regions:
 	pci_release_regions(pdev);
 err_disable_device:
+	nic_free_lmacmem(nic);
+	devm_kfree(dev, nic->hw);
+	devm_kfree(dev, nic);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	return err;
@@ -1106,6 +1362,11 @@
 
 	nic_unregister_interrupts(nic);
 	pci_release_regions(pdev);
+
+	nic_free_lmacmem(nic);
+	devm_kfree(&pdev->dev, nic->hw);
+	devm_kfree(&pdev->dev, nic);
+
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 }

diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index afb10e3..db9c632 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h

@@ -36,6 +36,20 @@
 #define   NIC_PF_MAILBOX_ENA_W1C		(0x0450)
 #define   NIC_PF_MAILBOX_ENA_W1S		(0x0470)
 #define   NIC_PF_RX_ETYPE_0_7			(0x0500)
+#define   NIC_PF_RX_GENEVE_DEF			(0x0580)
+#define    UDP_GENEVE_PORT_NUM				0x17C1ULL
+#define   NIC_PF_RX_GENEVE_PROT_DEF		(0x0588)
+#define    IPV6_PROT					0x86DDULL
+#define    IPV4_PROT					0x800ULL
+#define    ET_PROT					0x6558ULL
+#define   NIC_PF_RX_NVGRE_PROT_DEF		(0x0598)
+#define   NIC_PF_RX_VXLAN_DEF_0_1		(0x05A0)
+#define    UDP_VXLAN_PORT_NUM				0x12B5
+#define   NIC_PF_RX_VXLAN_PROT_DEF		(0x05B0)
+#define    IPV6_PROT_DEF				0x2ULL
+#define    IPV4_PROT_DEF				0x1ULL
+#define    ET_PROT_DEF					0x3ULL
+#define   NIC_PF_RX_CFG				(0x05D0)
 #define   NIC_PF_PKIND_0_15_CFG			(0x0600)
 #define   NIC_PF_ECC0_FLIP0			(0x1000)
 #define   NIC_PF_ECC1_FLIP0			(0x1008)
@@ -103,6 +117,7 @@
 #define   NIC_PF_SW_SYNC_RX_DONE		(0x490008)
 #define   NIC_PF_TL2_0_63_CFG			(0x500000)
 #define   NIC_PF_TL2_0_63_PRI			(0x520000)
+#define   NIC_PF_TL2_LMAC			(0x540000)
 #define   NIC_PF_TL2_0_63_SH_STATUS		(0x580000)
 #define   NIC_PF_TL3A_0_63_CFG			(0x5F0000)
 #define   NIC_PF_TL3_0_255_CFG			(0x600000)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a19e73f..06c014e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

@@ -29,10 +29,20 @@
 static const struct pci_device_id nicvf_id_table[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_NIC_VF,
-			 PCI_VENDOR_ID_CAVIUM, 0xA134) },
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_88XX_NIC_VF) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
-			 PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+			 PCI_DEVICE_ID_THUNDER_NIC_VF,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_81XX_NIC_VF) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+			 PCI_DEVICE_ID_THUNDER_NIC_VF,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_83XX_NIC_VF) },
 	{ 0, }  /* end of table */
 };
 
@@ -134,15 +144,19 @@
 
 	/* Wait for previous message to be acked, timeout 2sec */
 	while (!nic->pf_acked) {
-		if (nic->pf_nacked)
+		if (nic->pf_nacked) {
+			netdev_err(nic->netdev,
+				   "PF NACK to mbox msg 0x%02x from VF%d\n",
+				   (mbx->msg.msg & 0xFF), nic->vf_id);
 			return -EINVAL;
+		}
 		msleep(sleep);
 		if (nic->pf_acked)
 			break;
 		timeout -= sleep;
 		if (!timeout) {
 			netdev_err(nic->netdev,
-				   "PF didn't ack to mbox msg %d from VF%d\n",
+				   "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
 				   (mbx->msg.msg & 0xFF), nic->vf_id);
 			return -EBUSY;
 		}
@@ -352,13 +366,7 @@
 
 	rss->enable = true;
 
-	/* Using the HW reset value for now */
-	rss->key[0] = 0xFEED0BADFEED0BADULL;
-	rss->key[1] = 0xFEED0BADFEED0BADULL;
-	rss->key[2] = 0xFEED0BADFEED0BADULL;
-	rss->key[3] = 0xFEED0BADFEED0BADULL;
-	rss->key[4] = 0xFEED0BADFEED0BADULL;
-
+	netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
 	nicvf_set_rss_key(nic);
 
 	rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
@@ -507,7 +515,8 @@
 
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
 				  struct cmp_queue *cq,
-				  struct cqe_send_t *cqe_tx, int cqe_type)
+				  struct cqe_send_t *cqe_tx,
+				  int cqe_type, int budget)
 {
 	struct sk_buff *skb = NULL;
 	struct nicvf *nic = netdev_priv(netdev);
@@ -531,7 +540,7 @@
 	if (skb) {
 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 		prefetch(skb);
-		dev_consume_skb_any(skb);
+		napi_consume_skb(skb, budget);
 		sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
 	} else {
 		/* In case of HW TSO, HW sends a CQE for each segment of a TSO
@@ -686,7 +695,8 @@
 		break;
 		case CQE_TYPE_SEND:
 			nicvf_snd_pkt_handler(netdev, cq,
-					      (void *)cq_desc, CQE_TYPE_SEND);
+					      (void *)cq_desc, CQE_TYPE_SEND,
+					      budget);
 			tx_done++;
 		break;
 		case CQE_TYPE_INVALID:
@@ -928,16 +938,19 @@
 	int vector;
 
 	for_each_cq_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d CQ%d",
-			nic->vf_id, irq);
+		sprintf(nic->irq_name[irq], "%s-rxtx-%d",
+			nic->pnicvf->netdev->name,
+			nicvf_netdev_qidx(nic, irq));
 
 	for_each_sq_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d SQ%d",
-			nic->vf_id, irq - NICVF_INTR_ID_SQ);
+		sprintf(nic->irq_name[irq], "%s-sq-%d",
+			nic->pnicvf->netdev->name,
+			nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
 
 	for_each_rbdr_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
-			nic->vf_id, irq - NICVF_INTR_ID_RBDR);
+		sprintf(nic->irq_name[irq], "%s-rbdr-%d",
+			nic->pnicvf->netdev->name,
+			nic->sqs_mode ? (nic->sqs_id + 1) : 0);
 
 	/* Register CQ interrupts */
 	for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
@@ -961,8 +974,9 @@
 	}
 
 	/* Register QS error interrupt */
-	sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
-		"NICVF%d Qset error", nic->vf_id);
+	sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
+		nic->pnicvf->netdev->name,
+		nic->sqs_mode ? (nic->sqs_id + 1) : 0);
 	irq = NICVF_INTR_ID_QS_ERR;
 	ret = request_irq(nic->msix_entries[irq].vector,
 			  nicvf_qs_err_intr_handler,
@@ -1191,7 +1205,7 @@
 	}
 
 	/* Check if we got MAC address from PF or else generate a radom MAC */
-	if (is_zero_ether_addr(netdev->dev_addr)) {
+	if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
 		eth_hw_addr_random(netdev);
 		nicvf_hw_set_mac_addr(nic, netdev);
 	}
@@ -1527,14 +1541,13 @@
 		goto err_release_regions;
 	}
 
-	qcount = MAX_CMP_QUEUES_PER_QS;
+	qcount = netif_get_num_default_rss_queues();
 
 	/* Restrict multiqset support only for host bound VFs */
 	if (pdev->is_virtfn) {
 		/* Set max number of queues per VF */
-		qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
-		qcount = min(qcount,
-			     (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+		qcount = min_t(int, num_online_cpus(),
+			       (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
 	}
 
 	netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 0ff8e60..7d90856 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c

@@ -479,6 +479,16 @@
 					      NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
 }
 
+static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
+{
+	union nic_mbx mbx = {};
+
+	/* Reset all RXQ's stats */
+	mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+	mbx.reset_stat.rq_stat_mask = 0xFFFF;
+	nicvf_send_msg_to_pf(nic, &mbx);
+}
+
 /* Configures receive queue */
 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 				   int qidx, bool enable)
@@ -762,10 +772,10 @@
 	nic->qs = qs;
 
 	/* Set count of each queue */
-	qs->rbdr_cnt = RBDR_CNT;
-	qs->rq_cnt = RCV_QUEUE_CNT;
-	qs->sq_cnt = SND_QUEUE_CNT;
-	qs->cq_cnt = CMP_QUEUE_CNT;
+	qs->rbdr_cnt = DEFAULT_RBDR_CNT;
+	qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
+	qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
+	qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
 
 	/* Set queue lengths */
 	qs->rbdr_len = RCV_BUF_COUNT;
@@ -812,6 +822,11 @@
 		nicvf_free_resources(nic);
 	}
 
+	/* Reset RXQ's stats.
+	 * SQ's stats will get reset automatically once SQ is reset.
+	 */
+	nicvf_reset_rcv_queue_stats(nic);
+
 	return 0;
 }
 
@@ -1184,13 +1199,23 @@
 	int frag;
 	int payload_len = 0;
 	struct sk_buff *skb = NULL;
-	struct sk_buff *skb_frag = NULL;
-	struct sk_buff *prev_frag = NULL;
+	struct page *page;
+	int offset;
 	u16 *rb_lens = NULL;
 	u64 *rb_ptrs = NULL;
 
 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
-	rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
+	 * CQE_RX at word6, hence buffer pointers move by word
+	 *
+	 * Use existing 'hw_tso' flag which will be set for all chips
+	 * except 88xx pass1 instead of a additional cache line
+	 * access (or miss) by using pci dev's revision.
+	 */
+	if (!nic->hw_tso)
+		rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+	else
+		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
 	netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
 		   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
@@ -1208,22 +1233,10 @@
 			skb_put(skb, payload_len);
 		} else {
 			/* Add fragments */
-			skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs,
-						       payload_len);
-			if (!skb_frag) {
-				dev_kfree_skb(skb);
-				return NULL;
-			}
-
-			if (!skb_shinfo(skb)->frag_list)
-				skb_shinfo(skb)->frag_list = skb_frag;
-			else
-				prev_frag->next = skb_frag;
-
-			prev_frag = skb_frag;
-			skb->len += payload_len;
-			skb->data_len += payload_len;
-			skb_frag->len = payload_len;
+			page = virt_to_page(phys_to_virt(*rb_ptrs));
+			offset = phys_to_virt(*rb_ptrs) - page_address(page);
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+					offset, payload_len, RCV_FRAG_LEN);
 		}
 		/* Next buffer pointer */
 		rb_ptrs++;

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 6673e11..869f338 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h

@@ -57,10 +57,7 @@
 #define CMP_QUEUE_SIZE6		6ULL /* 64K entries */
 
 /* Default queue count per QS, its lengths and threshold values */
-#define RBDR_CNT		1
-#define RCV_QUEUE_CNT		8
-#define SND_QUEUE_CNT		8
-#define CMP_QUEUE_CNT		8 /* Max of RCV and SND qcount */
+#define DEFAULT_RBDR_CNT	1
 
 #define SND_QSIZE		SND_QUEUE_SIZE2
 #define SND_QUEUE_LEN		(1ULL << (SND_QSIZE + 10))

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 63a39ac..8bbaedb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c

@@ -28,6 +28,9 @@
 	struct bgx		*bgx;
 	int			dmac;
 	u8			mac[ETH_ALEN];
+	u8                      lmac_type;
+	u8                      lane_to_sds;
+	bool                    use_training;
 	bool			link_up;
 	int			lmacid; /* ID within BGX */
 	int			lmacid_bd; /* ID on board */
@@ -43,14 +46,13 @@
 
 struct bgx {
 	u8			bgx_id;
-	u8			qlm_mode;
 	struct	lmac		lmac[MAX_LMAC_PER_BGX];
 	int			lmac_count;
-	int                     lmac_type;
-	int                     lane_to_sds;
-	int			use_training;
+	u8			max_lmac;
 	void __iomem		*reg_base;
 	struct pci_dev		*pdev;
+	bool                    is_dlm;
+	bool                    is_rgx;
 };
 
 static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
@@ -61,6 +63,7 @@
 /* Supported devices */
 static const struct pci_device_id bgx_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) },
 	{ 0, }  /* end of table */
 };
 
@@ -124,8 +127,8 @@
 	int i;
 	unsigned map = 0;
 
-	for (i = 0; i < MAX_BGX_PER_CN88XX; i++) {
-		if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i])
+	for (i = 0; i < MAX_BGX_PER_NODE; i++) {
+		if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
 			map |= (1 << i);
 	}
 
@@ -138,7 +141,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (bgx)
 		return bgx->lmac_count;
 
@@ -153,7 +156,7 @@
 	struct bgx *bgx;
 	struct lmac *lmac;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return;
 
@@ -166,7 +169,7 @@
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
 	if (bgx)
 		return bgx->lmac[lmacid].mac;
@@ -177,7 +180,7 @@
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
 	if (!bgx)
 		return;
@@ -188,11 +191,13 @@
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct lmac *lmac;
 	u64 cfg;
 
 	if (!bgx)
 		return;
+	lmac = &bgx->lmac[lmacid];
 
 	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
 	if (enable)
@@ -200,6 +205,9 @@
 	else
 		cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	if (bgx->is_rgx)
+		xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed);
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
@@ -266,9 +274,12 @@
 
 	port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
 
-	/* renable lmac */
+	/* Re-enable lmac */
 	cmr_cfg |= CMR_EN;
 	bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+	if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN)))
+		xcv_setup_link(lmac->link_up, lmac->last_speed);
 }
 
 static void bgx_lmac_handler(struct net_device *netdev)
@@ -314,7 +325,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return 0;
 
@@ -328,7 +339,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return 0;
 
@@ -356,7 +367,7 @@
 	struct lmac *lmac;
 	u64    cfg;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return;
 
@@ -379,8 +390,9 @@
 }
 EXPORT_SYMBOL(bgx_lmac_internal_loopback);
 
-static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
+static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
 {
+	int lmacid = lmac->lmacid;
 	u64 cfg;
 
 	bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30);
@@ -409,18 +421,29 @@
 	cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN);
 	bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
 
-	if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
-			 PCS_MRX_STATUS_AN_CPT, false)) {
-		dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
-		return -1;
+	if (lmac->lmac_type == BGX_MODE_QSGMII) {
+		/* Disable disparity check for QSGMII */
+		cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL);
+		cfg &= ~PCS_MISC_CTL_DISP_EN;
+		bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg);
+		return 0;
+	}
+
+	if (lmac->lmac_type == BGX_MODE_SGMII) {
+		if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
+				 PCS_MRX_STATUS_AN_CPT, false)) {
+			dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
-static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
+static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
 {
 	u64 cfg;
+	int lmacid = lmac->lmacid;
 
 	/* Reset SPU */
 	bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET);
@@ -436,12 +459,14 @@
 
 	bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
 	/* Set interleaved running disparity for RXAUI */
-	if (bgx->lmac_type != BGX_MODE_RXAUI)
-		bgx_reg_modify(bgx, lmacid,
-			       BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-	else
+	if (lmac->lmac_type == BGX_MODE_RXAUI)
 		bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL,
-			       SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP);
+			       SPU_MISC_CTL_INTLV_RDISP);
+
+	/* Clear receive packet disable */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
+	cfg &= ~SPU_MISC_CTL_RX_DIS;
+	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
 
 	/* clear all interrupts */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT);
@@ -451,7 +476,7 @@
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
 
-	if (bgx->use_training) {
+	if (lmac->use_training) {
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00);
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00);
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00);
@@ -474,9 +499,9 @@
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg);
 
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV);
-	if (bgx->lmac_type == BGX_MODE_10G_KR)
+	if (lmac->lmac_type == BGX_MODE_10G_KR)
 		cfg |= (1 << 23);
-	else if (bgx->lmac_type == BGX_MODE_40G_KR)
+	else if (lmac->lmac_type == BGX_MODE_40G_KR)
 		cfg |= (1 << 24);
 	else
 		cfg &= ~((1 << 23) | (1 << 24));
@@ -511,11 +536,10 @@
 {
 	struct bgx *bgx = lmac->bgx;
 	int lmacid = lmac->lmacid;
-	int lmac_type = bgx->lmac_type;
+	int lmac_type = lmac->lmac_type;
 	u64 cfg;
 
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-	if (bgx->use_training) {
+	if (lmac->use_training) {
 		cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 		if (!(cfg & (1ull << 13))) {
 			cfg = (1ull << 13) | (1ull << 14);
@@ -556,7 +580,7 @@
 			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
-		if (bgx->use_training) {
+		if (lmac->use_training) {
 			cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 			if (!(cfg & (1ull << 13))) {
 				cfg = (1ull << 13) | (1ull << 14);
@@ -584,11 +608,6 @@
 		return -1;
 	}
 
-	/* Clear receive packet disable */
-	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
-	cfg &= ~SPU_MISC_CTL_RX_DIS;
-	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-
 	/* Check for MAC RX faults */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
 	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
@@ -599,7 +618,7 @@
 	/* Rx local/remote fault seen.
 	 * Do lmac reinit to see if condition recovers
 	 */
-	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+	bgx_lmac_xaui_init(bgx, lmac);
 
 	return -1;
 }
@@ -623,7 +642,7 @@
 	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
 	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
-		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
+		if (lmac->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
 		else
 			lmac->last_speed = 10000;
@@ -649,6 +668,16 @@
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
 }
 
+static int phy_interface_mode(u8 lmac_type)
+{
+	if (lmac_type == BGX_MODE_QSGMII)
+		return PHY_INTERFACE_MODE_QSGMII;
+	if (lmac_type == BGX_MODE_RGMII)
+		return PHY_INTERFACE_MODE_RGMII;
+
+	return PHY_INTERFACE_MODE_SGMII;
+}
+
 static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
@@ -657,13 +686,15 @@
 	lmac = &bgx->lmac[lmacid];
 	lmac->bgx = bgx;
 
-	if (bgx->lmac_type == BGX_MODE_SGMII) {
+	if ((lmac->lmac_type == BGX_MODE_SGMII) ||
+	    (lmac->lmac_type == BGX_MODE_QSGMII) ||
+	    (lmac->lmac_type == BGX_MODE_RGMII)) {
 		lmac->is_sgmii = 1;
-		if (bgx_lmac_sgmii_init(bgx, lmacid))
+		if (bgx_lmac_sgmii_init(bgx, lmac))
 			return -1;
 	} else {
 		lmac->is_sgmii = 0;
-		if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type))
+		if (bgx_lmac_xaui_init(bgx, lmac))
 			return -1;
 	}
 
@@ -685,10 +716,10 @@
 	/* Restore default cfg, incase low level firmware changed it */
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
 
-	if ((bgx->lmac_type != BGX_MODE_XFI) &&
-	    (bgx->lmac_type != BGX_MODE_XLAUI) &&
-	    (bgx->lmac_type != BGX_MODE_40G_KR) &&
-	    (bgx->lmac_type != BGX_MODE_10G_KR)) {
+	if ((lmac->lmac_type != BGX_MODE_XFI) &&
+	    (lmac->lmac_type != BGX_MODE_XLAUI) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_10G_KR)) {
 		if (!lmac->phydev)
 			return -ENODEV;
 
@@ -696,7 +727,7 @@
 
 		if (phy_connect_direct(&lmac->netdev, lmac->phydev,
 				       bgx_lmac_handler,
-				       PHY_INTERFACE_MODE_SGMII))
+				       phy_interface_mode(lmac->lmac_type)))
 			return -ENODEV;
 
 		phy_start_aneg(lmac->phydev);
@@ -753,76 +784,19 @@
 
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
-	if ((bgx->lmac_type != BGX_MODE_XFI) &&
-	    (bgx->lmac_type != BGX_MODE_XLAUI) &&
-	    (bgx->lmac_type != BGX_MODE_40G_KR) &&
-	    (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
+	if ((lmac->lmac_type != BGX_MODE_XFI) &&
+	    (lmac->lmac_type != BGX_MODE_XLAUI) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
 		phy_disconnect(lmac->phydev);
 
 	lmac->phydev = NULL;
 }
 
-static void bgx_set_num_ports(struct bgx *bgx)
-{
-	u64 lmac_count;
-
-	switch (bgx->qlm_mode) {
-	case QLM_MODE_SGMII:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_SGMII;
-		bgx->lane_to_sds = 0;
-		break;
-	case QLM_MODE_XAUI_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_XAUI;
-		bgx->lane_to_sds = 0xE4;
-			break;
-	case QLM_MODE_RXAUI_2X2:
-		bgx->lmac_count = 2;
-		bgx->lmac_type = BGX_MODE_RXAUI;
-		bgx->lane_to_sds = 0xE4;
-			break;
-	case QLM_MODE_XFI_4X1:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_XFI;
-		bgx->lane_to_sds = 0;
-		break;
-	case QLM_MODE_XLAUI_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_XLAUI;
-		bgx->lane_to_sds = 0xE4;
-		break;
-	case QLM_MODE_10G_KR_4X1:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_10G_KR;
-		bgx->lane_to_sds = 0;
-		bgx->use_training = 1;
-		break;
-	case QLM_MODE_40G_KR4_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_40G_KR;
-		bgx->lane_to_sds = 0xE4;
-		bgx->use_training = 1;
-		break;
-	default:
-		bgx->lmac_count = 0;
-		break;
-	}
-
-	/* Check if low level firmware has programmed LMAC count
-	 * based on board type, if yes consider that otherwise
-	 * the default static values
-	 */
-	lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
-	if (lmac_count != 4)
-		bgx->lmac_count = lmac_count;
-}
-
 static void bgx_init_hw(struct bgx *bgx)
 {
 	int i;
-
-	bgx_set_num_ports(bgx);
+	struct lmac *lmac;
 
 	bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP);
 	if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS))
@@ -830,17 +804,9 @@
 
 	/* Set lmac type and lane2serdes mapping */
 	for (i = 0; i < bgx->lmac_count; i++) {
-		if (bgx->lmac_type == BGX_MODE_RXAUI) {
-			if (i)
-				bgx->lane_to_sds = 0x0e;
-			else
-				bgx->lane_to_sds = 0x04;
-			bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-				      (bgx->lmac_type << 8) | bgx->lane_to_sds);
-			continue;
-		}
+		lmac = &bgx->lmac[i];
 		bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-			      (bgx->lmac_type << 8) | (bgx->lane_to_sds + i));
+			      (lmac->lmac_type << 8) | lmac->lane_to_sds);
 		bgx->lmac[i].lmacid_bd = lmac_count;
 		lmac_count++;
 	}
@@ -863,55 +829,212 @@
 		bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
 }
 
-static void bgx_get_qlm_mode(struct bgx *bgx)
+static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
+{
+	return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF);
+}
+
+static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 {
 	struct device *dev = &bgx->pdev->dev;
-	int lmac_type;
-	int train_en;
+	struct lmac *lmac;
+	char str[20];
+	u8 dlm;
 
-	/* Read LMAC0 type to figure out QLM mode
-	 * This is configured by low level firmware
-	 */
-	lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
-	lmac_type = (lmac_type >> 8) & 0x07;
+	if (lmacid > bgx->max_lmac)
+		return;
 
-	train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) &
-				SPU_PMD_CRTL_TRAIN_EN;
+	lmac = &bgx->lmac[lmacid];
+	dlm = (lmacid / 2) + (bgx->bgx_id * 2);
+	if (!bgx->is_dlm)
+		sprintf(str, "BGX%d QLM mode", bgx->bgx_id);
+	else
+		sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm);
 
-	switch (lmac_type) {
+	switch (lmac->lmac_type) {
 	case BGX_MODE_SGMII:
-		bgx->qlm_mode = QLM_MODE_SGMII;
-		dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id);
+		dev_info(dev, "%s: SGMII\n", (char *)str);
 		break;
 	case BGX_MODE_XAUI:
-		bgx->qlm_mode = QLM_MODE_XAUI_1X4;
-		dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id);
+		dev_info(dev, "%s: XAUI\n", (char *)str);
 		break;
 	case BGX_MODE_RXAUI:
-		bgx->qlm_mode = QLM_MODE_RXAUI_2X2;
-		dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id);
+		dev_info(dev, "%s: RXAUI\n", (char *)str);
 		break;
 	case BGX_MODE_XFI:
-		if (!train_en) {
-			bgx->qlm_mode = QLM_MODE_XFI_4X1;
-			dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id);
-		} else {
-			bgx->qlm_mode = QLM_MODE_10G_KR_4X1;
-			dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id);
-		}
+		if (!lmac->use_training)
+			dev_info(dev, "%s: XFI\n", (char *)str);
+		else
+			dev_info(dev, "%s: 10G_KR\n", (char *)str);
 		break;
 	case BGX_MODE_XLAUI:
-		if (!train_en) {
-			bgx->qlm_mode = QLM_MODE_XLAUI_1X4;
-			dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id);
-		} else {
-			bgx->qlm_mode = QLM_MODE_40G_KR4_1X4;
-			dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id);
-		}
+		if (!lmac->use_training)
+			dev_info(dev, "%s: XLAUI\n", (char *)str);
+		else
+			dev_info(dev, "%s: 40G_KR4\n", (char *)str);
+		break;
+	case BGX_MODE_QSGMII:
+		if ((lmacid == 0) &&
+		    (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
+			return;
+		if ((lmacid == 2) &&
+		    (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
+			return;
+		dev_info(dev, "%s: QSGMII\n", (char *)str);
+		break;
+	case BGX_MODE_RGMII:
+		dev_info(dev, "%s: RGMII\n", (char *)str);
+		break;
+	case BGX_MODE_INVALID:
+		/* Nothing to do */
+		break;
+	}
+}
+
+static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac)
+{
+	switch (lmac->lmac_type) {
+	case BGX_MODE_SGMII:
+	case BGX_MODE_XFI:
+		lmac->lane_to_sds = lmac->lmacid;
+		break;
+	case BGX_MODE_XAUI:
+	case BGX_MODE_XLAUI:
+	case BGX_MODE_RGMII:
+		lmac->lane_to_sds = 0xE4;
+		break;
+	case BGX_MODE_RXAUI:
+		lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4;
+		break;
+	case BGX_MODE_QSGMII:
+		/* There is no way to determine if DLM0/2 is QSGMII or
+		 * DLM1/3 is configured to QSGMII as bootloader will
+		 * configure all LMACs, so take whatever is configured
+		 * by low level firmware.
+		 */
+		lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac);
 		break;
 	default:
-		bgx->qlm_mode = QLM_MODE_SGMII;
-		dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id);
+		lmac->lane_to_sds = 0;
+		break;
+	}
+}
+
+static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
+{
+	if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR)) {
+		lmac->use_training = 0;
+		return;
+	}
+
+	lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) &
+							SPU_PMD_CRTL_TRAIN_EN;
+}
+
+static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
+{
+	struct lmac *lmac;
+	struct lmac *olmac;
+	u64 cmr_cfg;
+	u8 lmac_type;
+	u8 lane_to_sds;
+
+	lmac = &bgx->lmac[idx];
+
+	if (!bgx->is_dlm || bgx->is_rgx) {
+		/* Read LMAC0 type to figure out QLM mode
+		 * This is configured by low level firmware
+		 */
+		cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
+		lmac->lmac_type = (cmr_cfg >> 8) & 0x07;
+		if (bgx->is_rgx)
+			lmac->lmac_type = BGX_MODE_RGMII;
+		lmac_set_training(bgx, lmac, 0);
+		lmac_set_lane2sds(bgx, lmac);
+		return;
+	}
+
+	/* On 81xx BGX can be split across 2 DLMs
+	 * firmware programs lmac_type of LMAC0 and LMAC2
+	 */
+	if ((idx == 0) || (idx == 2)) {
+		cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG);
+		lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+		lane_to_sds = (u8)(cmr_cfg & 0xFF);
+		/* Check if config is not reset value */
+		if ((lmac_type == 0) && (lane_to_sds == 0xE4))
+			lmac->lmac_type = BGX_MODE_INVALID;
+		else
+			lmac->lmac_type = lmac_type;
+		lmac_set_training(bgx, lmac, lmac->lmacid);
+		lmac_set_lane2sds(bgx, lmac);
+
+		/* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
+		olmac = &bgx->lmac[idx + 1];
+		olmac->lmac_type = lmac->lmac_type;
+		lmac_set_training(bgx, olmac, olmac->lmacid);
+		lmac_set_lane2sds(bgx, olmac);
+	}
+}
+
+static bool is_dlm0_in_bgx_mode(struct bgx *bgx)
+{
+	struct lmac *lmac;
+
+	if (!bgx->is_dlm)
+		return true;
+
+	lmac = &bgx->lmac[0];
+	if (lmac->lmac_type == BGX_MODE_INVALID)
+		return false;
+
+	return true;
+}
+
+static void bgx_get_qlm_mode(struct bgx *bgx)
+{
+	struct lmac *lmac;
+	struct lmac *lmac01;
+	struct lmac *lmac23;
+	u8  idx;
+
+	/* Init all LMAC's type to invalid */
+	for (idx = 0; idx < bgx->max_lmac; idx++) {
+		lmac = &bgx->lmac[idx];
+		lmac->lmacid = idx;
+		lmac->lmac_type = BGX_MODE_INVALID;
+		lmac->use_training = false;
+	}
+
+	/* It is assumed that low level firmware sets this value */
+	bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
+	if (bgx->lmac_count > bgx->max_lmac)
+		bgx->lmac_count = bgx->max_lmac;
+
+	for (idx = 0; idx < bgx->max_lmac; idx++)
+		bgx_set_lmac_config(bgx, idx);
+
+	if (!bgx->is_dlm || bgx->is_rgx) {
+		bgx_print_qlm_mode(bgx, 0);
+		return;
+	}
+
+	if (bgx->lmac_count) {
+		bgx_print_qlm_mode(bgx, 0);
+		bgx_print_qlm_mode(bgx, 2);
+	}
+
+	/* If DLM0 is not in BGX mode then LMAC0/1 have
+	 * to be configured with serdes lanes of DLM1
+	 */
+	if (is_dlm0_in_bgx_mode(bgx) || (bgx->lmac_count > 2))
+		return;
+	for (idx = 0; idx < bgx->lmac_count; idx++) {
+		lmac01 = &bgx->lmac[idx];
+		lmac23 = &bgx->lmac[idx + 2];
+		lmac01->lmac_type = lmac23->lmac_type;
+		lmac01->lane_to_sds = lmac23->lane_to_sds;
 	}
 }
 
@@ -1042,7 +1165,7 @@
 		}
 
 		lmac++;
-		if (lmac == MAX_LMAC_PER_BGX) {
+		if (lmac == bgx->max_lmac) {
 			of_node_put(node);
 			break;
 		}
@@ -1087,6 +1210,7 @@
 	struct device *dev = &pdev->dev;
 	struct bgx *bgx = NULL;
 	u8 lmac;
+	u16 sdevid;
 
 	bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
 	if (!bgx)
@@ -1115,10 +1239,30 @@
 		err = -ENOMEM;
 		goto err_release_regions;
 	}
-	bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
-	bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX;
 
-	bgx_vnic[bgx->bgx_id] = bgx;
+	pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
+	if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
+		bgx->bgx_id =
+		    (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
+		bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+		bgx->max_lmac = MAX_LMAC_PER_BGX;
+		bgx_vnic[bgx->bgx_id] = bgx;
+	} else {
+		bgx->is_rgx = true;
+		bgx->max_lmac = 1;
+		bgx->bgx_id = MAX_BGX_PER_CN81XX - 1;
+		bgx_vnic[bgx->bgx_id] = bgx;
+		xcv_init_hw();
+	}
+
+	/* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one
+	 * BGX i.e BGX2 can be split across 2 DLMs.
+	 */
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) ||
+	    ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2)))
+		bgx->is_dlm = true;
+
 	bgx_get_qlm_mode(bgx);
 
 	err = bgx_init_phy(bgx);
@@ -1133,6 +1277,8 @@
 		if (err) {
 			dev_err(dev, "BGX%d failed to enable lmac%d\n",
 				bgx->bgx_id, lmac);
+			while (lmac)
+				bgx_lmac_disable(bgx, --lmac);
 			goto err_enable;
 		}
 	}

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 42010d2..d59c71e 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h

@@ -9,8 +9,20 @@
 #ifndef THUNDER_BGX_H
 #define THUNDER_BGX_H
 
-#define    MAX_BGX_THUNDER			8 /* Max 4 nodes, 2 per node */
+/* PCI device ID */
+#define	PCI_DEVICE_ID_THUNDER_BGX		0xA026
+#define	PCI_DEVICE_ID_THUNDER_RGX		0xA054
+
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_BGX		0xA126
+#define PCI_SUBSYS_DEVID_81XX_BGX		0xA226
+#define PCI_SUBSYS_DEVID_83XX_BGX		0xA326
+
+#define    MAX_BGX_THUNDER			8 /* Max 2 nodes, 4 per node */
 #define    MAX_BGX_PER_CN88XX			2
+#define    MAX_BGX_PER_CN81XX			3 /* 2 BGXs + 1 RGX */
+#define    MAX_BGX_PER_CN83XX			4
+#define    MAX_BGX_PER_NODE			4
 #define    MAX_LMAC_PER_BGX			4
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8
@@ -18,8 +30,6 @@
 
 #define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE	2
 
-#define    MAX_LMAC	(MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX)
-
 /* Registers */
 #define BGX_CMRX_CFG			0x00
 #define  CMR_PKT_TX_EN				BIT_ULL(13)
@@ -136,6 +146,7 @@
 #define BGX_GMP_PCS_ANX_AN_RESULTS	0x30020
 #define BGX_GMP_PCS_SGM_AN_ADV		0x30068
 #define BGX_GMP_PCS_MISCX_CTL		0x30078
+#define  PCS_MISC_CTL_DISP_EN			BIT_ULL(13)
 #define  PCS_MISC_CTL_GMX_ENO			BIT_ULL(11)
 #define  PCS_MISC_CTL_SAMP_PT_MASK	0x7Full
 #define BGX_GMP_GMI_PRTX_CFG		0x38020
@@ -194,6 +205,9 @@
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
 				int lmac_idx, bool enable);
+void xcv_init_hw(void);
+void xcv_setup_link(bool link_up, int link_speed);
+
 u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
 u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
 #define BGX_RX_STATS_COUNT 11
@@ -213,16 +227,9 @@
 	BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */
 	BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */
 	BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */
-};
-
-enum qlm_mode {
-	QLM_MODE_SGMII,         /* SGMII, each lane independent */
-	QLM_MODE_XAUI_1X4,      /* 1 XAUI or DXAUI, 4 lanes */
-	QLM_MODE_RXAUI_2X2,     /* 2 RXAUI, 2 lanes each */
-	QLM_MODE_XFI_4X1,       /* 4 XFI, 1 lane each */
-	QLM_MODE_XLAUI_1X4,     /* 1 XLAUI, 4 lanes each */
-	QLM_MODE_10G_KR_4X1,    /* 4 10GBASE-KR, 1 lane each */
-	QLM_MODE_40G_KR4_1X4,   /* 1 40GBASE-KR4, 4 lanes each */
+	BGX_MODE_RGMII = 5,
+	BGX_MODE_QSGMII = 6,
+	BGX_MODE_INVALID = 7,
 };
 
 #endif /* THUNDER_BGX_H */

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
new file mode 100644
index 0000000..67befed
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c

@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME	"thunder-xcv"
+#define DRV_VERSION	"1.0"
+
+/* Register offsets */
+#define XCV_RESET		0x00
+#define   PORT_EN		BIT_ULL(63)
+#define   CLK_RESET		BIT_ULL(15)
+#define   DLL_RESET		BIT_ULL(11)
+#define   COMP_EN		BIT_ULL(7)
+#define   TX_PKT_RESET		BIT_ULL(3)
+#define   TX_DATA_RESET		BIT_ULL(2)
+#define   RX_PKT_RESET		BIT_ULL(1)
+#define   RX_DATA_RESET		BIT_ULL(0)
+#define XCV_DLL_CTL		0x10
+#define   CLKRX_BYP		BIT_ULL(23)
+#define   CLKTX_BYP		BIT_ULL(15)
+#define XCV_COMP_CTL		0x20
+#define   DRV_BYP		BIT_ULL(63)
+#define XCV_CTL			0x30
+#define XCV_INT			0x40
+#define XCV_INT_W1S		0x48
+#define XCV_INT_ENA_W1C		0x50
+#define XCV_INT_ENA_W1S		0x58
+#define XCV_INBND_STATUS	0x80
+#define XCV_BATCH_CRD_RET	0x100
+
+struct xcv {
+	void __iomem		*reg_base;
+	struct pci_dev		*pdev;
+};
+
+static struct xcv *xcv;
+
+/* Supported devices */
+static const struct pci_device_id xcv_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) },
+	{ 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, xcv_id_table);
+
+void xcv_init_hw(void)
+{
+	u64  cfg;
+
+	/* Take DLL out of reset */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg &= ~DLL_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+	/* Take clock tree out of reset */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg &= ~CLK_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+	/* Wait for DLL to lock */
+	msleep(1);
+
+	/* Configure DLL - enable or bypass
+	 * TX no bypass, RX bypass
+	 */
+	cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL);
+	cfg &= ~0xFF03;
+	cfg |= CLKRX_BYP;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL);
+
+	/* Enable compensation controller and force the
+	 * write to be visible to HW by readig back.
+	 */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= COMP_EN;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+	readq_relaxed(xcv->reg_base + XCV_RESET);
+	/* Wait for compensation state machine to lock */
+	msleep(10);
+
+	/* enable the XCV block */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= PORT_EN;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= CLK_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+}
+EXPORT_SYMBOL(xcv_init_hw);
+
+void xcv_setup_link(bool link_up, int link_speed)
+{
+	u64  cfg;
+	int speed = 2;
+
+	if (!xcv) {
+		dev_err(&xcv->pdev->dev,
+			"XCV init not done, probe may have failed\n");
+		return;
+	}
+
+	if (link_speed == 100)
+		speed = 1;
+	else if (link_speed == 10)
+		speed = 0;
+
+	if (link_up) {
+		/* set operating speed */
+		cfg = readq_relaxed(xcv->reg_base + XCV_CTL);
+		cfg &= ~0x03;
+		cfg |= speed;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_CTL);
+
+		/* Reset datapaths */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg |= TX_DATA_RESET | RX_DATA_RESET;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+		/* Enable the packet flow */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg |= TX_PKT_RESET | RX_PKT_RESET;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+		/* Return credits to RGX */
+		writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET);
+	} else {
+		/* Disable packet flow */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg &= ~(TX_PKT_RESET | RX_PKT_RESET);
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+		readq_relaxed(xcv->reg_base + XCV_RESET);
+	}
+}
+EXPORT_SYMBOL(xcv_setup_link);
+
+static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int err;
+	struct device *dev = &pdev->dev;
+
+	xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL);
+	if (!xcv)
+		return -ENOMEM;
+	xcv->pdev = pdev;
+
+	pci_set_drvdata(pdev, xcv);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		goto err_kfree;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto err_disable_device;
+	}
+
+	/* MAP configuration registers */
+	xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+	if (!xcv->reg_base) {
+		dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n");
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	return 0;
+
+err_release_regions:
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_kfree:
+	devm_kfree(dev, xcv);
+	xcv = NULL;
+	return err;
+}
+
+static void xcv_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	if (xcv) {
+		devm_kfree(dev, xcv);
+		xcv = NULL;
+	}
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver xcv_driver = {
+	.name = DRV_NAME,
+	.id_table = xcv_id_table,
+	.probe = xcv_probe,
+	.remove = xcv_remove,
+};
+
+static int __init xcv_init_module(void)
+{
+	pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+	return pci_register_driver(&xcv_driver);
+}
+
+static void __exit xcv_cleanup_module(void)
+{
+	pci_unregister_driver(&xcv_driver);
+}
+
+module_init(xcv_init_module);
+module_exit(xcv_cleanup_module);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 2e2aa9f..bcfa512 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

@@ -1521,4 +1521,7 @@
 void t4_idma_monitor(struct adapter *adapter,
 		     struct sge_idma_monitor_state *idma,
 		     int hz, int ticks);
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+		      unsigned int naddr, u8 *addr);
+
 #endif /* __CXGB4_H__ */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index c45de49..2bb804c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -3078,6 +3078,26 @@
 	return ret;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+
+	/* verify MAC addr is valid */
+	if (!is_valid_ether_addr(mac)) {
+		dev_err(pi->adapter->pdev_dev,
+			"Invalid Ethernet address %pM for VF %d\n",
+			mac, vf);
+		return -EINVAL;
+	}
+
+	dev_info(pi->adapter->pdev_dev,
+		 "Setting MAC %pM on VF %d\n", mac, vf);
+	return t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+}
+#endif
+
 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 {
 	int ret;
@@ -3136,7 +3156,27 @@
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll        = cxgb_busy_poll,
 #endif
+};
 
+static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+#ifdef CONFIG_PCI_IOV
+	.ndo_set_vf_mac       = cxgb_set_vf_mac,
+#endif
+};
+
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	struct adapter *adapter = netdev2adap(dev);
+
+	strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+	strlcpy(info->version, cxgb4_driver_version,
+		sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(adapter->pdev),
+		sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
+	.get_drvinfo       = get_drvinfo,
 };
 
 void t4_fatal_err(struct adapter *adap)
@@ -4836,19 +4876,12 @@
 #ifdef CONFIG_PCI_IOV
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
+	struct adapter *adap = pci_get_drvdata(pdev);
 	int err = 0;
 	int current_vfs = pci_num_vf(pdev);
 	u32 pcie_fw;
-	void __iomem *regs;
 
-	regs = pci_ioremap_bar(pdev, 0);
-	if (!regs) {
-		dev_err(&pdev->dev, "cannot map device registers\n");
-		return -ENOMEM;
-	}
-
-	pcie_fw = readl(regs + PCIE_FW_A);
-	iounmap(regs);
+	pcie_fw = readl(adap->regs + PCIE_FW_A);
 	/* Check if cxgb4 is the MASTER and fw is initialized */
 	if (!(pcie_fw & PCIE_FW_INIT_F) ||
 	    !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
@@ -4875,6 +4908,8 @@
 	 */
 	if (!num_vfs) {
 		pci_disable_sriov(pdev);
+		if (adap->port[0]->reg_state == NETREG_REGISTERED)
+			unregister_netdev(adap->port[0]);
 		return num_vfs;
 	}
 
@@ -4882,6 +4917,12 @@
 		err = pci_enable_sriov(pdev, num_vfs);
 		if (err)
 			return err;
+
+		if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) {
+			err = register_netdev(adap->port[0]);
+			if (err < 0)
+				pr_info("Unable to register VF mgmt netdev\n");
+		}
 	}
 	return num_vfs;
 }
@@ -4893,9 +4934,14 @@
 	struct port_info *pi;
 	bool highdma = false;
 	struct adapter *adapter = NULL;
+	struct net_device *netdev;
+#ifdef CONFIG_PCI_IOV
+	char name[IFNAMSIZ];
+#endif
 	void __iomem *regs;
 	u32 whoami, pl_rev;
 	enum chip_type chip;
+	static int adap_idx = 1;
 
 	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4930,7 +4976,9 @@
 	func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
 		SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 	if (func != ent->driver_data) {
+#ifndef CONFIG_PCI_IOV
 		iounmap(regs);
+#endif
 		pci_disable_device(pdev);
 		pci_save_state(pdev);        /* to restore SR-IOV later */
 		goto sriov;
@@ -4962,6 +5010,7 @@
 		err = -ENOMEM;
 		goto out_unmap_bar0;
 	}
+	adap_idx++;
 
 	adapter->workq = create_singlethread_workqueue("cxgb4");
 	if (!adapter->workq) {
@@ -5048,8 +5097,6 @@
 			      T6_STATMODE_V(0)));
 
 	for_each_port(adapter, i) {
-		struct net_device *netdev;
-
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
 					   MAX_ETH_QSETS);
 		if (!netdev) {
@@ -5217,6 +5264,7 @@
 		attach_ulds(adapter);
 
 	print_adapter_info(adapter);
+	return 0;
 
 sriov:
 #ifdef CONFIG_PCI_IOV
@@ -5230,9 +5278,58 @@
 				 "instantiated %u virtual functions\n",
 				 num_vf[func]);
 	}
-#endif
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter) {
+		err = -ENOMEM;
+		goto free_pci_region;
+	}
+
+	snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func);
+	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto free_adapter;
+	}
+
+	adapter->pdev = pdev;
+	adapter->pdev_dev = &pdev->dev;
+	adapter->name = pci_name(pdev);
+	adapter->mbox = func;
+	adapter->pf = func;
+	adapter->regs = regs;
+	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
+				    (sizeof(struct mbox_cmd) *
+				     T4_OS_LOG_MBOX_CMDS),
+				    GFP_KERNEL);
+	if (!adapter->mbox_log) {
+		err = -ENOMEM;
+		goto free_netdevice;
+	}
+	pi = netdev_priv(netdev);
+	pi->adapter = adapter;
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pci_set_drvdata(pdev, adapter);
+
+	adapter->port[0] = netdev;
+	netdev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+	netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+
 	return 0;
 
+ free_netdevice:
+	free_netdev(adapter->port[0]);
+ free_adapter:
+	kfree(adapter);
+ free_pci_region:
+	iounmap(regs);
+	pci_disable_sriov(pdev);
+	pci_release_regions(pdev);
+	return err;
+#else
+	return 0;
+#endif
+
  out_free_dev:
 	free_some_resources(adapter);
  out_unmap_bar:
@@ -5258,12 +5355,12 @@
 {
 	struct adapter *adapter = pci_get_drvdata(pdev);
 
-#ifdef CONFIG_PCI_IOV
-	pci_disable_sriov(pdev);
+	if (!adapter) {
+		pci_release_regions(pdev);
+		return;
+	}
 
-#endif
-
-	if (adapter) {
+	if (adapter->pf == 4) {
 		int i;
 
 		/* Tear down per-adapter Work Queue first since it can contain
@@ -5312,8 +5409,18 @@
 		kfree(adapter->mbox_log);
 		synchronize_rcu();
 		kfree(adapter);
-	} else
+	}
+#ifdef CONFIG_PCI_IOV
+	else {
+		if (adapter->port[0]->reg_state == NETREG_REGISTERED)
+			unregister_netdev(adapter->port[0]);
+		free_netdev(adapter->port[0]);
+		iounmap(adapter->regs);
+		kfree(adapter);
+		pci_disable_sriov(pdev);
 		pci_release_regions(pdev);
+	}
+#endif
 }
 
 static struct pci_driver cxgb4_driver = {

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index dc92c80..2a476cc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

@@ -8264,3 +8264,44 @@
 		t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
 	}
 }
+
+/**
+ *	t4_set_vf_mac - Set MAC address for the specified VF
+ *	@adapter: The adapter
+ *	@vf: one of the VFs instantiated by the specified PF
+ *	@naddr: the number of MAC addresses
+ *	@addr: the MAC address(es) to be set to the specified VF
+ */
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+		      unsigned int naddr, u8 *addr)
+{
+	struct fw_acl_mac_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+				    FW_CMD_REQUEST_F |
+				    FW_CMD_WRITE_F |
+				    FW_ACL_MAC_CMD_PFN_V(adapter->pf) |
+				    FW_ACL_MAC_CMD_VFN_V(vf));
+
+	/* Note: Do not enable the ACL */
+	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+	cmd.nmac = naddr;
+
+	switch (adapter->pf) {
+	case 3:
+		memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
+		break;
+	case 2:
+		memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2));
+		break;
+	case 1:
+		memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1));
+		break;
+	case 0:
+		memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0));
+		break;
+	}
+
+	return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
+}

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index e116bb8..f2951bf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

@@ -2777,6 +2777,7 @@
 	struct adapter *adapter;
 	struct port_info *pi;
 	struct net_device *netdev;
+	unsigned int pf;
 
 	/*
 	 * Print our driver banner the first time we're called to initialize a
@@ -2903,8 +2904,11 @@
 	 * Allocate our "adapter ports" and stitch everything together.
 	 */
 	pmask = adapter->params.vfres.pmask;
+	pf = t4vf_get_pf_from_vf(adapter);
 	for_each_port(adapter, pidx) {
 		int port_id, viid;
+		u8 mac[ETH_ALEN];
+		unsigned int naddr = 1;
 
 		/*
 		 * We simplistically allocate our virtual interfaces
@@ -2975,6 +2979,26 @@
 				pidx);
 			goto err_free_dev;
 		}
+
+		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
+		if (err) {
+			dev_err(&pdev->dev,
+				"unable to determine MAC ACL address, "
+				"continuing anyway.. (status %d)\n", err);
+		} else if (naddr && adapter->params.vfres.nvi == 1) {
+			struct sockaddr addr;
+
+			ether_addr_copy(addr.sa_data, mac);
+			err = cxgb4vf_set_mac_addr(netdev, &addr);
+			if (err) {
+				dev_err(&pdev->dev,
+					"unable to set MAC address %pM\n",
+					mac);
+				goto err_free_dev;
+			}
+			dev_info(&pdev->dev,
+				 "Using assigned MAC ACL: %pM\n", mac);
+		}
 	}
 
 	/* See what interrupts we'll be using.  If we've been configured to

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 8ee5414..8067424 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h

@@ -347,6 +347,7 @@
 			u64 *pbar2_qoffset,
 			unsigned int *pbar2_qid);
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *);
 int t4vf_get_sge_params(struct adapter *);
 int t4vf_get_vpd_params(struct adapter *);
 int t4vf_get_dev_params(struct adapter *);
@@ -381,5 +382,7 @@
 
 int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
 int t4vf_prep_adapter(struct adapter *);
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+			unsigned int *naddr, u8 *addr);
 
 #endif /* __T4VF_COMMON_H__ */

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 427bfa7..879f4c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c

@@ -639,6 +639,15 @@
 	return 0;
 }
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *adapter)
+{
+	u32 whoami;
+
+	whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
+	return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami));
+}
+
 /**
  *	t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
  *	@adapter: the adapter
@@ -716,7 +725,6 @@
 	 * read.
 	 */
 	if (!is_t4(adapter->params.chip)) {
-		u32 whoami;
 		unsigned int pf, s_hps, s_qpp;
 
 		params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) |
@@ -740,11 +748,7 @@
 		 * register we just read. Do it once here so other code in
 		 * the driver can just use it.
 		 */
-		whoami = t4_read_reg(adapter,
-				     T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
-		pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
-			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
-
+		pf = t4vf_get_pf_from_vf(adapter);
 		s_hps = (HOSTPAGESIZEPF0_S +
 			 (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf);
 		sge_params->sge_vf_hps =
@@ -1807,3 +1811,50 @@
 
 	return 0;
 }
+
+/**
+ *	t4vf_get_vf_mac_acl - Get the MAC address to be set to
+ *			      the VI of this VF.
+ *	@adapter: The adapter
+ *	@pf: The pf associated with vf
+ *	@naddr: the number of ACL MAC addresses returned in addr
+ *	@addr: Placeholder for MAC addresses
+ *
+ *	Find the MAC address to be set to the VF's VI. The requested MAC address
+ *	is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+			unsigned int *naddr, u8 *addr)
+{
+	struct fw_acl_mac_cmd cmd;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+				    FW_CMD_REQUEST_F |
+				    FW_CMD_READ_F);
+	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+	ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+	if (ret)
+		return ret;
+
+	if (cmd.nmac < *naddr)
+		*naddr = cmd.nmac;
+
+	switch (pf) {
+	case 3:
+		memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
+		break;
+	case 2:
+		memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2));
+		break;
+	case 1:
+		memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1));
+		break;
+	case 0:
+		memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0));
+		break;
+	}
+
+	return ret;
+}

diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index f0e9e2e..6620fc8 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c

@@ -1966,7 +1966,7 @@
     } else if (lp->setup_f == HASH_PERF) {   /* Hash Filtering */
 	netdev_for_each_mc_addr(ha, dev) {
 		crc = ether_crc_le(ETH_ALEN, ha->addr);
-		hashcode = crc & HASH_BITS;  /* hashcode is 9 LSb of CRC */
+		hashcode = crc & DE4X5_HASH_BITS;  /* hashcode is 9 LSb of CRC */
 
 		byte = hashcode >> 3;        /* bit[3-8] -> byte in filter */
 		bit = 1 << (hashcode & 0x07);/* bit[0-2] -> bit in byte */
@@ -5043,7 +5043,7 @@
 	    *(pa + i) = dev->dev_addr[i];                 /* Host address */
 	    if (i & 0x01) pa += 2;
 	}
-	*(lp->setup_frame + (HASH_TABLE_LEN >> 3) - 3) = 0x80;
+	*(lp->setup_frame + (DE4X5_HASH_TABLE_LEN >> 3) - 3) = 0x80;
     } else {
 	for (i=0; i<ETH_ALEN; i++) { /* Host address */
 	    *(pa + (i&1)) = dev->dev_addr[i];

diff --git a/drivers/net/ethernet/dec/tulip/de4x5.h b/drivers/net/ethernet/dec/tulip/de4x5.h
index ec756eb..1bfdc9b 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.h
+++ b/drivers/net/ethernet/dec/tulip/de4x5.h

@@ -860,8 +860,8 @@
 #define PCI  0
 #define EISA 1
 
-#define HASH_TABLE_LEN   512       /* Bits */
-#define HASH_BITS        0x01ff    /* 9 LS bits */
+#define DE4X5_HASH_TABLE_LEN   512       /* Bits */
+#define DE4X5_HASH_BITS        0x01ff    /* 9 LS bits */
 
 #define SETUP_FRAME_LEN  192       /* Bytes */
 #define IMPERF_PA_OFFSET 156       /* Bytes */

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 4555e04..86780b5 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -508,6 +508,10 @@
 	u16 lso_mss;	/* MSS for LSO */
 };
 
+struct be_eth_addr {
+	unsigned char mac[ETH_ALEN];
+};
+
 struct be_adapter {
 	struct pci_dev *pdev;
 	struct net_device *netdev;
@@ -523,7 +527,7 @@
 	struct be_dma_mem mbox_mem_alloced;
 
 	struct be_mcc_obj mcc_obj;
-	spinlock_t mcc_lock;	/* For serializing mcc cmds to BE card */
+	struct mutex mcc_lock;	/* For serializing mcc cmds to BE card */
 	spinlock_t mcc_cq_lock;
 
 	u16 cfg_num_rx_irqs;		/* configured via set-channels */
@@ -570,9 +574,15 @@
 	int if_handle;		/* Used to configure filtering */
 	u32 if_flags;		/* Interface filtering flags */
 	u32 *pmac_id;		/* MAC addr handle used by BE card */
+	struct be_eth_addr *uc_list;/* list of uc-addrs programmed (not perm) */
 	u32 uc_macs;		/* Count of secondary UC MAC programmed */
+	struct be_eth_addr *mc_list;/* list of mcast addrs programmed */
+	u32 mc_count;
 	unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)];
 	u16 vlans_added;
+	bool update_uc_list;
+	bool update_mc_list;
+	struct mutex rx_filter_lock;/* For protecting vids[] & mc/uc_list[] */
 
 	u32 beacon_state;	/* for set_phys_id */
 
@@ -626,6 +636,15 @@
 	u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */
 };
 
+/* Used for defered FW config cmds. Add fields to this struct as reqd */
+struct be_cmd_work {
+	struct work_struct work;
+	struct be_adapter *adapter;
+	union {
+		__be16 vxlan_port;
+	} info;
+};
+
 #define be_physfn(adapter)		(!adapter->virtfn)
 #define be_virtfn(adapter)		(adapter->virtfn)
 #define sriov_enabled(adapter)		(adapter->flags &	\

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 2cc1175..fa11a5a 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c

@@ -571,7 +571,7 @@
 /* Wait till no more pending mcc requests are present */
 static int be_mcc_wait_compl(struct be_adapter *adapter)
 {
-#define mcc_timeout		120000 /* 12s timeout */
+#define mcc_timeout		12000 /* 12s timeout */
 	int i, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
@@ -585,7 +585,7 @@
 
 		if (atomic_read(&mcc_obj->q.used) == 0)
 			break;
-		udelay(100);
+		usleep_range(500, 1000);
 	}
 	if (i == mcc_timeout) {
 		dev_err(&adapter->pdev->dev, "FW not responding\n");
@@ -863,7 +863,7 @@
 static int be_cmd_lock(struct be_adapter *adapter)
 {
 	if (use_mcc(adapter)) {
-		spin_lock_bh(&adapter->mcc_lock);
+		mutex_lock(&adapter->mcc_lock);
 		return 0;
 	} else {
 		return mutex_lock_interruptible(&adapter->mbox_lock);
@@ -874,7 +874,7 @@
 static void be_cmd_unlock(struct be_adapter *adapter)
 {
 	if (use_mcc(adapter))
-		spin_unlock_bh(&adapter->mcc_lock);
+		return mutex_unlock(&adapter->mcc_lock);
 	else
 		return mutex_unlock(&adapter->mbox_lock);
 }
@@ -1044,7 +1044,7 @@
 	struct be_cmd_req_mac_query *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1073,7 +1073,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1085,7 +1085,7 @@
 	struct be_cmd_req_pmac_add *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1110,7 +1110,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	 if (status == MCC_STATUS_UNAUTHORIZED_REQUEST)
 		status = -EPERM;
@@ -1128,7 +1128,7 @@
 	if (pmac_id == -1)
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1148,7 +1148,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1411,7 +1411,7 @@
 	struct be_dma_mem *q_mem = &rxq->dma_mem;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1441,7 +1441,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1505,7 +1505,7 @@
 	struct be_cmd_req_q_destroy *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1522,7 +1522,7 @@
 	q->created = false;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1590,7 +1590,7 @@
 	struct be_cmd_req_hdr *hdr;
 	int status = 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1618,7 +1618,7 @@
 	adapter->stats_cmd_sent = true;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1634,7 +1634,7 @@
 			    CMD_SUBSYSTEM_ETH))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1657,7 +1657,7 @@
 	adapter->stats_cmd_sent = true;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1694,7 +1694,7 @@
 	struct be_cmd_req_link_status *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	if (link_status)
 		*link_status = LINK_DOWN;
@@ -1733,7 +1733,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1744,7 +1744,7 @@
 	struct be_cmd_req_get_cntl_addnl_attribs *req;
 	int status = 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1759,7 +1759,7 @@
 
 	status = be_mcc_notify(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1808,7 +1808,7 @@
 	if (!get_fat_cmd.va)
 		return -ENOMEM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	while (total_size) {
 		buf_size = min(total_size, (u32)60*1024);
@@ -1848,7 +1848,7 @@
 err:
 	dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
 			  get_fat_cmd.va, get_fat_cmd.dma);
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1859,7 +1859,7 @@
 	struct be_cmd_req_get_fw_version *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1882,7 +1882,7 @@
 			sizeof(adapter->fw_on_flash));
 	}
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1896,7 +1896,7 @@
 	struct be_cmd_req_modify_eq_delay *req;
 	int status = 0, i;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1919,7 +1919,7 @@
 
 	status = be_mcc_notify(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1946,7 +1946,7 @@
 	struct be_cmd_req_vlan_config *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1968,7 +1968,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1979,7 +1979,7 @@
 	struct be_cmd_req_rx_filter *req = mem->va;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1996,8 +1996,7 @@
 	req->if_flags = (value == ON) ? req->if_flags_mask : 0;
 
 	if (flags & BE_IF_FLAGS_MULTICAST) {
-		struct netdev_hw_addr *ha;
-		int i = 0;
+		int i;
 
 		/* Reset mcast promisc mode if already set by setting mask
 		 * and not setting flags field
@@ -2005,14 +2004,15 @@
 		req->if_flags_mask |=
 			cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS &
 				    be_if_cap_flags(adapter));
-		req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev));
-		netdev_for_each_mc_addr(ha, adapter->netdev)
-			memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
+		req->mcast_num = cpu_to_le32(adapter->mc_count);
+		for (i = 0; i < adapter->mc_count; i++)
+			ether_addr_copy(req->mcast_mac[i].byte,
+					adapter->mc_list[i].mac);
 	}
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2043,7 +2043,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2063,7 +2063,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED)
 		return  -EOPNOTSUPP;
@@ -2082,7 +2082,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2105,7 +2105,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2186,7 +2186,7 @@
 	if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2211,7 +2211,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2223,7 +2223,7 @@
 	struct be_cmd_req_enable_disable_beacon *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2244,7 +2244,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2255,7 +2255,7 @@
 	struct be_cmd_req_get_beacon_state *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2279,7 +2279,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2303,7 +2303,7 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2325,7 +2325,7 @@
 		memcpy(data, resp->page_data, PAGE_DATA_LEN);
 	}
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 	return status;
 }
@@ -2342,7 +2342,7 @@
 	void *ctxt = NULL;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 	adapter->flash_status = 0;
 
 	wrb = wrb_from_mccq(adapter);
@@ -2384,7 +2384,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(60000)))
@@ -2403,7 +2403,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2457,7 +2457,7 @@
 	struct be_mcc_wrb *wrb;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2475,7 +2475,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2488,7 +2488,7 @@
 	struct lancer_cmd_resp_read_object *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2522,7 +2522,7 @@
 	}
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2534,7 +2534,7 @@
 	struct be_cmd_write_flashrom *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 	adapter->flash_status = 0;
 
 	wrb = wrb_from_mccq(adapter);
@@ -2559,7 +2559,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(40000)))
@@ -2570,7 +2570,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2581,7 +2581,7 @@
 	struct be_mcc_wrb *wrb;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2608,7 +2608,7 @@
 		memcpy(flashed_crc, req->crc, 4);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3192,7 +3192,7 @@
 	struct be_cmd_req_acpi_wol_magic_config *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3209,7 +3209,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3224,7 +3224,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3247,7 +3247,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(SET_LB_MODE_TIMEOUT)))
@@ -3256,7 +3256,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3273,7 +3273,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3299,7 +3299,7 @@
 	if (status)
 		goto err;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	wait_for_completion(&adapter->et_cmd_compl);
 	resp = embedded_payload(wrb);
@@ -3307,7 +3307,7 @@
 
 	return status;
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3323,7 +3323,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3357,7 +3357,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3368,7 +3368,7 @@
 	struct be_cmd_req_seeprom_read *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3384,7 +3384,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3399,7 +3399,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3444,7 +3444,7 @@
 	}
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3454,7 +3454,7 @@
 	struct be_cmd_req_set_qos *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3474,7 +3474,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3581,7 +3581,7 @@
 	struct be_cmd_req_get_fn_privileges *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3613,7 +3613,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3625,7 +3625,7 @@
 	struct be_cmd_req_set_fn_privileges *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3645,7 +3645,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3677,7 +3677,7 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3741,7 +3741,7 @@
 	}
 
 out:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
 			  get_mac_list_cmd.va, get_mac_list_cmd.dma);
 	return status;
@@ -3801,7 +3801,7 @@
 	if (!cmd.va)
 		return -ENOMEM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3823,7 +3823,7 @@
 
 err:
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3859,7 +3859,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3900,7 +3900,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3914,7 +3914,7 @@
 	int status;
 	u16 vid;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3961,7 +3961,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4156,7 +4156,7 @@
 	struct be_cmd_req_set_ext_fat_caps *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4172,7 +4172,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4650,7 +4650,7 @@
 	if (iface == 0xFFFFFFFF)
 		return -1;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4667,7 +4667,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4701,7 +4701,7 @@
 	struct be_cmd_resp_get_iface_list *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4722,7 +4722,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4816,7 +4816,7 @@
 	if (BEx_chip(adapter))
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4834,7 +4834,7 @@
 	req->enable = 1;
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4905,7 +4905,7 @@
 	struct be_cmd_req_set_ll_link *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4931,7 +4931,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4964,7 +4964,7 @@
 	struct be_cmd_resp_hdr *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4987,7 +4987,7 @@
 	memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length);
 	be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 EXPORT_SYMBOL(be_roce_mcc_cmd);

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 874c753..f7584d4 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -53,6 +53,10 @@
 	{ 0 }
 };
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
+
+/* Workqueue used by all functions for defering cmd calls to the adapter */
+struct workqueue_struct *be_wq;
+
 /* UE Status Low CSR */
 static const char * const ue_status_low_desc[] = {
 	"CEV",
@@ -1420,13 +1424,18 @@
 	u16 num = 0, i = 0;
 	int status = 0;
 
-	/* No need to further configure vids if in promiscuous mode */
-	if (be_in_all_promisc(adapter))
+	/* No need to change the VLAN state if the I/F is in promiscuous */
+	if (adapter->netdev->flags & IFF_PROMISC)
 		return 0;
 
 	if (adapter->vlans_added > be_max_vlans(adapter))
 		return be_set_vlan_promisc(adapter);
 
+	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
+		status = be_clear_vlan_promisc(adapter);
+		if (status)
+			return status;
+	}
 	/* Construct VLAN Table to give to HW */
 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
 		vids[num++] = cpu_to_le16(i);
@@ -1439,8 +1448,6 @@
 		    addl_status(status) ==
 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
 			return be_set_vlan_promisc(adapter);
-	} else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
-		status = be_clear_vlan_promisc(adapter);
 	}
 	return status;
 }
@@ -1450,46 +1457,45 @@
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status = 0;
 
+	mutex_lock(&adapter->rx_filter_lock);
+
 	/* Packets with VID 0 are always received by Lancer by default */
 	if (lancer_chip(adapter) && vid == 0)
-		return status;
+		goto done;
 
 	if (test_bit(vid, adapter->vids))
-		return status;
+		goto done;
 
 	set_bit(vid, adapter->vids);
 	adapter->vlans_added++;
 
 	status = be_vid_config(adapter);
-	if (status) {
-		adapter->vlans_added--;
-		clear_bit(vid, adapter->vids);
-	}
-
+done:
+	mutex_unlock(&adapter->rx_filter_lock);
 	return status;
 }
 
 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
+	int status = 0;
+
+	mutex_lock(&adapter->rx_filter_lock);
 
 	/* Packets with VID 0 are always received by Lancer by default */
 	if (lancer_chip(adapter) && vid == 0)
-		return 0;
+		goto done;
 
 	if (!test_bit(vid, adapter->vids))
-		return 0;
+		goto done;
 
 	clear_bit(vid, adapter->vids);
 	adapter->vlans_added--;
 
-	return be_vid_config(adapter);
-}
-
-static void be_clear_all_promisc(struct be_adapter *adapter)
-{
-	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
-	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+	status = be_vid_config(adapter);
+done:
+	mutex_unlock(&adapter->rx_filter_lock);
+	return status;
 }
 
 static void be_set_all_promisc(struct be_adapter *adapter)
@@ -1510,75 +1516,207 @@
 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
 }
 
-static void be_set_mc_list(struct be_adapter *adapter)
+static void be_set_uc_promisc(struct be_adapter *adapter)
 {
 	int status;
 
-	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
+		return;
+
+	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
 	if (!status)
-		adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
-	else
+		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
+}
+
+static void be_clear_uc_promisc(struct be_adapter *adapter)
+{
+	int status;
+
+	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
+		return;
+
+	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
+	if (!status)
+		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
+}
+
+/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
+ * We use a single callback function for both sync and unsync. We really don't
+ * add/remove addresses through this callback. But, we use it to detect changes
+ * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
+ */
+static int be_uc_list_update(struct net_device *netdev,
+			     const unsigned char *addr)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+
+	adapter->update_uc_list = true;
+	return 0;
+}
+
+static int be_mc_list_update(struct net_device *netdev,
+			     const unsigned char *addr)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+
+	adapter->update_mc_list = true;
+	return 0;
+}
+
+static void be_set_mc_list(struct be_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct netdev_hw_addr *ha;
+	bool mc_promisc = false;
+	int status;
+
+	netif_addr_lock_bh(netdev);
+	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
+
+	if (netdev->flags & IFF_PROMISC) {
+		adapter->update_mc_list = false;
+	} else if (netdev->flags & IFF_ALLMULTI ||
+		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
+		/* Enable multicast promisc if num configured exceeds
+		 * what we support
+		 */
+		mc_promisc = true;
+		adapter->update_mc_list = false;
+	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
+		/* Update mc-list unconditionally if the iface was previously
+		 * in mc-promisc mode and now is out of that mode.
+		 */
+		adapter->update_mc_list = true;
+	}
+
+	if (adapter->update_mc_list) {
+		int i = 0;
+
+		/* cache the mc-list in adapter */
+		netdev_for_each_mc_addr(ha, netdev) {
+			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
+			i++;
+		}
+		adapter->mc_count = netdev_mc_count(netdev);
+	}
+	netif_addr_unlock_bh(netdev);
+
+	if (mc_promisc) {
 		be_set_mc_promisc(adapter);
+	} else if (adapter->update_mc_list) {
+		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+		if (!status)
+			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
+		else
+			be_set_mc_promisc(adapter);
+
+		adapter->update_mc_list = false;
+	}
+}
+
+static void be_clear_mc_list(struct be_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	__dev_mc_unsync(netdev, NULL);
+	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
+	adapter->mc_count = 0;
 }
 
 static void be_set_uc_list(struct be_adapter *adapter)
 {
+	struct net_device *netdev = adapter->netdev;
 	struct netdev_hw_addr *ha;
-	int i = 1; /* First slot is claimed by the Primary MAC */
+	bool uc_promisc = false;
+	int curr_uc_macs = 0, i;
 
-	for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
-		be_cmd_pmac_del(adapter, adapter->if_handle,
-				adapter->pmac_id[i], 0);
+	netif_addr_lock_bh(netdev);
+	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
 
-	if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
-		be_set_all_promisc(adapter);
-		return;
+	if (netdev->flags & IFF_PROMISC) {
+		adapter->update_uc_list = false;
+	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
+		uc_promisc = true;
+		adapter->update_uc_list = false;
+	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
+		/* Update uc-list unconditionally if the iface was previously
+		 * in uc-promisc mode and now is out of that mode.
+		 */
+		adapter->update_uc_list = true;
 	}
 
-	netdev_for_each_uc_addr(ha, adapter->netdev) {
-		adapter->uc_macs++; /* First slot is for Primary MAC */
-		be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
-				&adapter->pmac_id[adapter->uc_macs], 0);
+	if (adapter->update_uc_list) {
+		i = 1; /* First slot is claimed by the Primary MAC */
+
+		/* cache the uc-list in adapter array */
+		netdev_for_each_uc_addr(ha, netdev) {
+			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
+			i++;
+		}
+		curr_uc_macs = netdev_uc_count(netdev);
+	}
+	netif_addr_unlock_bh(netdev);
+
+	if (uc_promisc) {
+		be_set_uc_promisc(adapter);
+	} else if (adapter->update_uc_list) {
+		be_clear_uc_promisc(adapter);
+
+		for (i = 0; i < adapter->uc_macs; i++)
+			be_cmd_pmac_del(adapter, adapter->if_handle,
+					adapter->pmac_id[i + 1], 0);
+
+		for (i = 0; i < curr_uc_macs; i++)
+			be_cmd_pmac_add(adapter, adapter->uc_list[i].mac,
+					adapter->if_handle,
+					&adapter->pmac_id[i + 1], 0);
+		adapter->uc_macs = curr_uc_macs;
+		adapter->update_uc_list = false;
 	}
 }
 
 static void be_clear_uc_list(struct be_adapter *adapter)
 {
+	struct net_device *netdev = adapter->netdev;
 	int i;
 
-	for (i = 1; i < (adapter->uc_macs + 1); i++)
+	__dev_uc_unsync(netdev, NULL);
+	for (i = 0; i < adapter->uc_macs; i++)
 		be_cmd_pmac_del(adapter, adapter->if_handle,
-				adapter->pmac_id[i], 0);
+				adapter->pmac_id[i + 1], 0);
 	adapter->uc_macs = 0;
 }
 
-static void be_set_rx_mode(struct net_device *netdev)
+static void __be_set_rx_mode(struct be_adapter *adapter)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
+	struct net_device *netdev = adapter->netdev;
+
+	mutex_lock(&adapter->rx_filter_lock);
 
 	if (netdev->flags & IFF_PROMISC) {
-		be_set_all_promisc(adapter);
-		return;
+		if (!be_in_all_promisc(adapter))
+			be_set_all_promisc(adapter);
+	} else if (be_in_all_promisc(adapter)) {
+		/* We need to re-program the vlan-list or clear
+		 * vlan-promisc mode (if needed) when the interface
+		 * comes out of promisc mode.
+		 */
+		be_vid_config(adapter);
 	}
 
-	/* Interface was previously in promiscuous mode; disable it */
-	if (be_in_all_promisc(adapter)) {
-		be_clear_all_promisc(adapter);
-		if (adapter->vlans_added)
-			be_vid_config(adapter);
-	}
-
-	/* Enable multicast promisc if num configured exceeds what we support */
-	if (netdev->flags & IFF_ALLMULTI ||
-	    netdev_mc_count(netdev) > be_max_mc(adapter)) {
-		be_set_mc_promisc(adapter);
-		return;
-	}
-
-	if (netdev_uc_count(netdev) != adapter->uc_macs)
-		be_set_uc_list(adapter);
-
+	be_set_uc_list(adapter);
 	be_set_mc_list(adapter);
+
+	mutex_unlock(&adapter->rx_filter_lock);
+}
+
+static void be_work_set_rx_mode(struct work_struct *work)
+{
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+
+	__be_set_rx_mode(cmd_work->adapter);
+	kfree(cmd_work);
 }
 
 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
@@ -3429,6 +3567,7 @@
 			adapter->pmac_id[0], 0);
 
 	be_clear_uc_list(adapter);
+	be_clear_mc_list(adapter);
 
 	/* The IFACE flags are enabled in the open path and cleared
 	 * in the close path. When a VF gets detached from the host and
@@ -3462,6 +3601,11 @@
 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
 		return 0;
 
+	/* Before attempting cleanup ensure all the pending cmds in the
+	 * config_wq have finished execution
+	 */
+	flush_workqueue(be_wq);
+
 	be_disable_if_filters(adapter);
 
 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
@@ -3586,7 +3730,7 @@
 	if (adapter->vlans_added)
 		be_vid_config(adapter);
 
-	be_set_rx_mode(adapter->netdev);
+	__be_set_rx_mode(adapter);
 
 	return 0;
 }
@@ -3860,6 +4004,20 @@
 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
 }
 
+static void be_if_destroy(struct be_adapter *adapter)
+{
+	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+
+	kfree(adapter->pmac_id);
+	adapter->pmac_id = NULL;
+
+	kfree(adapter->mc_list);
+	adapter->mc_list = NULL;
+
+	kfree(adapter->uc_list);
+	adapter->uc_list = NULL;
+}
+
 static int be_clear(struct be_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
@@ -3867,6 +4025,8 @@
 
 	be_cancel_worker(adapter);
 
+	flush_workqueue(be_wq);
+
 	if (sriov_enabled(adapter))
 		be_vf_clear(adapter);
 
@@ -3884,10 +4044,8 @@
 	}
 
 	be_disable_vxlan_offloads(adapter);
-	kfree(adapter->pmac_id);
-	adapter->pmac_id = NULL;
 
-	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+	be_if_destroy(adapter);
 
 	be_clear_queues(adapter);
 
@@ -4341,7 +4499,7 @@
 
 static void be_schedule_worker(struct be_adapter *adapter)
 {
-	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
 }
 
@@ -4393,6 +4551,22 @@
 	u32 cap_flags = be_if_cap_flags(adapter);
 	int status;
 
+	/* alloc required memory for other filtering fields */
+	adapter->pmac_id = kcalloc(be_max_uc(adapter),
+				   sizeof(*adapter->pmac_id), GFP_KERNEL);
+	if (!adapter->pmac_id)
+		return -ENOMEM;
+
+	adapter->mc_list = kcalloc(be_max_mc(adapter),
+				   sizeof(*adapter->mc_list), GFP_KERNEL);
+	if (!adapter->mc_list)
+		return -ENOMEM;
+
+	adapter->uc_list = kcalloc(be_max_uc(adapter),
+				   sizeof(*adapter->uc_list), GFP_KERNEL);
+	if (!adapter->uc_list)
+		return -ENOMEM;
+
 	if (adapter->cfg_num_rx_irqs == 1)
 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
 
@@ -4401,7 +4575,10 @@
 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
 				  &adapter->if_handle, 0);
 
-	return status;
+	if (status)
+		return status;
+
+	return 0;
 }
 
 int be_update_queues(struct be_adapter *adapter)
@@ -4530,11 +4707,6 @@
 	if (status)
 		goto err;
 
-	adapter->pmac_id = kcalloc(be_max_uc(adapter),
-				   sizeof(*adapter->pmac_id), GFP_KERNEL);
-	if (!adapter->pmac_id)
-		return -ENOMEM;
-
 	status = be_msix_enable(adapter);
 	if (status)
 		goto err;
@@ -4728,6 +4900,23 @@
 				       0, 0, nlflags, filter_mask, NULL);
 }
 
+static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
+					 void (*func)(struct work_struct *))
+{
+	struct be_cmd_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work) {
+		dev_err(&adapter->pdev->dev,
+			"be_work memory allocation failed\n");
+		return NULL;
+	}
+
+	INIT_WORK(&work->work, func);
+	work->adapter = adapter;
+	return work;
+}
+
 /* VxLAN offload Notes:
  *
  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
@@ -4742,23 +4931,19 @@
  * adds more than one port, disable offloads and don't re-enable them again
  * until after all the tunnels are removed.
  */
-static void be_add_vxlan_port(struct net_device *netdev,
-			      struct udp_tunnel_info *ti)
+static void be_work_add_vxlan_port(struct work_struct *work)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+	struct be_adapter *adapter = cmd_work->adapter;
+	struct net_device *netdev = adapter->netdev;
 	struct device *dev = &adapter->pdev->dev;
-	__be16 port = ti->port;
+	__be16 port = cmd_work->info.vxlan_port;
 	int status;
 
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
-
-	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-		return;
-
 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
 		adapter->vxlan_port_aliases++;
-		return;
+		goto done;
 	}
 
 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
@@ -4770,7 +4955,7 @@
 	}
 
 	if (adapter->vxlan_port_count++ >= 1)
-		return;
+		goto done;
 
 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
 				     OP_CONVERT_NORMAL_TO_TUNNEL);
@@ -4795,29 +4980,26 @@
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));
-	return;
+	goto done;
 err:
 	be_disable_vxlan_offloads(adapter);
+done:
+	kfree(cmd_work);
 }
 
-static void be_del_vxlan_port(struct net_device *netdev,
-			      struct udp_tunnel_info *ti)
+static void be_work_del_vxlan_port(struct work_struct *work)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
-	__be16 port = ti->port;
-
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
-
-	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-		return;
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+	struct be_adapter *adapter = cmd_work->adapter;
+	__be16 port = cmd_work->info.vxlan_port;
 
 	if (adapter->vxlan_port != port)
 		goto done;
 
 	if (adapter->vxlan_port_aliases) {
 		adapter->vxlan_port_aliases--;
-		return;
+		goto out;
 	}
 
 	be_disable_vxlan_offloads(adapter);
@@ -4827,6 +5009,40 @@
 		 be16_to_cpu(port));
 done:
 	adapter->vxlan_port_count--;
+out:
+	kfree(cmd_work);
+}
+
+static void be_cfg_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti,
+			      void (*func)(struct work_struct *))
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+	struct be_cmd_work *cmd_work;
+
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
+		return;
+
+	cmd_work = be_alloc_work(adapter, func);
+	if (cmd_work) {
+		cmd_work->info.vxlan_port = ti->port;
+		queue_work(be_wq, &cmd_work->work);
+	}
+}
+
+static void be_del_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
+{
+	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
+}
+
+static void be_add_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
+{
+	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
 }
 
 static netdev_features_t be_features_check(struct sk_buff *skb,
@@ -4891,6 +5107,16 @@
 	return 0;
 }
 
+static void be_set_rx_mode(struct net_device *dev)
+{
+	struct be_adapter *adapter = netdev_priv(dev);
+	struct be_cmd_work *work;
+
+	work = be_alloc_work(adapter, be_work_set_rx_mode);
+	if (work)
+		queue_work(be_wq, &work->work);
+}
+
 static const struct net_device_ops be_netdev_ops = {
 	.ndo_open		= be_open,
 	.ndo_stop		= be_close,
@@ -5116,7 +5342,7 @@
 
 reschedule:
 	adapter->work_counter++;
-	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
@@ -5256,7 +5482,8 @@
 	}
 
 	mutex_init(&adapter->mbox_lock);
-	spin_lock_init(&adapter->mcc_lock);
+	mutex_init(&adapter->mcc_lock);
+	mutex_init(&adapter->rx_filter_lock);
 	spin_lock_init(&adapter->mcc_cq_lock);
 	init_completion(&adapter->et_cmd_compl);
 
@@ -5712,6 +5939,12 @@
 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
 	}
 
+	be_wq = create_singlethread_workqueue("be_wq");
+	if (!be_wq) {
+		pr_warn(DRV_NAME "workqueue creation failed\n");
+		return -1;
+	}
+
 	return pci_register_driver(&be_driver);
 }
 module_init(be_init_module);
@@ -5719,5 +5952,8 @@
 static void __exit be_exit_module(void)
 {
 	pci_unregister_driver(&be_driver);
+
+	if (be_wq)
+		destroy_workqueue(be_wq);
 }
 module_exit(be_exit_module);

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 01f7e81..fb5c638 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -2887,7 +2887,7 @@
  * this kind of feature?).
  */
 
-#define HASH_BITS	6		/* #bits in hash */
+#define FEC_HASH_BITS	6		/* #bits in hash */
 #define CRC32_POLY	0xEDB88320
 
 static void set_multicast_list(struct net_device *ndev)
@@ -2935,10 +2935,10 @@
 			}
 		}
 
-		/* only upper 6 bits (HASH_BITS) are used
+		/* only upper 6 bits (FEC_HASH_BITS) are used
 		 * which point to specific bit in he hash registers
 		 */
-		hash = (crc >> (32 - HASH_BITS)) & 0x3f;
+		hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
 
 		if (hash > 31) {
 			tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 7b8fe86..e03b30c 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c

@@ -271,11 +271,8 @@
 		goto err_ioremap;
 	}
 
-	if (of_get_property(pdev->dev.of_node,
-			    "little-endian", NULL))
-		priv->is_little_endian = true;
-	else
-		priv->is_little_endian = false;
+	priv->is_little_endian = of_property_read_bool(pdev->dev.of_node,
+						       "little-endian");
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 5c8afe1..a834774 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c

@@ -684,8 +684,7 @@
 	if (!phy || IS_ERR(phy))
 		return -EIO;
 
-	if (mdio->irq)
-		phy->irq = mdio->irq[addr];
+	phy->irq = mdio->irq[addr];
 
 	/* All data is now stored in the phy struct;
 	 * register it

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d41c28d..8e4252d 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c

@@ -399,7 +399,6 @@
 	u16 rx_ring_size;
 
 	struct mii_bus *mii_bus;
-	struct phy_device *phy_dev;
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
 	unsigned int link;
@@ -2651,6 +2650,7 @@
 	u32 cause_rx_tx;
 	int rx_queue;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
+	struct net_device *ndev = pp->dev;
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
@@ -2668,7 +2668,7 @@
 				(MVNETA_CAUSE_PHY_STATUS_CHANGE |
 				 MVNETA_CAUSE_LINK_CHANGE |
 				 MVNETA_CAUSE_PSC_SYNC_CHANGE))) {
-			mvneta_fixed_link_update(pp, pp->phy_dev);
+			mvneta_fixed_link_update(pp, ndev->phydev);
 		}
 	}
 
@@ -2963,6 +2963,7 @@
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
 	int cpu;
+	struct net_device *ndev = pp->dev;
 
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2985,15 +2986,16 @@
 		    MVNETA_CAUSE_LINK_CHANGE |
 		    MVNETA_CAUSE_PSC_SYNC_CHANGE);
 
-	phy_start(pp->phy_dev);
+	phy_start(ndev->phydev);
 	netif_tx_start_all_queues(pp->dev);
 }
 
 static void mvneta_stop_dev(struct mvneta_port *pp)
 {
 	unsigned int cpu;
+	struct net_device *ndev = pp->dev;
 
-	phy_stop(pp->phy_dev);
+	phy_stop(ndev->phydev);
 
 	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
@@ -3166,7 +3168,7 @@
 static void mvneta_adjust_link(struct net_device *ndev)
 {
 	struct mvneta_port *pp = netdev_priv(ndev);
-	struct phy_device *phydev = pp->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 	int status_change = 0;
 
 	if (phydev->link) {
@@ -3244,7 +3246,6 @@
 	phy_dev->supported &= PHY_GBIT_FEATURES;
 	phy_dev->advertising = phy_dev->supported;
 
-	pp->phy_dev = phy_dev;
 	pp->link    = 0;
 	pp->duplex  = 0;
 	pp->speed   = 0;
@@ -3254,8 +3255,9 @@
 
 static void mvneta_mdio_remove(struct mvneta_port *pp)
 {
-	phy_disconnect(pp->phy_dev);
-	pp->phy_dev = NULL;
+	struct net_device *ndev = pp->dev;
+
+	phy_disconnect(ndev->phydev);
 }
 
 /* Electing a CPU must be done in an atomic way: it should be done
@@ -3495,42 +3497,30 @@
 
 static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	if (!pp->phy_dev)
+	if (!dev->phydev)
 		return -ENOTSUPP;
 
-	return phy_mii_ioctl(pp->phy_dev, ifr, cmd);
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 /* Ethtool methods */
 
-/* Get settings (phy address, speed) for ethtools */
-int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+/* Set link ksettings (phy address, speed) for ethtools */
+int mvneta_ethtool_set_link_ksettings(struct net_device *ndev,
+				      const struct ethtool_link_ksettings *cmd)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	if (!pp->phy_dev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(pp->phy_dev, cmd);
-}
-
-/* Set settings (phy address, speed) for ethtools */
-int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct mvneta_port *pp = netdev_priv(dev);
-	struct phy_device *phydev = pp->phy_dev;
+	struct mvneta_port *pp = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!phydev)
 		return -ENODEV;
 
-	if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
+	if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
 		u32 val;
 
-		mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
+		mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE);
 
-		if (cmd->autoneg == AUTONEG_DISABLE) {
+		if (cmd->base.autoneg == AUTONEG_DISABLE) {
 			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
 				 MVNETA_GMAC_CONFIG_GMII_SPEED |
@@ -3547,17 +3537,17 @@
 			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
 		}
 
-		pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
+		pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE);
 		netdev_info(pp->dev, "autoneg status set to %i\n",
 			    pp->use_inband_status);
 
-		if (netif_running(dev)) {
+		if (netif_running(ndev)) {
 			mvneta_port_down(pp);
 			mvneta_port_up(pp);
 		}
 	}
 
-	return phy_ethtool_sset(pp->phy_dev, cmd);
+	return phy_ethtool_ksettings_set(ndev->phydev, cmd);
 }
 
 /* Set interrupt coalescing for ethtools */
@@ -3821,8 +3811,6 @@
 
 const struct ethtool_ops mvneta_eth_tool_ops = {
 	.get_link       = ethtool_op_get_link,
-	.get_settings   = mvneta_ethtool_get_settings,
-	.set_settings   = mvneta_ethtool_set_settings,
 	.set_coalesce   = mvneta_ethtool_set_coalesce,
 	.get_coalesce   = mvneta_ethtool_get_coalesce,
 	.get_drvinfo    = mvneta_ethtool_get_drvinfo,
@@ -3835,6 +3823,8 @@
 	.get_rxnfc	= mvneta_ethtool_get_rxnfc,
 	.get_rxfh	= mvneta_ethtool_get_rxfh,
 	.set_rxfh	= mvneta_ethtool_set_rxfh,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = mvneta_ethtool_set_link_ksettings,
 };
 
 /* Initialize hw */

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f160954..1801fd8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

@@ -369,18 +369,17 @@
 	int ret = eth_mac_addr(dev, p);
 	struct mtk_mac *mac = netdev_priv(dev);
 	const char *macaddr = dev->dev_addr;
-	unsigned long flags;
 
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&mac->hw->page_lock, flags);
+	spin_lock_bh(&mac->hw->page_lock);
 	mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
 		MTK_GDMA_MAC_ADRH(mac->id));
 	mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
 		(macaddr[4] << 8) | macaddr[5],
 		MTK_GDMA_MAC_ADRL(mac->id));
-	spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+	spin_unlock_bh(&mac->hw->page_lock);
 
 	return 0;
 }
@@ -764,7 +763,6 @@
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct net_device_stats *stats = &dev->stats;
-	unsigned long flags;
 	bool gso = false;
 	int tx_num;
 
@@ -772,14 +770,14 @@
 	 * however we have 2 queues running on the same ring so we need to lock
 	 * the ring access
 	 */
-	spin_lock_irqsave(&eth->page_lock, flags);
+	spin_lock(&eth->page_lock);
 
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
 		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
-		spin_unlock_irqrestore(&eth->page_lock, flags);
+		spin_unlock(&eth->page_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -804,12 +802,12 @@
 	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock(&eth->page_lock);
 
 	return NETDEV_TX_OK;
 
 drop:
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock(&eth->page_lock);
 	stats->tx_dropped++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -1363,16 +1361,15 @@
 
 static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
 {
-	unsigned long flags;
 	u32 val;
 	int i;
 
 	/* stop the dma engine */
-	spin_lock_irqsave(&eth->page_lock, flags);
+	spin_lock_bh(&eth->page_lock);
 	val = mtk_r32(eth, glo_cfg);
 	mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
 		glo_cfg);
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock_bh(&eth->page_lock);
 
 	/* wait for dma stop */
 	for (i = 0; i < 10; i++) {
@@ -1912,7 +1909,6 @@
 	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 1c35f37..5476927 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c

@@ -377,9 +377,8 @@
 	}
 }
 
-u32 qed_cxt_get_proto_cid_count(struct qed_hwfn		*p_hwfn,
-				enum protocol_type	type,
-				u32			*vf_cid)
+u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
+				enum protocol_type type, u32 *vf_cid)
 {
 	if (vf_cid)
 		*vf_cid = p_hwfn->p_cxt_mngr->conn_cfg[type].cids_per_vf;
@@ -405,10 +404,10 @@
 	return cnt;
 }
 
-static void
-qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
-			    enum protocol_type proto,
-			    u8 seg, u8 seg_type, u32 count, bool has_fl)
+static void qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
+					enum protocol_type proto,
+					u8 seg,
+					u8 seg_type, u32 count, bool has_fl)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg];
@@ -420,8 +419,7 @@
 
 static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli,
 				 struct qed_ilt_cli_blk *p_blk,
-				 u32 start_line, u32 total_size,
-				 u32 elem_size)
+				 u32 start_line, u32 total_size, u32 elem_size)
 {
 	u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val);
 
@@ -448,8 +446,7 @@
 		p_cli->first.val = *p_line;
 
 	p_cli->active = true;
-	*p_line += DIV_ROUND_UP(p_blk->total_size,
-				p_blk->real_size_in_page);
+	*p_line += DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page);
 	p_cli->last.val = *p_line - 1;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
@@ -926,12 +923,9 @@
 		void *p_virt;
 		u32 size;
 
-		size = min_t(u32, sz_left,
-			     p_blk->real_size_in_page);
+		size = min_t(u32, sz_left, p_blk->real_size_in_page);
 		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-					    size,
-					    &p_phys,
-					    GFP_KERNEL);
+					    size, &p_phys, GFP_KERNEL);
 		if (!p_virt)
 			return -ENOMEM;
 		memset(p_virt, 0, size);
@@ -976,7 +970,7 @@
 		for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
 			p_blk = &clients[i].pf_blks[j];
 			rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0);
-			if (rc != 0)
+			if (rc)
 				goto ilt_shadow_fail;
 		}
 		for (k = 0; k < p_mngr->vf_count; k++) {
@@ -985,7 +979,7 @@
 
 				p_blk = &clients[i].vf_blks[j];
 				rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, lines);
-				if (rc != 0)
+				if (rc)
 					goto ilt_shadow_fail;
 			}
 		}
@@ -1672,7 +1666,7 @@
 		     p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i);
 
 		STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
-		active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0);
+		active_seg_mask |= (tm_iids.pf_tids[i] ? BIT(i) : 0);
 
 		tm_offset += tm_iids.pf_tids[i];
 	}
@@ -1702,8 +1696,7 @@
 }
 
 int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
-			enum protocol_type type,
-			u32 *p_cid)
+			enum protocol_type type, u32 *p_cid)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	u32 rel_cid;
@@ -1717,8 +1710,7 @@
 				      p_mngr->acquired[type].max_count);
 
 	if (rel_cid >= p_mngr->acquired[type].max_count) {
-		DP_NOTICE(p_hwfn, "no CID available for protocol %d\n",
-			  type);
+		DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", type);
 		return -EINVAL;
 	}
 
@@ -1730,8 +1722,7 @@
 }
 
 static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn,
-				      u32 cid,
-				      enum protocol_type *p_type)
+				      u32 cid, enum protocol_type *p_type)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	struct qed_cid_acquired_map *p_map;
@@ -1763,8 +1754,7 @@
 	return true;
 }
 
-void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
-			 u32 cid)
+void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	enum protocol_type type;
@@ -1781,8 +1771,7 @@
 	__clear_bit(rel_cid, p_mngr->acquired[type].cid_map);
 }
 
-int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
-			 struct qed_cxt_info *p_info)
+int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	u32 conn_cxt_size, hw_p_size, cxts_per_p, line;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 226cb08..b900dfb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c

@@ -1968,6 +1968,7 @@
 
 	if (!dcbx_info->operational.ieee) {
 		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
 		return -EINVAL;
 	}
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 0e4f4a9..8117ddf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c

@@ -35,8 +35,7 @@
 #include "qed_sriov.h"
 #include "qed_vf.h"
 
-static spinlock_t qm_lock;
-static bool qm_lock_init = false;
+static DEFINE_SPINLOCK(qm_lock);
 
 /* API common to all protocols */
 enum BAR_ID {
@@ -44,8 +43,7 @@
 	BAR_ID_1        /* Used for doorbells */
 };
 
-static u32 qed_hw_bar_size(struct qed_hwfn	*p_hwfn,
-			   enum BAR_ID		bar_id)
+static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id)
 {
 	u32 bar_reg = (bar_id == BAR_ID_0 ?
 		       PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE);
@@ -70,8 +68,7 @@
 	}
 }
 
-void qed_init_dp(struct qed_dev *cdev,
-		 u32 dp_module, u8 dp_level)
+void qed_init_dp(struct qed_dev *cdev, u32 dp_module, u8 dp_level)
 {
 	u32 i;
 
@@ -543,8 +540,7 @@
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
 	if (!cdev->reset_stats) {
 		DP_NOTICE(cdev, "Failed to allocate reset statistics\n");
-		rc = -ENOMEM;
-		goto alloc_err;
+		goto alloc_no_mem;
 	}
 
 	return 0;
@@ -605,9 +601,8 @@
 
 	/* Make sure notification is not set before initiating final cleanup */
 	if (REG_RD(p_hwfn, addr)) {
-		DP_NOTICE(
-			p_hwfn,
-			"Unexpected; Found final cleanup notification before initiating final cleanup\n");
+		DP_NOTICE(p_hwfn,
+			  "Unexpected; Found final cleanup notification before initiating final cleanup\n");
 		REG_WR(p_hwfn, addr, 0);
 	}
 
@@ -701,17 +696,14 @@
 				continue;
 
 			qed_init_cau_sb_entry(p_hwfn, &sb_entry,
-					      p_block->function_id,
-					      0, 0);
-			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2,
-					 sb_entry);
+					      p_block->function_id, 0, 0);
+			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry);
 		}
 	}
 }
 
 static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
-			      struct qed_ptt *p_ptt,
-			      int hw_mode)
+			      struct qed_ptt *p_ptt, int hw_mode)
 {
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 	struct qed_qm_common_rt_init_params params;
@@ -759,7 +751,7 @@
 	qed_port_unpretend(p_hwfn, p_ptt);
 
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
@@ -788,13 +780,12 @@
 }
 
 static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    int hw_mode)
+			    struct qed_ptt *p_ptt, int hw_mode)
 {
 	int rc = 0;
 
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	if (hw_mode & (1 << MODE_MF_SI)) {
@@ -848,7 +839,7 @@
 	qed_int_igu_init_rt(p_hwfn);
 
 	/* Set VLAN in NIG if needed */
-	if (hw_mode & (1 << MODE_MF_SD)) {
+	if (hw_mode & BIT(MODE_MF_SD)) {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n");
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
@@ -856,7 +847,7 @@
 	}
 
 	/* Enable classification by MAC if needed */
-	if (hw_mode & (1 << MODE_MF_SI)) {
+	if (hw_mode & BIT(MODE_MF_SI)) {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 			   "Configuring TAGMAC_CLS_TYPE\n");
 		STORE_RT_REG(p_hwfn,
@@ -871,7 +862,7 @@
 
 	/* Cleanup chip from previous driver if such remains exist */
 	rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	/* PF Init sequence */
@@ -950,8 +941,7 @@
 	/* Read shadow of current MFW mailbox */
 	qed_mcp_read_mb(p_hwfn, p_main_ptt);
 	memcpy(p_hwfn->mcp_info->mfw_mb_shadow,
-	       p_hwfn->mcp_info->mfw_mb_cur,
-	       p_hwfn->mcp_info->mfw_mb_length);
+	       p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length);
 }
 
 int qed_hw_init(struct qed_dev *cdev,
@@ -971,7 +961,7 @@
 
 	if (IS_PF(cdev)) {
 		rc = qed_init_fw_data(cdev, bin_fw_data);
-		if (rc != 0)
+		if (rc)
 			return rc;
 	}
 
@@ -988,8 +978,7 @@
 
 		qed_calc_hw_mode(p_hwfn);
 
-		rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
-				      &load_code);
+		rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code);
 		if (rc) {
 			DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
 			return rc;
@@ -1004,11 +993,6 @@
 		p_hwfn->first_on_engine = (load_code ==
 					   FW_MSG_CODE_DRV_LOAD_ENGINE);
 
-		if (!qm_lock_init) {
-			spin_lock_init(&qm_lock);
-			qm_lock_init = true;
-		}
-
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
 			rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
@@ -1071,9 +1055,8 @@
 }
 
 #define QED_HW_STOP_RETRY_LIMIT (10)
-static inline void qed_hw_timers_stop(struct qed_dev *cdev,
-				      struct qed_hwfn *p_hwfn,
-				      struct qed_ptt *p_ptt)
+static void qed_hw_timers_stop(struct qed_dev *cdev,
+			       struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int i;
 
@@ -1084,8 +1067,7 @@
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
-		    (!qed_rd(p_hwfn, p_ptt,
-			     TM_REG_PF_SCAN_ACTIVE_TASK)))
+		    (!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_TASK)))
 			break;
 
 		/* Dependent on number of connection/tasks, possibly
@@ -1190,8 +1172,7 @@
 		}
 
 		DP_VERBOSE(p_hwfn,
-			   NETIF_MSG_IFDOWN,
-			   "Shutting down the fastpath\n");
+			   NETIF_MSG_IFDOWN, "Shutting down the fastpath\n");
 
 		qed_wr(p_hwfn, p_ptt,
 		       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
@@ -1219,14 +1200,13 @@
 	       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 }
 
-static int qed_reg_assert(struct qed_hwfn *hwfn,
-			  struct qed_ptt *ptt, u32 reg,
-			  bool expected)
+static int qed_reg_assert(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u32 reg, bool expected)
 {
-	u32 assert_val = qed_rd(hwfn, ptt, reg);
+	u32 assert_val = qed_rd(p_hwfn, p_ptt, reg);
 
 	if (assert_val != expected) {
-		DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n",
+		DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n",
 			  reg, expected);
 		return -EINVAL;
 	}
@@ -1306,8 +1286,7 @@
 
 	/* Clean Previous errors if such exist */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
-	       1 << p_hwfn->abs_pf_id);
+	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
 
 	/* enable internal target-read */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -1317,7 +1296,8 @@
 static void get_function_id(struct qed_hwfn *p_hwfn)
 {
 	/* ME Register */
-	p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR);
+	p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn,
+						  PXP_PF_ME_OPAQUE_ADDR);
 
 	p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR);
 
@@ -1326,6 +1306,10 @@
 				      PXP_CONCRETE_FID_PFID);
 	p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid,
 				    PXP_CONCRETE_FID_PORT);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
+		   "Read ME register: Concrete 0x%08x Opaque 0x%04x\n",
+		   p_hwfn->hw_info.concrete_fid, p_hwfn->hw_info.opaque_fid);
 }
 
 static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
@@ -1417,8 +1401,7 @@
 	return 0;
 }
 
-static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt)
+static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
 	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
@@ -1472,8 +1455,7 @@
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
 		break;
 	default:
-		DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n",
-			  core_cfg);
+		DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg);
 		break;
 	}
 
@@ -1484,11 +1466,11 @@
 	link_temp = qed_rd(p_hwfn, p_ptt,
 			   port_cfg_addr +
 			   offsetof(struct nvm_cfg1_port, speed_cap_mask));
-	link->speed.advertised_speeds =
-		link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+	link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+	link->speed.advertised_speeds = link_temp;
 
-	p_hwfn->mcp_info->link_capabilities.speed_capabilities =
-						link->speed.advertised_speeds;
+	link_temp = link->speed.advertised_speeds;
+	p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp;
 
 	link_temp = qed_rd(p_hwfn, p_ptt,
 			   port_cfg_addr +
@@ -1517,8 +1499,7 @@
 		link->speed.forced_speed = 100000;
 		break;
 	default:
-		DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n",
-			  link_temp);
+		DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp);
 	}
 
 	link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
@@ -1628,10 +1609,10 @@
 
 	DP_VERBOSE(p_hwfn,
 		   NETIF_MSG_PROBE,
-		   "PF [rel_id %d, abs_id %d] within the %d enabled functions on the engine\n",
+		   "PF [rel_id %d, abs_id %d] occupies index %d within the %d enabled functions on the engine\n",
 		   p_hwfn->rel_pf_id,
 		   p_hwfn->abs_pf_id,
-		   p_hwfn->num_funcs_on_engine);
+		   p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine);
 }
 
 static int
@@ -1703,10 +1684,9 @@
 	u32 tmp;
 
 	/* Read Vendor Id / Device Id */
-	pci_read_config_word(cdev->pdev, PCI_VENDOR_ID,
-			     &cdev->vendor_id);
-	pci_read_config_word(cdev->pdev, PCI_DEVICE_ID,
-			     &cdev->device_id);
+	pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id);
+	pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id);
+
 	cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				     MISCS_REG_CHIP_NUM);
 	cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
@@ -1782,7 +1762,7 @@
 	/* First hwfn learns basic information, e.g., number of hwfns */
 	if (!p_hwfn->my_id) {
 		rc = qed_get_dev_info(p_hwfn->cdev);
-		if (rc != 0)
+		if (rc)
 			goto err1;
 	}
 
@@ -2183,8 +2163,7 @@
 	return 0;
 }
 
-int qed_fw_vport(struct qed_hwfn *p_hwfn,
-		 u8 src_id, u8 *dst_id)
+int qed_fw_vport(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
 	if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) {
 		u8 min, max;
@@ -2203,8 +2182,7 @@
 	return 0;
 }
 
-int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
-		   u8 src_id, u8 *dst_id)
+int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
 	if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) {
 		u8 min, max;
@@ -2386,8 +2364,7 @@
  * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate.
  */
 static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
-			      u16 vport_id, u32 req_rate,
-			      u32 min_pf_rate)
+			      u16 vport_id, u32 req_rate, u32 min_pf_rate)
 {
 	u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0;
 	int non_requested_count = 0, req_count = 0, i, num_vports;
@@ -2471,7 +2448,7 @@
 
 	rc = qed_init_wfq_param(p_hwfn, vp_id, rate, p_link->min_pf_rate);
 
-	if (rc == 0)
+	if (!rc)
 		qed_configure_wfq_for_all_vports(p_hwfn, p_ptt,
 						 p_link->min_pf_rate);
 	else

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index e178853..8ebdc79 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c

@@ -44,8 +44,7 @@
 
 int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn)
 {
-	struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool),
-					      GFP_KERNEL);
+	struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), GFP_KERNEL);
 	int i;
 
 	if (!p_pool)
@@ -113,16 +112,14 @@
 	return NULL;
 }
 
-void qed_ptt_release(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+void qed_ptt_release(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
 	list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list);
 	spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
 }
 
-u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt)
+u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	/* The HW is using DWORDS and we need to translate it to Bytes */
 	return le32_to_cpu(p_ptt->pxp.offset) << 2;
@@ -141,8 +138,7 @@
 }
 
 void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u32 new_hw_addr)
+		     struct qed_ptt *p_ptt, u32 new_hw_addr)
 {
 	u32 prev_hw_addr;
 
@@ -166,8 +162,7 @@
 }
 
 static u32 qed_set_ptt(struct qed_hwfn *p_hwfn,
-		       struct qed_ptt *p_ptt,
-		       u32 hw_addr)
+		       struct qed_ptt *p_ptt, u32 hw_addr)
 {
 	u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt);
 	u32 offset;
@@ -224,10 +219,7 @@
 
 static void qed_memcpy_hw(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
-			  void *addr,
-			  u32 hw_addr,
-			  size_t n,
-			  bool to_device)
+			  void *addr, u32 hw_addr, size_t n, bool to_device)
 {
 	u32 dw_count, *host_addr, hw_offset;
 	size_t quota, done = 0;
@@ -259,8 +251,7 @@
 }
 
 void qed_memcpy_from(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     void *dest, u32 hw_addr, size_t n)
+		     struct qed_ptt *p_ptt, void *dest, u32 hw_addr, size_t n)
 {
 	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 		   "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n",
@@ -270,8 +261,7 @@
 }
 
 void qed_memcpy_to(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt,
-		   u32 hw_addr, void *src, size_t n)
+		   struct qed_ptt *p_ptt, u32 hw_addr, void *src, size_t n)
 {
 	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 		   "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n",
@@ -280,9 +270,7 @@
 	qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true);
 }
 
-void qed_fid_pretend(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u16 fid)
+void qed_fid_pretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 fid)
 {
 	u16 control = 0;
 
@@ -309,8 +297,7 @@
 }
 
 void qed_port_pretend(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      u8 port_id)
+		      struct qed_ptt *p_ptt, u8 port_id)
 {
 	u16 control = 0;
 
@@ -326,8 +313,7 @@
 	       *(u32 *)&p_ptt->pxp.pretend);
 }
 
-void qed_port_unpretend(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt)
+void qed_port_unpretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u16 control = 0;
 
@@ -429,28 +415,27 @@
 	return DMAE_REG_GO_C0 + (idx << 2);
 }
 
-static int
-qed_dmae_post_command(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt)
+static int qed_dmae_post_command(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
 {
-	struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd;
+	struct dmae_cmd *p_command = p_hwfn->dmae_info.p_dmae_cmd;
 	u8 idx_cmd = p_hwfn->dmae_info.channel, i;
 	int qed_status = 0;
 
 	/* verify address is not NULL */
-	if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) ||
-	     ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) {
+	if ((((!p_command->dst_addr_lo) && (!p_command->dst_addr_hi)) ||
+	     ((!p_command->src_addr_lo) && (!p_command->src_addr_hi)))) {
 		DP_NOTICE(p_hwfn,
 			  "source or destination address 0 idx_cmd=%d\n"
 			  "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
-			   idx_cmd,
-			   le32_to_cpu(command->opcode),
-			   le16_to_cpu(command->opcode_b),
-			   le16_to_cpu(command->length_dw),
-			   le32_to_cpu(command->src_addr_hi),
-			   le32_to_cpu(command->src_addr_lo),
-			   le32_to_cpu(command->dst_addr_hi),
-			   le32_to_cpu(command->dst_addr_lo));
+			  idx_cmd,
+			  le32_to_cpu(p_command->opcode),
+			  le16_to_cpu(p_command->opcode_b),
+			  le16_to_cpu(p_command->length_dw),
+			  le32_to_cpu(p_command->src_addr_hi),
+			  le32_to_cpu(p_command->src_addr_lo),
+			  le32_to_cpu(p_command->dst_addr_hi),
+			  le32_to_cpu(p_command->dst_addr_lo));
 
 		return -EINVAL;
 	}
@@ -459,13 +444,13 @@
 		   NETIF_MSG_HW,
 		   "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
 		   idx_cmd,
-		   le32_to_cpu(command->opcode),
-		   le16_to_cpu(command->opcode_b),
-		   le16_to_cpu(command->length_dw),
-		   le32_to_cpu(command->src_addr_hi),
-		   le32_to_cpu(command->src_addr_lo),
-		   le32_to_cpu(command->dst_addr_hi),
-		   le32_to_cpu(command->dst_addr_lo));
+		   le32_to_cpu(p_command->opcode),
+		   le16_to_cpu(p_command->opcode_b),
+		   le16_to_cpu(p_command->length_dw),
+		   le32_to_cpu(p_command->src_addr_hi),
+		   le32_to_cpu(p_command->src_addr_lo),
+		   le32_to_cpu(p_command->dst_addr_hi),
+		   le32_to_cpu(p_command->dst_addr_lo));
 
 	/* Copy the command to DMAE - need to do it before every call
 	 * for source/dest address no reset.
@@ -475,7 +460,7 @@
 	 */
 	for (i = 0; i < DMAE_CMD_SIZE; i++) {
 		u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ?
-			   *(((u32 *)command) + i) : 0;
+			   *(((u32 *)p_command) + i) : 0;
 
 		qed_wr(p_hwfn, p_ptt,
 		       DMAE_REG_CMD_MEM +
@@ -483,9 +468,7 @@
 		       (i * sizeof(u32)), data);
 	}
 
-	qed_wr(p_hwfn, p_ptt,
-	       qed_dmae_idx_to_go_cmd(idx_cmd),
-	       DMAE_GO_VALUE);
+	qed_wr(p_hwfn, p_ptt, qed_dmae_idx_to_go_cmd(idx_cmd), DMAE_GO_VALUE);
 
 	return qed_status;
 }
@@ -498,9 +481,7 @@
 	u32 **p_comp = &p_hwfn->dmae_info.p_completion_word;
 
 	*p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-				     sizeof(u32),
-				     p_addr,
-				     GFP_KERNEL);
+				     sizeof(u32), p_addr, GFP_KERNEL);
 	if (!*p_comp) {
 		DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n");
 		goto err;
@@ -543,8 +524,7 @@
 		p_phys = p_hwfn->dmae_info.completion_word_phys_addr;
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  sizeof(u32),
-				  p_hwfn->dmae_info.p_completion_word,
-				  p_phys);
+				  p_hwfn->dmae_info.p_completion_word, p_phys);
 		p_hwfn->dmae_info.p_completion_word = NULL;
 	}
 
@@ -552,8 +532,7 @@
 		p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr;
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  sizeof(struct dmae_cmd),
-				  p_hwfn->dmae_info.p_dmae_cmd,
-				  p_phys);
+				  p_hwfn->dmae_info.p_dmae_cmd, p_phys);
 		p_hwfn->dmae_info.p_dmae_cmd = NULL;
 	}
 
@@ -571,9 +550,7 @@
 
 static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn)
 {
-	u32 wait_cnt = 0;
-	u32 wait_cnt_limit = 10000;
-
+	u32 wait_cnt_limit = 10000, wait_cnt = 0;
 	int qed_status = 0;
 
 	barrier();
@@ -606,7 +583,7 @@
 					  u64 dst_addr,
 					  u8 src_type,
 					  u8 dst_type,
-					  u32 length)
+					  u32 length_dw)
 {
 	dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr;
 	struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd;
@@ -624,7 +601,7 @@
 		cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys));
 		memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0],
 		       (void *)(uintptr_t)src_addr,
-		       length * sizeof(u32));
+		       length_dw * sizeof(u32));
 		break;
 	default:
 		return -EINVAL;
@@ -645,7 +622,7 @@
 		return -EINVAL;
 	}
 
-	cmd->length_dw = cpu_to_le16((u16)length);
+	cmd->length_dw = cpu_to_le16((u16)length_dw);
 
 	qed_dmae_post_command(p_hwfn, p_ptt);
 
@@ -654,16 +631,14 @@
 	if (qed_status) {
 		DP_NOTICE(p_hwfn,
 			  "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n",
-			  src_addr,
-			  dst_addr,
-			  length);
+			  src_addr, dst_addr, length_dw);
 		return qed_status;
 	}
 
 	if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT)
 		memcpy((void *)(uintptr_t)(dst_addr),
 		       &p_hwfn->dmae_info.p_intermediate_buffer[0],
-		       length * sizeof(u32));
+		       length_dw * sizeof(u32));
 
 	return 0;
 }
@@ -730,10 +705,7 @@
 		if (qed_status) {
 			DP_NOTICE(p_hwfn,
 				  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
-				  qed_status,
-				  src_addr,
-				  dst_addr,
-				  length_cur);
+				  qed_status, src_addr, dst_addr, length_cur);
 			break;
 		}
 	}
@@ -743,10 +715,7 @@
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
-		      u64 source_addr,
-		      u32 grc_addr,
-		      u32 size_in_dwords,
-		      u32 flags)
+		  u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
 	struct qed_dmae_params params;
@@ -768,9 +737,10 @@
 	return rc;
 }
 
-int
-qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr,
-		  dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u32 grc_addr,
+		      dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
 	struct qed_dmae_params params;
@@ -791,12 +761,11 @@
 	return rc;
 }
 
-int
-qed_dmae_host2host(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt,
-		   dma_addr_t source_addr,
-		   dma_addr_t dest_addr,
-		   u32 size_in_dwords, struct qed_dmae_params *p_params)
+int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       dma_addr_t source_addr,
+		       dma_addr_t dest_addr,
+		       u32 size_in_dwords, struct qed_dmae_params *p_params)
 {
 	int rc;
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 9866a20..b7a4b27 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c

@@ -59,17 +59,14 @@
 		p_hwfn->rt_data.b_valid[i] = false;
 }
 
-void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
-			   u32 rt_offset,
-			   u32 val)
+void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val)
 {
 	p_hwfn->rt_data.init_val[rt_offset] = val;
 	p_hwfn->rt_data.b_valid[rt_offset] = true;
 }
 
 void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
-			   u32 rt_offset, u32 *p_val,
-			   size_t size)
+			   u32 rt_offset, u32 *p_val, size_t size)
 {
 	size_t i;
 
@@ -81,10 +78,7 @@
 
 static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u32 addr,
-		       u16 rt_offset,
-		       u16 size,
-		       bool b_must_dmae)
+		       u32 addr, u16 rt_offset, u16 size, bool b_must_dmae)
 {
 	u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset];
 	bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset];
@@ -102,8 +96,7 @@
 		 * simply write the data instead of using dmae.
 		 */
 		if (!b_must_dmae) {
-			qed_wr(p_hwfn, p_ptt, addr + (i << 2),
-			       p_init_val[i]);
+			qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]);
 			continue;
 		}
 
@@ -115,7 +108,7 @@
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(p_init_val + i),
 				       addr + (i << 2), segment, 0);
-		if (rc != 0)
+		if (rc)
 			return rc;
 
 		/* Jump over the entire segment, including invalid entry */
@@ -182,9 +175,7 @@
 
 static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
-			      u32 addr,
-			      u32 fill,
-			      u32 fill_count)
+			      u32 addr, u32 fill, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
 
@@ -199,15 +190,12 @@
 
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
-				 addr, fill_count,
-				 QED_DMAE_FLAG_RW_REPL_SRC);
+				 addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
-			  u32 addr,
-			  u32 fill,
-			  u32 fill_count)
+			  u32 addr, u32 fill, u32 fill_count)
 {
 	u32 i;
 
@@ -218,12 +206,12 @@
 static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
 			      struct init_write_op *cmd,
-			      bool b_must_dmae,
-			      bool b_can_dmae)
+			      bool b_must_dmae, bool b_can_dmae)
 {
+	u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
 	u32 data = le32_to_cpu(cmd->data);
 	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
-	u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
+
 	u32 offset, output_len, input_len, max_size;
 	struct qed_dev *cdev = p_hwfn->cdev;
 	union init_array_hdr *hdr;
@@ -233,8 +221,7 @@
 
 	array_data = cdev->fw_data->arr_data;
 
-	hdr = (union init_array_hdr *)(array_data +
-				       dmae_array_offset);
+	hdr = (union init_array_hdr *)(array_data + dmae_array_offset);
 	data = le32_to_cpu(hdr->raw.data);
 	switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) {
 	case INIT_ARR_ZIPPED:
@@ -290,13 +277,12 @@
 /* init_ops write command */
 static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt,
-			   struct init_write_op *cmd,
-			   bool b_can_dmae)
+			   struct init_write_op *p_cmd, bool b_can_dmae)
 {
-	u32 data = le32_to_cpu(cmd->data);
-	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	u32 data = le32_to_cpu(p_cmd->data);
 	bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS);
-	union init_write_args *arg = &cmd->args;
+	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	union init_write_args *arg = &p_cmd->args;
 	int rc = 0;
 
 	/* Sanitize */
@@ -309,20 +295,18 @@
 
 	switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) {
 	case INIT_SRC_INLINE:
-		qed_wr(p_hwfn, p_ptt, addr,
-		       le32_to_cpu(arg->inline_val));
+		data = le32_to_cpu(p_cmd->args.inline_val);
+		qed_wr(p_hwfn, p_ptt, addr, data);
 		break;
 	case INIT_SRC_ZEROS:
-		if (b_must_dmae ||
-		    (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64)))
-			rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0,
-						le32_to_cpu(arg->zeros_count));
+		data = le32_to_cpu(p_cmd->args.zeros_count);
+		if (b_must_dmae || (b_can_dmae && (data >= 64)))
+			rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data);
 		else
-			qed_init_fill(p_hwfn, p_ptt, addr, 0,
-				      le32_to_cpu(arg->zeros_count));
+			qed_init_fill(p_hwfn, p_ptt, addr, 0, data);
 		break;
 	case INIT_SRC_ARRAY:
-		rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd,
+		rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd,
 					b_must_dmae, b_can_dmae);
 		break;
 	case INIT_SRC_RUNTIME:
@@ -353,8 +337,7 @@
 
 /* init_ops read/poll commands */
 static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    struct init_read_op *cmd)
+			    struct qed_ptt *p_ptt, struct init_read_op *cmd)
 {
 	bool (*comp_check)(u32 val, u32 expected_val);
 	u32 delay = QED_INIT_POLL_PERIOD_US, val;
@@ -412,35 +395,33 @@
 }
 
 static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn,
-				  u16 *offset,
-				  int modes)
+				  u16 *p_offset, int modes)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	const u8 *modes_tree_buf;
 	u8 arg1, arg2, tree_val;
 
 	modes_tree_buf = cdev->fw_data->modes_tree_buf;
-	tree_val = modes_tree_buf[(*offset)++];
+	tree_val = modes_tree_buf[(*p_offset)++];
 	switch (tree_val) {
 	case INIT_MODE_OP_NOT:
-		return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1;
+		return qed_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1;
 	case INIT_MODE_OP_OR:
-		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
-		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+		arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
 		return arg1 | arg2;
 	case INIT_MODE_OP_AND:
-		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
-		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+		arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
 		return arg1 & arg2;
 	default:
 		tree_val -= MAX_INIT_MODE_OPS;
-		return (modes & (1 << tree_val)) ? 1 : 0;
+		return (modes & BIT(tree_val)) ? 1 : 0;
 	}
 }
 
 static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn,
-			     struct init_if_mode_op *p_cmd,
-			     int modes)
+			     struct init_if_mode_op *p_cmd, int modes)
 {
 	u16 offset = le16_to_cpu(p_cmd->modes_buf_offset);
 
@@ -453,8 +434,7 @@
 
 static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
 			      struct init_if_phase_op *p_cmd,
-			      u32 phase,
-			      u32 phase_id)
+			      u32 phase, u32 phase_id)
 {
 	u32 data = le32_to_cpu(p_cmd->phase_data);
 	u32 op_data = le32_to_cpu(p_cmd->op_data);
@@ -468,10 +448,7 @@
 }
 
 int qed_init_run(struct qed_hwfn *p_hwfn,
-		 struct qed_ptt *p_ptt,
-		 int phase,
-		 int phase_id,
-		 int modes)
+		 struct qed_ptt *p_ptt, int phase, int phase_id, int modes)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cmd_num, num_init_ops;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 8fa50fa..61ec973 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c

@@ -1775,10 +1775,9 @@
 };
 
 static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
-				      struct qed_sb_attn_info   *p_sb_desc)
+				      struct qed_sb_attn_info *p_sb_desc)
 {
-	u16     rc = 0;
-	u16     index;
+	u16 rc = 0, index;
 
 	/* Make certain HW write took affect */
 	mmiowb();
@@ -1802,15 +1801,13 @@
  *  @param asserted_bits newly asserted bits
  *  @return int
  */
-static int qed_int_assertion(struct qed_hwfn *p_hwfn,
-			     u16 asserted_bits)
+static int qed_int_assertion(struct qed_hwfn *p_hwfn, u16 asserted_bits)
 {
 	struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
 	u32 igu_mask;
 
 	/* Mask the source of the attention in the IGU */
-	igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			  IGU_REG_ATTENTION_ENABLE);
+	igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
 	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n",
 		   igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE));
 	igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE);
@@ -2041,7 +2038,7 @@
 			struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
 			if ((p_bit->flags & ATTENTION_PARITY) &&
-			    !!(parities & (1 << bit_idx)))
+			    !!(parities & BIT(bit_idx)))
 				qed_int_deassertion_parity(p_hwfn, p_bit,
 							   bit_idx);
 
@@ -2114,8 +2111,7 @@
 				    ~((u32)deasserted_bits));
 
 	/* Unmask deasserted attentions in IGU */
-	aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			  IGU_REG_ATTENTION_ENABLE);
+	aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
 	aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE);
 	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask);
 
@@ -2160,8 +2156,7 @@
 			index, attn_bits, attn_acks, asserted_bits,
 			deasserted_bits, p_sb_attn_sw->known_attn);
 	} else if (asserted_bits == 0x100) {
-		DP_INFO(p_hwfn,
-			"MFW indication via attention\n");
+		DP_INFO(p_hwfn, "MFW indication via attention\n");
 	} else {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
 			   "MFW indication [deassertion]\n");
@@ -2173,18 +2168,14 @@
 			return rc;
 	}
 
-	if (deasserted_bits) {
+	if (deasserted_bits)
 		rc = qed_int_deassertion(p_hwfn, deasserted_bits);
-		if (rc)
-			return rc;
-	}
 
 	return rc;
 }
 
 static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
-			    void __iomem *igu_addr,
-			    u32 ack_cons)
+			    void __iomem *igu_addr, u32 ack_cons)
 {
 	struct igu_prod_cons_update igu_ack = { 0 };
 
@@ -2242,9 +2233,8 @@
 
 	/* Gather Interrupts/Attentions information */
 	if (!sb_info->sb_virt) {
-		DP_ERR(
-			p_hwfn->cdev,
-			"Interrupt Status block is NULL - cannot check for new interrupts!\n");
+		DP_ERR(p_hwfn->cdev,
+		       "Interrupt Status block is NULL - cannot check for new interrupts!\n");
 	} else {
 		u32 tmp_index = sb_info->sb_ack;
 
@@ -2255,9 +2245,8 @@
 	}
 
 	if (!sb_attn || !sb_attn->sb_attn) {
-		DP_ERR(
-			p_hwfn->cdev,
-			"Attentions Status block is NULL - cannot check for new attentions!\n");
+		DP_ERR(p_hwfn->cdev,
+		       "Attentions Status block is NULL - cannot check for new attentions!\n");
 	} else {
 		u16 tmp_index = sb_attn->index;
 
@@ -2313,8 +2302,7 @@
 	if (p_sb->sb_attn)
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  SB_ATTN_ALIGNED_SIZE(p_hwfn),
-				  p_sb->sb_attn,
-				  p_sb->sb_phys);
+				  p_sb->sb_attn, p_sb->sb_phys);
 	kfree(p_sb);
 }
 
@@ -2337,8 +2325,7 @@
 
 static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
-				 void *sb_virt_addr,
-				 dma_addr_t sb_phy_addr)
+				 void *sb_virt_addr, dma_addr_t sb_phy_addr)
 {
 	struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn;
 	int i, j, k;
@@ -2378,8 +2365,8 @@
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_sb_attn_info *p_sb;
-	void *p_virt;
 	dma_addr_t p_phys = 0;
+	void *p_virt;
 
 	/* SB struct */
 	p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
@@ -2412,9 +2399,7 @@
 
 void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			   struct cau_sb_entry *p_sb_entry,
-			   u8 pf_id,
-			   u16 vf_number,
-			   u8 vf_valid)
+			   u8 pf_id, u16 vf_number, u8 vf_valid)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cau_state;
@@ -2428,12 +2413,6 @@
 	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F);
 	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F);
 
-	/* setting the time resultion to a fixed value ( = 1) */
-	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0,
-		  QED_CAU_DEF_RX_TIMER_RES);
-	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1,
-		  QED_CAU_DEF_TX_TIMER_RES);
-
 	cau_state = CAU_HC_DISABLE_STATE;
 
 	if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
@@ -2468,9 +2447,7 @@
 void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 dma_addr_t sb_phys,
-			 u16 igu_sb_id,
-			 u16 vf_number,
-			 u8 vf_valid)
+			 u16 igu_sb_id, u16 vf_number, u8 vf_valid)
 {
 	struct cau_sb_entry sb_entry;
 
@@ -2514,8 +2491,7 @@
 			timer_res = 2;
 		timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res);
 		qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
-				    QED_COAL_RX_STATE_MACHINE,
-				    timeset);
+				    QED_COAL_RX_STATE_MACHINE, timeset);
 
 		if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F)
 			timer_res = 0;
@@ -2541,8 +2517,7 @@
 			 u8 timeset)
 {
 	struct cau_pi_entry pi_entry;
-	u32 sb_offset;
-	u32 pi_offset;
+	u32 sb_offset, pi_offset;
 
 	if (IS_VF(p_hwfn->cdev))
 		return;
@@ -2569,8 +2544,7 @@
 }
 
 void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      struct qed_sb_info *sb_info)
+		      struct qed_ptt *p_ptt, struct qed_sb_info *sb_info)
 {
 	/* zero status block and ack counter */
 	sb_info->sb_ack = 0;
@@ -2590,8 +2564,7 @@
  *
  * @return u16
  */
-static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
-			     u16 sb_id)
+static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 {
 	u16 igu_sb_id;
 
@@ -2603,8 +2576,12 @@
 	else
 		igu_sb_id = qed_vf_get_igu_sb_id(p_hwfn, sb_id);
 
-	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n",
-		   (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id);
+	if (sb_id == QED_SP_SB_ID)
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id);
+	else
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "SB [%04x] <--> IGU SB [%04x]\n", sb_id, igu_sb_id);
 
 	return igu_sb_id;
 }
@@ -2612,9 +2589,7 @@
 int qed_int_sb_init(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
 		    struct qed_sb_info *sb_info,
-		    void *sb_virt_addr,
-		    dma_addr_t sb_phy_addr,
-		    u16 sb_id)
+		    void *sb_virt_addr, dma_addr_t sb_phy_addr, u16 sb_id)
 {
 	sb_info->sb_virt = sb_virt_addr;
 	sb_info->sb_phys = sb_phy_addr;
@@ -2650,8 +2625,7 @@
 }
 
 int qed_int_sb_release(struct qed_hwfn *p_hwfn,
-		       struct qed_sb_info *sb_info,
-		       u16 sb_id)
+		       struct qed_sb_info *sb_info, u16 sb_id)
 {
 	if (sb_id == QED_SP_SB_ID) {
 		DP_ERR(p_hwfn, "Do Not free sp sb using this function");
@@ -2685,8 +2659,7 @@
 	kfree(p_sb);
 }
 
-static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt)
+static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_sb_sp_info *p_sb;
 	dma_addr_t p_phys = 0;
@@ -2721,9 +2694,7 @@
 
 int qed_int_register_cb(struct qed_hwfn *p_hwfn,
 			qed_int_comp_cb_t comp_cb,
-			void *cookie,
-			u8 *sb_idx,
-			__le16 **p_fw_cons)
+			void *cookie, u8 *sb_idx, __le16 **p_fw_cons)
 {
 	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
 	int rc = -ENOMEM;
@@ -2764,8 +2735,7 @@
 }
 
 void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    enum qed_int_mode int_mode)
+			    struct qed_ptt *p_ptt, enum qed_int_mode int_mode)
 {
 	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN;
 
@@ -2809,7 +2779,7 @@
 	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
 	if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) {
 		rc = qed_slowpath_irq_req(p_hwfn);
-		if (rc != 0) {
+		if (rc) {
 			DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n");
 			return -EINVAL;
 		}
@@ -2822,8 +2792,7 @@
 	return rc;
 }
 
-void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
-			     struct qed_ptt *p_ptt)
+void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	p_hwfn->b_int_enabled = 0;
 
@@ -2950,13 +2919,11 @@
 					p_hwfn->hw_info.opaque_fid, b_set);
 }
 
-static u32 qed_int_igu_read_cam_block(struct qed_hwfn	*p_hwfn,
-				      struct qed_ptt	*p_ptt,
-				      u16		sb_id)
+static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt, u16 sb_id)
 {
 	u32 val = qed_rd(p_hwfn, p_ptt,
-			 IGU_REG_MAPPING_MEMORY +
-			 sizeof(u32) * sb_id);
+			 IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
 	struct qed_igu_block *p_block;
 
 	p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
@@ -2983,8 +2950,7 @@
 	return val;
 }
 
-int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
-			 struct qed_ptt *p_ptt)
+int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_igu_info *p_igu_info;
 	u32 val, min_vf = 0, max_vf = 0;
@@ -3104,22 +3070,19 @@
  */
 void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn)
 {
-	u32 igu_pf_conf = 0;
-
-	igu_pf_conf |= IGU_PF_CONF_FUNC_EN;
+	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN;
 
 	STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf);
 }
 
 u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
 {
-	u64 intr_status = 0;
-	u32 intr_status_lo = 0;
-	u32 intr_status_hi = 0;
 	u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER -
 			       IGU_CMD_INT_ACK_BASE;
 	u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER -
 			       IGU_CMD_INT_ACK_BASE;
+	u32 intr_status_hi = 0, intr_status_lo = 0;
+	u64 intr_status = 0;
 
 	intr_status_lo = REG_RD(p_hwfn,
 				GTT_BAR0_MAP_REG_IGU_CMD +
@@ -3153,8 +3116,7 @@
 	kfree(p_hwfn->sp_dpc);
 }
 
-int qed_int_alloc(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int rc = 0;
 
@@ -3169,10 +3131,9 @@
 		return rc;
 	}
 	rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt);
-	if (rc) {
+	if (rc)
 		DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n");
-		return rc;
-	}
+
 	return rc;
 }
 
@@ -3183,8 +3144,7 @@
 	qed_int_sp_dpc_free(p_hwfn);
 }
 
-void qed_int_setup(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt)
+void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info);
 	qed_int_sb_attn_setup(p_hwfn, p_ptt);

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 401e738..c823c46 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c

@@ -52,7 +52,7 @@
 	u16 rx_mode = 0;
 
 	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -80,8 +80,7 @@
 	p_ramrod->rx_mode.state = cpu_to_le16(rx_mode);
 
 	/* TPA related fields */
-	memset(&p_ramrod->tpa_param, 0,
-	       sizeof(struct eth_vport_tpa_param));
+	memset(&p_ramrod->tpa_param, 0, sizeof(struct eth_vport_tpa_param));
 
 	p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
 
@@ -306,14 +305,14 @@
 	memset(&p_ramrod->approx_mcast.bins, 0,
 	       sizeof(p_ramrod->approx_mcast.bins));
 
-	if (p_params->update_approx_mcast_flg) {
-		p_ramrod->common.update_approx_mcast_flg = 1;
-		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
-			u32 *p_bins = (u32 *)p_params->bins;
-			__le32 val = cpu_to_le32(p_bins[i]);
+	if (!p_params->update_approx_mcast_flg)
+		return;
 
-			p_ramrod->approx_mcast.bins[i] = val;
-		}
+	p_ramrod->common.update_approx_mcast_flg = 1;
+	for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+		u32 *p_bins = (u32 *)p_params->bins;
+
+		p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]);
 	}
 }
 
@@ -336,7 +335,7 @@
 	}
 
 	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -361,8 +360,8 @@
 	p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
 	p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
 	p_cmn->accept_any_vlan = p_params->accept_any_vlan;
-	p_cmn->update_accept_any_vlan_flg =
-			p_params->update_accept_any_vlan_flg;
+	val = p_params->update_accept_any_vlan_flg;
+	p_cmn->update_accept_any_vlan_flg = val;
 
 	p_cmn->inner_vlan_removal_en = p_params->inner_vlan_removal_flg;
 	val = p_params->update_inner_vlan_removal_flg;
@@ -411,7 +410,7 @@
 		return qed_vf_pf_vport_stop(p_hwfn);
 
 	rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -476,7 +475,7 @@
 
 		rc = qed_sp_vport_update(p_hwfn, &vport_update_params,
 					 comp_mode, p_comp_data);
-		if (rc != 0) {
+		if (rc) {
 			DP_ERR(cdev, "Update rx_mode failed %d\n", rc);
 			return rc;
 		}
@@ -511,7 +510,7 @@
 int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 				u16 opaque_fid,
 				u32 cid,
-				struct qed_queue_start_common_params *params,
+				struct qed_queue_start_common_params *p_params,
 				u8 stats_id,
 				u16 bd_max_bytes,
 				dma_addr_t bd_chain_phys_addr,
@@ -526,23 +525,23 @@
 	int rc = -EINVAL;
 
 	/* Store information for the stop */
-	p_rx_cid		= &p_hwfn->p_rx_cids[params->queue_id];
-	p_rx_cid->cid		= cid;
-	p_rx_cid->opaque_fid	= opaque_fid;
-	p_rx_cid->vport_id	= params->vport_id;
+	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+	p_rx_cid->cid = cid;
+	p_rx_cid->opaque_fid = opaque_fid;
+	p_rx_cid->vport_id = p_params->vport_id;
 
-	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+	if (rc)
 		return rc;
 
-	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id);
-	if (rc != 0)
+	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
+	if (rc)
 		return rc;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SP,
 		   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid, cid, params->queue_id, params->vport_id,
-		   params->sb);
+		   opaque_fid,
+		   cid, p_params->queue_id, p_params->vport_id, p_params->sb);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -558,24 +557,25 @@
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id			= cpu_to_le16(params->sb);
-	p_ramrod->sb_index		= params->sb_idx;
-	p_ramrod->vport_id		= abs_vport_id;
-	p_ramrod->stats_counter_id	= stats_id;
-	p_ramrod->rx_queue_id		= cpu_to_le16(abs_rx_q_id);
-	p_ramrod->complete_cqe_flg	= 0;
-	p_ramrod->complete_event_flg	= 1;
+	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+	p_ramrod->sb_index = p_params->sb_idx;
+	p_ramrod->vport_id = abs_vport_id;
+	p_ramrod->stats_counter_id = stats_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->complete_cqe_flg = 0;
+	p_ramrod->complete_event_flg = 1;
 
-	p_ramrod->bd_max_bytes	= cpu_to_le16(bd_max_bytes);
+	p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes);
 	DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr);
 
-	p_ramrod->num_of_pbl_pages	= cpu_to_le16(cqe_pbl_size);
+	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-	p_ramrod->vf_rx_prod_index = params->vf_qid;
-	if (params->vf_qid)
+	p_ramrod->vf_rx_prod_index = p_params->vf_qid;
+	if (p_params->vf_qid)
 		DP_VERBOSE(p_hwfn, QED_MSG_SP,
-			   "Queue is meant for VF rxq[%04x]\n", params->vf_qid);
+			   "Queue is meant for VF rxq[%04x]\n",
+			   p_params->vf_qid);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -583,7 +583,7 @@
 static int
 qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
 			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *params,
+			  struct qed_queue_start_common_params *p_params,
 			  u16 bd_max_bytes,
 			  dma_addr_t bd_chain_phys_addr,
 			  dma_addr_t cqe_pbl_addr,
@@ -597,20 +597,20 @@
 
 	if (IS_VF(p_hwfn->cdev)) {
 		return qed_vf_pf_rxq_start(p_hwfn,
-					   params->queue_id,
-					   params->sb,
-					   params->sb_idx,
+					   p_params->queue_id,
+					   p_params->sb,
+					   (u8)p_params->sb_idx,
 					   bd_max_bytes,
 					   bd_chain_phys_addr,
 					   cqe_pbl_addr, cqe_pbl_size, pp_prod);
 	}
 
-	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue);
-	if (rc != 0)
+	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
+	if (rc)
 		return rc;
 
-	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id);
-	if (rc != 0)
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+	if (rc)
 		return rc;
 
 	*pp_prod = (u8 __iomem *)p_hwfn->regview +
@@ -622,9 +622,8 @@
 			  (u32 *)(&init_prod_val));
 
 	/* Allocate a CID for the queue */
-	p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id];
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-				 &p_rx_cid->cid);
+	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
 		return rc;
@@ -634,14 +633,14 @@
 	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
 					 opaque_fid,
 					 p_rx_cid->cid,
-					 params,
+					 p_params,
 					 abs_stats_id,
 					 bd_max_bytes,
 					 bd_chain_phys_addr,
 					 cqe_pbl_addr,
 					 cqe_pbl_size);
 
-	if (rc != 0)
+	if (rc)
 		qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
 
 	return rc;
@@ -788,21 +787,20 @@
 	if (rc)
 		return rc;
 
-	p_ramrod		= &p_ent->ramrod.tx_queue_start;
-	p_ramrod->vport_id	= abs_vport_id;
+	p_ramrod = &p_ent->ramrod.tx_queue_start;
+	p_ramrod->vport_id = abs_vport_id;
 
-	p_ramrod->sb_id			= cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index		= p_params->sb_idx;
-	p_ramrod->stats_counter_id	= stats_id;
+	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+	p_ramrod->sb_index = p_params->sb_idx;
+	p_ramrod->stats_counter_id = stats_id;
 
-	p_ramrod->queue_zone_id		= cpu_to_le16(abs_tx_q_id);
-	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
+	p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+
+	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-	pq_id			= qed_get_qm_pq(p_hwfn,
-						PROTOCOLID_ETH,
-						p_pq_params);
-	p_ramrod->qm_pq_id	= cpu_to_le16(pq_id);
+	pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
+	p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -836,8 +834,7 @@
 	memset(&pq_params, 0, sizeof(pq_params));
 
 	/* Allocate a CID for the queue */
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-				 &p_tx_cid->cid);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
 		return rc;
@@ -896,8 +893,7 @@
 	return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
 }
 
-static enum eth_filter_action
-qed_filter_action(enum qed_filter_opcode opcode)
+static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
 {
 	enum eth_filter_action action = MAX_ETH_FILTER_ACTION;
 
@@ -1033,19 +1029,19 @@
 		p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni);
 
 	if (p_filter_cmd->opcode == QED_FILTER_MOVE) {
-		p_second_filter->type		= p_first_filter->type;
-		p_second_filter->mac_msb	= p_first_filter->mac_msb;
-		p_second_filter->mac_mid	= p_first_filter->mac_mid;
-		p_second_filter->mac_lsb	= p_first_filter->mac_lsb;
-		p_second_filter->vlan_id	= p_first_filter->vlan_id;
-		p_second_filter->vni		= p_first_filter->vni;
+		p_second_filter->type = p_first_filter->type;
+		p_second_filter->mac_msb = p_first_filter->mac_msb;
+		p_second_filter->mac_mid = p_first_filter->mac_mid;
+		p_second_filter->mac_lsb = p_first_filter->mac_lsb;
+		p_second_filter->vlan_id = p_first_filter->vlan_id;
+		p_second_filter->vni = p_first_filter->vni;
 
 		p_first_filter->action = ETH_FILTER_ACTION_REMOVE;
 
 		p_first_filter->vport_id = vport_to_remove_from;
 
-		p_second_filter->action		= ETH_FILTER_ACTION_ADD;
-		p_second_filter->vport_id	= vport_to_add_to;
+		p_second_filter->action = ETH_FILTER_ACTION_ADD;
+		p_second_filter->vport_id = vport_to_add_to;
 	} else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) {
 		p_first_filter->vport_id = vport_to_add_to;
 		memcpy(p_second_filter, p_first_filter,
@@ -1086,7 +1082,7 @@
 	rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd,
 				     &p_ramrod, &p_ent,
 				     comp_mode, p_comp_data);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc);
 		return rc;
 	}
@@ -1094,10 +1090,8 @@
 	p_header->assert_on_error = p_filter_cmd->assert_on_error;
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc != 0) {
-		DP_ERR(p_hwfn,
-		       "Unicast filter ADD command failed %d\n",
-		       rc);
+	if (rc) {
+		DP_ERR(p_hwfn, "Unicast filter ADD command failed %d\n", rc);
 		return rc;
 	}
 
@@ -1136,15 +1130,10 @@
  * Return:
  ******************************************************************************/
 static u32 qed_calc_crc32c(u8 *crc32_packet,
-			   u32 crc32_length,
-			   u32 crc32_seed,
-			   u8 complement)
+			   u32 crc32_length, u32 crc32_seed, u8 complement)
 {
-	u32 byte = 0;
-	u32 bit = 0;
-	u8 msb = 0;
-	u8 current_byte = 0;
-	u32 crc32_result = crc32_seed;
+	u32 byte = 0, bit = 0, crc32_result = crc32_seed;
+	u8 msb = 0, current_byte = 0;
 
 	if ((!crc32_packet) ||
 	    (crc32_length == 0) ||
@@ -1164,9 +1153,7 @@
 	return crc32_result;
 }
 
-static inline u32 qed_crc32c_le(u32 seed,
-				u8 *mac,
-				u32 len)
+static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len)
 {
 	u32 packet_buf[2] = { 0 };
 
@@ -1196,17 +1183,14 @@
 	u8 abs_vport_id = 0;
 	int rc, i;
 
-	if (p_filter_cmd->opcode == QED_FILTER_ADD) {
+	if (p_filter_cmd->opcode == QED_FILTER_ADD)
 		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to,
 				  &abs_vport_id);
-		if (rc)
-			return rc;
-	} else {
+	else
 		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from,
 				  &abs_vport_id);
-		if (rc)
-			return rc;
-	}
+	if (rc)
+		return rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -1244,11 +1228,11 @@
 
 		/* Convert to correct endianity */
 		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+			struct vport_update_ramrod_mcast *p_ramrod_bins;
 			u32 *p_bins = (u32 *)bins;
-			struct vport_update_ramrod_mcast *approx_mcast;
 
-			approx_mcast = &p_ramrod->approx_mcast;
-			approx_mcast->bins[i] = cpu_to_le32(p_bins[i]);
+			p_ramrod_bins = &p_ramrod->approx_mcast;
+			p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]);
 		}
 	}
 
@@ -1286,8 +1270,7 @@
 		rc = qed_sp_eth_filter_mcast(p_hwfn,
 					     opaque_fid,
 					     p_filter_cmd,
-					     comp_mode,
-					     p_comp_data);
+					     comp_mode, p_comp_data);
 	}
 	return rc;
 }
@@ -1314,9 +1297,8 @@
 		rc = qed_sp_eth_filter_ucast(p_hwfn,
 					     opaque_fid,
 					     p_filter_cmd,
-					     comp_mode,
-					     p_comp_data);
-		if (rc != 0)
+					     comp_mode, p_comp_data);
+		if (rc)
 			break;
 	}
 
@@ -1590,8 +1572,7 @@
 	}
 }
 
-void qed_get_vport_stats(struct qed_dev *cdev,
-			 struct qed_eth_stats *stats)
+void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
 {
 	u32 i;
 
@@ -1766,8 +1747,7 @@
 	return 0;
 }
 
-static int qed_stop_vport(struct qed_dev *cdev,
-			  u8 vport_id)
+static int qed_stop_vport(struct qed_dev *cdev, u8 vport_id)
 {
 	int rc, i;
 
@@ -1775,8 +1755,7 @@
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
 		rc = qed_sp_vport_stop(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       vport_id);
+				       p_hwfn->hw_info.opaque_fid, vport_id);
 
 		if (rc) {
 			DP_ERR(cdev, "Failed to stop VPORT\n");
@@ -1801,10 +1780,8 @@
 
 	/* Translate protocol params into sp params */
 	sp_params.vport_id = params->vport_id;
-	sp_params.update_vport_active_rx_flg =
-		params->update_vport_active_flg;
-	sp_params.update_vport_active_tx_flg =
-		params->update_vport_active_flg;
+	sp_params.update_vport_active_rx_flg = params->update_vport_active_flg;
+	sp_params.update_vport_active_tx_flg = params->update_vport_active_flg;
 	sp_params.vport_active_rx_flg = params->vport_active_flg;
 	sp_params.vport_active_tx_flg = params->vport_active_flg;
 	sp_params.update_tx_switching_flg = params->update_tx_switching_flg;
@@ -1817,8 +1794,7 @@
 	 * We need to re-fix the rss values per engine for CMT.
 	 */
 	if (cdev->num_hwfns > 1 && params->update_rss_flg) {
-		struct qed_update_vport_rss_params *rss =
-			&params->rss_params;
+		struct qed_update_vport_rss_params *rss = &params->rss_params;
 		int k, max = 0;
 
 		/* Find largest entry, since it's possible RSS needs to
@@ -1861,8 +1837,8 @@
 		       QED_RSS_IND_TABLE_SIZE * sizeof(u16));
 		memcpy(sp_rss_params.rss_key, params->rss_params.rss_key,
 		       QED_RSS_KEY_SIZE * sizeof(u32));
+		sp_params.rss_params = &sp_rss_params;
 	}
-	sp_params.rss_params = &sp_rss_params;
 
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -1893,8 +1869,8 @@
 			 u16 cqe_pbl_size,
 			 void __iomem **pp_prod)
 {
-	int rc, hwfn_index;
 	struct qed_hwfn *p_hwfn;
+	int rc, hwfn_index;
 
 	hwfn_index = params->rss_id % cdev->num_hwfns;
 	p_hwfn = &cdev->hwfns[hwfn_index];
@@ -1935,8 +1911,7 @@
 
 	rc = qed_sp_eth_rx_queue_stop(p_hwfn,
 				      params->rx_queue_id / cdev->num_hwfns,
-				      params->eq_completion_only,
-				      false);
+				      params->eq_completion_only, false);
 	if (rc) {
 		DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
 		return rc;
@@ -2047,11 +2022,11 @@
 
 	memset(&accept_flags, 0, sizeof(accept_flags));
 
-	accept_flags.update_rx_mode_config	= 1;
-	accept_flags.update_tx_mode_config	= 1;
-	accept_flags.rx_accept_filter		= QED_ACCEPT_UCAST_MATCHED |
-						  QED_ACCEPT_MCAST_MATCHED |
-						  QED_ACCEPT_BCAST;
+	accept_flags.update_rx_mode_config = 1;
+	accept_flags.update_tx_mode_config = 1;
+	accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
+					QED_ACCEPT_MCAST_MATCHED |
+					QED_ACCEPT_BCAST;
 	accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
 					QED_ACCEPT_MCAST_MATCHED |
 					QED_ACCEPT_BCAST;
@@ -2072,9 +2047,8 @@
 	struct qed_filter_ucast ucast;
 
 	if (!params->vlan_valid && !params->mac_valid) {
-		DP_NOTICE(
-			cdev,
-			"Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
+		DP_NOTICE(cdev,
+			  "Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
 		return -EINVAL;
 	}
 
@@ -2135,8 +2109,7 @@
 	for (i = 0; i < mcast.num_mc_addrs; i++)
 		ether_addr_copy(mcast.mac[i], params->mac[i]);
 
-	return qed_filter_mcast_cmd(cdev, &mcast,
-				    QED_SPQ_MODE_CB, NULL);
+	return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL);
 }
 
 static int qed_configure_filter(struct qed_dev *cdev,
@@ -2153,15 +2126,13 @@
 		accept_flags = params->filter.accept_flags;
 		return qed_configure_filter_rx_mode(cdev, accept_flags);
 	default:
-		DP_NOTICE(cdev, "Unknown filter type %d\n",
-			  (int)params->type);
+		DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type);
 		return -EINVAL;
 	}
 }
 
 static int qed_fp_cqe_completion(struct qed_dev *dev,
-				 u8 rss_id,
-				 struct eth_slow_path_rx_cqe *cqe)
+				 u8 rss_id, struct eth_slow_path_rx_cqe *cqe)
 {
 	return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns],
 				      cqe);

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c7dc34b..54976cc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c

@@ -51,8 +51,6 @@
 
 static int __init qed_init(void)
 {
-	pr_notice("qed_init called\n");
-
 	pr_info("%s", version);
 
 	return 0;
@@ -106,8 +104,7 @@
 /* Performs PCI initializations as well as initializing PCI-related parameters
  * in the device structrue. Returns 0 in case of success.
  */
-static int qed_init_pci(struct qed_dev *cdev,
-			struct pci_dev *pdev)
+static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
 {
 	u8 rev_id;
 	int rc;
@@ -263,8 +260,7 @@
 }
 
 /* Sets the requested power state */
-static int qed_set_power_state(struct qed_dev *cdev,
-			       pci_power_t state)
+static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 {
 	if (!cdev)
 		return -ENODEV;
@@ -366,8 +362,8 @@
 		DP_NOTICE(cdev,
 			  "Trying to enable MSI-X with less vectors (%d out of %d)\n",
 			  cnt, int_params->in.num_vectors);
-		rc = pci_enable_msix_exact(cdev->pdev,
-					   int_params->msix_table, cnt);
+		rc = pci_enable_msix_exact(cdev->pdev, int_params->msix_table,
+					   cnt);
 		if (!rc)
 			rc = cnt;
 	}
@@ -439,6 +435,11 @@
 	}
 
 out:
+	if (!rc)
+		DP_INFO(cdev, "Using %s interrupts\n",
+			int_params->out.int_mode == QED_INT_MODE_INTA ?
+			"INTa" : int_params->out.int_mode == QED_INT_MODE_MSI ?
+			"MSI" : "MSIX");
 	cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE;
 
 	return rc;
@@ -514,19 +515,18 @@
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 {
 	struct qed_dev *cdev = hwfn->cdev;
+	u32 int_mode;
 	int rc = 0;
 	u8 id;
 
-	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+	int_mode = cdev->int_params.out.int_mode;
+	if (int_mode == QED_INT_MODE_MSIX) {
 		id = hwfn->my_id;
 		snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x",
 			 id, cdev->pdev->bus->number,
 			 PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
 		rc = request_irq(cdev->int_params.msix_table[id].vector,
 				 qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
-		if (!rc)
-			DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
-				   "Requested slowpath MSI-X\n");
 	} else {
 		unsigned long flags = 0;
 
@@ -541,6 +541,13 @@
 				 flags, cdev->name, cdev);
 	}
 
+	if (rc)
+		DP_NOTICE(cdev, "request_irq failed, rc = %d\n", rc);
+	else
+		DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
+			   "Requested slowpath %s\n",
+			   (int_mode == QED_INT_MODE_MSIX) ? "MSI-X" : "IRQ");
+
 	return rc;
 }
 
@@ -974,8 +981,7 @@
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-			  struct qed_sb_info *sb_info,
-			  u16 sb_id)
+			  struct qed_sb_info *sb_info, u16 sb_id)
 {
 	struct qed_hwfn *p_hwfn;
 	int hwfn_index;
@@ -1025,20 +1031,23 @@
 		link_params->speed.autoneg = params->autoneg;
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
 		link_params->speed.advertised_speeds = 0;
-		if ((params->adv_speeds & SUPPORTED_1000baseT_Half) ||
-		    (params->adv_speeds & SUPPORTED_1000baseT_Full))
+		if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
+		    (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
-		if (params->adv_speeds & SUPPORTED_10000baseKR_Full)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
-		if (params->adv_speeds & SUPPORTED_40000baseLR4_Full)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
-		if (params->adv_speeds & 0)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
-		if (params->adv_speeds & 0)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
+			link_params->speed.advertised_speeds |=
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
+		if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
 	}
@@ -1168,50 +1177,56 @@
 		if_link->link_up = true;
 
 	/* TODO - at the moment assume supported and advertised speed equal */
-	if_link->supported_caps = SUPPORTED_FIBRE;
+	if_link->supported_caps = QED_LM_FIBRE_BIT;
 	if (params.speed.autoneg)
-		if_link->supported_caps |= SUPPORTED_Autoneg;
+		if_link->supported_caps |= QED_LM_Autoneg_BIT;
 	if (params.pause.autoneg ||
 	    (params.pause.forced_rx && params.pause.forced_tx))
-		if_link->supported_caps |= SUPPORTED_Asym_Pause;
+		if_link->supported_caps |= QED_LM_Asym_Pause_BIT;
 	if (params.pause.autoneg || params.pause.forced_rx ||
 	    params.pause.forced_tx)
-		if_link->supported_caps |= SUPPORTED_Pause;
+		if_link->supported_caps |= QED_LM_Pause_BIT;
 
 	if_link->advertised_caps = if_link->supported_caps;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->advertised_caps |= SUPPORTED_1000baseT_Half |
-					   SUPPORTED_1000baseT_Full;
+		if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
+		    QED_LM_1000baseT_Full_BIT;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->advertised_caps |= SUPPORTED_10000baseKR_Full;
+		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
 	if (params.speed.advertised_speeds &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (params.speed.advertised_speeds &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->advertised_caps |= 0;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (params.speed.advertised_speeds &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->advertised_caps |= 0;
+		if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->supported_caps |= SUPPORTED_1000baseT_Half |
-					   SUPPORTED_1000baseT_Full;
+		if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
+		    QED_LM_1000baseT_Full_BIT;
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->supported_caps |= SUPPORTED_10000baseKR_Full;
+		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
 	if (link_caps.speed_capabilities &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->supported_caps |= SUPPORTED_40000baseLR4_Full;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (link_caps.speed_capabilities &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->supported_caps |= 0;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (link_caps.speed_capabilities &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->supported_caps |= 0;
+		if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link.link_up)
 		if_link->speed = link.speed;
@@ -1231,33 +1246,29 @@
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
 	/* Link partner capabilities */
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_1G_HD)
-		if_link->lp_caps |= SUPPORTED_1000baseT_Half;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_1G_FD)
-		if_link->lp_caps |= SUPPORTED_1000baseT_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_10G)
-		if_link->lp_caps |= SUPPORTED_10000baseKR_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_40G)
-		if_link->lp_caps |= SUPPORTED_40000baseLR4_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_50G)
-		if_link->lp_caps |= 0;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_100G)
-		if_link->lp_caps |= 0;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
+		if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
+		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G)
+		if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G)
+		if_link->lp_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_50G)
+		if_link->lp_caps |= QED_LM_50000baseKR2_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_100G)
+		if_link->lp_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link.an_complete)
-		if_link->lp_caps |= SUPPORTED_Autoneg;
+		if_link->lp_caps |= QED_LM_Autoneg_BIT;
 
 	if (link.partner_adv_pause)
-		if_link->lp_caps |= SUPPORTED_Pause;
+		if_link->lp_caps |= QED_LM_Pause_BIT;
 	if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
 	    link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
-		if_link->lp_caps |= SUPPORTED_Asym_Pause;
+		if_link->lp_caps |= QED_LM_Asym_Pause_BIT;
 }
 
 static void qed_get_current_link(struct qed_dev *cdev,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index a240f26..88b448b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c

@@ -54,8 +54,7 @@
 	return true;
 }
 
-void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt)
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
 					PUBLIC_PORT);
@@ -68,8 +67,7 @@
 		   p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
 }
 
-void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
 	u32 tmp, i;
@@ -99,8 +97,7 @@
 	return 0;
 }
 
-static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
-				struct qed_ptt *p_ptt)
+static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_info *p_info = p_hwfn->mcp_info;
 	u32 drv_mb_offsize, mfw_mb_offsize;
@@ -143,8 +140,7 @@
 	return 0;
 }
 
-int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_info *p_info;
 	u32 size;
@@ -165,9 +161,7 @@
 
 	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
 	p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
-	p_info->mfw_mb_shadow =
-		kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
-				p_info->mfw_mb_length), GFP_KERNEL);
+	p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
 	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
 		goto err;
 
@@ -189,8 +183,7 @@
  * access is achieved by setting a blocking flag, which will fail other
  * competing contexts to send their mailboxes.
  */
-static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
-			   u32 cmd)
+static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
 	spin_lock_bh(&p_hwfn->mcp_info->lock);
 
@@ -221,15 +214,13 @@
 	return 0;
 }
 
-static void qed_mcp_mb_unlock(struct qed_hwfn	*p_hwfn,
-			      u32		cmd)
+static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
 	if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
 		spin_unlock_bh(&p_hwfn->mcp_info->lock);
 }
 
-int qed_mcp_reset(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
 	u8 delay = CHIP_MCP_RESP_ITER_US;
@@ -326,7 +317,8 @@
 		*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
 	} else {
 		/* FW BUG! */
-		DP_ERR(p_hwfn, "MFW failed to respond!\n");
+		DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
+		       cmd, param);
 		*o_mcp_resp = 0;
 		rc = -EAGAIN;
 	}
@@ -342,7 +334,7 @@
 
 	/* MCP not initialized */
 	if (!qed_mcp_is_init(p_hwfn)) {
-		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
 	}
 
@@ -399,8 +391,7 @@
 }
 
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u32 *p_load_code)
+		     struct qed_ptt *p_ptt, u32 *p_load_code)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_mcp_mb_params mb_params;
@@ -527,8 +518,7 @@
 		   "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
 		   transceiver_state,
 		   (u32)(p_hwfn->mcp_info->port_addr +
-			 offsetof(struct public_port,
-				  transceiver_data)));
+			  offsetof(struct public_port, transceiver_data)));
 
 	transceiver_state = GET_FIELD(transceiver_state,
 				      ETH_TRANSCEIVER_STATE);
@@ -540,8 +530,7 @@
 }
 
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
-				       struct qed_ptt *p_ptt,
-				       bool b_reset)
+				       struct qed_ptt *p_ptt, bool b_reset)
 {
 	struct qed_mcp_link_state *p_link;
 	u8 max_bw, min_bw;
@@ -557,8 +546,7 @@
 			   "Received link update [0x%08x] from mfw [Addr 0x%x]\n",
 			   status,
 			   (u32)(p_hwfn->mcp_info->port_addr +
-				 offsetof(struct public_port,
-					  link_status)));
+				 offsetof(struct public_port, link_status)));
 	} else {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
 			   "Resetting link indications\n");
@@ -635,6 +623,9 @@
 		(status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
 		QED_LINK_PARTNER_SPEED_20G : 0;
 	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_25G : 0;
+	p_link->partner_adv_speed |=
 		(status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
 		QED_LINK_PARTNER_SPEED_40G : 0;
 	p_link->partner_adv_speed |=
@@ -752,8 +743,7 @@
 
 static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
-				  struct public_func *p_data,
-				  int pfid)
+				  struct public_func *p_data, int pfid)
 {
 	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
 					PUBLIC_FUNC);
@@ -763,8 +753,7 @@
 
 	memset(p_data, 0, sizeof(*p_data));
 
-	size = min_t(u32, sizeof(*p_data),
-		     QED_SECTION_SIZE(mfw_path_offsize));
+	size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
 	for (i = 0; i < size / sizeof(u32); i++)
 		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
 					    func_addr + (i << 2));
@@ -799,15 +788,13 @@
 	return -EINVAL;
 }
 
-static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
-			      struct qed_ptt *p_ptt)
+static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_function_info *p_info;
 	struct public_func shmem_info;
 	u32 resp = 0, param = 0;
 
-	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-			       MCP_PF_ID(p_hwfn));
+	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
 
 	qed_read_pf_bandwidth(p_hwfn, &shmem_info);
 
@@ -940,8 +927,7 @@
 	return 0;
 }
 
-int qed_mcp_get_media_type(struct qed_dev *cdev,
-			   u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 {
 	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
 	struct qed_ptt  *p_ptt;
@@ -950,7 +936,7 @@
 		return -EINVAL;
 
 	if (!qed_mcp_is_init(p_hwfn)) {
-		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
 	}
 
@@ -1003,15 +989,13 @@
 	struct qed_mcp_function_info *info;
 	struct public_func shmem_info;
 
-	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-			       MCP_PF_ID(p_hwfn));
+	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
 	info = &p_hwfn->mcp_info->func_info;
 
 	info->pause_on_host = (shmem_info.config &
 			       FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
 
-	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
-				    &info->protocol)) {
+	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
 		DP_ERR(p_hwfn, "Unknown personality %08x\n",
 		       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
 		return -EINVAL;
@@ -1072,15 +1056,13 @@
 	return &p_hwfn->mcp_info->link_capabilities;
 }
 
-int qed_mcp_drain(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 resp = 0, param = 0;
 	int rc;
 
 	rc = qed_mcp_cmd(p_hwfn, p_ptt,
-			 DRV_MSG_CODE_NIG_DRAIN, 1000,
-			 &resp, &param);
+			 DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, &param);
 
 	/* Wait for the drain to complete before returning */
 	msleep(1020);
@@ -1089,8 +1071,7 @@
 }
 
 int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt,
-			   u32 *p_flash_size)
+			   struct qed_ptt *p_ptt, u32 *p_flash_size)
 {
 	u32 flash_size;
 
@@ -1168,8 +1149,8 @@
 	return rc;
 }
 
-int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-		    enum qed_led_mode mode)
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
 	u32 resp = 0, param = 0, drv_mb_param;
 	int rc;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 7f319aa..013d1b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h

@@ -60,9 +60,10 @@
 #define QED_LINK_PARTNER_SPEED_1G_FD    BIT(1)
 #define QED_LINK_PARTNER_SPEED_10G      BIT(2)
 #define QED_LINK_PARTNER_SPEED_20G      BIT(3)
-#define QED_LINK_PARTNER_SPEED_40G      BIT(4)
-#define QED_LINK_PARTNER_SPEED_50G      BIT(5)
-#define QED_LINK_PARTNER_SPEED_100G     BIT(6)
+#define QED_LINK_PARTNER_SPEED_25G      BIT(4)
+#define QED_LINK_PARTNER_SPEED_40G      BIT(5)
+#define QED_LINK_PARTNER_SPEED_50G      BIT(6)
+#define QED_LINK_PARTNER_SPEED_100G     BIT(7)
 	u32     partner_adv_speed;
 
 	bool    partner_tx_flow_ctrl_en;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index a52f3fc..2888eb0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c

@@ -25,9 +25,7 @@
 
 int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 			struct qed_spq_entry **pp_ent,
-			u8 cmd,
-			u8 protocol,
-			struct qed_sp_init_data *p_data)
+			u8 cmd, u8 protocol, struct qed_sp_init_data *p_data)
 {
 	u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid;
 	struct qed_spq_entry *p_ent = NULL;
@@ -38,7 +36,7 @@
 
 	rc = qed_spq_get_entry(p_hwfn, pp_ent);
 
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	p_ent = *pp_ent;
@@ -321,8 +319,7 @@
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 COMMON_RAMROD_PF_START,
-				 PROTOCOLID_COMMON,
-				 &init_data);
+				 PROTOCOLID_COMMON, &init_data);
 	if (rc)
 		return rc;
 
@@ -356,8 +353,7 @@
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
 		       p_hwfn->p_consq->chain.pbl.p_phys_table);
 
-	qed_tunn_set_pf_start_params(p_hwfn, p_tunn,
-				     &p_ramrod->tunnel_config);
+	qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
 	if (IS_MF_SI(p_hwfn))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
@@ -389,8 +385,7 @@
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
-		   sb, sb_index,
-		   p_ramrod->outer_tag);
+		   sb, sb_index, p_ramrod->outer_tag);
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index d73456e..0265a32 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c

@@ -41,8 +41,7 @@
 ***************************************************************************/
 static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
 				void *cookie,
-				union event_ring_data *data,
-				u8 fw_return_code)
+				union event_ring_data *data, u8 fw_return_code)
 {
 	struct qed_spq_comp_done *comp_done;
 
@@ -109,9 +108,8 @@
 /***************************************************************************
 * SPQ entries inner API
 ***************************************************************************/
-static int
-qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
-		   struct qed_spq_entry *p_ent)
+static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
+			      struct qed_spq_entry *p_ent)
 {
 	p_ent->flags = 0;
 
@@ -189,8 +187,7 @@
 }
 
 static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
-			   struct qed_spq *p_spq,
-			   struct qed_spq_entry *p_ent)
+			   struct qed_spq *p_spq, struct qed_spq_entry *p_ent)
 {
 	struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
 	u16 echo = qed_chain_get_prod_idx(p_chain);
@@ -255,8 +252,7 @@
 /***************************************************************************
 * EQ API
 ***************************************************************************/
-void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
-			u16 prod)
+void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod)
 {
 	u32 addr = GTT_BAR0_MAP_REG_USDM_RAM +
 		   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
@@ -267,9 +263,7 @@
 	mmiowb();
 }
 
-int qed_eq_completion(struct qed_hwfn *p_hwfn,
-		      void *cookie)
-
+int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 {
 	struct qed_eq *p_eq = cookie;
 	struct qed_chain *p_chain = &p_eq->chain;
@@ -323,8 +317,7 @@
 	return rc;
 }
 
-struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
-			    u16 num_elem)
+struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
 {
 	struct qed_eq *p_eq;
 
@@ -348,11 +341,8 @@
 	}
 
 	/* register EQ completion on the SP SB */
-	qed_int_register_cb(p_hwfn,
-			    qed_eq_completion,
-			    p_eq,
-			    &p_eq->eq_sb_index,
-			    &p_eq->p_fw_cons);
+	qed_int_register_cb(p_hwfn, qed_eq_completion,
+			    p_eq, &p_eq->eq_sb_index, &p_eq->p_fw_cons);
 
 	return p_eq;
 
@@ -361,14 +351,12 @@
 	return NULL;
 }
 
-void qed_eq_setup(struct qed_hwfn *p_hwfn,
-		  struct qed_eq *p_eq)
+void qed_eq_setup(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
 	qed_chain_reset(&p_eq->chain);
 }
 
-void qed_eq_free(struct qed_hwfn *p_hwfn,
-		 struct qed_eq *p_eq)
+void qed_eq_free(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
 	if (!p_eq)
 		return;
@@ -379,10 +367,9 @@
 /***************************************************************************
 * CQE API - manipulate EQ functionality
 ***************************************************************************/
-static int qed_cqe_completion(
-	struct qed_hwfn *p_hwfn,
-	struct eth_slow_path_rx_cqe *cqe,
-	enum protocol_type protocol)
+static int qed_cqe_completion(struct qed_hwfn *p_hwfn,
+			      struct eth_slow_path_rx_cqe *cqe,
+			      enum protocol_type protocol)
 {
 	if (IS_VF(p_hwfn->cdev))
 		return 0;
@@ -463,8 +450,7 @@
 	u32 capacity;
 
 	/* SPQ struct */
-	p_spq =
-		kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
+	p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
 	if (!p_spq) {
 		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n");
 		return -ENOMEM;
@@ -525,9 +511,7 @@
 	kfree(p_spq);
 }
 
-int
-qed_spq_get_entry(struct qed_hwfn *p_hwfn,
-		  struct qed_spq_entry **pp_ent)
+int qed_spq_get_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	struct qed_spq_entry *p_ent = NULL;
@@ -538,14 +522,15 @@
 	if (list_empty(&p_spq->free_pool)) {
 		p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC);
 		if (!p_ent) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate an SPQ entry for a pending ramrod\n");
 			rc = -ENOMEM;
 			goto out_unlock;
 		}
 		p_ent->queue = &p_spq->unlimited_pending;
 	} else {
 		p_ent = list_first_entry(&p_spq->free_pool,
-					 struct qed_spq_entry,
-					 list);
+					 struct qed_spq_entry, list);
 		list_del(&p_ent->list);
 		p_ent->queue = &p_spq->pending;
 	}
@@ -564,8 +549,7 @@
 	list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool);
 }
 
-void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
-			  struct qed_spq_entry *p_ent)
+void qed_spq_return_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent)
 {
 	spin_lock_bh(&p_hwfn->p_spq->lock);
 	__qed_spq_return_entry(p_hwfn, p_ent);
@@ -586,10 +570,9 @@
  *
  * @return int
  */
-static int
-qed_spq_add_entry(struct qed_hwfn *p_hwfn,
-		  struct qed_spq_entry *p_ent,
-		  enum spq_priority priority)
+static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
+			     struct qed_spq_entry *p_ent,
+			     enum spq_priority priority)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 
@@ -604,8 +587,7 @@
 			struct qed_spq_entry *p_en2;
 
 			p_en2 = list_first_entry(&p_spq->free_pool,
-						 struct qed_spq_entry,
-						 list);
+						 struct qed_spq_entry, list);
 			list_del(&p_en2->list);
 
 			/* Copy the ring element physical pointer to the new
@@ -655,8 +637,7 @@
 * Posting new Ramrods
 ***************************************************************************/
 static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
-			     struct list_head *head,
-			     u32 keep_reserve)
+			     struct list_head *head, u32 keep_reserve)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	int rc;
@@ -690,8 +671,7 @@
 			break;
 
 		p_ent = list_first_entry(&p_spq->unlimited_pending,
-					 struct qed_spq_entry,
-					 list);
+					 struct qed_spq_entry, list);
 		if (!p_ent)
 			return -EINVAL;
 
@@ -705,8 +685,7 @@
 }
 
 int qed_spq_post(struct qed_hwfn *p_hwfn,
-		 struct qed_spq_entry *p_ent,
-		 u8 *fw_return_code)
+		 struct qed_spq_entry *p_ent, u8 *fw_return_code)
 {
 	int rc = 0;
 	struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
@@ -803,8 +782,7 @@
 		return -EINVAL;
 
 	spin_lock_bh(&p_spq->lock);
-	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
-				 list) {
+	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) {
 		if (p_ent->elem.hdr.echo == echo) {
 			u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
 
@@ -846,15 +824,22 @@
 
 	if (!found) {
 		DP_NOTICE(p_hwfn,
-			  "Failed to find an entry this EQE completes\n");
+			  "Failed to find an entry this EQE [echo %04x] completes\n",
+			  le16_to_cpu(echo));
 		return -EEXIST;
 	}
 
-	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n",
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Complete EQE [echo %04x]: func %p cookie %p)\n",
+		   le16_to_cpu(echo),
 		   p_ent->comp_cb.function, p_ent->comp_cb.cookie);
 	if (found->comp_cb.function)
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
+	else
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Got a completion without a callback function\n");
 
 	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
 	    (found->queue == &p_spq->unlimited_pending))
@@ -901,14 +886,12 @@
 	return NULL;
 }
 
-void qed_consq_setup(struct qed_hwfn *p_hwfn,
-		     struct qed_consq *p_consq)
+void qed_consq_setup(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
 	qed_chain_reset(&p_consq->chain);
 }
 
-void qed_consq_free(struct qed_hwfn *p_hwfn,
-		    struct qed_consq *p_consq)
+void qed_consq_free(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
 	if (!p_consq)
 		return;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 15399da..51e4c90 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c

@@ -699,7 +699,7 @@
 				&qzone_id);
 
 		reg_addr = PSWHST_REG_ZONE_PERMISSION_TABLE + qzone_id * 4;
-		val = enable ? (vf->abs_vf_id | (1 << 8)) : 0;
+		val = enable ? (vf->abs_vf_id | BIT(8)) : 0;
 		qed_wr(p_hwfn, p_ptt, reg_addr, val);
 	}
 }
@@ -1090,13 +1090,13 @@
 
 	/* Prepare response for all extended tlvs if they are found by PF */
 	for (i = 0; i < QED_IOV_VP_UPDATE_MAX; i++) {
-		if (!(tlvs_mask & (1 << i)))
+		if (!(tlvs_mask & BIT(i)))
 			continue;
 
 		resp = qed_add_tlv(p_hwfn, &p_mbx->offset,
 				   qed_iov_vport_to_tlv(p_hwfn, i), size);
 
-		if (tlvs_accepted & (1 << i))
+		if (tlvs_accepted & BIT(i))
 			resp->hdr.status = status;
 		else
 			resp->hdr.status = PFVF_STATUS_NOT_SUPPORTED;
@@ -1334,8 +1334,7 @@
 	pfdev_info->fw_minor = FW_MINOR_VERSION;
 	pfdev_info->fw_rev = FW_REVISION_VERSION;
 	pfdev_info->fw_eng = FW_ENGINEERING_VERSION;
-	pfdev_info->minor_fp_hsi = min_t(u8,
-					 ETH_HSI_VER_MINOR,
+	pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR,
 					 req->vfdev_info.eth_fp_hsi_minor);
 	pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX;
 	qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL);
@@ -1438,14 +1437,11 @@
 
 		filter.type = QED_FILTER_VLAN;
 		filter.vlan = p_vf->shadow_config.vlans[i].vid;
-		DP_VERBOSE(p_hwfn,
-			   QED_MSG_IOV,
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
 			   "Reconfiguring VLAN [0x%04x] for VF [%04x]\n",
 			   filter.vlan, p_vf->relative_vf_id);
-		rc = qed_sp_eth_filter_ucast(p_hwfn,
-					     p_vf->opaque_fid,
-					     &filter,
-					     QED_SPQ_MODE_CB, NULL);
+		rc = qed_sp_eth_filter_ucast(p_hwfn, p_vf->opaque_fid,
+					     &filter, QED_SPQ_MODE_CB, NULL);
 		if (rc) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to configure VLAN [%04x] to VF [%04x]\n",
@@ -1463,7 +1459,7 @@
 {
 	int rc = 0;
 
-	if ((events & (1 << VLAN_ADDR_FORCED)) &&
+	if ((events & BIT(VLAN_ADDR_FORCED)) &&
 	    !(p_vf->configured_features & (1 << VLAN_ADDR_FORCED)))
 		rc = qed_iov_reconfigure_unicast_vlan(p_hwfn, p_vf);
 
@@ -1479,7 +1475,7 @@
 	if (!p_vf->vport_instance)
 		return -EINVAL;
 
-	if (events & (1 << MAC_ADDR_FORCED)) {
+	if (events & BIT(MAC_ADDR_FORCED)) {
 		/* Since there's no way [currently] of removing the MAC,
 		 * we can always assume this means we need to force it.
 		 */
@@ -1502,7 +1498,7 @@
 		p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
 	}
 
-	if (events & (1 << VLAN_ADDR_FORCED)) {
+	if (events & BIT(VLAN_ADDR_FORCED)) {
 		struct qed_sp_vport_update_params vport_update;
 		u8 removal;
 		int i;
@@ -1572,7 +1568,7 @@
 		if (filter.vlan)
 			p_vf->configured_features |= 1 << VLAN_ADDR_FORCED;
 		else
-			p_vf->configured_features &= ~(1 << VLAN_ADDR_FORCED);
+			p_vf->configured_features &= ~BIT(VLAN_ADDR_FORCED);
 	}
 
 	/* If forced features are terminated, we need to configure the shadow
@@ -1619,8 +1615,7 @@
 
 		qed_int_cau_conf_sb(p_hwfn, p_ptt,
 				    start->sb_addr[sb_id],
-				    vf->igu_sbs[sb_id],
-				    vf->abs_vf_id, 1);
+				    vf->igu_sbs[sb_id], vf->abs_vf_id, 1);
 	}
 	qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf);
 
@@ -1632,7 +1627,7 @@
 	 * vfs that would still be fine, since they passed '0' as padding].
 	 */
 	p_bitmap = &vf_info->bulletin.p_virt->valid_bitmap;
-	if (!(*p_bitmap & (1 << VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
+	if (!(*p_bitmap & BIT(VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
 		u8 vf_req = start->only_untagged;
 
 		vf_info->bulletin.p_virt->default_only_untagged = vf_req;
@@ -1652,7 +1647,7 @@
 	params.mtu = vf->mtu;
 
 	rc = qed_sp_eth_vport_start(p_hwfn, &params);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn,
 		       "qed_iov_vf_mbx_start_vport returned error %d\n", rc);
 		status = PFVF_STATUS_FAILURE;
@@ -1679,7 +1674,7 @@
 	vf->spoof_chk = false;
 
 	rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n",
 		       rc);
 		status = PFVF_STATUS_FAILURE;
@@ -2045,7 +2040,7 @@
 	p_vf->shadow_config.inner_vlan_removal = p_vlan_tlv->remove_vlan;
 
 	/* Ignore the VF request if we're forcing a vlan */
-	if (!(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) {
+	if (!(p_vf->configured_features & BIT(VLAN_ADDR_FORCED))) {
 		p_data->update_inner_vlan_removal_flg = 1;
 		p_data->inner_vlan_removal_flg = p_vlan_tlv->remove_vlan;
 	}
@@ -2340,7 +2335,7 @@
 	/* In forced mode, we're willing to remove entries - but we don't add
 	 * new ones.
 	 */
-	if (p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED))
+	if (p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED))
 		return 0;
 
 	if (p_params->opcode == QED_FILTER_ADD ||
@@ -2374,7 +2369,7 @@
 	int i;
 
 	/* If we're in forced-mode, we don't allow any change */
-	if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))
+	if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
 		return 0;
 
 	/* First remove entries and then add new ones */
@@ -2509,7 +2504,7 @@
 	}
 
 	/* Determine if the unicast filtering is acceptible by PF */
-	if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) &&
+	if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
 	    (params.type == QED_FILTER_VLAN ||
 	     params.type == QED_FILTER_MAC_VLAN)) {
 		/* Once VLAN is forced or PVID is set, do not allow
@@ -2521,7 +2516,7 @@
 		goto out;
 	}
 
-	if ((p_bulletin->valid_bitmap & (1 << MAC_ADDR_FORCED)) &&
+	if ((p_bulletin->valid_bitmap & BIT(MAC_ADDR_FORCED)) &&
 	    (params.type == QED_FILTER_MAC ||
 	     params.type == QED_FILTER_MAC_VLAN)) {
 		if (!ether_addr_equal(p_bulletin->mac, params.mac) ||
@@ -2749,7 +2744,7 @@
 		/* Mark VF for ack and clean pending state */
 		if (p_vf->state == VF_RESET)
 			p_vf->state = VF_STOPPED;
-		ack_vfs[vfid / 32] |= (1 << (vfid % 32));
+		ack_vfs[vfid / 32] |= BIT((vfid % 32));
 		p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &=
 		    ~(1ULL << (rel_vf_id % 64));
 		p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &=
@@ -2805,7 +2800,7 @@
 			continue;
 
 		vfid = p_vf->abs_vf_id;
-		if ((1 << (vfid % 32)) & p_disabled_vfs[vfid / 32]) {
+		if (BIT((vfid % 32)) & p_disabled_vfs[vfid / 32]) {
 			u64 *p_flr = p_hwfn->pf_iov_info->pending_flr;
 			u16 rel_vf_id = p_vf->relative_vf_id;
 
@@ -3064,8 +3059,7 @@
 
 	vf_info->bulletin.p_virt->valid_bitmap |= feature;
 	/* Forced MAC will disable MAC_ADDR */
-	vf_info->bulletin.p_virt->valid_bitmap &=
-				~(1 << VFPF_BULLETIN_MAC_ADDR);
+	vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
 
 	qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
@@ -3163,7 +3157,7 @@
 	if (!p_vf || !p_vf->bulletin.p_virt)
 		return NULL;
 
-	if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED)))
+	if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)))
 		return NULL;
 
 	return p_vf->bulletin.p_virt->mac;
@@ -3177,7 +3171,7 @@
 	if (!p_vf || !p_vf->bulletin.p_virt)
 		return 0;
 
-	if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED)))
+	if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED)))
 		return 0;
 
 	return p_vf->bulletin.p_virt->pvid;

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index f8492ca..427e043 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c

@@ -249,78 +249,150 @@
 	return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT;
 }
 
-static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+struct qede_link_mode_mapping {
+	u32 qed_link_mode;
+	u32 ethtool_link_mode;
+};
+
+static const struct qede_link_mode_mapping qed_lm_map[] = {
+	{QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
+	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
+	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
+	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
+	{QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
+	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+	{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
+	{QED_LM_100000baseKR4_Full_BIT,
+	 ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+};
+
+#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)	\
+{								\
+	int i;							\
+								\
+	for (i = 0; i < QED_LM_COUNT; i++) {			\
+		if ((caps) & (qed_lm_map[i].qed_link_mode))	\
+			__set_bit(qed_lm_map[i].ethtool_link_mode,\
+				  lk_ksettings->link_modes.name); \
+	}							\
+}
+
+#define QEDE_ETHTOOL_TO_DRV_CAPS(caps, lk_ksettings, name)	\
+{								\
+	int i;							\
+								\
+	for (i = 0; i < QED_LM_COUNT; i++) {			\
+		if (test_bit(qed_lm_map[i].ethtool_link_mode,	\
+			     lk_ksettings->link_modes.name))	\
+			caps |= qed_lm_map[i].qed_link_mode;	\
+	}							\
+}
+
+static int qede_get_link_ksettings(struct net_device *dev,
+				   struct ethtool_link_ksettings *cmd)
 {
+	struct ethtool_link_settings *base = &cmd->base;
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 
 	memset(&current_link, 0, sizeof(current_link));
 	edev->ops->common->get_link(edev->cdev, &current_link);
 
-	cmd->supported = current_link.supported_caps;
-	cmd->advertising = current_link.advertised_caps;
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.supported_caps, cmd, supported)
+
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.advertised_caps, cmd, advertising)
+
+	ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.lp_caps, cmd, lp_advertising)
+
 	if ((edev->state == QEDE_STATE_OPEN) && (current_link.link_up)) {
-		ethtool_cmd_speed_set(cmd, current_link.speed);
-		cmd->duplex = current_link.duplex;
+		base->speed = current_link.speed;
+		base->duplex = current_link.duplex;
 	} else {
-		cmd->duplex = DUPLEX_UNKNOWN;
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		base->speed = SPEED_UNKNOWN;
+		base->duplex = DUPLEX_UNKNOWN;
 	}
-	cmd->port = current_link.port;
-	cmd->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
-						AUTONEG_DISABLE;
-	cmd->lp_advertising = current_link.lp_caps;
+	base->port = current_link.port;
+	base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
+			AUTONEG_DISABLE;
 
 	return 0;
 }
 
-static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int qede_set_link_ksettings(struct net_device *dev,
+				   const struct ethtool_link_ksettings *cmd)
 {
+	const struct ethtool_link_settings *base = &cmd->base;
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 	struct qed_link_params params;
-	u32 speed;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-		DP_INFO(edev,
-			"Link settings are not allowed to be changed\n");
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
 		return -EOPNOTSUPP;
 	}
-
 	memset(&current_link, 0, sizeof(current_link));
 	memset(&params, 0, sizeof(params));
 	edev->ops->common->get_link(edev->cdev, &current_link);
 
-	speed = ethtool_cmd_speed(cmd);
 	params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS;
 	params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG;
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (base->autoneg == AUTONEG_ENABLE) {
 		params.autoneg = true;
 		params.forced_speed = 0;
-		params.adv_speeds = cmd->advertising;
-	} else { /* forced speed */
+		QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising)
+	} else {		/* forced speed */
 		params.override_flags |= QED_LINK_OVERRIDE_SPEED_FORCED_SPEED;
 		params.autoneg = false;
-		params.forced_speed = speed;
-		switch (speed) {
+		params.forced_speed = base->speed;
+		switch (base->speed) {
 		case SPEED_10000:
 			if (!(current_link.supported_caps &
-			    SUPPORTED_10000baseKR_Full)) {
+			      QED_LM_10000baseKR_Full_BIT)) {
 				DP_INFO(edev, "10G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = SUPPORTED_10000baseKR_Full;
+			params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+			break;
+		case SPEED_25000:
+			if (!(current_link.supported_caps &
+			      QED_LM_25000baseKR_Full_BIT)) {
+				DP_INFO(edev, "25G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
 			break;
 		case SPEED_40000:
 			if (!(current_link.supported_caps &
-			    SUPPORTED_40000baseLR4_Full)) {
+			      QED_LM_40000baseLR4_Full_BIT)) {
 				DP_INFO(edev, "40G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = SUPPORTED_40000baseLR4_Full;
+			params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+			break;
+		case 0xdead:
+			if (!(current_link.supported_caps &
+			      QED_LM_50000baseKR2_Full_BIT)) {
+				DP_INFO(edev, "50G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+			break;
+		case 0xbeef:
+			if (!(current_link.supported_caps &
+			      QED_LM_100000baseKR4_Full_BIT)) {
+				DP_INFO(edev, "100G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
 			break;
 		default:
-			DP_INFO(edev, "Unsupported speed %u\n", speed);
+			DP_INFO(edev, "Unsupported speed %u\n", base->speed);
 			return -EINVAL;
 		}
 	}
@@ -368,8 +440,7 @@
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
-	return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) |
-	       edev->dp_module;
+	return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | edev->dp_module;
 }
 
 static void qede_set_msglevel(struct net_device *ndev, u32 level)
@@ -393,8 +464,7 @@
 	struct qed_link_params link_params;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-		DP_INFO(edev,
-			"Link settings are not allowed to be changed\n");
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -1228,8 +1298,8 @@
 }
 
 static const struct ethtool_ops qede_ethtool_ops = {
-	.get_settings = qede_get_settings,
-	.set_settings = qede_set_settings,
+	.get_link_ksettings = qede_get_link_ksettings,
+	.set_link_ksettings = qede_set_link_ksettings,
 	.get_drvinfo = qede_get_drvinfo,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
@@ -1260,7 +1330,7 @@
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {
-	.get_settings = qede_get_settings,
+	.get_link_ksettings = qede_get_link_ksettings,
 	.get_drvinfo = qede_get_drvinfo,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index e4bd02e..5ce8a3c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c

@@ -222,7 +222,7 @@
 {
 	int ret;
 
-	pr_notice("qede_init: %s\n", version);
+	pr_info("qede_init: %s\n", version);
 
 	qed_ops = qed_get_eth_ops();
 	if (!qed_ops) {
@@ -253,7 +253,8 @@
 
 static void __exit qede_cleanup(void)
 {
-	pr_notice("qede_cleanup called\n");
+	if (debug & QED_LOG_INFO_MASK)
+		pr_info("qede_cleanup called\n");
 
 	unregister_netdevice_notifier(&qede_netdev_notifier);
 	pci_unregister_driver(&qede_pci_driver);
@@ -270,8 +271,7 @@
 
 /* Unmap the data and free skb */
 static int qede_free_tx_pkt(struct qede_dev *edev,
-			    struct qede_tx_queue *txq,
-			    int *len)
+			    struct qede_tx_queue *txq, int *len)
 {
 	u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
 	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -329,8 +329,7 @@
 static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 				    struct qede_tx_queue *txq,
 				    struct eth_tx_1st_bd *first_bd,
-				    int nbd,
-				    bool data_split)
+				    int nbd, bool data_split)
 {
 	u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
 	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -339,8 +338,7 @@
 
 	/* Return prod to its position before this skb was handled */
 	qed_chain_set_prod(&txq->tx_pbl,
-			   le16_to_cpu(txq->tx_db.data.bd_prod),
-			   first_bd);
+			   le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
 	first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
 
@@ -366,8 +364,7 @@
 
 	/* Return again prod to its position before this skb was handled */
 	qed_chain_set_prod(&txq->tx_pbl,
-			   le16_to_cpu(txq->tx_db.data.bd_prod),
-			   first_bd);
+			   le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
 	/* Free skb */
 	dev_kfree_skb_any(skb);
@@ -376,8 +373,7 @@
 }
 
 static u32 qede_xmit_type(struct qede_dev *edev,
-			  struct sk_buff *skb,
-			  int *ipv6_ext)
+			  struct sk_buff *skb, int *ipv6_ext)
 {
 	u32 rc = XMIT_L4_CSUM;
 	__be16 l3_proto;
@@ -434,15 +430,13 @@
 }
 
 static int map_frag_to_bd(struct qede_dev *edev,
-			  skb_frag_t *frag,
-			  struct eth_tx_bd *bd)
+			  skb_frag_t *frag, struct eth_tx_bd *bd)
 {
 	dma_addr_t mapping;
 
 	/* Map skb non-linear frag data for DMA */
 	mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
-				   skb_frag_size(frag),
-				   DMA_TO_DEVICE);
+				   skb_frag_size(frag), DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
 		DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
 		return -ENOMEM;
@@ -504,9 +498,8 @@
 }
 
 /* Main transmit function */
-static
-netdev_tx_t qede_start_xmit(struct sk_buff *skb,
-			    struct net_device *ndev)
+static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
+				   struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 	struct netdev_queue *netdev_txq;
@@ -530,8 +523,7 @@
 	txq = QEDE_TX_QUEUE(edev, txq_index);
 	netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
-	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) <
-			       (MAX_SKB_FRAGS + 1));
+	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
 
 	xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
 
@@ -761,8 +753,7 @@
 	return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
 }
 
-static int qede_tx_int(struct qede_dev *edev,
-		       struct qede_tx_queue *txq)
+static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
 	u16 hw_bd_cons;
@@ -960,8 +951,7 @@
 
 static u32 qede_get_rxhash(struct qede_dev *edev,
 			   u8 bitfields,
-			   __le32 rss_hash,
-			   enum pkt_hash_types *rxhash_type)
+			   __le32 rss_hash, enum pkt_hash_types *rxhash_type)
 {
 	enum rss_hash_type htype;
 
@@ -990,12 +980,10 @@
 
 static inline void qede_skb_receive(struct qede_dev *edev,
 				    struct qede_fastpath *fp,
-				    struct sk_buff *skb,
-				    u16 vlan_tag)
+				    struct sk_buff *skb, u16 vlan_tag)
 {
 	if (vlan_tag)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-				       vlan_tag);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
 	napi_gro_receive(&fp->napi, skb);
 }
@@ -1018,8 +1006,7 @@
 
 static int qede_fill_frag_skb(struct qede_dev *edev,
 			      struct qede_rx_queue *rxq,
-			      u8 tpa_agg_index,
-			      u16 len_on_bd)
+			      u8 tpa_agg_index, u16 len_on_bd)
 {
 	struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons &
 							 NUM_RX_BDS_MAX];
@@ -1467,7 +1454,7 @@
 		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
 		if (unlikely(!skb)) {
 			DP_NOTICE(edev,
-				  "Build_skb failed, dropping incoming packet\n");
+				  "skb allocation failed, dropping incoming packet\n");
 			qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
 			rxq->rx_alloc_errors++;
 			goto next_cqe;
@@ -1575,8 +1562,7 @@
 		skb->protocol = eth_type_trans(skb, edev->ndev);
 
 		rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
-					  fp_cqe->rss_hash,
-					  &rxhash_type);
+					  fp_cqe->rss_hash, &rxhash_type);
 
 		skb_set_hash(skb, rx_hash, rxhash_type);
 
@@ -1787,9 +1773,9 @@
 	edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames;
 }
 
-static struct rtnl_link_stats64 *qede_get_stats64(
-			    struct net_device *dev,
-			    struct rtnl_link_stats64 *stats)
+static
+struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev,
+					   struct rtnl_link_stats64 *stats)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 
@@ -2103,8 +2089,7 @@
 		}
 
 		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
-			   "marked vlan %d as non-configured\n",
-			   vlan->vid);
+			   "marked vlan %d as non-configured\n", vlan->vid);
 	}
 
 	edev->accept_any_vlan = false;
@@ -2146,7 +2131,7 @@
 
 		edev->vxlan_dst_port = t_port;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n",
 			   t_port);
 
 		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2157,7 +2142,7 @@
 
 		edev->geneve_dst_port = t_port;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n",
 			   t_port);
 		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
 		break;
@@ -2181,7 +2166,7 @@
 
 		edev->vxlan_dst_port = 0;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n",
 			   t_port);
 
 		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2192,7 +2177,7 @@
 
 		edev->geneve_dst_port = 0;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n",
 			   t_port);
 		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
 		break;
@@ -2237,15 +2222,13 @@
 static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 					    struct pci_dev *pdev,
 					    struct qed_dev_eth_info *info,
-					    u32 dp_module,
-					    u8 dp_level)
+					    u32 dp_module, u8 dp_level)
 {
 	struct net_device *ndev;
 	struct qede_dev *edev;
 
 	ndev = alloc_etherdev_mqs(sizeof(*edev),
-				  info->num_queues,
-				  info->num_queues);
+				  info->num_queues, info->num_queues);
 	if (!ndev) {
 		pr_err("etherdev allocation failed\n");
 		return NULL;
@@ -2261,6 +2244,9 @@
 	edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
 	edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
 
+	DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
+		info->num_queues, info->num_queues);
+
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	memset(&edev->stats, 0, sizeof(edev->stats));
@@ -2453,7 +2439,7 @@
 			bool is_vf, enum qede_probe_mode mode)
 {
 	struct qed_probe_params probe_params;
-	struct qed_slowpath_params params;
+	struct qed_slowpath_params sp_params;
 	struct qed_dev_eth_info dev_info;
 	struct qede_dev *edev;
 	struct qed_dev *cdev;
@@ -2476,14 +2462,14 @@
 	qede_update_pf_params(cdev);
 
 	/* Start the Slowpath-process */
-	memset(&params, 0, sizeof(struct qed_slowpath_params));
-	params.int_mode = QED_INT_MODE_MSIX;
-	params.drv_major = QEDE_MAJOR_VERSION;
-	params.drv_minor = QEDE_MINOR_VERSION;
-	params.drv_rev = QEDE_REVISION_VERSION;
-	params.drv_eng = QEDE_ENGINEERING_VERSION;
-	strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
-	rc = qed_ops->common->slowpath_start(cdev, &params);
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.int_mode = QED_INT_MODE_MSIX;
+	sp_params.drv_major = QEDE_MAJOR_VERSION;
+	sp_params.drv_minor = QEDE_MINOR_VERSION;
+	sp_params.drv_rev = QEDE_REVISION_VERSION;
+	sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
+	strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
+	rc = qed_ops->common->slowpath_start(cdev, &sp_params);
 	if (rc) {
 		pr_notice("Cannot start slowpath\n");
 		goto err1;
@@ -2586,7 +2572,7 @@
 	qed_ops->common->slowpath_stop(cdev);
 	qed_ops->common->remove(cdev);
 
-	pr_notice("Ending successfully qede_remove\n");
+	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
 
 static void qede_remove(struct pci_dev *pdev)
@@ -2634,16 +2620,14 @@
 
 /* This function allocates fast-path status block memory */
 static int qede_alloc_mem_sb(struct qede_dev *edev,
-			     struct qed_sb_info *sb_info,
-			     u16 sb_id)
+			     struct qed_sb_info *sb_info, u16 sb_id)
 {
 	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int rc;
 
 	sb_virt = dma_alloc_coherent(&edev->pdev->dev,
-				     sizeof(*sb_virt),
-				     &sb_phys, GFP_KERNEL);
+				     sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
 	if (!sb_virt) {
 		DP_ERR(edev, "Status block allocation failed\n");
 		return -ENOMEM;
@@ -2675,16 +2659,15 @@
 		data = rx_buf->data;
 
 		dma_unmap_page(&edev->pdev->dev,
-			       rx_buf->mapping,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+			       rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
 
 		rx_buf->data = NULL;
 		__free_page(data);
 	}
 }
 
-static void qede_free_sge_mem(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq) {
+static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
+{
 	int i;
 
 	if (edev->gro_disable)
@@ -2703,8 +2686,7 @@
 	}
 }
 
-static void qede_free_mem_rxq(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	qede_free_sge_mem(edev, rxq);
 
@@ -2726,9 +2708,6 @@
 	struct eth_rx_bd *rx_bd;
 	dma_addr_t mapping;
 	struct page *data;
-	u16 rx_buf_size;
-
-	rx_buf_size = rxq->rx_buf_size;
 
 	data = alloc_pages(GFP_ATOMIC, 0);
 	if (unlikely(!data)) {
@@ -2763,8 +2742,7 @@
 	return 0;
 }
 
-static int qede_alloc_sge_mem(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	dma_addr_t mapping;
 	int i;
@@ -2811,15 +2789,14 @@
 }
 
 /* This function allocates all memory needed per Rx queue */
-static int qede_alloc_mem_rxq(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	int i, rc, size;
 
 	rxq->num_rx_buffers = edev->q_num_rx_buffers;
 
-	rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD +
-			   edev->ndev->mtu;
+	rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
+
 	if (rxq->rx_buf_size > PAGE_SIZE)
 		rxq->rx_buf_size = PAGE_SIZE;
 
@@ -2873,8 +2850,7 @@
 	return rc;
 }
 
-static void qede_free_mem_txq(struct qede_dev *edev,
-			      struct qede_tx_queue *txq)
+static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	/* Free the parallel SW ring */
 	kfree(txq->sw_tx_ring);
@@ -2884,8 +2860,7 @@
 }
 
 /* This function allocates all memory needed per Tx queue */
-static int qede_alloc_mem_txq(struct qede_dev *edev,
-			      struct qede_tx_queue *txq)
+static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	int size, rc;
 	union eth_tx_bd_types *p_virt;
@@ -2917,8 +2892,7 @@
 }
 
 /* This function frees all memory of a single fp */
-static void qede_free_mem_fp(struct qede_dev *edev,
-			     struct qede_fastpath *fp)
+static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
 	int tc;
 
@@ -2933,8 +2907,7 @@
 /* This function allocates all memory needed for a single fp (i.e. an entity
  * which contains status block, one rx queue and multiple per-TC tx queues.
  */
-static int qede_alloc_mem_fp(struct qede_dev *edev,
-			     struct qede_fastpath *fp)
+static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
 	int rc, tc;
 
@@ -3146,8 +3119,7 @@
 }
 
 static int qede_drain_txq(struct qede_dev *edev,
-			  struct qede_tx_queue *txq,
-			  bool allow_drain)
+			  struct qede_tx_queue *txq, bool allow_drain)
 {
 	int rc, cnt = 1000;
 

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 1e1cc0f..d4809ad 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -1876,6 +1876,20 @@
 	return 0;
 }
 
+static void ravb_set_config_mode(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	if (priv->chip_id == RCAR_GEN2) {
+		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
+		/* Set CSEL value */
+		ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+	} else {
+		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+			    CCC_GAC | CCC_CSEL_HPB);
+	}
+}
+
 static int ravb_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -1978,14 +1992,7 @@
 	ndev->ethtool_ops = &ravb_ethtool_ops;
 
 	/* Set AVB config mode */
-	if (chip_id == RCAR_GEN2) {
-		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
-		/* Set CSEL value */
-		ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
-	} else {
-		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
-			    CCC_GAC | CCC_CSEL_HPB);
-	}
+	ravb_set_config_mode(ndev);
 
 	/* Set GTI value */
 	error = ravb_set_gti(ndev);
@@ -2097,6 +2104,54 @@
 }
 
 #ifdef CONFIG_PM
+static int ravb_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (netif_running(ndev)) {
+		netif_device_detach(ndev);
+		ret = ravb_close(ndev);
+	}
+
+	return ret;
+}
+
+static int ravb_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret = 0;
+
+	/* All register have been reset to default values.
+	 * Restore all registers which where setup at probe time and
+	 * reopen device if it was running before system suspended.
+	 */
+
+	/* Set AVB config mode */
+	ravb_set_config_mode(ndev);
+
+	/* Set GTI value */
+	ret = ravb_set_gti(ndev);
+	if (ret)
+		return ret;
+
+	/* Request GTI loading */
+	ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
+
+	/* Restore descriptor base address table */
+	ravb_write(ndev, priv->desc_bat_dma, DBAT);
+
+	if (netif_running(ndev)) {
+		ret = ravb_open(ndev);
+		if (ret < 0)
+			return ret;
+		netif_device_attach(ndev);
+	}
+
+	return ret;
+}
+
 static int ravb_runtime_nop(struct device *dev)
 {
 	/* Runtime PM callback shared between ->runtime_suspend()
@@ -2110,6 +2165,7 @@
 }
 
 static const struct dev_pm_ops ravb_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
 	SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
 };
 

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 799d58d..1f8240a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c

@@ -1723,7 +1723,7 @@
 static void sh_eth_adjust_link(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct phy_device *phydev = mdp->phydev;
+	struct phy_device *phydev = ndev->phydev;
 	int new_state = 0;
 
 	if (phydev->link) {
@@ -1800,51 +1800,48 @@
 
 	phy_attached_info(phydev);
 
-	mdp->phydev = phydev;
-
 	return 0;
 }
 
 /* PHY control start function */
 static int sh_eth_phy_start(struct net_device *ndev)
 {
-	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ret;
 
 	ret = sh_eth_phy_init(ndev);
 	if (ret)
 		return ret;
 
-	phy_start(mdp->phydev);
+	phy_start(ndev->phydev);
 
 	return 0;
 }
 
-static int sh_eth_get_settings(struct net_device *ndev,
-			       struct ethtool_cmd *ecmd)
+static int sh_eth_get_link_ksettings(struct net_device *ndev,
+				     struct ethtool_link_ksettings *cmd)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
-	ret = phy_ethtool_gset(mdp->phydev, ecmd);
+	ret = phy_ethtool_ksettings_get(ndev->phydev, cmd);
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	return ret;
 }
 
-static int sh_eth_set_settings(struct net_device *ndev,
-			       struct ethtool_cmd *ecmd)
+static int sh_eth_set_link_ksettings(struct net_device *ndev,
+				     const struct ethtool_link_ksettings *cmd)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
@@ -1852,11 +1849,11 @@
 	/* disable tx and rx */
 	sh_eth_rcv_snd_disable(ndev);
 
-	ret = phy_ethtool_sset(mdp->phydev, ecmd);
+	ret = phy_ethtool_ksettings_set(ndev->phydev, cmd);
 	if (ret)
 		goto error_exit;
 
-	if (ecmd->duplex == DUPLEX_FULL)
+	if (cmd->base.duplex == DUPLEX_FULL)
 		mdp->duplex = 1;
 	else
 		mdp->duplex = 0;
@@ -2067,11 +2064,11 @@
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
-	ret = phy_start_aneg(mdp->phydev);
+	ret = phy_start_aneg(ndev->phydev);
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	return ret;
@@ -2198,8 +2195,6 @@
 }
 
 static const struct ethtool_ops sh_eth_ethtool_ops = {
-	.get_settings	= sh_eth_get_settings,
-	.set_settings	= sh_eth_set_settings,
 	.get_regs_len	= sh_eth_get_regs_len,
 	.get_regs	= sh_eth_get_regs,
 	.nway_reset	= sh_eth_nway_reset,
@@ -2211,6 +2206,8 @@
 	.get_sset_count     = sh_eth_get_sset_count,
 	.get_ringparam	= sh_eth_get_ringparam,
 	.set_ringparam	= sh_eth_set_ringparam,
+	.get_link_ksettings = sh_eth_get_link_ksettings,
+	.set_link_ksettings = sh_eth_set_link_ksettings,
 };
 
 /* network device open function */
@@ -2408,10 +2405,9 @@
 	sh_eth_dev_exit(ndev);
 
 	/* PHY Disconnect */
-	if (mdp->phydev) {
-		phy_stop(mdp->phydev);
-		phy_disconnect(mdp->phydev);
-		mdp->phydev = NULL;
+	if (ndev->phydev) {
+		phy_stop(ndev->phydev);
+		phy_disconnect(ndev->phydev);
 	}
 
 	free_irq(ndev->irq, ndev);
@@ -2429,8 +2425,7 @@
 /* ioctl to device function */
 static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct phy_device *phydev = mdp->phydev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;

diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index c62380e..d050f37 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h

@@ -518,7 +518,6 @@
 	/* MII transceiver section. */
 	u32 phy_id;			/* PHY ID */
 	struct mii_bus *mii_bus;	/* MDIO bus control */
-	struct phy_device *phydev;	/* PHY device control */
 	int link;
 	phy_interface_t phy_interface;
 	int msg_enable;

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index f658fee..b8c9f18 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c

@@ -177,7 +177,7 @@
 
 static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 {
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V2_OUT_LEN);
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	size_t outlen;
 	int rc;
@@ -188,7 +188,7 @@
 			  outbuf, sizeof(outbuf), &outlen);
 	if (rc)
 		return rc;
-	if (outlen < sizeof(outbuf)) {
+	if (outlen < MC_CMD_GET_CAPABILITIES_OUT_LEN) {
 		netif_err(efx, drv, efx->net_dev,
 			  "unable to read datapath firmware capabilities\n");
 		return -EIO;
@@ -197,6 +197,12 @@
 	nic_data->datapath_caps =
 		MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1);
 
+	if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN)
+		nic_data->datapath_caps2 = MCDI_DWORD(outbuf,
+				GET_CAPABILITIES_V2_OUT_FLAGS2);
+	else
+		nic_data->datapath_caps2 = 0;
+
 	/* record the DPCPU firmware IDs to determine VEB vswitching support.
 	 */
 	nic_data->rx_dpcpu_fw_id =
@@ -227,6 +233,116 @@
 	return rc > 0 ? rc : -ERANGE;
 }
 
+static int efx_ef10_get_timer_workarounds(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	unsigned int implemented;
+	unsigned int enabled;
+	int rc;
+
+	nic_data->workaround_35388 = false;
+	nic_data->workaround_61265 = false;
+
+	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
+
+	if (rc == -ENOSYS) {
+		/* Firmware without GET_WORKAROUNDS - not a problem. */
+		rc = 0;
+	} else if (rc == 0) {
+		/* Bug61265 workaround is always enabled if implemented. */
+		if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265)
+			nic_data->workaround_61265 = true;
+
+		if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+			nic_data->workaround_35388 = true;
+		} else if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+			/* Workaround is implemented but not enabled.
+			 * Try to enable it.
+			 */
+			rc = efx_mcdi_set_workaround(efx,
+						     MC_CMD_WORKAROUND_BUG35388,
+						     true, NULL);
+			if (rc == 0)
+				nic_data->workaround_35388 = true;
+			/* If we failed to set the workaround just carry on. */
+			rc = 0;
+		}
+	}
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "workaround for bug 35388 is %sabled\n",
+		  nic_data->workaround_35388 ? "en" : "dis");
+	netif_dbg(efx, probe, efx->net_dev,
+		  "workaround for bug 61265 is %sabled\n",
+		  nic_data->workaround_61265 ? "en" : "dis");
+
+	return rc;
+}
+
+static void efx_ef10_process_timer_config(struct efx_nic *efx,
+					  const efx_dword_t *data)
+{
+	unsigned int max_count;
+
+	if (EFX_EF10_WORKAROUND_61265(efx)) {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS);
+		efx->timer_max_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS);
+	} else if (EFX_EF10_WORKAROUND_35388(efx)) {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT);
+		max_count = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT);
+		efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+	} else {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT);
+		max_count = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT);
+		efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+	}
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "got timer properties from MC: quantum %u ns; max %u ns\n",
+		  efx->timer_quantum_ns, efx->timer_max_ns);
+}
+
+static int efx_ef10_get_timer_config(struct efx_nic *efx)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN);
+	int rc;
+
+	rc = efx_ef10_get_timer_workarounds(efx);
+	if (rc)
+		return rc;
+
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, NULL, 0,
+				outbuf, sizeof(outbuf), NULL);
+
+	if (rc == 0) {
+		efx_ef10_process_timer_config(efx, outbuf);
+	} else if (rc == -ENOSYS || rc == -EPERM) {
+		/* Not available - fall back to Huntington defaults. */
+		unsigned int quantum;
+
+		rc = efx_ef10_get_sysclk_freq(efx);
+		if (rc < 0)
+			return rc;
+
+		quantum = 1536000 / rc; /* 1536 cycles */
+		efx->timer_quantum_ns = quantum;
+		efx->timer_max_ns = efx->type->timer_period_max * quantum;
+		rc = 0;
+	} else {
+		efx_mcdi_display_error(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES,
+				       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN,
+				       NULL, 0, rc);
+	}
+
+	return rc;
+}
+
 static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
@@ -527,33 +643,11 @@
 	if (rc)
 		goto fail5;
 
-	rc = efx_ef10_get_sysclk_freq(efx);
+	rc = efx_ef10_get_timer_config(efx);
 	if (rc < 0)
 		goto fail5;
 	efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
 
-	/* Check whether firmware supports bug 35388 workaround.
-	 * First try to enable it, then if we get EPERM, just
-	 * ask if it's already enabled
-	 */
-	rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true, NULL);
-	if (rc == 0) {
-		nic_data->workaround_35388 = true;
-	} else if (rc == -EPERM) {
-		unsigned int enabled;
-
-		rc = efx_mcdi_get_workarounds(efx, NULL, &enabled);
-		if (rc)
-			goto fail3;
-		nic_data->workaround_35388 = enabled &
-			MC_CMD_GET_WORKAROUNDS_OUT_BUG35388;
-	} else if (rc != -ENOSYS && rc != -ENOENT) {
-		goto fail5;
-	}
-	netif_dbg(efx, probe, efx->net_dev,
-		  "workaround for bug 35388 is %sabled\n",
-		  nic_data->workaround_35388 ? "en" : "dis");
-
 	rc = efx_mcdi_mon_probe(efx);
 	if (rc && rc != -EPERM)
 		goto fail5;
@@ -1743,27 +1837,43 @@
 static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
 {
 	struct efx_nic *efx = channel->efx;
-	unsigned int mode, value;
+	unsigned int mode, usecs;
 	efx_dword_t timer_cmd;
 
-	if (channel->irq_moderation) {
+	if (channel->irq_moderation_us) {
 		mode = 3;
-		value = channel->irq_moderation - 1;
+		usecs = channel->irq_moderation_us;
 	} else {
 		mode = 0;
-		value = 0;
+		usecs = 0;
 	}
 
-	if (EFX_EF10_WORKAROUND_35388(efx)) {
+	if (EFX_EF10_WORKAROUND_61265(efx)) {
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_EVQ_TMR_IN_LEN);
+		unsigned int ns = usecs * 1000;
+
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_INSTANCE,
+			       channel->channel);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS, ns);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS, ns);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_MODE, mode);
+
+		efx_mcdi_rpc_async(efx, MC_CMD_SET_EVQ_TMR,
+				   inbuf, sizeof(inbuf), 0, NULL, 0);
+	} else if (EFX_EF10_WORKAROUND_35388(efx)) {
+		unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
 		EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS,
 				     EFE_DD_EVQ_IND_TIMER_FLAGS,
 				     ERF_DD_EVQ_IND_TIMER_MODE, mode,
-				     ERF_DD_EVQ_IND_TIMER_VAL, value);
+				     ERF_DD_EVQ_IND_TIMER_VAL, ticks);
 		efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT,
 				channel->channel);
 	} else {
+		unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
 		EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode,
-				     ERF_DZ_TC_TIMER_VAL, value);
+				     ERF_DZ_TC_TIMER_VAL, ticks);
 		efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR,
 				channel->channel);
 	}
@@ -2535,13 +2645,12 @@
 static int efx_ef10_ev_init(struct efx_channel *channel)
 {
 	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
-						EFX_BUF_SIZE));
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN);
+			 MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+						   EFX_BUF_SIZE));
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
 	size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
 	struct efx_nic *efx = channel->efx;
 	struct efx_ef10_nic_data *nic_data;
-	bool supports_rx_merge;
 	size_t inlen, outlen;
 	unsigned int enabled, implemented;
 	dma_addr_t dma_addr;
@@ -2549,9 +2658,6 @@
 	int i;
 
 	nic_data = efx->nic_data;
-	supports_rx_merge =
-		!!(nic_data->datapath_caps &
-		   1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
 
 	/* Fill event queue with all ones (i.e. empty events) */
 	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
@@ -2560,11 +2666,6 @@
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
 	/* INIT_EVQ expects index in vector table, not absolute */
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
-	MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
-			      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
-			      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
-			      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
-			      INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
 		       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
@@ -2573,6 +2674,27 @@
 		       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
 
+	if (nic_data->datapath_caps2 &
+	    1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
+		/* Use the new generic approach to specifying event queue
+		 * configuration, requesting lower latency or higher throughput.
+		 * The options that actually get used appear in the output.
+		 */
+		MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+				      INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_V2_IN_FLAG_TYPE,
+				      MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+	} else {
+		bool cut_thru = !(nic_data->datapath_caps &
+			1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+
+		MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+				      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
+	}
+
 	dma_addr = channel->eventq.buf.dma_addr;
 	for (i = 0; i < entries; ++i) {
 		MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
@@ -2583,6 +2705,13 @@
 
 	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
 			  outbuf, sizeof(outbuf), &outlen);
+
+	if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Channel %d using event queue flags %08x\n",
+			  channel->channel,
+			  MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
 	/* IRQ return is ignored */
 	if (channel->channel || rc)
 		return rc;
@@ -2590,8 +2719,8 @@
 	/* Successfully created event queue on channel 0 */
 	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
 	if (rc == -ENOSYS) {
-		/* GET_WORKAROUNDS was implemented before the bug26807
-		 * workaround, thus the latter must be unavailable in this fw
+		/* GET_WORKAROUNDS was implemented before this workaround,
+		 * thus it must be unavailable in this firmware.
 		 */
 		nic_data->workaround_26807 = false;
 		rc = 0;

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 14b821b..f3826ae 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c

@@ -281,6 +281,27 @@
  * NAPI guarantees serialisation of polls of the same device, which
  * provides the guarantee required by efx_process_channel().
  */
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+	int step = efx->irq_mod_step_us;
+
+	if (channel->irq_mod_score < irq_adapt_low_thresh) {
+		if (channel->irq_moderation_us > step) {
+			channel->irq_moderation_us -= step;
+			efx->type->push_irq_moderation(channel);
+		}
+	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+		if (channel->irq_moderation_us <
+		    efx->irq_rx_moderation_us) {
+			channel->irq_moderation_us += step;
+			efx->type->push_irq_moderation(channel);
+		}
+	}
+
+	channel->irq_count = 0;
+	channel->irq_mod_score = 0;
+}
+
 static int efx_poll(struct napi_struct *napi, int budget)
 {
 	struct efx_channel *channel =
@@ -301,22 +322,7 @@
 		if (efx_channel_has_rx_queue(channel) &&
 		    efx->irq_rx_adaptive &&
 		    unlikely(++channel->irq_count == 1000)) {
-			if (unlikely(channel->irq_mod_score <
-				     irq_adapt_low_thresh)) {
-				if (channel->irq_moderation > 1) {
-					channel->irq_moderation -= 1;
-					efx->type->push_irq_moderation(channel);
-				}
-			} else if (unlikely(channel->irq_mod_score >
-					    irq_adapt_high_thresh)) {
-				if (channel->irq_moderation <
-				    efx->irq_rx_moderation) {
-					channel->irq_moderation += 1;
-					efx->type->push_irq_moderation(channel);
-				}
-			}
-			channel->irq_count = 0;
-			channel->irq_mod_score = 0;
+			efx_update_irq_mod(efx, channel);
 		}
 
 		efx_filter_rfs_expire(channel);
@@ -1703,6 +1709,7 @@
 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 
 	/* Initialise the interrupt moderation settings */
+	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
 	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
 				true);
 
@@ -1949,14 +1956,21 @@
  * Interrupt moderation
  *
  **************************************************************************/
-
-static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns)
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
 {
 	if (usecs == 0)
 		return 0;
-	if (usecs * 1000 < quantum_ns)
+	if (usecs * 1000 < efx->timer_quantum_ns)
 		return 1; /* never round down to 0 */
-	return usecs * 1000 / quantum_ns;
+	return usecs * 1000 / efx->timer_quantum_ns;
+}
+
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
+{
+	/* We must round up when converting ticks to microseconds
+	 * because we round down when converting the other way.
+	 */
+	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
 }
 
 /* Set interrupt moderation parameters */
@@ -1965,21 +1979,16 @@
 			    bool rx_may_override_tx)
 {
 	struct efx_channel *channel;
-	unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max *
-						efx->timer_quantum_ns,
-						1000);
-	unsigned int tx_ticks;
-	unsigned int rx_ticks;
+	unsigned int timer_max_us;
 
 	EFX_ASSERT_RESET_SERIALISED(efx);
 
-	if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max)
+	timer_max_us = efx->timer_max_ns / 1000;
+
+	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
 		return -EINVAL;
 
-	tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns);
-	rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns);
-
-	if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 &&
+	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
 	    !rx_may_override_tx) {
 		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
 			  "RX and TX IRQ moderation must be equal\n");
@@ -1987,12 +1996,12 @@
 	}
 
 	efx->irq_rx_adaptive = rx_adaptive;
-	efx->irq_rx_moderation = rx_ticks;
+	efx->irq_rx_moderation_us = rx_usecs;
 	efx_for_each_channel(channel, efx) {
 		if (efx_channel_has_rx_queue(channel))
-			channel->irq_moderation = rx_ticks;
+			channel->irq_moderation_us = rx_usecs;
 		else if (efx_channel_has_tx_queues(channel))
-			channel->irq_moderation = tx_ticks;
+			channel->irq_moderation_us = tx_usecs;
 	}
 
 	return 0;
@@ -2001,26 +2010,21 @@
 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 			    unsigned int *rx_usecs, bool *rx_adaptive)
 {
-	/* We must round up when converting ticks to microseconds
-	 * because we round down when converting the other way.
-	 */
-
 	*rx_adaptive = efx->irq_rx_adaptive;
-	*rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation *
-				 efx->timer_quantum_ns,
-				 1000);
+	*rx_usecs = efx->irq_rx_moderation_us;
 
 	/* If channels are shared between RX and TX, so is IRQ
 	 * moderation.  Otherwise, IRQ moderation is the same for all
 	 * TX channels and is not adaptive.
 	 */
-	if (efx->tx_channel_offset == 0)
+	if (efx->tx_channel_offset == 0) {
 		*tx_usecs = *rx_usecs;
-	else
-		*tx_usecs = DIV_ROUND_UP(
-			efx->channel[efx->tx_channel_offset]->irq_moderation *
-			efx->timer_quantum_ns,
-			1000);
+	} else {
+		struct efx_channel *tx_channel;
+
+		tx_channel = efx->channel[efx->tx_channel_offset];
+		*tx_usecs = tx_channel->irq_moderation_us;
+	}
 }
 
 /**************************************************************************

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index c3ae739..342ae16 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h

@@ -204,6 +204,8 @@
 
 /* Global */
 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 			    unsigned int rx_usecs, bool rx_adaptive,
 			    bool rx_may_override_tx);

diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index d790cb8..1a70926 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c

@@ -378,12 +378,15 @@
 	struct efx_nic *efx = channel->efx;
 
 	/* Set timer register */
-	if (channel->irq_moderation) {
+	if (channel->irq_moderation_us) {
+		unsigned int ticks;
+
+		ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
 				     FFE_BB_TIMER_MODE_INT_HLDOFF,
 				     FRF_AB_TC_TIMER_VAL,
-				     channel->irq_moderation - 1);
+				     ticks - 1);
 	} else {
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
@@ -2373,6 +2376,8 @@
 			     EFX_MAX_CHANNELS);
 	efx->max_tx_channels = efx->max_channels;
 	efx->timer_quantum_ns = 4968; /* 621 cycles */
+	efx->timer_max_ns = efx->type->timer_period_max *
+			    efx->timer_quantum_ns;
 
 	/* Initialise I2C adapter */
 	board = falcon_board(efx);

diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index d28e7dd..9fbc12a 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c

@@ -548,7 +548,10 @@
 		efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf,
 				       err_len, rc);
 	}
-	async->complete(efx, async->cookie, rc, outbuf, data_len);
+
+	if (async->complete)
+		async->complete(efx, async->cookie, rc, outbuf,
+				min(async->outlen, data_len));
 	kfree(async);
 
 	efx_mcdi_release(mcdi);

diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index c9a5b00..ccceafc 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h

@@ -2645,16 +2645,20 @@
 #define          MC_CMD_POLL_BIST_MEM_BUS_MC 0x0
 /* enum: CSR IREG bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_CSR 0x1
-/* enum: RX DPCPU bus. */
+/* enum: RX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX 0x2
 /* enum: TX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX0 0x3
 /* enum: TX1 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX1 0x4
-/* enum: RX DICPU bus. */
+/* enum: RX0 DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX 0x5
 /* enum: TX DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_TX 0x6
+/* enum: RX1 DPCPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX1 0x7
+/* enum: RX1 DICPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX1 0x8
 /* Pattern written to RAM / register */
 #define       MC_CMD_POLL_BIST_OUT_MEM_EXPECT_OFST 16
 /* Actual value read from RAM / register */
@@ -3612,6 +3616,8 @@
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_LBN 7
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_WIDTH 1
 #define       MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16
@@ -4389,6 +4395,8 @@
  * the command will fail with MC_CMD_ERR_FILTERS_PRESENT.
  */
 #define          MC_CMD_WORKAROUND_BUG26807 0x6
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_WORKAROUND_BUG61265 0x7
 /* 0 = disable the workaround indicated by TYPE; any non-zero value = enable
  * the workaround
  */
@@ -4413,7 +4421,6 @@
  * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the
  * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1
  * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80.
- * Anything else: currently undefined. Locks required: None. Return code: 0.
  */
 #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b
 
@@ -5479,6 +5486,8 @@
 #define        LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_LBN 9
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_WIDTH 1
 #define       LICENSED_V3_FEATURES_MASK_LBN 0
 #define       LICENSED_V3_FEATURES_MASK_WIDTH 64
 
@@ -5634,6 +5643,109 @@
 /* Only valid if INTRFLAG was true */
 #define       MC_CMD_INIT_EVQ_OUT_IRQ_OFST 0
 
+/* MC_CMD_INIT_EVQ_V2_IN msgrequest */
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMIN 44
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMAX 548
+#define    MC_CMD_INIT_EVQ_V2_IN_LEN(num) (36+8*(num))
+/* Size, in entries */
+#define       MC_CMD_INIT_EVQ_V2_IN_SIZE_OFST 0
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_INSTANCE_OFST 4
+/* The initial timer value. The load value is ignored if the timer mode is DIS.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_LOAD_OFST 8
+/* The reload value is ignored in one-shot modes */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_RELOAD_OFST 12
+/* tbd */
+#define       MC_CMD_INIT_EVQ_V2_IN_FLAGS_OFST 16
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_LBN 4
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_LBN 5
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_LBN 6
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LBN 7
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_WIDTH 4
+/* enum: All initialisation flags specified by host. */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_MANUAL 0x0
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the lowest latency achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY 0x1
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the best throughput achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT 0x2
+/* enum: MEDFORD only. Certain initialisation flags may be over-ridden by
+ * firmware based on licenses and firmware variant. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO 0x3
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_OFST 20
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_DIS 0x0
+/* enum: Immediate */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_IMMED_START 0x1
+/* enum: Triggered */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_TRIG_START 0x2
+/* enum: Hold-off */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF 0x3
+/* Target EVQ for wakeups if in wakeup mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_TARGET_EVQ_OFST 24
+/* Target interrupt if in interrupting mode (note union with target EVQ). Use
+ * MC_CMD_RESOURCE_INSTANCE_ANY unless a specific one required for test
+ * purposes.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_IRQ_NUM_OFST 24
+/* Event Counter Mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_OFST 28
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_DIS 0x0
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RX 0x1
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_TX 0x2
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RXTX 0x3
+/* Event queue packet count threshold. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_THRSHLD_OFST 32
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LO_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_HI_OFST 40
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MINNUM 1
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MAXNUM 64
+
+/* MC_CMD_INIT_EVQ_V2_OUT msgresponse */
+#define    MC_CMD_INIT_EVQ_V2_OUT_LEN 8
+/* Only valid if INTRFLAG was true */
+#define       MC_CMD_INIT_EVQ_V2_OUT_IRQ_OFST 0
+/* Actual configuration applied on the card */
+#define       MC_CMD_INIT_EVQ_V2_OUT_FLAGS_OFST 4
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_WIDTH 1
+
 /* QUEUE_CRC_MODE structuredef */
 #define    QUEUE_CRC_MODE_LEN 1
 #define       QUEUE_CRC_MODE_MODE_LBN 0
@@ -5697,8 +5809,8 @@
 #define        MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_LBN 10
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -7854,6 +7966,20 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -7910,6 +8036,288 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2
 
+/* MC_CMD_GET_CAPABILITIES_V3_OUT msgresponse */
+#define    MC_CMD_GET_CAPABILITIES_V3_OUT_LEN 73
+/* First word of flags. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS1_OFST 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_LBN 13
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_LBN 14
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_LBN 16
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_LBN 17
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_LBN 18
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_LBN 25
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_LBN 26
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_LBN 27
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_LBN 28
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_LBN 30
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_LBN 31
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_WIDTH 1
+/* RxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
+/* enum: Standard RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+/* enum: Low latency RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+/* enum: Packed stream RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+/* enum: BIST RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+/* enum: RXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+/* enum: RXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+/* enum: RXDP Test firmware image 3 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+/* enum: RXDP Test firmware image 4 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+/* enum: RXDP Test firmware image 5 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+/* enum: RXDP Test firmware image 6 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+/* enum: RXDP Test firmware image 7 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+/* enum: RXDP Test firmware image 8 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+/* enum: RXDP Test firmware image 9 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+/* TxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
+/* enum: Standard TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+/* enum: Low latency TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+/* enum: High packet rate TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+/* enum: BIST TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+/* enum: TXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+/* enum: TXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+/* enum: TXDP CSR bus test firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial RX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: RX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant RX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+/* enum: Low latency RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+/* enum: Packed stream RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+/* enum: RX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* enum: RX PD firmware parsing but not filtering network overlay tunnel
+ * encapsulations (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial TX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: TX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant TX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+/* enum: TX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* Hardware capabilities of NIC */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
+/* Licensed capabilities */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_LICENSE_CAPABILITIES_OFST 16
+/* Second word of flags. Not present on older firmware (check the length). */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS2_OFST 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_LBN 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_LBN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1
+/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
+ * on older firmware (check the length).
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2
+/* One byte per PF containing the number of the external port assigned to this
+ * PF, indexed by PF number. Special values indicate that a PF is either not
+ * present or not assigned.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+/* enum: PF does not exist. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+/* enum: PF does exist but is not assigned to any external port. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+/* enum: This value indicates that PF is assigned, but it cannot be expressed
+ * in this field. It is intended for a possible future situation where a more
+ * complex scheme of PFs to ports mapping is being used. The future driver
+ * should look for a new field supporting the new scheme. The current/old
+ * driver should treat this value as PF_NOT_ASSIGNED.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+/* One byte per PF containing the number of its VFs, indexed by PF number. A
+ * special value indicates that a PF is not present.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_OFST 42
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/* enum: PF does not exist. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/* Number of VIs available for each external port */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_NUM 4
+/* Size of RX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ RX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_OFST 66
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_LEN 1
+/* Size of TX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ TX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_OFST 67
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_LEN 1
+/* Total number of available PIO buffers */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_OFST 68
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_LEN 2
+/* Size of a single PIO buffer */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_OFST 70
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2
+/* On chips later than Medford the amount of address space assigned to each VI
+ * is configurable. This is a global setting that the driver must query to
+ * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available
+ * with 8k VI windows.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_LEN 1
+/* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
+ * CTPIO is not mapped.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+/* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+/* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+
 
 /***********************************/
 /* MC_CMD_V2_EXTN
@@ -9026,7 +9434,7 @@
  */
 #define MC_CMD_GET_RXDP_CONFIG 0xc2
 
-#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_RXDP_CONFIG_IN_LEN 0
@@ -10125,7 +10533,9 @@
  * that this operation returns a zero-length response
  */
 #define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
-/* enum: report counts of installed licenses */
+/* enum: report counts of installed licenses Returns EAGAIN if license
+ * processing (updating) has been started but not yet completed.
+ */
 #define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
@@ -10763,6 +11173,8 @@
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG42008 0x20
 /* enum: Bug 26807 features present in firmware (multicast filter chaining) */
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 0x40
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_GET_WORKAROUNDS_OUT_BUG61265 0x80
 
 
 /***********************************/
@@ -11280,22 +11692,110 @@
 #define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
 /* MC_CMD_RX_BALANCING_IN msgrequest */
-#define    MC_CMD_RX_BALANCING_IN_LEN 4
+#define    MC_CMD_RX_BALANCING_IN_LEN 16
 /* The RX port whose upconverter table will be modified */
 #define       MC_CMD_RX_BALANCING_IN_PORT_OFST 0
-#define       MC_CMD_RX_BALANCING_IN_PORT_LEN 1
 /* The VLAN priority associated to the table index and vFIFO */
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 4
 /* The resulting bit of SRC^DST for indexing the table */
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 8
 /* The RX engine to which the vFIFO in the table entry will point to */
-#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 3
-#define       MC_CMD_RX_BALANCING_IN_ENG_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 12
 
 /* MC_CMD_RX_BALANCING_OUT msgresponse */
 #define    MC_CMD_RX_BALANCING_OUT_LEN 0
 
+/***********************************/
+/* MC_CMD_SET_EVQ_TMR
+ * Update the timer load, timer reload and timer mode values for a given EVQ.
+ * The requested timer values (in TMR_LOAD_REQ_NS and TMR_RELOAD_REQ_NS) will
+ * be rounded up to the granularity supported by the hardware, then truncated
+ * to the range supported by the hardware. The resulting value after the
+ * rounding and truncation will be returned to the caller (in TMR_LOAD_ACT_NS
+ * and TMR_RELOAD_ACT_NS).
+ */
+#define MC_CMD_SET_EVQ_TMR 0x120
+
+#define MC_CMD_0x120_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_SET_EVQ_TMR_IN msgrequest */
+#define    MC_CMD_SET_EVQ_TMR_IN_LEN 16
+/* Function-relative queue instance */
+#define       MC_CMD_SET_EVQ_TMR_IN_INSTANCE_OFST 0
+/* Requested value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS_OFST 4
+/* Requested value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS_OFST 8
+/* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+
+/* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
+#define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
+/* Actual value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_LOAD_ACT_NS_OFST 0
+/* Actual value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_RELOAD_ACT_NS_OFST 4
+
+
+/***********************************/
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES
+ * Query properties about the event queue timers.
+ */
+#define MC_CMD_GET_EVQ_TMR_PROPERTIES 0x122
+
+#define MC_CMD_0x122_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_IN msgrequest */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_IN_LEN 0
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT msgresponse */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN 36
+/* Reserved for future use. */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_FLAGS_OFST 0
+/* For timers updated via writes to EVQ_TMR_REG, this is the time interval (in
+ * nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT_OFST 4
+/* For timers updated via writes to EVQ_TMR_REG, this is the maximum value
+ * allowed for timer load/reload counts.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT_OFST 8
+/* For timers updated via writes to EVQ_TMR_REG, timer load/reload counts not a
+ * multiple of this step size will be rounded in an implementation defined
+ * manner.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_STEP_OFST 12
+/* Maximum timer duration (in nanoseconds) for timers updated via MCDI. Only
+ * meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS_OFST 16
+/* Timer durations requested via MCDI that are not a multiple of this step size
+ * will be rounded up. Only meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS_OFST 20
+/* For timers updated using the bug35388 workaround, this is the time interval
+ * (in nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count. This field is only meaningful if the bug35388 workaround
+ * is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT_OFST 24
+/* For timers updated using the bug35388 workaround, this is the maximum value
+ * allowed for timer load/reload counts. This field is only meaningful if the
+ * bug35388 workaround is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT_OFST 28
+/* For timers updated using the bug35388 workaround, timer load/reload counts
+ * not a multiple of this step size will be rounded in an implementation
+ * defined manner. This field is only meaningful if the bug35388 workaround is
+ * enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32
 
 #endif /* MCDI_PCOL_H */

diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9ff062a..13b7f52 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h

@@ -392,7 +392,7 @@
  * @eventq_init: Event queue initialised flag
  * @enabled: Channel enabled indicator
  * @irq: IRQ number (MSI and MSI-X only)
- * @irq_moderation: IRQ moderation value (in hardware ticks)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
  * @napi_dev: Net device used with NAPI
  * @napi_str: NAPI control structure
  * @state: state for NAPI vs busy polling
@@ -433,7 +433,7 @@
 	bool eventq_init;
 	bool enabled;
 	int irq;
-	unsigned int irq_moderation;
+	unsigned int irq_moderation_us;
 	struct net_device *napi_dev;
 	struct napi_struct napi_str;
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -810,8 +810,10 @@
  * @membase: Memory BAR value
  * @interrupt_mode: Interrupt mode
  * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
  * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
- * @irq_rx_moderation: IRQ moderation time for RX event queues
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
  * @msg_enable: Log message enable flags
  * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
  * @reset_pending: Bitmask for pending resets
@@ -940,8 +942,10 @@
 
 	enum efx_int_mode interrupt_mode;
 	unsigned int timer_quantum_ns;
+	unsigned int timer_max_ns;
 	bool irq_rx_adaptive;
-	unsigned int irq_rx_moderation;
+	unsigned int irq_mod_step_us;
+	unsigned int irq_rx_moderation_us;
 	u32 msg_enable;
 
 	enum nic_state state;

diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 96944c3..d8b1694 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h

@@ -507,10 +507,13 @@
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
  * @workaround_26807: Flag: firmware supports workaround for bug 26807
+ * @workaround_61265: Flag: firmware supports workaround for bug 61265
  * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated
  *	after MC reboot
  * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of
  *	%MC_CMD_GET_CAPABILITIES response)
+ * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of
+ * %MC_CMD_GET_CAPABILITIES response)
  * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
  * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
  * @vport_id: The function's vport ID, only relevant for PFs
@@ -540,8 +543,10 @@
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
 	bool workaround_26807;
+	bool workaround_61265;
 	bool must_check_datapath_caps;
 	u32 datapath_caps;
+	u32 datapath_caps2;
 	unsigned int rx_dpcpu_fw_id;
 	unsigned int tx_dpcpu_fw_id;
 	unsigned int vport_id;

diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index c771e0a..dd204d9 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c

@@ -1306,7 +1306,7 @@
 {
 	struct efx_nic *efx = channel->efx;
 
-	channel->irq_moderation = 0;
+	channel->irq_moderation_us = 0;
 	channel->rx_queue.core_index = 0;
 
 	return efx_ptp_probe(efx, channel);

diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 2219b54..04ed1b4 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c

@@ -34,19 +34,24 @@
 
 static void siena_push_irq_moderation(struct efx_channel *channel)
 {
+	struct efx_nic *efx = channel->efx;
 	efx_dword_t timer_cmd;
 
-	if (channel->irq_moderation)
+	if (channel->irq_moderation_us) {
+		unsigned int ticks;
+
+		ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_CZ_TC_TIMER_MODE,
 				     FFE_CZ_TIMER_MODE_INT_HLDOFF,
 				     FRF_CZ_TC_TIMER_VAL,
-				     channel->irq_moderation - 1);
-	else
+				     ticks - 1);
+	} else {
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_CZ_TC_TIMER_MODE,
 				     FFE_CZ_TIMER_MODE_DIS,
 				     FRF_CZ_TC_TIMER_VAL, 0);
+	}
 	efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
 			       channel->channel);
 }
@@ -222,6 +227,9 @@
 	efx->timer_quantum_ns =
 		(caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ?
 		3072 : 6144; /* 768 cycles */
+	efx->timer_max_ns = efx->type->timer_period_max *
+			    efx->timer_quantum_ns;
+
 	return rc;
 }
 

diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 2310b75..351cd14 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h

@@ -50,4 +50,8 @@
 #define EFX_WORKAROUND_35388(efx)					\
 	(efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx))
 
+/* Moderation timer access must go through MCDI */
+#define EFX_EF10_WORKAROUND_61265(efx)					\
+	(((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265)
+
 #endif /* EFX_WORKAROUNDS_H */

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f85d605..421ebda 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c

@@ -140,9 +140,10 @@
 #define CPSW_CMINTMAX_INTVL	(1000 / CPSW_CMINTMIN_CNT)
 #define CPSW_CMINTMIN_INTVL	((1000 / CPSW_CMINTMAX_CNT) + 1)
 
-#define cpsw_slave_index(priv)				\
-		((priv->data.dual_emac) ? priv->emac_port :	\
-		priv->data.active_slave)
+#define cpsw_slave_index(cpsw, priv)				\
+		((cpsw->data.dual_emac) ? priv->emac_port :	\
+		cpsw->data.active_slave)
+#define IRQ_NUM			2
 
 static int debug_level;
 module_param(debug_level, int, 0);
@@ -363,38 +364,39 @@
 	__raw_writel(val, slave->regs + offset);
 }
 
-struct cpsw_priv {
-	struct platform_device		*pdev;
-	struct net_device		*ndev;
-	struct napi_struct		napi_rx;
-	struct napi_struct		napi_tx;
+struct cpsw_common {
 	struct device			*dev;
 	struct cpsw_platform_data	data;
+	struct napi_struct		napi_rx;
+	struct napi_struct		napi_tx;
 	struct cpsw_ss_regs __iomem	*regs;
 	struct cpsw_wr_regs __iomem	*wr_regs;
 	u8 __iomem			*hw_stats;
 	struct cpsw_host_regs __iomem	*host_port_regs;
-	u32				msg_enable;
 	u32				version;
 	u32				coal_intvl;
 	u32				bus_freq_mhz;
 	int				rx_packet_max;
-	struct clk			*clk;
-	u8				mac_addr[ETH_ALEN];
 	struct cpsw_slave		*slaves;
 	struct cpdma_ctlr		*dma;
 	struct cpdma_chan		*txch, *rxch;
 	struct cpsw_ale			*ale;
-	bool				rx_pause;
-	bool				tx_pause;
 	bool				quirk_irq;
 	bool				rx_irq_disabled;
 	bool				tx_irq_disabled;
-	/* snapshot of IRQ numbers */
-	u32 irqs_table[4];
-	u32 num_irqs;
-	struct cpts *cpts;
+	u32 irqs_table[IRQ_NUM];
+	struct cpts			*cpts;
+};
+
+struct cpsw_priv {
+	struct net_device		*ndev;
+	struct device			*dev;
+	u32				msg_enable;
+	u8				mac_addr[ETH_ALEN];
+	bool				rx_pause;
+	bool				tx_pause;
 	u32 emac_port;
+	struct cpsw_common *cpsw;
 };
 
 struct cpsw_stats {
@@ -485,78 +487,71 @@
 
 #define CPSW_STATS_LEN	ARRAY_SIZE(cpsw_gstrings_stats)
 
-#define napi_to_priv(napi)	container_of(napi, struct cpsw_priv, napi)
+#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
+#define napi_to_cpsw(napi)	container_of(napi, struct cpsw_common, napi)
 #define for_each_slave(priv, func, arg...)				\
 	do {								\
 		struct cpsw_slave *slave;				\
+		struct cpsw_common *cpsw = (priv)->cpsw;		\
 		int n;							\
-		if (priv->data.dual_emac)				\
-			(func)((priv)->slaves + priv->emac_port, ##arg);\
+		if (cpsw->data.dual_emac)				\
+			(func)((cpsw)->slaves + priv->emac_port, ##arg);\
 		else							\
-			for (n = (priv)->data.slaves,			\
-					slave = (priv)->slaves;		\
+			for (n = cpsw->data.slaves,			\
+					slave = cpsw->slaves;		\
 					n; n--)				\
 				(func)(slave++, ##arg);			\
 	} while (0)
-#define cpsw_get_slave_ndev(priv, __slave_no__)				\
-	((__slave_no__ < priv->data.slaves) ?				\
-		priv->slaves[__slave_no__].ndev : NULL)
-#define cpsw_get_slave_priv(priv, __slave_no__)				\
-	(((__slave_no__ < priv->data.slaves) &&				\
-		(priv->slaves[__slave_no__].ndev)) ?			\
-		netdev_priv(priv->slaves[__slave_no__].ndev) : NULL)	\
 
-#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb)		\
+#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb)		\
 	do {								\
-		if (!priv->data.dual_emac)				\
+		if (!cpsw->data.dual_emac)				\
 			break;						\
 		if (CPDMA_RX_SOURCE_PORT(status) == 1) {		\
-			ndev = cpsw_get_slave_ndev(priv, 0);		\
-			priv = netdev_priv(ndev);			\
+			ndev = cpsw->slaves[0].ndev;			\
 			skb->dev = ndev;				\
 		} else if (CPDMA_RX_SOURCE_PORT(status) == 2) {		\
-			ndev = cpsw_get_slave_ndev(priv, 1);		\
-			priv = netdev_priv(ndev);			\
+			ndev = cpsw->slaves[1].ndev;			\
 			skb->dev = ndev;				\
 		}							\
 	} while (0)
-#define cpsw_add_mcast(priv, addr)					\
+#define cpsw_add_mcast(cpsw, priv, addr)				\
 	do {								\
-		if (priv->data.dual_emac) {				\
-			struct cpsw_slave *slave = priv->slaves +	\
+		if (cpsw->data.dual_emac) {				\
+			struct cpsw_slave *slave = cpsw->slaves +	\
 						priv->emac_port;	\
-			int slave_port = cpsw_get_slave_port(priv,	\
+			int slave_port = cpsw_get_slave_port(		\
 						slave->slave_num);	\
-			cpsw_ale_add_mcast(priv->ale, addr,		\
+			cpsw_ale_add_mcast(cpsw->ale, addr,		\
 				1 << slave_port | ALE_PORT_HOST,	\
 				ALE_VLAN, slave->port_vlan, 0);		\
 		} else {						\
-			cpsw_ale_add_mcast(priv->ale, addr,		\
+			cpsw_ale_add_mcast(cpsw->ale, addr,		\
 				ALE_ALL_PORTS,				\
 				0, 0, 0);				\
 		}							\
 	} while (0)
 
-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
+static inline int cpsw_get_slave_port(u32 slave_num)
 {
 	return slave_num + 1;
 }
 
 static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_ale *ale = priv->ale;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_ale *ale = cpsw->ale;
 	int i;
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		bool flag = false;
 
 		/* Enabling promiscuous mode for one interface will be
 		 * common for both the interface as the interface shares
 		 * the same hardware resource.
 		 */
-		for (i = 0; i < priv->data.slaves; i++)
-			if (priv->slaves[i].ndev->flags & IFF_PROMISC)
+		for (i = 0; i < cpsw->data.slaves; i++)
+			if (cpsw->slaves[i].ndev->flags & IFF_PROMISC)
 				flag = true;
 
 		if (!enable && flag) {
@@ -579,7 +574,7 @@
 			unsigned long timeout = jiffies + HZ;
 
 			/* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */
-			for (i = 0; i <= priv->data.slaves; i++) {
+			for (i = 0; i <= cpsw->data.slaves; i++) {
 				cpsw_ale_control_set(ale, i,
 						     ALE_PORT_NOLEARN, 1);
 				cpsw_ale_control_set(ale, i,
@@ -606,7 +601,7 @@
 			cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0);
 
 			/* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */
-			for (i = 0; i <= priv->data.slaves; i++) {
+			for (i = 0; i <= cpsw->data.slaves; i++) {
 				cpsw_ale_control_set(ale, i,
 						     ALE_PORT_NOLEARN, 0);
 				cpsw_ale_control_set(ale, i,
@@ -620,17 +615,18 @@
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int vid;
 
-	if (priv->data.dual_emac)
-		vid = priv->slaves[priv->emac_port].port_vlan;
+	if (cpsw->data.dual_emac)
+		vid = cpsw->slaves[priv->emac_port].port_vlan;
 	else
-		vid = priv->data.default_vlan;
+		vid = cpsw->data.default_vlan;
 
 	if (ndev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cpsw_set_promiscious(ndev, true);
-		cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI);
+		cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI);
 		return;
 	} else {
 		/* Disable promiscuous mode */
@@ -638,36 +634,36 @@
 	}
 
 	/* Restore allmulti on vlans if necessary */
-	cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI);
+	cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI);
 
 	/* Clear all mcast from ALE */
-	cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS, vid);
+	cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid);
 
 	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
 		/* program multicast address list into ALE register */
 		netdev_for_each_mc_addr(ha, ndev) {
-			cpsw_add_mcast(priv, (u8 *)ha->addr);
+			cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr);
 		}
 	}
 }
 
-static void cpsw_intr_enable(struct cpsw_priv *priv)
+static void cpsw_intr_enable(struct cpsw_common *cpsw)
 {
-	__raw_writel(0xFF, &priv->wr_regs->tx_en);
-	__raw_writel(0xFF, &priv->wr_regs->rx_en);
+	__raw_writel(0xFF, &cpsw->wr_regs->tx_en);
+	__raw_writel(0xFF, &cpsw->wr_regs->rx_en);
 
-	cpdma_ctlr_int_ctrl(priv->dma, true);
+	cpdma_ctlr_int_ctrl(cpsw->dma, true);
 	return;
 }
 
-static void cpsw_intr_disable(struct cpsw_priv *priv)
+static void cpsw_intr_disable(struct cpsw_common *cpsw)
 {
-	__raw_writel(0, &priv->wr_regs->tx_en);
-	__raw_writel(0, &priv->wr_regs->rx_en);
+	__raw_writel(0, &cpsw->wr_regs->tx_en);
+	__raw_writel(0, &cpsw->wr_regs->rx_en);
 
-	cpdma_ctlr_int_ctrl(priv->dma, false);
+	cpdma_ctlr_int_ctrl(cpsw->dma, false);
 	return;
 }
 
@@ -675,14 +671,14 @@
 {
 	struct sk_buff		*skb = token;
 	struct net_device	*ndev = skb->dev;
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
 	/* Check whether the queue is stopped due to stalled tx dma, if the
 	 * queue is stopped then start the queue as we have free desc for tx
 	 */
 	if (unlikely(netif_queue_stopped(ndev)))
 		netif_wake_queue(ndev);
-	cpts_tx_timestamp(priv->cpts, skb);
+	cpts_tx_timestamp(cpsw->cpts, skb);
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += len;
 	dev_kfree_skb_any(skb);
@@ -693,19 +689,19 @@
 	struct sk_buff		*skb = token;
 	struct sk_buff		*new_skb;
 	struct net_device	*ndev = skb->dev;
-	struct cpsw_priv	*priv = netdev_priv(ndev);
 	int			ret = 0;
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
-	cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
+	cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb);
 
 	if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
 		bool ndev_status = false;
-		struct cpsw_slave *slave = priv->slaves;
+		struct cpsw_slave *slave = cpsw->slaves;
 		int n;
 
-		if (priv->data.dual_emac) {
+		if (cpsw->data.dual_emac) {
 			/* In dual emac mode check for all interfaces */
-			for (n = priv->data.slaves; n; n--, slave++)
+			for (n = cpsw->data.slaves; n; n--, slave++)
 				if (netif_running(slave->ndev))
 					ndev_status = true;
 		}
@@ -726,10 +722,10 @@
 		return;
 	}
 
-	new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max);
+	new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
 	if (new_skb) {
 		skb_put(skb, len);
-		cpts_rx_timestamp(priv->cpts, skb);
+		cpts_rx_timestamp(cpsw->cpts, skb);
 		skb->protocol = eth_type_trans(skb, ndev);
 		netif_receive_skb(skb);
 		ndev->stats.rx_bytes += len;
@@ -741,83 +737,77 @@
 	}
 
 requeue:
-	ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data,
-			skb_tailroom(new_skb), 0);
+	ret = cpdma_chan_submit(cpsw->rxch, new_skb, new_skb->data,
+				skb_tailroom(new_skb), 0);
 	if (WARN_ON(ret < 0))
 		dev_kfree_skb_any(new_skb);
 }
 
 static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
-	struct cpsw_priv *priv = dev_id;
+	struct cpsw_common *cpsw = dev_id;
 
-	writel(0, &priv->wr_regs->tx_en);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
+	writel(0, &cpsw->wr_regs->tx_en);
+	cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX);
 
-	if (priv->quirk_irq) {
-		disable_irq_nosync(priv->irqs_table[1]);
-		priv->tx_irq_disabled = true;
+	if (cpsw->quirk_irq) {
+		disable_irq_nosync(cpsw->irqs_table[1]);
+		cpsw->tx_irq_disabled = true;
 	}
 
-	napi_schedule(&priv->napi_tx);
+	napi_schedule(&cpsw->napi_tx);
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 {
-	struct cpsw_priv *priv = dev_id;
+	struct cpsw_common *cpsw = dev_id;
 
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-	writel(0, &priv->wr_regs->rx_en);
+	cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+	writel(0, &cpsw->wr_regs->rx_en);
 
-	if (priv->quirk_irq) {
-		disable_irq_nosync(priv->irqs_table[0]);
-		priv->rx_irq_disabled = true;
+	if (cpsw->quirk_irq) {
+		disable_irq_nosync(cpsw->irqs_table[0]);
+		cpsw->rx_irq_disabled = true;
 	}
 
-	napi_schedule(&priv->napi_rx);
+	napi_schedule(&cpsw->napi_rx);
 	return IRQ_HANDLED;
 }
 
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
-	struct cpsw_priv	*priv = napi_to_priv(napi_tx);
+	struct cpsw_common	*cpsw = napi_to_cpsw(napi_tx);
 	int			num_tx;
 
-	num_tx = cpdma_chan_process(priv->txch, budget);
+	num_tx = cpdma_chan_process(cpsw->txch, budget);
 	if (num_tx < budget) {
 		napi_complete(napi_tx);
-		writel(0xff, &priv->wr_regs->tx_en);
-		if (priv->quirk_irq && priv->tx_irq_disabled) {
-			priv->tx_irq_disabled = false;
-			enable_irq(priv->irqs_table[1]);
+		writel(0xff, &cpsw->wr_regs->tx_en);
+		if (cpsw->quirk_irq && cpsw->tx_irq_disabled) {
+			cpsw->tx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[1]);
 		}
 	}
 
-	if (num_tx)
-		cpsw_dbg(priv, intr, "poll %d tx pkts\n", num_tx);
-
 	return num_tx;
 }
 
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
-	struct cpsw_priv	*priv = napi_to_priv(napi_rx);
+	struct cpsw_common	*cpsw = napi_to_cpsw(napi_rx);
 	int			num_rx;
 
-	num_rx = cpdma_chan_process(priv->rxch, budget);
+	num_rx = cpdma_chan_process(cpsw->rxch, budget);
 	if (num_rx < budget) {
 		napi_complete(napi_rx);
-		writel(0xff, &priv->wr_regs->rx_en);
-		if (priv->quirk_irq && priv->rx_irq_disabled) {
-			priv->rx_irq_disabled = false;
-			enable_irq(priv->irqs_table[0]);
+		writel(0xff, &cpsw->wr_regs->rx_en);
+		if (cpsw->quirk_irq && cpsw->rx_irq_disabled) {
+			cpsw->rx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[0]);
 		}
 	}
 
-	if (num_rx)
-		cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx);
-
 	return num_rx;
 }
 
@@ -850,17 +840,18 @@
 	struct phy_device	*phy = slave->phy;
 	u32			mac_control = 0;
 	u32			slave_port;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	if (!phy)
 		return;
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
 	if (phy->link) {
-		mac_control = priv->data.mac_control;
+		mac_control = cpsw->data.mac_control;
 
 		/* enable forwarding */
-		cpsw_ale_control_set(priv->ale, slave_port,
+		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
 		if (phy->speed == 1000)
@@ -884,7 +875,7 @@
 	} else {
 		mac_control = 0;
 		/* disable forwarding */
-		cpsw_ale_control_set(priv->ale, slave_port,
+		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
 	}
 
@@ -916,9 +907,9 @@
 static int cpsw_get_coalesce(struct net_device *ndev,
 				struct ethtool_coalesce *coal)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	coal->rx_coalesce_usecs = priv->coal_intvl;
+	coal->rx_coalesce_usecs = cpsw->coal_intvl;
 	return 0;
 }
 
@@ -931,11 +922,12 @@
 	u32 prescale = 0;
 	u32 addnl_dvdr = 1;
 	u32 coal_intvl = 0;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	coal_intvl = coal->rx_coalesce_usecs;
 
-	int_ctrl =  readl(&priv->wr_regs->int_control);
-	prescale = priv->bus_freq_mhz * 4;
+	int_ctrl =  readl(&cpsw->wr_regs->int_control);
+	prescale = cpsw->bus_freq_mhz * 4;
 
 	if (!coal->rx_coalesce_usecs) {
 		int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN);
@@ -963,27 +955,18 @@
 	}
 
 	num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
-	writel(num_interrupts, &priv->wr_regs->rx_imax);
-	writel(num_interrupts, &priv->wr_regs->tx_imax);
+	writel(num_interrupts, &cpsw->wr_regs->rx_imax);
+	writel(num_interrupts, &cpsw->wr_regs->tx_imax);
 
 	int_ctrl |= CPSW_INTPACEEN;
 	int_ctrl &= (~CPSW_INTPRESCALE_MASK);
 	int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
 
 update_return:
-	writel(int_ctrl, &priv->wr_regs->int_control);
+	writel(int_ctrl, &cpsw->wr_regs->int_control);
 
 	cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
-	if (priv->data.dual_emac) {
-		int i;
-
-		for (i = 0; i < priv->data.slaves; i++) {
-			priv = netdev_priv(priv->slaves[i].ndev);
-			priv->coal_intvl = coal_intvl;
-		}
-	} else {
-		priv->coal_intvl = coal_intvl;
-	}
+	cpsw->coal_intvl = coal_intvl;
 
 	return 0;
 }
@@ -1017,21 +1000,21 @@
 static void cpsw_get_ethtool_stats(struct net_device *ndev,
 				    struct ethtool_stats *stats, u64 *data)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpdma_chan_stats rx_stats;
 	struct cpdma_chan_stats tx_stats;
 	u32 val;
 	u8 *p;
 	int i;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	/* Collect Davinci CPDMA stats for Rx and Tx Channel */
-	cpdma_chan_get_stats(priv->rxch, &rx_stats);
-	cpdma_chan_get_stats(priv->txch, &tx_stats);
+	cpdma_chan_get_stats(cpsw->rxch, &rx_stats);
+	cpdma_chan_get_stats(cpsw->txch, &tx_stats);
 
 	for (i = 0; i < CPSW_STATS_LEN; i++) {
 		switch (cpsw_gstrings_stats[i].type) {
 		case CPSW_STATS:
-			val = readl(priv->hw_stats +
+			val = readl(cpsw->hw_stats +
 				    cpsw_gstrings_stats[i].stat_offset);
 			data[i] = val;
 			break;
@@ -1051,52 +1034,48 @@
 	}
 }
 
-static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
+static int cpsw_common_res_usage_state(struct cpsw_common *cpsw)
 {
 	u32 i;
 	u32 usage_count = 0;
 
-	if (!priv->data.dual_emac)
+	if (!cpsw->data.dual_emac)
 		return 0;
 
-	for (i = 0; i < priv->data.slaves; i++)
-		if (priv->slaves[i].open_stat)
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].open_stat)
 			usage_count++;
 
 	return usage_count;
 }
 
-static inline int cpsw_tx_packet_submit(struct net_device *ndev,
-			struct cpsw_priv *priv, struct sk_buff *skb)
+static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
+					struct sk_buff *skb)
 {
-	if (!priv->data.dual_emac)
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 0);
+	struct cpsw_common *cpsw = priv->cpsw;
 
-	if (ndev == cpsw_get_slave_ndev(priv, 0))
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 1);
-	else
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 2);
+	return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len,
+				 priv->emac_port + cpsw->data.dual_emac);
 }
 
 static inline void cpsw_add_dual_emac_def_ale_entries(
 		struct cpsw_priv *priv, struct cpsw_slave *slave,
 		u32 slave_port)
 {
+	struct cpsw_common *cpsw = priv->cpsw;
 	u32 port_mask = 1 << slave_port | ALE_PORT_HOST;
 
-	if (priv->version == CPSW_VERSION_1)
+	if (cpsw->version == CPSW_VERSION_1)
 		slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
 	else
 		slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
-	cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
+	cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask,
 			  port_mask, port_mask, 0);
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+	cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 			   port_mask, ALE_VLAN, slave->port_vlan, 0);
-	cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
-		HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan);
+	cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
+			   HOST_PORT_NUM, ALE_VLAN |
+			   ALE_SECURE, slave->port_vlan);
 }
 
 static void soft_reset_slave(struct cpsw_slave *slave)
@@ -1110,13 +1089,14 @@
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	u32 slave_port;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	soft_reset_slave(slave);
 
 	/* setup priority mapping */
 	__raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
 		slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP);
 		break;
@@ -1128,17 +1108,17 @@
 	}
 
 	/* setup max packet size, and mac address */
-	__raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen);
+	__raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
 	cpsw_set_slave_mac(slave, priv);
 
 	slave->mac_control = 0;	/* no link yet */
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
-	if (priv->data.dual_emac)
+	if (cpsw->data.dual_emac)
 		cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
 	else
-		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+		cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	if (slave->data->phy_node) {
@@ -1168,81 +1148,83 @@
 	phy_start(slave->phy);
 
 	/* Configure GMII_SEL register */
-	cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num);
+	cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 {
-	const int vlan = priv->data.default_vlan;
+	struct cpsw_common *cpsw = priv->cpsw;
+	const int vlan = cpsw->data.default_vlan;
 	u32 reg;
 	int i;
 	int unreg_mcast_mask;
 
-	reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
+	reg = (cpsw->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
 	       CPSW2_PORT_VLAN;
 
-	writel(vlan, &priv->host_port_regs->port_vlan);
+	writel(vlan, &cpsw->host_port_regs->port_vlan);
 
-	for (i = 0; i < priv->data.slaves; i++)
-		slave_write(priv->slaves + i, vlan, reg);
+	for (i = 0; i < cpsw->data.slaves; i++)
+		slave_write(cpsw->slaves + i, vlan, reg);
 
 	if (priv->ndev->flags & IFF_ALLMULTI)
 		unreg_mcast_mask = ALE_ALL_PORTS;
 	else
 		unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
 
-	cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS,
+	cpsw_ale_add_vlan(cpsw->ale, vlan, ALE_ALL_PORTS,
 			  ALE_ALL_PORTS, ALE_ALL_PORTS,
 			  unreg_mcast_mask);
 }
 
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
-	u32 control_reg;
 	u32 fifo_mode;
+	u32 control_reg;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	/* soft reset the controller and initialize ale */
-	soft_reset("cpsw", &priv->regs->soft_reset);
-	cpsw_ale_start(priv->ale);
+	soft_reset("cpsw", &cpsw->regs->soft_reset);
+	cpsw_ale_start(cpsw->ale);
 
 	/* switch to vlan unaware mode */
-	cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
+	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
 			     CPSW_ALE_VLAN_AWARE);
-	control_reg = readl(&priv->regs->control);
+	control_reg = readl(&cpsw->regs->control);
 	control_reg |= CPSW_VLAN_AWARE;
-	writel(control_reg, &priv->regs->control);
-	fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
+	writel(control_reg, &cpsw->regs->control);
+	fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
 		     CPSW_FIFO_NORMAL_MODE;
-	writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
+	writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
-		     &priv->host_port_regs->cpdma_tx_pri_map);
-	__raw_writel(0, &priv->host_port_regs->cpdma_rx_chan_map);
+		     &cpsw->host_port_regs->cpdma_tx_pri_map);
+	__raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map);
 
-	cpsw_ale_control_set(priv->ale, HOST_PORT_NUM,
+	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM,
 			     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-	if (!priv->data.dual_emac) {
-		cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+	if (!cpsw->data.dual_emac) {
+		cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
 				   0, 0);
-		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+		cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				   ALE_PORT_HOST, 0, 0, ALE_MCAST_FWD_2);
 	}
 }
 
-static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
+static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 {
 	u32 slave_port;
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
 	if (!slave->phy)
 		return;
 	phy_stop(slave->phy);
 	phy_disconnect(slave->phy);
 	slave->phy = NULL;
-	cpsw_ale_control_set(priv->ale, slave_port,
+	cpsw_ale_control_set(cpsw->ale, slave_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
 	soft_reset_slave(slave);
 }
@@ -1250,78 +1232,78 @@
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int i, ret;
 	u32 reg;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (!cpsw_common_res_usage_state(priv))
-		cpsw_intr_disable(priv);
+	if (!cpsw_common_res_usage_state(cpsw))
+		cpsw_intr_disable(cpsw);
 	netif_carrier_off(ndev);
 
-	reg = priv->version;
+	reg = cpsw->version;
 
 	dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n",
 		 CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg),
 		 CPSW_RTL_VERSION(reg));
 
 	/* initialize host and slave ports */
-	if (!cpsw_common_res_usage_state(priv))
+	if (!cpsw_common_res_usage_state(cpsw))
 		cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
 	/* Add default VLAN */
-	if (!priv->data.dual_emac)
+	if (!cpsw->data.dual_emac)
 		cpsw_add_default_vlan(priv);
 	else
-		cpsw_ale_add_vlan(priv->ale, priv->data.default_vlan,
+		cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan,
 				  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
-	if (!cpsw_common_res_usage_state(priv)) {
-		struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
+	if (!cpsw_common_res_usage_state(cpsw)) {
 		int buf_num;
 
 		/* setup tx dma to fixed prio and zero offset */
-		cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
-		cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
+		cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
+		cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
 
 		/* disable priority elevation */
-		__raw_writel(0, &priv->regs->ptype);
+		__raw_writel(0, &cpsw->regs->ptype);
 
 		/* enable statistics collection only on all ports */
-		__raw_writel(0x7, &priv->regs->stat_port_en);
+		__raw_writel(0x7, &cpsw->regs->stat_port_en);
 
 		/* Enable internal fifo flow control */
-		writel(0x7, &priv->regs->flow_control);
+		writel(0x7, &cpsw->regs->flow_control);
 
-		napi_enable(&priv_sl0->napi_rx);
-		napi_enable(&priv_sl0->napi_tx);
+		napi_enable(&cpsw->napi_rx);
+		napi_enable(&cpsw->napi_tx);
 
-		if (priv_sl0->tx_irq_disabled) {
-			priv_sl0->tx_irq_disabled = false;
-			enable_irq(priv->irqs_table[1]);
+		if (cpsw->tx_irq_disabled) {
+			cpsw->tx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[1]);
 		}
 
-		if (priv_sl0->rx_irq_disabled) {
-			priv_sl0->rx_irq_disabled = false;
-			enable_irq(priv->irqs_table[0]);
+		if (cpsw->rx_irq_disabled) {
+			cpsw->rx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[0]);
 		}
 
-		buf_num = cpdma_chan_get_rx_buf_num(priv->dma);
+		buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma);
 		for (i = 0; i < buf_num; i++) {
 			struct sk_buff *skb;
 
 			ret = -ENOMEM;
 			skb = __netdev_alloc_skb_ip_align(priv->ndev,
-					priv->rx_packet_max, GFP_KERNEL);
+					cpsw->rx_packet_max, GFP_KERNEL);
 			if (!skb)
 				goto err_cleanup;
-			ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
-					skb_tailroom(skb), 0);
+			ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data,
+						skb_tailroom(skb), 0);
 			if (ret < 0) {
 				kfree_skb(skb);
 				goto err_cleanup;
@@ -1333,32 +1315,32 @@
 		 */
 		cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 
-		if (cpts_register(&priv->pdev->dev, priv->cpts,
-				  priv->data.cpts_clock_mult,
-				  priv->data.cpts_clock_shift))
+		if (cpts_register(cpsw->dev, cpsw->cpts,
+				  cpsw->data.cpts_clock_mult,
+				  cpsw->data.cpts_clock_shift))
 			dev_err(priv->dev, "error registering cpts device\n");
 
 	}
 
 	/* Enable Interrupt pacing if configured */
-	if (priv->coal_intvl != 0) {
+	if (cpsw->coal_intvl != 0) {
 		struct ethtool_coalesce coal;
 
-		coal.rx_coalesce_usecs = priv->coal_intvl;
+		coal.rx_coalesce_usecs = cpsw->coal_intvl;
 		cpsw_set_coalesce(ndev, &coal);
 	}
 
-	cpdma_ctlr_start(priv->dma);
-	cpsw_intr_enable(priv);
+	cpdma_ctlr_start(cpsw->dma);
+	cpsw_intr_enable(cpsw);
 
-	if (priv->data.dual_emac)
-		priv->slaves[priv->emac_port].open_stat = true;
+	if (cpsw->data.dual_emac)
+		cpsw->slaves[priv->emac_port].open_stat = true;
 	return 0;
 
 err_cleanup:
-	cpdma_ctlr_stop(priv->dma);
-	for_each_slave(priv, cpsw_slave_stop, priv);
-	pm_runtime_put_sync(&priv->pdev->dev);
+	cpdma_ctlr_stop(cpsw->dma);
+	for_each_slave(priv, cpsw_slave_stop, cpsw);
+	pm_runtime_put_sync(cpsw->dev);
 	netif_carrier_off(priv->ndev);
 	return ret;
 }
@@ -1366,25 +1348,24 @@
 static int cpsw_ndo_stop(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	cpsw_info(priv, ifdown, "shutting down cpsw device\n");
 	netif_stop_queue(priv->ndev);
 	netif_carrier_off(priv->ndev);
 
-	if (cpsw_common_res_usage_state(priv) <= 1) {
-		struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
-
-		napi_disable(&priv_sl0->napi_rx);
-		napi_disable(&priv_sl0->napi_tx);
-		cpts_unregister(priv->cpts);
-		cpsw_intr_disable(priv);
-		cpdma_ctlr_stop(priv->dma);
-		cpsw_ale_stop(priv->ale);
+	if (cpsw_common_res_usage_state(cpsw) <= 1) {
+		napi_disable(&cpsw->napi_rx);
+		napi_disable(&cpsw->napi_tx);
+		cpts_unregister(cpsw->cpts);
+		cpsw_intr_disable(cpsw);
+		cpdma_ctlr_stop(cpsw->dma);
+		cpsw_ale_stop(cpsw->ale);
 	}
-	for_each_slave(priv, cpsw_slave_stop, priv);
-	pm_runtime_put_sync(&priv->pdev->dev);
-	if (priv->data.dual_emac)
-		priv->slaves[priv->emac_port].open_stat = false;
+	for_each_slave(priv, cpsw_slave_stop, cpsw);
+	pm_runtime_put_sync(cpsw->dev);
+	if (cpsw->data.dual_emac)
+		cpsw->slaves[priv->emac_port].open_stat = false;
 	return 0;
 }
 
@@ -1393,6 +1374,7 @@
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	int ret;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	netif_trans_update(ndev);
 
@@ -1403,12 +1385,12 @@
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-				priv->cpts->tx_enable)
+				cpsw->cpts->tx_enable)
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	skb_tx_timestamp(skb);
 
-	ret = cpsw_tx_packet_submit(ndev, priv, skb);
+	ret = cpsw_tx_packet_submit(priv, skb);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
 		goto fail;
@@ -1417,7 +1399,7 @@
 	/* If there is no more tx desc left free then we need to
 	 * tell the kernel to stop sending us tx frames.
 	 */
-	if (unlikely(!cpdma_check_free_tx_desc(priv->txch)))
+	if (unlikely(!cpdma_check_free_tx_desc(cpsw->txch)))
 		netif_stop_queue(ndev);
 
 	return NETDEV_TX_OK;
@@ -1429,12 +1411,12 @@
 
 #ifdef CONFIG_TI_CPTS
 
-static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
+static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
-	struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave];
+	struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
 	u32 ts_en, seq_id;
 
-	if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) {
+	if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
 		slave_write(slave, 0, CPSW1_TS_CTL);
 		return;
 	}
@@ -1442,10 +1424,10 @@
 	seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
 	ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-	if (priv->cpts->tx_enable)
+	if (cpsw->cpts->tx_enable)
 		ts_en |= CPSW_V1_TS_TX_EN;
 
-	if (priv->cpts->rx_enable)
+	if (cpsw->cpts->rx_enable)
 		ts_en |= CPSW_V1_TS_RX_EN;
 
 	slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1455,32 +1437,33 @@
 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 {
 	struct cpsw_slave *slave;
+	struct cpsw_common *cpsw = priv->cpsw;
 	u32 ctrl, mtype;
 
-	if (priv->data.dual_emac)
-		slave = &priv->slaves[priv->emac_port];
+	if (cpsw->data.dual_emac)
+		slave = &cpsw->slaves[priv->emac_port];
 	else
-		slave = &priv->slaves[priv->data.active_slave];
+		slave = &cpsw->slaves[cpsw->data.active_slave];
 
 	ctrl = slave_read(slave, CPSW2_CONTROL);
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_2:
 		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-		if (priv->cpts->tx_enable)
+		if (cpsw->cpts->tx_enable)
 			ctrl |= CTRL_V2_TX_TS_BITS;
 
-		if (priv->cpts->rx_enable)
+		if (cpsw->cpts->rx_enable)
 			ctrl |= CTRL_V2_RX_TS_BITS;
 		break;
 	case CPSW_VERSION_3:
 	default:
 		ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-		if (priv->cpts->tx_enable)
+		if (cpsw->cpts->tx_enable)
 			ctrl |= CTRL_V3_TX_TS_BITS;
 
-		if (priv->cpts->rx_enable)
+		if (cpsw->cpts->rx_enable)
 			ctrl |= CTRL_V3_RX_TS_BITS;
 		break;
 	}
@@ -1489,18 +1472,19 @@
 
 	slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE);
 	slave_write(slave, ctrl, CPSW2_CONTROL);
-	__raw_writel(ETH_P_1588, &priv->regs->ts_ltype);
+	__raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype);
 }
 
 static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
 	struct cpsw_priv *priv = netdev_priv(dev);
-	struct cpts *cpts = priv->cpts;
 	struct hwtstamp_config cfg;
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpts *cpts = cpsw->cpts;
 
-	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2 &&
-	    priv->version != CPSW_VERSION_3)
+	if (cpsw->version != CPSW_VERSION_1 &&
+	    cpsw->version != CPSW_VERSION_2 &&
+	    cpsw->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -1540,9 +1524,9 @@
 
 	cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
-		cpsw_hwtstamp_v1(priv);
+		cpsw_hwtstamp_v1(cpsw);
 		break;
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
@@ -1557,13 +1541,13 @@
 
 static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 {
-	struct cpsw_priv *priv = netdev_priv(dev);
-	struct cpts *cpts = priv->cpts;
+	struct cpsw_common *cpsw = ndev_to_cpsw(dev);
+	struct cpts *cpts = cpsw->cpts;
 	struct hwtstamp_config cfg;
 
-	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2 &&
-	    priv->version != CPSW_VERSION_3)
+	if (cpsw->version != CPSW_VERSION_1 &&
+	    cpsw->version != CPSW_VERSION_2 &&
+	    cpsw->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
@@ -1579,7 +1563,8 @@
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
 	struct cpsw_priv *priv = netdev_priv(dev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -1593,27 +1578,29 @@
 #endif
 	}
 
-	if (!priv->slaves[slave_no].phy)
+	if (!cpsw->slaves[slave_no].phy)
 		return -EOPNOTSUPP;
-	return phy_mii_ioctl(priv->slaves[slave_no].phy, req, cmd);
+	return phy_mii_ioctl(cpsw->slaves[slave_no].phy, req, cmd);
 }
 
 static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n");
 	ndev->stats.tx_errors++;
-	cpsw_intr_disable(priv);
-	cpdma_chan_stop(priv->txch);
-	cpdma_chan_start(priv->txch);
-	cpsw_intr_enable(priv);
+	cpsw_intr_disable(cpsw);
+	cpdma_chan_stop(cpsw->txch);
+	cpdma_chan_start(cpsw->txch);
+	cpsw_intr_enable(cpsw);
 }
 
 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct sockaddr *addr = (struct sockaddr *)p;
+	struct cpsw_common *cpsw = priv->cpsw;
 	int flags = 0;
 	u16 vid = 0;
 	int ret;
@@ -1621,27 +1608,27 @@
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
-		vid = priv->slaves[priv->emac_port].port_vlan;
+	if (cpsw->data.dual_emac) {
+		vid = cpsw->slaves[priv->emac_port].port_vlan;
 		flags = ALE_VLAN;
 	}
 
-	cpsw_ale_del_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+	cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
 			   flags, vid);
-	cpsw_ale_add_ucast(priv->ale, addr->sa_data, HOST_PORT_NUM,
+	cpsw_ale_add_ucast(cpsw->ale, addr->sa_data, HOST_PORT_NUM,
 			   flags, vid);
 
 	memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN);
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 	for_each_slave(priv, cpsw_set_slave_mac, priv);
 
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 
 	return 0;
 }
@@ -1649,12 +1636,12 @@
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void cpsw_ndo_poll_controller(struct net_device *ndev)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	cpsw_intr_disable(priv);
-	cpsw_rx_interrupt(priv->irqs_table[0], priv);
-	cpsw_tx_interrupt(priv->irqs_table[1], priv);
-	cpsw_intr_enable(priv);
+	cpsw_intr_disable(cpsw);
+	cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw);
+	cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw);
+	cpsw_intr_enable(cpsw);
 }
 #endif
 
@@ -1664,8 +1651,9 @@
 	int ret;
 	int unreg_mcast_mask = 0;
 	u32 port_mask;
+	struct cpsw_common *cpsw = priv->cpsw;
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST;
 
 		if (priv->ndev->flags & IFF_ALLMULTI)
@@ -1679,27 +1667,27 @@
 			unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
 	}
 
-	ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask,
+	ret = cpsw_ale_add_vlan(cpsw->ale, vid, port_mask, 0, port_mask,
 				unreg_mcast_mask);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+	ret = cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
 				 HOST_PORT_NUM, ALE_VLAN, vid);
 	if (ret != 0)
 		goto clean_vid;
 
-	ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+	ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				 port_mask, ALE_VLAN, vid, 0);
 	if (ret != 0)
 		goto clean_vlan_ucast;
 	return 0;
 
 clean_vlan_ucast:
-	cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+	cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
 			   HOST_PORT_NUM, ALE_VLAN, vid);
 clean_vid:
-	cpsw_ale_del_vlan(priv->ale, vid, 0);
+	cpsw_ale_del_vlan(cpsw->ale, vid, 0);
 	return ret;
 }
 
@@ -1707,26 +1695,27 @@
 				    __be16 proto, u16 vid)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	if (vid == priv->data.default_vlan)
+	if (vid == cpsw->data.default_vlan)
 		return 0;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		/* In dual EMAC, reserved VLAN id should not be used for
 		 * creating VLAN interfaces as this can break the dual
 		 * EMAC port separation
 		 */
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (vid == priv->slaves[i].port_vlan)
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (vid == cpsw->slaves[i].port_vlan)
 				return -EINVAL;
 		}
 	}
@@ -1734,7 +1723,7 @@
 	dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
 	ret = cpsw_add_vlan_ale_entry(priv, vid);
 
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 	return ret;
 }
 
@@ -1742,39 +1731,40 @@
 				     __be16 proto, u16 vid)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	if (vid == priv->data.default_vlan)
+	if (vid == cpsw->data.default_vlan)
 		return 0;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (vid == priv->slaves[i].port_vlan)
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (vid == cpsw->slaves[i].port_vlan)
 				return -EINVAL;
 		}
 	}
 
 	dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
-	ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
+	ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+	ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
 				 HOST_PORT_NUM, ALE_VLAN, vid);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+	ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
 				 0, ALE_VLAN, vid);
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 	return ret;
 }
 
@@ -1797,31 +1787,32 @@
 
 static int cpsw_get_regs_len(struct net_device *ndev)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	return priv->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
+	return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
 }
 
 static void cpsw_get_regs(struct net_device *ndev,
 			  struct ethtool_regs *regs, void *p)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
 	u32 *reg = p;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	/* update CPSW IP version */
-	regs->version = priv->version;
+	regs->version = cpsw->version;
 
-	cpsw_ale_dump(priv->ale, reg);
+	cpsw_ale_dump(cpsw->ale, reg);
 }
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
 			     struct ethtool_drvinfo *info)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct platform_device	*pdev = to_platform_device(cpsw->dev);
 
 	strlcpy(info->driver, "cpsw", sizeof(info->driver));
 	strlcpy(info->version, "1.0", sizeof(info->version));
-	strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
+	strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info));
 }
 
 static u32 cpsw_get_msglevel(struct net_device *ndev)
@@ -1840,7 +1831,7 @@
 			    struct ethtool_ts_info *info)
 {
 #ifdef CONFIG_TI_CPTS
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_HARDWARE |
@@ -1849,7 +1840,7 @@
 		SOF_TIMESTAMPING_RX_SOFTWARE |
 		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
-	info->phc_index = priv->cpts->phc_index;
+	info->phc_index = cpsw->cpts->phc_index;
 	info->tx_types =
 		(1 << HWTSTAMP_TX_OFF) |
 		(1 << HWTSTAMP_TX_ON);
@@ -1872,10 +1863,11 @@
 			     struct ethtool_cmd *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1883,10 +1875,11 @@
 static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1894,22 +1887,24 @@
 static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	wol->supported = 0;
 	wol->wolopts = 0;
 
-	if (priv->slaves[slave_no].phy)
-		phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol);
+	if (cpsw->slaves[slave_no].phy)
+		phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol);
 }
 
 static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1940,12 +1935,13 @@
 static int cpsw_ethtool_op_begin(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
 		cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 	}
 
 	return ret;
@@ -1956,7 +1952,7 @@
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	int ret;
 
-	ret = pm_runtime_put(&priv->pdev->dev);
+	ret = pm_runtime_put(priv->cpsw->dev);
 	if (ret < 0)
 		cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
 }
@@ -1984,12 +1980,12 @@
 	.complete	= cpsw_ethtool_op_complete,
 };
 
-static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
+static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
 			    u32 slave_reg_ofs, u32 sliver_reg_ofs)
 {
-	void __iomem		*regs = priv->regs;
+	void __iomem		*regs = cpsw->regs;
 	int			slave_num = slave->slave_num;
-	struct cpsw_slave_data	*data = priv->data.slave_data + slave_num;
+	struct cpsw_slave_data	*data = cpsw->data.slave_data + slave_num;
 
 	slave->data	= data;
 	slave->regs	= regs + slave_reg_ofs;
@@ -2160,71 +2156,50 @@
 	return 0;
 }
 
-static int cpsw_probe_dual_emac(struct platform_device *pdev,
-				struct cpsw_priv *priv)
+static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 {
-	struct cpsw_platform_data	*data = &priv->data;
+	struct cpsw_common		*cpsw = priv->cpsw;
+	struct cpsw_platform_data	*data = &cpsw->data;
 	struct net_device		*ndev;
 	struct cpsw_priv		*priv_sl2;
-	int ret = 0, i;
+	int ret = 0;
 
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
-		dev_err(&pdev->dev, "cpsw: error allocating net_device\n");
+		dev_err(cpsw->dev, "cpsw: error allocating net_device\n");
 		return -ENOMEM;
 	}
 
 	priv_sl2 = netdev_priv(ndev);
-	priv_sl2->data = *data;
-	priv_sl2->pdev = pdev;
+	priv_sl2->cpsw = cpsw;
 	priv_sl2->ndev = ndev;
 	priv_sl2->dev  = &ndev->dev;
 	priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-	priv_sl2->rx_packet_max = max(rx_packet_max, 128);
 
 	if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
 		memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
 			ETH_ALEN);
-		dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n",
+			 priv_sl2->mac_addr);
 	} else {
 		random_ether_addr(priv_sl2->mac_addr);
-		dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n",
+			 priv_sl2->mac_addr);
 	}
 	memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
 
-	priv_sl2->slaves = priv->slaves;
-	priv_sl2->clk = priv->clk;
-
-	priv_sl2->coal_intvl = 0;
-	priv_sl2->bus_freq_mhz = priv->bus_freq_mhz;
-
-	priv_sl2->regs = priv->regs;
-	priv_sl2->host_port_regs = priv->host_port_regs;
-	priv_sl2->wr_regs = priv->wr_regs;
-	priv_sl2->hw_stats = priv->hw_stats;
-	priv_sl2->dma = priv->dma;
-	priv_sl2->txch = priv->txch;
-	priv_sl2->rxch = priv->rxch;
-	priv_sl2->ale = priv->ale;
 	priv_sl2->emac_port = 1;
-	priv->slaves[1].ndev = ndev;
-	priv_sl2->cpts = priv->cpts;
-	priv_sl2->version = priv->version;
-
-	for (i = 0; i < priv->num_irqs; i++) {
-		priv_sl2->irqs_table[i] = priv->irqs_table[i];
-		priv_sl2->num_irqs = priv->num_irqs;
-	}
+	cpsw->slaves[1].ndev = ndev;
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
 
 	/* register the network device */
-	SET_NETDEV_DEV(ndev, &pdev->dev);
+	SET_NETDEV_DEV(ndev, cpsw->dev);
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "cpsw: error registering net device\n");
+		dev_err(cpsw->dev, "cpsw: error registering net device\n");
 		free_netdev(ndev);
 		ret = -ENODEV;
 	}
@@ -2272,6 +2247,7 @@
 
 static int cpsw_probe(struct platform_device *pdev)
 {
+	struct clk			*clk;
 	struct cpsw_platform_data	*data;
 	struct net_device		*ndev;
 	struct cpsw_priv		*priv;
@@ -2282,9 +2258,13 @@
 	const struct of_device_id	*of_id;
 	struct gpio_descs		*mode;
 	u32 slave_offset, sliver_offset, slave_size;
+	struct cpsw_common		*cpsw;
 	int ret = 0, i;
 	int irq;
 
+	cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
+	cpsw->dev = &pdev->dev;
+
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
 		dev_err(&pdev->dev, "error allocating net_device\n");
@@ -2293,13 +2273,13 @@
 
 	platform_set_drvdata(pdev, ndev);
 	priv = netdev_priv(ndev);
-	priv->pdev = pdev;
+	priv->cpsw = cpsw;
 	priv->ndev = ndev;
 	priv->dev  = &ndev->dev;
 	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-	priv->rx_packet_max = max(rx_packet_max, 128);
-	priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-	if (!priv->cpts) {
+	cpsw->rx_packet_max = max(rx_packet_max, 128);
+	cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
+	if (!cpsw->cpts) {
 		dev_err(&pdev->dev, "error allocating cpts\n");
 		ret = -ENOMEM;
 		goto clean_ndev_ret;
@@ -2320,12 +2300,12 @@
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
-	if (cpsw_probe_dt(&priv->data, pdev)) {
+	if (cpsw_probe_dt(&cpsw->data, pdev)) {
 		dev_err(&pdev->dev, "cpsw: platform data missing\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	data = &priv->data;
+	data = &cpsw->data;
 
 	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
 		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
@@ -2337,27 +2317,26 @@
 
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 
-	priv->slaves = devm_kzalloc(&pdev->dev,
+	cpsw->slaves = devm_kzalloc(&pdev->dev,
 				    sizeof(struct cpsw_slave) * data->slaves,
 				    GFP_KERNEL);
-	if (!priv->slaves) {
+	if (!cpsw->slaves) {
 		ret = -ENOMEM;
 		goto clean_runtime_disable_ret;
 	}
 	for (i = 0; i < data->slaves; i++)
-		priv->slaves[i].slave_num = i;
+		cpsw->slaves[i].slave_num = i;
 
-	priv->slaves[0].ndev = ndev;
+	cpsw->slaves[0].ndev = ndev;
 	priv->emac_port = 0;
 
-	priv->clk = devm_clk_get(&pdev->dev, "fck");
-	if (IS_ERR(priv->clk)) {
+	clk = devm_clk_get(&pdev->dev, "fck");
+	if (IS_ERR(clk)) {
 		dev_err(priv->dev, "fck is not found\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	priv->coal_intvl = 0;
-	priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000;
+	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
 	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ss_regs = devm_ioremap_resource(&pdev->dev, ss_res);
@@ -2365,7 +2344,7 @@
 		ret = PTR_ERR(ss_regs);
 		goto clean_runtime_disable_ret;
 	}
-	priv->regs = ss_regs;
+	cpsw->regs = ss_regs;
 
 	/* Need to enable clocks with runtime PM api to access module
 	 * registers
@@ -2375,24 +2354,24 @@
 		pm_runtime_put_noidle(&pdev->dev);
 		goto clean_runtime_disable_ret;
 	}
-	priv->version = readl(&priv->regs->id_ver);
+	cpsw->version = readl(&cpsw->regs->id_ver);
 	pm_runtime_put_sync(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	priv->wr_regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(priv->wr_regs)) {
-		ret = PTR_ERR(priv->wr_regs);
+	cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cpsw->wr_regs)) {
+		ret = PTR_ERR(cpsw->wr_regs);
 		goto clean_runtime_disable_ret;
 	}
 
 	memset(&dma_params, 0, sizeof(dma_params));
 	memset(&ale_params, 0, sizeof(ale_params));
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
-		priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-		priv->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
-		priv->hw_stats	     = ss_regs + CPSW1_HW_STATS;
+		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
+		cpsw->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
 		ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
@@ -2404,9 +2383,9 @@
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
 	case CPSW_VERSION_4:
-		priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-		priv->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
-		priv->hw_stats	     = ss_regs + CPSW2_HW_STATS;
+		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
+		cpsw->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
 		ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
@@ -2417,13 +2396,14 @@
 			(u32 __force) ss_res->start + CPSW2_BD_OFFSET;
 		break;
 	default:
-		dev_err(priv->dev, "unknown version 0x%08x\n", priv->version);
+		dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version);
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	for (i = 0; i < priv->data.slaves; i++) {
-		struct cpsw_slave *slave = &priv->slaves[i];
-		cpsw_slave_init(slave, priv, slave_offset, sliver_offset);
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		struct cpsw_slave *slave = &cpsw->slaves[i];
+
+		cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset);
 		slave_offset  += slave_size;
 		sliver_offset += SLIVER_SIZE;
 	}
@@ -2443,19 +2423,19 @@
 	dma_params.has_ext_regs		= true;
 	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
 
-	priv->dma = cpdma_ctlr_create(&dma_params);
-	if (!priv->dma) {
+	cpsw->dma = cpdma_ctlr_create(&dma_params);
+	if (!cpsw->dma) {
 		dev_err(priv->dev, "error initializing dma\n");
 		ret = -ENOMEM;
 		goto clean_runtime_disable_ret;
 	}
 
-	priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0),
+	cpsw->txch = cpdma_chan_create(cpsw->dma, tx_chan_num(0),
 				       cpsw_tx_handler);
-	priv->rxch = cpdma_chan_create(priv->dma, rx_chan_num(0),
+	cpsw->rxch = cpdma_chan_create(cpsw->dma, rx_chan_num(0),
 				       cpsw_rx_handler);
 
-	if (WARN_ON(!priv->txch || !priv->rxch)) {
+	if (WARN_ON(!cpsw->txch || !cpsw->rxch)) {
 		dev_err(priv->dev, "error initializing dma channels\n");
 		ret = -ENOMEM;
 		goto clean_dma_ret;
@@ -2466,8 +2446,8 @@
 	ale_params.ale_entries		= data->ale_entries;
 	ale_params.ale_ports		= data->slaves;
 
-	priv->ale = cpsw_ale_create(&ale_params);
-	if (!priv->ale) {
+	cpsw->ale = cpsw_ale_create(&ale_params);
+	if (!cpsw->ale) {
 		dev_err(priv->dev, "error initializing ale engine\n");
 		ret = -ENODEV;
 		goto clean_dma_ret;
@@ -2484,7 +2464,7 @@
 	if (of_id) {
 		pdev->id_entry = of_id->data;
 		if (pdev->id_entry->driver_data)
-			priv->quirk_irq = true;
+			cpsw->quirk_irq = true;
 	}
 
 	/* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
@@ -2502,9 +2482,9 @@
 		goto clean_ale_ret;
 	}
 
-	priv->irqs_table[0] = irq;
+	cpsw->irqs_table[0] = irq;
 	ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
-			       0, dev_name(&pdev->dev), priv);
+			       0, dev_name(&pdev->dev), cpsw);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
@@ -2517,21 +2497,20 @@
 		goto clean_ale_ret;
 	}
 
-	priv->irqs_table[1] = irq;
+	cpsw->irqs_table[1] = irq;
 	ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
-			       0, dev_name(&pdev->dev), priv);
+			       0, dev_name(&pdev->dev), cpsw);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
-	priv->num_irqs = 2;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
-	netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
-	netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+	netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
+	netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -2545,8 +2524,8 @@
 	cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n",
 		    &ss_res->start, ndev->irq);
 
-	if (priv->data.dual_emac) {
-		ret = cpsw_probe_dual_emac(pdev, priv);
+	if (cpsw->data.dual_emac) {
+		ret = cpsw_probe_dual_emac(priv);
 		if (ret) {
 			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
 			goto clean_ale_ret;
@@ -2556,9 +2535,9 @@
 	return 0;
 
 clean_ale_ret:
-	cpsw_ale_destroy(priv->ale);
+	cpsw_ale_destroy(cpsw->ale);
 clean_dma_ret:
-	cpdma_ctlr_destroy(priv->dma);
+	cpdma_ctlr_destroy(cpsw->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
 clean_ndev_ret:
@@ -2569,7 +2548,7 @@
 static int cpsw_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 	int ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -2578,17 +2557,17 @@
 		return ret;
 	}
 
-	if (priv->data.dual_emac)
-		unregister_netdev(cpsw_get_slave_ndev(priv, 1));
+	if (cpsw->data.dual_emac)
+		unregister_netdev(cpsw->slaves[1].ndev);
 	unregister_netdev(ndev);
 
-	cpsw_ale_destroy(priv->ale);
-	cpdma_ctlr_destroy(priv->dma);
+	cpsw_ale_destroy(cpsw->ale);
+	cpdma_ctlr_destroy(cpsw->dma);
 	of_platform_depopulate(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	if (priv->data.dual_emac)
-		free_netdev(cpsw_get_slave_ndev(priv, 1));
+	if (cpsw->data.dual_emac)
+		free_netdev(cpsw->slaves[1].ndev);
 	free_netdev(ndev);
 	return 0;
 }
@@ -2598,14 +2577,14 @@
 {
 	struct platform_device	*pdev = to_platform_device(dev);
 	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (netif_running(priv->slaves[i].ndev))
-				cpsw_ndo_stop(priv->slaves[i].ndev);
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (netif_running(cpsw->slaves[i].ndev))
+				cpsw_ndo_stop(cpsw->slaves[i].ndev);
 		}
 	} else {
 		if (netif_running(ndev))
@@ -2613,7 +2592,7 @@
 	}
 
 	/* Select sleep pin state */
-	pinctrl_pm_select_sleep_state(&pdev->dev);
+	pinctrl_pm_select_sleep_state(dev);
 
 	return 0;
 }
@@ -2622,17 +2601,17 @@
 {
 	struct platform_device	*pdev = to_platform_device(dev);
 	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = netdev_priv(ndev);
 
 	/* Select default pin state */
-	pinctrl_pm_select_default_state(&pdev->dev);
+	pinctrl_pm_select_default_state(dev);
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (netif_running(priv->slaves[i].ndev))
-				cpsw_ndo_open(priv->slaves[i].ndev);
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (netif_running(cpsw->slaves[i].ndev))
+				cpsw_ndo_open(cpsw->slaves[i].ndev);
 		}
 	} else {
 		if (netif_running(ndev))

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 19e5f32..cf72b33 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c

@@ -86,7 +86,7 @@
 	void __iomem		*iomap;		/* ioremap map */
 	void			*cpumap;	/* dma_alloc map */
 	int			desc_size, mem_size;
-	int			num_desc, used_desc;
+	int			num_desc;
 	struct device		*dev;
 	struct gen_pool		*gen_pool;
 };
@@ -148,7 +148,10 @@
 	if (!pool)
 		return;
 
-	WARN_ON(pool->used_desc);
+	WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool),
+	     "cpdma_desc_pool size %d != avail %d",
+	     gen_pool_size(pool->gen_pool),
+	     gen_pool_avail(pool->gen_pool));
 	if (pool->cpumap)
 		dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
 				  pool->phys);
@@ -232,21 +235,14 @@
 static struct cpdma_desc __iomem *
 cpdma_desc_alloc(struct cpdma_desc_pool *pool)
 {
-	struct cpdma_desc __iomem *desc = NULL;
-
-	desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool,
-							   pool->desc_size);
-	if (desc)
-		pool->used_desc++;
-
-	return desc;
+	return (struct cpdma_desc __iomem *)
+		gen_pool_alloc(pool->gen_pool, pool->desc_size);
 }
 
 static void cpdma_desc_free(struct cpdma_desc_pool *pool,
 			    struct cpdma_desc __iomem *desc, int num_desc)
 {
 	gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
-	pool->used_desc--;
 }
 
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 727a79f..2d6fc9a 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c

@@ -597,14 +597,14 @@
 }
 
 /**
- * hash_add - Hash function to add mac addr from hash table
+ * emac_hash_add - Hash function to add mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Adds mac address to the internal hash table
  *
  */
-static int hash_add(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_add(struct emac_priv *priv, u8 *mac_addr)
 {
 	struct device *emac_dev = &priv->ndev->dev;
 	u32 rc = 0;
@@ -613,7 +613,7 @@
 
 	if (hash_value >= EMAC_NUM_MULTICAST_BITS) {
 		if (netif_msg_drv(priv)) {
-			dev_err(emac_dev, "DaVinci EMAC: hash_add(): Invalid "\
+			dev_err(emac_dev, "DaVinci EMAC: emac_hash_add(): Invalid "\
 				"Hash %08x, should not be greater than %08x",
 				hash_value, (EMAC_NUM_MULTICAST_BITS - 1));
 		}
@@ -639,14 +639,14 @@
 }
 
 /**
- * hash_del - Hash function to delete mac addr from hash table
+ * emac_hash_del - Hash function to delete mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Removes mac address from the internal hash table
  *
  */
-static int hash_del(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr)
 {
 	u32 hash_value;
 	u32 hash_bit;
@@ -696,10 +696,10 @@
 
 	switch (action) {
 	case EMAC_MULTICAST_ADD:
-		update = hash_add(priv, mac_addr);
+		update = emac_hash_add(priv, mac_addr);
 		break;
 	case EMAC_MULTICAST_DEL:
-		update = hash_del(priv, mac_addr);
+		update = emac_hash_del(priv, mac_addr);
 		break;
 	case EMAC_ALL_MULTI_SET:
 		update = 1;

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 9006877..e46b1eb 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c

@@ -97,7 +97,6 @@
 static struct platform_driver fjes_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 	},
 	.probe = fjes_probe,
 	.remove = fjes_remove,

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 591af71..fa7b1e4 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h

@@ -490,6 +490,7 @@
 			u64 sriov:1;
 			u64 ieee8021q:1;
 			u64 correlation_id:1;
+			u64 teaming:1;
 		};
 	};
 } __packed;

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 410fb8e8..8078bc2 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c

@@ -468,9 +468,13 @@
 	init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
 	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 
-	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
+	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
 		init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 
+		/* Teaming bit is needed to receive link speed updates */
+		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
+	}
+
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
 				(unsigned long)init_packet,

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3ba29fc..eb2c122 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -579,19 +579,32 @@
 	struct netvsc_reconfig *event;
 	unsigned long flags;
 
-	/* Handle link change statuses only */
+	net = hv_get_drvdata(device_obj);
+
+	if (!net)
+		return;
+
+	ndev_ctx = netdev_priv(net);
+
+	/* Update the physical link speed when changing to another vSwitch */
+	if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
+		u32 speed;
+
+		speed = *(u32 *)((void *)indicate + indicate->
+				 status_buf_offset) / 10000;
+		ndev_ctx->speed = speed;
+		return;
+	}
+
+	/* Handle these link change statuses below */
 	if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
 	    indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
 	    indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
 		return;
 
-	net = hv_get_drvdata(device_obj);
-
-	if (!net || net->reg_state != NETREG_REGISTERED)
+	if (net->reg_state != NETREG_REGISTERED)
 		return;
 
-	ndev_ctx = netdev_priv(net);
-
 	event = kzalloc(sizeof(*event), GFP_ATOMIC);
 	if (!event)
 		return;
@@ -1337,6 +1350,8 @@
 
 	netif_carrier_off(net);
 
+	netvsc_init_settings(net);
+
 	net_device_ctx = netdev_priv(net);
 	net_device_ctx->device_ctx = dev;
 	net_device_ctx->msg_enable = netif_msg_init(debug, default_msg);
@@ -1398,8 +1413,6 @@
 	netif_set_real_num_tx_queues(net, nvdev->num_chn);
 	netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
-	netvsc_init_settings(net);
-
 	ret = register_netdev(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 8e830f7..dd3b335 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c

@@ -752,6 +752,28 @@
 	return ret;
 }
 
+static int rndis_filter_query_link_speed(struct rndis_device *dev)
+{
+	u32 size = sizeof(u32);
+	u32 link_speed;
+	struct net_device_context *ndc;
+	int ret;
+
+	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+					&link_speed, &size);
+
+	if (!ret) {
+		ndc = netdev_priv(dev->ndev);
+
+		/* The link speed reported from host is in 100bps unit, so
+		 * we convert it to Mbps here.
+		 */
+		ndc->speed = link_speed / 10000;
+	}
+
+	return ret;
+}
+
 int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 {
 	struct rndis_request *request;
@@ -1044,6 +1066,8 @@
 	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
 		return 0;
 
+	rndis_filter_query_link_speed(rndis_device);
+
 	/* vRSS setup */
 	memset(&rsscap, 0, rsscap_size);
 	ret = rndis_filter_query_device(rndis_device,

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 47a6434..d66133bf 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig

@@ -307,6 +307,18 @@
 	  This module provides a driver for the MDIO busses found in the
 	  APM X-Gene SoC's.
 
+config MICROSEMI_PHY
+    tristate "Drivers for the Microsemi PHYs"
+    ---help---
+      Currently supports the VSC8531 and VSC8541 PHYs
+
+config XILINX_GMII2RGMII
+       tristate "Xilinx GMII2RGMII converter driver"
+       ---help---
+         This driver support xilinx GMII to RGMII IP core it provides
+         the Reduced Gigabit Media Independent Interface(RGMII) between
+         Ethernet physical media devices and the Gigabit Ethernet controller.
+
 endif # PHYLIB
 
 config MICREL_KS8995MA

diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 534dfa7..73d65ce 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile

@@ -11,6 +11,7 @@
 obj-$(CONFIG_LXT_PHY)		+= lxt.o
 obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
 obj-$(CONFIG_SMSC_PHY)		+= smsc.o
+obj-$(CONFIG_MICROSEMI_PHY) += mscc.o
 obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
 obj-$(CONFIG_VITESSE_PHY)	+= vitesse.o
 obj-$(CONFIG_BCM_NET_PHYLIB)	+= bcm-phy-lib.o
@@ -49,3 +50,4 @@
 obj-$(CONFIG_INTEL_XWAY_PHY)	+= intel-xway.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
 obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
+obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o

diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
new file mode 100644
index 0000000..ad33390
--- /dev/null
+++ b/drivers/net/phy/mscc.c

@@ -0,0 +1,161 @@
+/*
+ * Driver for Microsemi VSC85xx PHYs
+ *
+ * Author: Nagaraju Lakkaraju
+ * License: Dual MIT/GPL
+ * Copyright (c) 2016 Microsemi Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mdio.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+enum rgmii_rx_clock_delay {
+       RGMII_RX_CLK_DELAY_0_2_NS = 0,
+       RGMII_RX_CLK_DELAY_0_8_NS = 1,
+       RGMII_RX_CLK_DELAY_1_1_NS = 2,
+       RGMII_RX_CLK_DELAY_1_7_NS = 3,
+       RGMII_RX_CLK_DELAY_2_0_NS = 4,
+       RGMII_RX_CLK_DELAY_2_3_NS = 5,
+       RGMII_RX_CLK_DELAY_2_6_NS = 6,
+       RGMII_RX_CLK_DELAY_3_4_NS = 7
+};
+
+#define MII_VSC85XX_INT_MASK              25
+#define MII_VSC85XX_INT_MASK_MASK         0xa000
+#define MII_VSC85XX_INT_STATUS            26
+
+#define MSCC_EXT_PAGE_ACCESS              31
+#define MSCC_PHY_PAGE_STANDARD            0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED_2          0x0002 /* Extended reg - page 2 */
+
+/* Extended Page 2 Registers */
+#define MSCC_PHY_RGMII_CNTL                       20
+#define RGMII_RX_CLK_DELAY_MASK                   0x0070
+#define RGMII_RX_CLK_DELAY_POS            4
+
+/* Microsemi PHY ID's */
+#define PHY_ID_VSC8531                            0x00070570
+#define PHY_ID_VSC8541                            0x00070770
+
+static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
+{
+       int rc;
+
+       rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
+       return rc;
+}
+
+static int vsc85xx_default_config(struct phy_device *phydev)
+{
+       int rc;
+       u16 reg_val;
+
+       mutex_lock(&phydev->lock);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+       if (rc != 0)
+               goto out_unlock;
+
+       reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
+       reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
+       reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
+       phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+       mutex_unlock(&phydev->lock);
+
+       return rc;
+}
+
+static int vsc85xx_config_init(struct phy_device *phydev)
+{
+       int rc;
+
+       rc = vsc85xx_default_config(phydev);
+       if (rc)
+               return rc;
+       rc = genphy_config_init(phydev);
+
+       return rc;
+}
+
+static int vsc85xx_ack_interrupt(struct phy_device *phydev)
+{
+       int rc = 0;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+
+       return (rc < 0) ? rc : 0;
+}
+
+static int vsc85xx_config_intr(struct phy_device *phydev)
+{
+       int rc;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
+                                  MII_VSC85XX_INT_MASK_MASK);
+       } else {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
+               if (rc < 0)
+                       return rc;
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+       }
+
+       return rc;
+}
+
+/* Microsemi VSC85xx PHYs */
+static struct phy_driver vsc85xx_driver[] = {
+{
+       .phy_id                 = PHY_ID_VSC8531,
+       .name                   = "Microsemi VSC8531",
+       .phy_id_mask    = 0xfffffff0,
+       .features               = PHY_GBIT_FEATURES,
+       .flags                  = PHY_HAS_INTERRUPT,
+       .soft_reset             = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done              = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend                = &genphy_suspend,
+       .resume                 = &genphy_resume,
+},
+{
+       .phy_id                 = PHY_ID_VSC8541,
+       .name                   = "Microsemi VSC8541 SyncE",
+       .phy_id_mask    = 0xfffffff0,
+       .features               = PHY_GBIT_FEATURES,
+       .flags                  = PHY_HAS_INTERRUPT,
+       .soft_reset             = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done              = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend                = &genphy_suspend,
+       .resume                 = &genphy_resume,
+}
+
+};
+
+module_phy_driver(vsc85xx_driver);
+
+static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+       { PHY_ID_VSC8531, 0xfffffff0, },
+       { PHY_ID_VSC8541, 0xfffffff0, },
+       { }
+};
+
+MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
+
+MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
+MODULE_AUTHOR("Nagaraju Lakkaraju");
+MODULE_LICENSE("Dual MIT/GPL");

diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
new file mode 100644
index 0000000..cad6e19
--- /dev/null
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c

@@ -0,0 +1,109 @@
+/* Xilinx GMII2RGMII Converter driver
+ *
+ * Copyright (C) 2016 Xilinx, Inc.
+ *
+ * Author: Kedareswara rao Appana <appanad@xilinx.com>
+ *
+ * Description:
+ * This driver is developed for Xilinx GMII2RGMII Converter
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+
+#define XILINX_GMII2RGMII_REG		0x10
+#define XILINX_GMII2RGMII_SPEED_MASK	(BMCR_SPEED1000 | BMCR_SPEED100)
+
+struct gmii2rgmii {
+	struct phy_device *phy_dev;
+	struct phy_driver *phy_drv;
+	struct phy_driver conv_phy_drv;
+	int addr;
+};
+
+static int xgmiitorgmii_read_status(struct phy_device *phydev)
+{
+	struct gmii2rgmii *priv = phydev->priv;
+	u16 val = 0;
+
+	priv->phy_drv->read_status(phydev);
+
+	val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+	val &= XILINX_GMII2RGMII_SPEED_MASK;
+
+	if (phydev->speed == SPEED_1000)
+		val |= BMCR_SPEED1000;
+	else if (phydev->speed == SPEED_100)
+		val |= BMCR_SPEED100;
+	else
+		val |= BMCR_SPEED10;
+
+	mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val);
+
+	return 0;
+}
+
+int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+{
+	struct device *dev = &mdiodev->dev;
+	struct device_node *np = dev->of_node, *phy_node;
+	struct gmii2rgmii *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!phy_node) {
+		dev_err(dev, "Couldn't parse phy-handle\n");
+		return -ENODEV;
+	}
+
+	priv->phy_dev = of_phy_find_device(phy_node);
+	if (!priv->phy_dev) {
+		dev_info(dev, "Couldn't find phydev\n");
+		return -EPROBE_DEFER;
+	}
+
+	priv->addr = mdiodev->addr;
+	priv->phy_drv = priv->phy_dev->drv;
+	memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
+	       sizeof(struct phy_driver));
+	priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
+	priv->phy_dev->priv = priv;
+	priv->phy_dev->drv = &priv->conv_phy_drv;
+
+	return 0;
+}
+
+static const struct of_device_id xgmiitorgmii_of_match[] = {
+	{ .compatible = "xlnx,gmii-to-rgmii-1.0" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgmiitorgmii_of_match);
+
+static struct mdio_driver xgmiitorgmii_driver = {
+	.probe	= xgmiitorgmii_probe,
+	.mdiodrv.driver = {
+		.name = "xgmiitorgmii",
+		.of_match_table = xgmiitorgmii_of_match,
+	},
+};
+
+mdio_module_driver(xgmiitorgmii_driver);
+
+MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index f226db4..70cfa06 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c

@@ -1103,6 +1103,15 @@
 	}
 
 	conf.file = file;
+
+	/* Don't use device name generated by the rtnetlink layer when ifname
+	 * isn't specified. Let ppp_dev_configure() set the device name using
+	 * the PPP unit identifer as suffix (i.e. ppp<unit_id>). This allows
+	 * userspace to infer the device name using to the PPPIOCGUNIT ioctl.
+	 */
+	if (!tb[IFLA_IFNAME])
+		conf.ifname_is_set = false;
+
 	err = ppp_dev_configure(src_net, dev, &conf);
 
 out_unlock:

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..1951b10 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c

@@ -37,6 +37,7 @@
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/gre.h>
+#include <net/pptp.h>
 
 #include <linux/uaccess.h>
 
@@ -53,41 +54,6 @@
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-	((((curseq) & 0xffffff00) == 0) &&\
-	(((lastseq) & 0xffffff00) == 0xffffff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER    0x1
-
-#define PPTP_GRE_FLAG_C	0x80
-#define PPTP_GRE_FLAG_R	0x40
-#define PPTP_GRE_FLAG_K	0x20
-#define PPTP_GRE_FLAG_S	0x10
-#define PPTP_GRE_FLAG_A	0x80
-
-#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-	u8  flags;
-	u8  ver;
-	__be16 protocol;
-	__be16 payload_len;
-	__be16 call_id;
-	__be32 seq;
-	__be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
 	struct pppox_sock *sock;
@@ -240,16 +206,14 @@
 	skb_push(skb, header_len);
 	hdr = (struct pptp_gre_header *)(skb->data);
 
-	hdr->flags       = PPTP_GRE_FLAG_K;
-	hdr->ver         = PPTP_GRE_VER;
-	hdr->protocol    = htons(PPTP_GRE_PROTO);
-	hdr->call_id     = htons(opt->dst_addr.call_id);
+	hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ;
+	hdr->gre_hd.protocol = GRE_PROTO_PPP;
+	hdr->call_id = htons(opt->dst_addr.call_id);
 
-	hdr->flags      |= PPTP_GRE_FLAG_S;
-	hdr->seq         = htonl(++opt->seq_sent);
+	hdr->seq = htonl(++opt->seq_sent);
 	if (opt->ack_sent != seq_recv)	{
 		/* send ack with this message */
-		hdr->ver |= PPTP_GRE_FLAG_A;
+		hdr->gre_hd.flags |= GRE_ACK;
 		hdr->ack  = htonl(seq_recv);
 		opt->ack_sent = seq_recv;
 	}
@@ -312,7 +276,7 @@
 	headersize  = sizeof(*header);
 
 	/* test if acknowledgement present */
-	if (PPTP_GRE_IS_A(header->ver)) {
+	if (GRE_IS_ACK(header->gre_hd.flags)) {
 		__u32 ack;
 
 		if (!pskb_may_pull(skb, headersize))
@@ -320,7 +284,7 @@
 		header = (struct pptp_gre_header *)(skb->data);
 
 		/* ack in different place if S = 0 */
-		ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq;
+		ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq;
 
 		ack = ntohl(ack);
 
@@ -333,7 +297,7 @@
 		headersize -= sizeof(header->ack);
 	}
 	/* test if payload present */
-	if (!PPTP_GRE_IS_S(header->flags))
+	if (!GRE_IS_SEQ(header->gre_hd.flags))
 		goto drop;
 
 	payload_len = ntohs(header->payload_len);
@@ -394,11 +358,11 @@
 
 	header = (struct pptp_gre_header *)skb->data;
 
-	if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */
-		PPTP_GRE_IS_C(header->flags) ||                /* flag C should be clear */
-		PPTP_GRE_IS_R(header->flags) ||                /* flag R should be clear */
-		!PPTP_GRE_IS_K(header->flags) ||               /* flag K should be set */
-		(header->flags&0xF) != 0)                      /* routing and recursion ctrl = 0 */
+	if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */
+		GRE_IS_CSUM(header->gre_hd.flags) ||    /* flag CSUM should be clear */
+		GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */
+		!GRE_IS_KEY(header->gre_hd.flags) ||    /* flag KEY should be set */
+		(header->gre_hd.flags & GRE_FLAGS))     /* flag Recursion Ctrl should be clear */
 		/* if invalid, discard this packet */
 		goto drop;
 

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 4b44586..c5544d3 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c

@@ -2300,10 +2300,8 @@
 	serial->rx_data_length = rx_size;
 	for (i = 0; i < serial->num_rx_urbs; i++) {
 		serial->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
-		if (!serial->rx_urb[i]) {
-			dev_err(dev, "Could not allocate urb?\n");
+		if (!serial->rx_urb[i])
 			goto exit;
-		}
 		serial->rx_urb[i]->transfer_buffer = NULL;
 		serial->rx_urb[i]->transfer_buffer_length = 0;
 		serial->rx_data[i] = kzalloc(serial->rx_data_length,
@@ -2314,10 +2312,8 @@
 
 	/* TX, allocate urb and initialize */
 	serial->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!serial->tx_urb) {
-		dev_err(dev, "Could not allocate urb?\n");
+	if (!serial->tx_urb)
 		goto exit;
-	}
 	serial->tx_urb->transfer_buffer = NULL;
 	serial->tx_urb->transfer_buffer_length = 0;
 	/* prepare our TX buffer */
@@ -2555,20 +2551,16 @@
 	/* start allocating */
 	for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
 		hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
-		if (!hso_net->mux_bulk_rx_urb_pool[i]) {
-			dev_err(&interface->dev, "Could not allocate rx urb\n");
+		if (!hso_net->mux_bulk_rx_urb_pool[i])
 			goto exit;
-		}
 		hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE,
 							   GFP_KERNEL);
 		if (!hso_net->mux_bulk_rx_buf_pool[i])
 			goto exit;
 	}
 	hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!hso_net->mux_bulk_tx_urb) {
-		dev_err(&interface->dev, "Could not allocate tx urb\n");
+	if (!hso_net->mux_bulk_tx_urb)
 		goto exit;
-	}
 	hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL);
 	if (!hso_net->mux_bulk_tx_buf)
 		goto exit;
@@ -2787,10 +2779,8 @@
 	}
 
 	mux->shared_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!mux->shared_intr_urb) {
-		dev_err(&interface->dev, "Could not allocate intr urb?\n");
+	if (!mux->shared_intr_urb)
 		goto exit;
-	}
 	mux->shared_intr_buf =
 		kzalloc(le16_to_cpu(mux->intr_endp->wMaxPacketSize),
 			GFP_KERNEL);

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 6a9d474..432b8a3 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c

@@ -3002,10 +3002,8 @@
 
 gso_skb:
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netif_dbg(dev, tx_err, dev->net, "no urb\n");
+	if (!urb)
 		goto drop;
-	}
 
 	entry = (struct skb_data *)skb->cb;
 	entry->urb = urb;

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 3bfb592..d5071e3 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c

@@ -2062,11 +2062,8 @@
 		   cmd, reqtype, value, index, size);
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(dev->net, "Error allocating URB in"
-			   " %s!\n", __func__);
+	if (!urb)
 		goto fail;
-	}
 
 	if (data) {
 		buf = kmemdup(data, size, GFP_ATOMIC);

diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c
index fc1355d..5d429f8 100644
--- a/drivers/net/wimax/i2400m/usb-notif.c
+++ b/drivers/net/wimax/i2400m/usb-notif.c

@@ -206,7 +206,6 @@
 	i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!i2400mu->notif_urb) {
 		ret = -ENOMEM;
-		dev_err(dev, "notification: cannot allocate URB\n");
 		goto error_alloc_urb;
 	}
 	epd = usb_get_epd(i2400mu->usb_iface,

diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 8aded24..7a60d2e 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c

@@ -706,10 +706,8 @@
 
 		data->ar = ar;
 		data->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!data->urb) {
-			ar5523_err(ar, "could not allocate rx data urb\n");
+		if (!data->urb)
 			goto err;
-		}
 		list_add_tail(&data->list, &ar->rx_data_free);
 		atomic_inc(&ar->rx_data_free_cnt);
 	}
@@ -824,7 +822,6 @@
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			ar5523_err(ar, "Failed to allocate TX urb\n");
 			ieee80211_free_txskb(ar->hw, skb);
 			continue;
 		}
@@ -949,10 +946,8 @@
 	init_completion(&cmd->done);
 
 	cmd->urb_tx = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cmd->urb_tx) {
-		ar5523_err(ar, "could not allocate urb\n");
+	if (!cmd->urb_tx)
 		return -ENOMEM;
-	}
 	cmd->buf_tx = usb_alloc_coherent(ar->dev, AR5523_MAX_TXCMDSZ,
 					 GFP_KERNEL,
 					 &cmd->urb_tx->transfer_dma);

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 98b15a9..fa26619 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c

@@ -1099,15 +1099,11 @@
 	devinfo->tx_freecount = ntxq;
 
 	devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!devinfo->ctl_urb) {
-		brcmf_err("usb_alloc_urb (ctl) failed\n");
+	if (!devinfo->ctl_urb)
 		goto error;
-	}
 	devinfo->bulk_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!devinfo->bulk_urb) {
-		brcmf_err("usb_alloc_urb (bulk) failed\n");
+	if (!devinfo->bulk_urb)
 		goto error;
-	}
 
 	return &devinfo->bus_pub;
 

diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
index 56f109b..bca6935 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c

@@ -1613,10 +1613,8 @@
 			}
 
 			upriv->read_urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (!upriv->read_urb) {
-				err("No free urbs available");
+			if (!upriv->read_urb)
 				goto error;
-			}
 			if (le16_to_cpu(ep->wMaxPacketSize) != 64)
 				pr_warn("bulk in: wMaxPacketSize!= 64\n");
 			if (ep->bEndpointAddress != (2 | USB_DIR_IN))

diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
index 799a2ef..e0ade40 100644
--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c

@@ -198,22 +198,16 @@
 	}
 
 	cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->rx_urb) {
-		lbtf_deb_usbd(&udev->dev, "Rx URB allocation failed\n");
+	if (!cardp->rx_urb)
 		goto dealloc;
-	}
 
 	cardp->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->tx_urb) {
-		lbtf_deb_usbd(&udev->dev, "Tx URB allocation failed\n");
+	if (!cardp->tx_urb)
 		goto dealloc;
-	}
 
 	cardp->cmd_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->cmd_urb) {
-		lbtf_deb_usbd(&udev->dev, "Cmd URB allocation failed\n");
+	if (!cardp->cmd_urb)
 		goto dealloc;
-	}
 
 	cardp->ep_out_buf = kmalloc(MRVDRV_ETH_TX_PACKET_BUFFER_SIZE,
 				    GFP_KERNEL);

diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c
index 0857575..3bd04f5 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.c
+++ b/drivers/net/wireless/marvell/mwifiex/usb.c

@@ -657,11 +657,8 @@
 	card->tx_cmd.ep = card->tx_cmd_ep;
 
 	card->tx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!card->tx_cmd.urb) {
-		mwifiex_dbg(adapter, ERROR,
-			    "tx_cmd.urb allocation failed\n");
+	if (!card->tx_cmd.urb)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) {
 		port = &card->port[i];
@@ -677,11 +674,8 @@
 			port->tx_data_list[j].ep = port->tx_data_ep;
 			port->tx_data_list[j].urb =
 					usb_alloc_urb(0, GFP_KERNEL);
-			if (!port->tx_data_list[j].urb) {
-				mwifiex_dbg(adapter, ERROR,
-					    "urb allocation failed\n");
+			if (!port->tx_data_list[j].urb)
 				return -ENOMEM;
-			}
 		}
 	}
 
@@ -697,10 +691,8 @@
 	card->rx_cmd.ep = card->rx_cmd_ep;
 
 	card->rx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!card->rx_cmd.urb) {
-		mwifiex_dbg(adapter, ERROR, "rx_cmd.urb allocation failed\n");
+	if (!card->rx_cmd.urb)
 		return -ENOMEM;
-	}
 
 	card->rx_cmd.skb = dev_alloc_skb(MWIFIEX_RX_CMD_BUF_SIZE);
 	if (!card->rx_cmd.skb)
@@ -714,11 +706,8 @@
 		card->rx_data_list[i].ep = card->rx_data_ep;
 
 		card->rx_data_list[i].urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!card->rx_data_list[i].urb) {
-			mwifiex_dbg(adapter, ERROR,
-				    "rx_data_list[] urb allocation failed\n");
+		if (!card->rx_data_list[i].urb)
 			return -1;
-		}
 		if (mwifiex_usb_submit_rx_urb(&card->rx_data_list[i],
 					      MWIFIEX_RX_DATA_BUF_SIZE))
 			return -1;

diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 41617b7..32aa5c1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c

@@ -739,11 +739,8 @@
 	for (i = 0; i < rtlusb->rx_urb_num; i++) {
 		err = -ENOMEM;
 		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-				 "Failed to alloc URB!!\n");
+		if (!urb)
 			goto err_out;
-		}
 
 		err = _rtl_prep_rx_urb(hw, rtlusb, urb, GFP_KERNEL);
 		if (err < 0) {
@@ -907,15 +904,12 @@
 static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw,
 				struct sk_buff *skb, u32 ep_num)
 {
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
 	struct urb *_urb;
 
 	WARN_ON(NULL == skb);
 	_urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!_urb) {
-		RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-			 "Can't allocate URB for bulk out!\n");
 		kfree_skb(skb);
 		return NULL;
 	}

diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c
index e13a4ab..1fde9c8 100644
--- a/drivers/staging/octeon/ethernet-mdio.c
+++ b/drivers/staging/octeon/ethernet-mdio.c

@@ -34,48 +34,23 @@
 	strlcpy(info->bus_info, "Builtin", sizeof(info->bus_info));
 }
 
-static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		return phy_ethtool_gset(priv->phydev, cmd);
-
-	return -EINVAL;
-}
-
-static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (priv->phydev)
-		return phy_ethtool_sset(priv->phydev, cmd);
-
-	return -EINVAL;
-}
-
 static int cvm_oct_nway_reset(struct net_device *dev)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (priv->phydev)
-		return phy_start_aneg(priv->phydev);
+	if (dev->phydev)
+		return phy_start_aneg(dev->phydev);
 
 	return -EINVAL;
 }
 
 const struct ethtool_ops cvm_oct_ethtool_ops = {
 	.get_drvinfo = cvm_oct_get_drvinfo,
-	.get_settings = cvm_oct_get_settings,
-	.set_settings = cvm_oct_set_settings,
 	.nway_reset = cvm_oct_nway_reset,
 	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -88,15 +63,13 @@
  */
 int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 void cvm_oct_note_carrier(struct octeon_ethernet *priv,
@@ -119,9 +92,9 @@
 	cvmx_helper_link_info_t link_info;
 
 	link_info.u64		= 0;
-	link_info.s.link_up	= priv->phydev->link ? 1 : 0;
-	link_info.s.full_duplex = priv->phydev->duplex ? 1 : 0;
-	link_info.s.speed	= priv->phydev->speed;
+	link_info.s.link_up	= dev->phydev->link ? 1 : 0;
+	link_info.s.full_duplex = dev->phydev->duplex ? 1 : 0;
+	link_info.s.speed	= dev->phydev->speed;
 	priv->link_info		= link_info.u64;
 
 	/*
@@ -130,8 +103,8 @@
 	if (priv->poll)
 		priv->poll(dev);
 
-	if (priv->last_link != priv->phydev->link) {
-		priv->last_link = priv->phydev->link;
+	if (priv->last_link != dev->phydev->link) {
+		priv->last_link = dev->phydev->link;
 		cvmx_helper_link_set(priv->port, link_info);
 		cvm_oct_note_carrier(priv, link_info);
 	}
@@ -151,9 +124,8 @@
 
 	priv->poll = NULL;
 
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
-	priv->phydev = NULL;
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 
 	if (priv->last_link) {
 		link_info.u64 = 0;
@@ -176,6 +148,7 @@
 {
 	struct octeon_ethernet *priv = netdev_priv(dev);
 	struct device_node *phy_node;
+	struct phy_device *phydev = NULL;
 
 	if (!priv->of_node)
 		goto no_phy;
@@ -193,14 +166,14 @@
 	if (!phy_node)
 		goto no_phy;
 
-	priv->phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
-				      PHY_INTERFACE_MODE_GMII);
+	phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
+				PHY_INTERFACE_MODE_GMII);
 
-	if (!priv->phydev)
+	if (!phydev)
 		return -ENODEV;
 
 	priv->last_link = 0;
-	phy_start_aneg(priv->phydev);
+	phy_start_aneg(phydev);
 
 	return 0;
 no_phy:

diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c
index 91b148c..48846df 100644
--- a/drivers/staging/octeon/ethernet-rgmii.c
+++ b/drivers/staging/octeon/ethernet-rgmii.c

@@ -145,7 +145,7 @@
 	if (ret)
 		return ret;
 
-	if (priv->phydev) {
+	if (dev->phydev) {
 		/*
 		 * In phydev mode, we need still periodic polling for the
 		 * preamble error checking, and we also need to call this

diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index e9cd5f2..45d5763 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c

@@ -457,10 +457,8 @@
 
 void cvm_oct_common_uninit(struct net_device *dev)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 }
 
 int cvm_oct_common_open(struct net_device *dev,
@@ -484,10 +482,10 @@
 	if (octeon_is_simulation())
 		return 0;
 
-	if (priv->phydev) {
-		int r = phy_read_status(priv->phydev);
+	if (dev->phydev) {
+		int r = phy_read_status(dev->phydev);
 
-		if (r == 0 && priv->phydev->link == 0)
+		if (r == 0 && dev->phydev->link == 0)
 			netif_carrier_off(dev);
 		cvm_oct_adjust_link(dev);
 	} else {

diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h
index 6275c15..d533aef 100644
--- a/drivers/staging/octeon/octeon-ethernet.h
+++ b/drivers/staging/octeon/octeon-ethernet.h

@@ -40,7 +40,6 @@
 	struct sk_buff_head tx_free_list[16];
 	/* Device statistics */
 	struct net_device_stats stats;
-	struct phy_device *phydev;
 	unsigned int last_speed;
 	unsigned int last_link;
 	/* Last negotiated link state */

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index c633476..bca66d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c

@@ -390,6 +390,8 @@
 	atomic_set(&ent->count, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
+	proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+
 out:
 	return ent;
 }

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index c8bbc68..7ae6b1d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c

@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/nsproxy.h>
+#include <linux/uidgid.h>
 #include <net/net_namespace.h>
 #include <linux/seq_file.h>
 
@@ -185,6 +186,8 @@
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
+	kuid_t uid;
+	kgid_t gid;
 	int err;
 
 	err = -ENOMEM;
@@ -199,6 +202,16 @@
 	netd->parent = &proc_root;
 	memcpy(netd->name, "net", 4);
 
+	uid = make_kuid(net->user_ns, 0);
+	if (!uid_valid(uid))
+		uid = netd->uid;
+
+	gid = make_kgid(net->user_ns, 0);
+	if (!gid_valid(gid))
+		gid = netd->gid;
+
+	proc_set_user(netd, uid, gid);
+
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
 	if (!net_statd)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1b93650..2ed3d71 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c

@@ -430,6 +430,7 @@
 static struct inode *proc_sys_make_inode(struct super_block *sb,
 		struct ctl_table_header *head, struct ctl_table *table)
 {
+	struct ctl_table_root *root = head->root;
 	struct inode *inode;
 	struct proc_inode *ei;
 
@@ -457,6 +458,10 @@
 		if (is_empty_dir(head))
 			make_empty_dir_inode(inode);
 	}
+
+	if (root->set_ownership)
+		root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+
 out:
 	return inode;
 }

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b..a4414a1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h

@@ -497,6 +497,23 @@
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	struct css_set *cset = task_css_set(task);
+
+	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -557,6 +574,7 @@
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
+struct cgroup;
 
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	return true;
+}
 #endif /* !CONFIG_CGROUPS */
 
 /*

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a788bf..794bb07 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -52,6 +52,7 @@
 #include <uapi/linux/netdevice.h>
 #include <uapi/linux/if_bonding.h>
 #include <uapi/linux/pkt_cls.h>
+#include <linux/hashtable.h>
 
 struct netpoll_info;
 struct device;
@@ -1800,6 +1801,9 @@
 	unsigned int		num_tx_queues;
 	unsigned int		real_num_tx_queues;
 	struct Qdisc		*qdisc;
+#ifdef CONFIG_NET_SCHED
+	DECLARE_HASHTABLE	(qdisc_hash, 4);
+#endif
 	unsigned long		tx_queue_len;
 	spinlock_t		tx_global_lock;
 	int			watchdog_timeo;

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index d6c4177..3ed7d20 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h

@@ -276,6 +276,21 @@
 	QED_PROTOCOL_ISCSI,
 };
 
+enum qed_link_mode_bits {
+	QED_LM_FIBRE_BIT = BIT(0),
+	QED_LM_Autoneg_BIT = BIT(1),
+	QED_LM_Asym_Pause_BIT = BIT(2),
+	QED_LM_Pause_BIT = BIT(3),
+	QED_LM_1000baseT_Half_BIT = BIT(4),
+	QED_LM_1000baseT_Full_BIT = BIT(5),
+	QED_LM_10000baseKR_Full_BIT = BIT(6),
+	QED_LM_25000baseKR_Full_BIT = BIT(7),
+	QED_LM_40000baseLR4_Full_BIT = BIT(8),
+	QED_LM_50000baseKR2_Full_BIT = BIT(9),
+	QED_LM_100000baseKR4_Full_BIT = BIT(10),
+	QED_LM_COUNT = 11
+};
+
 struct qed_link_params {
 	bool	link_up;
 

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 697e160..d82cb60 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h

@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
 #include <linux/rbtree.h>
+#include <linux/uidgid.h>
 #include <uapi/linux/sysctl.h>
 
 /* For the /proc/sys support */
@@ -157,6 +158,9 @@
 	struct ctl_table_set default_set;
 	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
 					   struct nsproxy *namespaces);
+	void (*set_ownership)(struct ctl_table_header *head,
+			      struct ctl_table *table,
+			      kuid_t *uid, kgid_t *gid);
 	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 

diff --git a/include/net/gre.h b/include/net/gre.h
index 73ea256..d25d836 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h

@@ -7,7 +7,15 @@
 struct gre_base_hdr {
 	__be16 flags;
 	__be16 protocol;
-};
+} __packed;
+
+struct gre_full_hdr {
+	struct gre_base_hdr fixed_header;
+	__be16 csum;
+	__be16 reserved1;
+	__be32 key;
+	__be32 seq;
+} __packed;
 #define GRE_HEADER_SECTION 4
 
 #define GREPROTO_CISCO		0

diff --git a/include/net/kcm.h b/include/net/kcm.h
index 2840b58..2a89658 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h

@@ -13,6 +13,7 @@
 
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/strparser.h>
 #include <uapi/linux/kcm.h>
 
 extern unsigned int kcm_net_id;
@@ -21,16 +22,8 @@
 #define KCM_STATS_INCR(stat) ((stat)++)
 
 struct kcm_psock_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
 	unsigned long long tx_msgs;
 	unsigned long long tx_bytes;
-	unsigned int rx_aborts;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
 	unsigned long long reserved;
 	unsigned long long unreserved;
 	unsigned int tx_aborts;
@@ -64,13 +57,6 @@
 	struct sk_buff *last_skb;
 };
 
-struct kcm_rx_msg {
-	int full_len;
-	int accum_len;
-	int offset;
-	int early_eaten;
-};
-
 /* Socket structure for KCM client sockets */
 struct kcm_sock {
 	struct sock sk;
@@ -87,6 +73,7 @@
 	struct work_struct tx_work;
 	struct list_head wait_psock_list;
 	struct sk_buff *seq_skb;
+	u32 tx_stopped : 1;
 
 	/* Don't use bit fields here, these are set under different locks */
 	bool tx_wait;
@@ -104,11 +91,11 @@
 /* Structure for an attached lower socket */
 struct kcm_psock {
 	struct sock *sk;
+	struct strparser strp;
 	struct kcm_mux *mux;
 	int index;
 
 	u32 tx_stopped : 1;
-	u32 rx_stopped : 1;
 	u32 done : 1;
 	u32 unattaching : 1;
 
@@ -121,18 +108,12 @@
 	struct kcm_psock_stats stats;
 
 	/* Receive */
-	struct sk_buff *rx_skb_head;
-	struct sk_buff **rx_skb_nextp;
-	struct sk_buff *ready_rx_msg;
 	struct list_head psock_ready_list;
-	struct work_struct rx_work;
-	struct delayed_work rx_delayed_work;
 	struct bpf_prog *bpf_prog;
 	struct kcm_sock *rx_kcm;
 	unsigned long long saved_rx_bytes;
 	unsigned long long saved_rx_msgs;
-	struct timer_list rx_msg_timer;
-	unsigned int rx_need_bytes;
+	struct sk_buff *ready_rx_msg;
 
 	/* Transmit */
 	struct kcm_sock *tx_kcm;
@@ -146,6 +127,7 @@
 	struct mutex mutex;
 	struct kcm_psock_stats aggregate_psock_stats;
 	struct kcm_mux_stats aggregate_mux_stats;
+	struct strp_aggr_stats aggregate_strp_stats;
 	struct list_head mux_list;
 	int count;
 };
@@ -163,6 +145,7 @@
 
 	struct kcm_mux_stats stats;
 	struct kcm_psock_stats aggregate_psock_stats;
+	struct strp_aggr_stats aggregate_strp_stats;
 
 	/* Receive */
 	spinlock_t rx_lock ____cacheline_aligned_in_smp;
@@ -190,14 +173,6 @@
 	/* Save psock statistics in the mux when psock is being unattached. */
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_aborts);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
 	SAVE_PSOCK_STATS(tx_msgs);
 	SAVE_PSOCK_STATS(tx_bytes);
 	SAVE_PSOCK_STATS(reserved);

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d061ffe..7adf438 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h

@@ -40,7 +40,6 @@
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
-	struct fib_table __rcu	*fib_local;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7caa99b..cd334c9 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h

@@ -90,8 +90,8 @@
 void qdisc_get_default(char *id, size_t len);
 int qdisc_set_default(const char *id);
 
-void qdisc_list_add(struct Qdisc *q);
-void qdisc_list_del(struct Qdisc *q);
+void qdisc_hash_add(struct Qdisc *q);
+void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,

diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 0000000..92e9f1f
--- /dev/null
+++ b/include/net/pptp.h

@@ -0,0 +1,23 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS     (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+	((((curseq) & 0xffffff00) == 0) &&\
+	(((lastseq) & 0xffffff00) == 0xffffff00))
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+	struct gre_base_hdr gre_hd;
+	__be16 payload_len;
+	__be16 call_id;
+	__be32 seq;
+	__be32 ack;
+} __packed;
+
+
+#endif

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 909aff2..0d50177 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h

@@ -61,7 +61,7 @@
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
-	struct list_head	list;
+	struct hlist_node       hash;
 	u32			handle;
 	u32			parent;
 	void			*u32_node;

diff --git a/include/net/strparser.h b/include/net/strparser.h
new file mode 100644
index 0000000..fdb3d67
--- /dev/null
+++ b/include/net/strparser.h

@@ -0,0 +1,145 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_STRPARSER_H_
+#define __NET_STRPARSER_H_
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#define STRP_STATS_ADD(stat, count) ((stat) += (count))
+#define STRP_STATS_INCR(stat) ((stat)++)
+
+struct strp_stats {
+	unsigned long long rx_msgs;
+	unsigned long long rx_bytes;
+	unsigned int rx_mem_fail;
+	unsigned int rx_need_more_hdr;
+	unsigned int rx_msg_too_big;
+	unsigned int rx_msg_timeouts;
+	unsigned int rx_bad_hdr_len;
+};
+
+struct strp_aggr_stats {
+	unsigned long long rx_msgs;
+	unsigned long long rx_bytes;
+	unsigned int rx_mem_fail;
+	unsigned int rx_need_more_hdr;
+	unsigned int rx_msg_too_big;
+	unsigned int rx_msg_timeouts;
+	unsigned int rx_bad_hdr_len;
+	unsigned int rx_aborts;
+	unsigned int rx_interrupted;
+	unsigned int rx_unrecov_intr;
+};
+
+struct strparser;
+
+/* Callbacks are called with lock held for the attached socket */
+struct strp_callbacks {
+	int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+	int (*read_sock_done)(struct strparser *strp, int err);
+	void (*abort_parser)(struct strparser *strp, int err);
+};
+
+struct strp_rx_msg {
+	int full_len;
+	int offset;
+};
+
+static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+{
+	return (struct strp_rx_msg *)((void *)skb->cb +
+		offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Structure for an attached lower socket */
+struct strparser {
+	struct sock *sk;
+
+	u32 rx_stopped : 1;
+	u32 rx_paused : 1;
+	u32 rx_aborted : 1;
+	u32 rx_interrupted : 1;
+	u32 rx_unrecov_intr : 1;
+
+	struct sk_buff **rx_skb_nextp;
+	struct timer_list rx_msg_timer;
+	struct sk_buff *rx_skb_head;
+	unsigned int rx_need_bytes;
+	struct delayed_work rx_delayed_work;
+	struct work_struct rx_work;
+	struct strp_stats stats;
+	struct strp_callbacks cb;
+};
+
+/* Must be called with lock held for attached socket */
+static inline void strp_pause(struct strparser *strp)
+{
+	strp->rx_paused = 1;
+}
+
+/* May be called without holding lock for attached socket */
+static inline void strp_unpause(struct strparser *strp)
+{
+	strp->rx_paused = 0;
+}
+
+static inline void save_strp_stats(struct strparser *strp,
+				   struct strp_aggr_stats *agg_stats)
+{
+	/* Save psock statistics in the mux when psock is being unattached. */
+
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=		\
+				 strp->stats._stat)
+	SAVE_PSOCK_STATS(rx_msgs);
+	SAVE_PSOCK_STATS(rx_bytes);
+	SAVE_PSOCK_STATS(rx_mem_fail);
+	SAVE_PSOCK_STATS(rx_need_more_hdr);
+	SAVE_PSOCK_STATS(rx_msg_too_big);
+	SAVE_PSOCK_STATS(rx_msg_timeouts);
+	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+#undef SAVE_PSOCK_STATS
+
+	if (strp->rx_aborted)
+		agg_stats->rx_aborts++;
+	if (strp->rx_interrupted)
+		agg_stats->rx_interrupted++;
+	if (strp->rx_unrecov_intr)
+		agg_stats->rx_unrecov_intr++;
+}
+
+static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
+					struct strp_aggr_stats *agg_stats)
+{
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
+	SAVE_PSOCK_STATS(rx_msgs);
+	SAVE_PSOCK_STATS(rx_bytes);
+	SAVE_PSOCK_STATS(rx_mem_fail);
+	SAVE_PSOCK_STATS(rx_need_more_hdr);
+	SAVE_PSOCK_STATS(rx_msg_too_big);
+	SAVE_PSOCK_STATS(rx_msg_timeouts);
+	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+	SAVE_PSOCK_STATS(rx_aborts);
+	SAVE_PSOCK_STATS(rx_interrupted);
+	SAVE_PSOCK_STATS(rx_unrecov_intr);
+#undef SAVE_PSOCK_STATS
+
+}
+
+void strp_done(struct strparser *strp);
+void strp_stop(struct strparser *strp);
+void strp_check_rcv(struct strparser *strp);
+int strp_init(struct strparser *strp, struct sock *csk,
+	      struct strp_callbacks *cb);
+void strp_tcp_data_ready(struct strparser *strp);
+
+#endif /* __NET_STRPARSER_H_ */

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 0fbf6fd..734fe83 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h

@@ -23,6 +23,42 @@
 #define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
 
 /**
+ * enum batadv_tt_client_flags - TT client specific flags
+ * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
+ * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
+ *  update telling its new real location has not been received/sent yet
+ * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
+ *  This information is used by the "AP Isolation" feature
+ * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
+ *  information is used by the Extended Isolation feature
+ * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
+ * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
+ *  not been announced yet
+ * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
+ *  in the table for one more originator interval for consistency purposes
+ * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
+ *  the network but no nnode has already announced it
+ *
+ * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
+ * Bits from 8 to 15 are called _local flags_ because they are used for local
+ * computations only.
+ *
+ * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
+ * the other nodes in the network. To achieve this goal these flags are included
+ * in the TT CRC computation.
+ */
+enum batadv_tt_client_flags {
+	BATADV_TT_CLIENT_DEL     = (1 << 0),
+	BATADV_TT_CLIENT_ROAM    = (1 << 1),
+	BATADV_TT_CLIENT_WIFI    = (1 << 4),
+	BATADV_TT_CLIENT_ISOLA	 = (1 << 5),
+	BATADV_TT_CLIENT_NOPURGE = (1 << 8),
+	BATADV_TT_CLIENT_NEW     = (1 << 9),
+	BATADV_TT_CLIENT_PENDING = (1 << 10),
+	BATADV_TT_CLIENT_TEMP	 = (1 << 11),
+};
+
+/**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
  * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
@@ -40,6 +76,26 @@
  * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
  * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
  * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active
+ * @BATADV_ATTR_TT_ADDRESS: Client MAC address
+ * @BATADV_ATTR_TT_TTVN: Translation table version
+ * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version
+ * @BATADV_ATTR_TT_CRC32: CRC32 over translation table
+ * @BATADV_ATTR_TT_VID: VLAN ID
+ * @BATADV_ATTR_TT_FLAGS: Translation table client flags
+ * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best
+ * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
+ * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
+ * @BATADV_ATTR_TQ: TQ to neighbour
+ * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour
+ * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth
+ * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth
+ * @BATADV_ATTR_ROUTER: Gateway router MAC address
+ * @BATADV_ATTR_BLA_OWN: Flag indicating own originator
+ * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address
+ * @BATADV_ATTR_BLA_VID: BLA VLAN ID
+ * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address
+ * @BATADV_ATTR_BLA_CRC: BLA CRC
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -60,6 +116,26 @@
 	BATADV_ATTR_TPMETER_BYTES,
 	BATADV_ATTR_TPMETER_COOKIE,
 	BATADV_ATTR_PAD,
+	BATADV_ATTR_ACTIVE,
+	BATADV_ATTR_TT_ADDRESS,
+	BATADV_ATTR_TT_TTVN,
+	BATADV_ATTR_TT_LAST_TTVN,
+	BATADV_ATTR_TT_CRC32,
+	BATADV_ATTR_TT_VID,
+	BATADV_ATTR_TT_FLAGS,
+	BATADV_ATTR_FLAG_BEST,
+	BATADV_ATTR_LAST_SEEN_MSECS,
+	BATADV_ATTR_NEIGH_ADDRESS,
+	BATADV_ATTR_TQ,
+	BATADV_ATTR_THROUGHPUT,
+	BATADV_ATTR_BANDWIDTH_UP,
+	BATADV_ATTR_BANDWIDTH_DOWN,
+	BATADV_ATTR_ROUTER,
+	BATADV_ATTR_BLA_OWN,
+	BATADV_ATTR_BLA_ADDRESS,
+	BATADV_ATTR_BLA_VID,
+	BATADV_ATTR_BLA_BACKBONE,
+	BATADV_ATTR_BLA_CRC,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -73,6 +149,15 @@
  * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
  * @BATADV_CMD_TP_METER: Start a tp meter session
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
+ * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+ * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
+ * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
+ * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
+ * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
+ * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
+ * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims
+ * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -81,6 +166,15 @@
 	BATADV_CMD_GET_MESH_INFO,
 	BATADV_CMD_TP_METER,
 	BATADV_CMD_TP_METER_CANCEL,
+	BATADV_CMD_GET_ROUTING_ALGOS,
+	BATADV_CMD_GET_HARDIFS,
+	BATADV_CMD_GET_TRANSTABLE_LOCAL,
+	BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+	BATADV_CMD_GET_ORIGINATORS,
+	BATADV_CMD_GET_NEIGHBORS,
+	BATADV_CMD_GET_GATEWAYS,
+	BATADV_CMD_GET_BLA_CLAIM,
+	BATADV_CMD_GET_BLA_BACKBONE,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9e5fc16..866d53c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h

@@ -375,6 +375,17 @@
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..361b9f0 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h

@@ -24,9 +24,22 @@
 #define GRE_SEQ		__cpu_to_be16(0x1000)
 #define GRE_STRICT	__cpu_to_be16(0x0800)
 #define GRE_REC		__cpu_to_be16(0x0700)
-#define GRE_FLAGS	__cpu_to_be16(0x00F8)
+#define GRE_ACK		__cpu_to_be16(0x0080)
+#define GRE_FLAGS	__cpu_to_be16(0x0078)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
+#define GRE_IS_CSUM(f)		((f) & GRE_CSUM)
+#define GRE_IS_ROUTING(f)	((f) & GRE_ROUTING)
+#define GRE_IS_KEY(f)		((f) & GRE_KEY)
+#define GRE_IS_SEQ(f)		((f) & GRE_SEQ)
+#define GRE_IS_STRICT(f)	((f) & GRE_STRICT)
+#define GRE_IS_REC(f)		((f) & GRE_REC)
+#define GRE_IS_ACK(f)		((f) & GRE_ACK)
+
+#define GRE_VERSION_1		__cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP		__cpu_to_be16(0x880b)
+#define GRE_PPTP_KEY_MASK	__cpu_to_be32(0xffff)
+
 struct ip_tunnel_parm {
 	char			name[IFNAMSIZ];
 	int			link;

diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 237fac4..15d8510 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h

@@ -48,6 +48,7 @@
 #define BMCR_SPEED100		0x2000	/* Select 100Mbps              */
 #define BMCR_LOOPBACK		0x4000	/* TXD loopback bits           */
 #define BMCR_RESET		0x8000	/* Reset to default state      */
+#define BMCR_SPEED10		0x0000	/* Select 10Mbps               */
 
 /* Basic mode status register. */
 #define BMSR_ERCAP		0x0001	/* Ext-reg capability          */

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650..a2ac051 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c

@@ -538,7 +538,7 @@
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index daea765..abb61f3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c

@@ -930,14 +930,14 @@
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
 {
-	struct reg_state *reg = env->cur_state.regs + regno;
-	enum bpf_reg_type expected_type;
+	struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+	enum bpf_reg_type expected_type, type = reg->type;
 	int err = 0;
 
 	if (arg_type == ARG_DONTCARE)
 		return 0;
 
-	if (reg->type == NOT_INIT) {
+	if (type == NOT_INIT) {
 		verbose("R%d !read_ok\n", regno);
 		return -EACCES;
 	}
@@ -950,16 +950,29 @@
 		return 0;
 	}
 
+	if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
+		verbose("helper access to the packet is not allowed for clsact\n");
+		return -EACCES;
+	}
+
 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
 		expected_type = PTR_TO_STACK;
+		if (type != PTR_TO_PACKET && type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		expected_type = CONST_IMM;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_CTX) {
 		expected_type = PTR_TO_CTX;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_STACK ||
 		   arg_type == ARG_PTR_TO_RAW_STACK) {
 		expected_type = PTR_TO_STACK;
@@ -967,20 +980,16 @@
 		 * passed in as argument, it's a CONST_IMM type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (reg->type == CONST_IMM && reg->imm == 0)
-			expected_type = CONST_IMM;
+		if (type == CONST_IMM && reg->imm == 0)
+			/* final test in check_stack_boundary() */;
+		else if (type != PTR_TO_PACKET && type != expected_type)
+			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
 	} else {
 		verbose("unsupported arg_type %d\n", arg_type);
 		return -EFAULT;
 	}
 
-	if (reg->type != expected_type) {
-		verbose("R%d type=%s expected=%s\n", regno,
-			reg_type_str[reg->type], reg_type_str[expected_type]);
-		return -EACCES;
-	}
-
 	if (arg_type == ARG_CONST_MAP_PTR) {
 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
 		meta->map_ptr = reg->map_ptr;
@@ -998,8 +1007,13 @@
 			verbose("invalid map_ptr to access map->key\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
-					   false, NULL);
+		if (type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno, 0,
+						  meta->map_ptr->key_size);
+		else
+			err = check_stack_boundary(env, regno,
+						   meta->map_ptr->key_size,
+						   false, NULL);
 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 		 * check [value, value + map->value_size) validity
@@ -1009,9 +1023,13 @@
 			verbose("invalid map_ptr to access map->value\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno,
-					   meta->map_ptr->value_size,
-					   false, NULL);
+		if (type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno, 0,
+						  meta->map_ptr->value_size);
+		else
+			err = check_stack_boundary(env, regno,
+						   meta->map_ptr->value_size,
+						   false, NULL);
 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1025,11 +1043,18 @@
 			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno - 1, reg->imm,
-					   zero_size_allowed, meta);
+		if (regs[regno - 1].type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno - 1, 0, reg->imm);
+		else
+			err = check_stack_boundary(env, regno - 1, reg->imm,
+						   zero_size_allowed, meta);
 	}
 
 	return err;
+err_type:
+	verbose("R%d type=%s expected=%s\n", regno,
+		reg_type_str[type], reg_type_str[expected_type]);
+	return -EACCES;
 }
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1053,7 +1078,8 @@
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_under_cgroup)
+		if (func_id != BPF_FUNC_skb_under_cgroup &&
+		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1101,7 @@
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_under_cgroup:
 	case BPF_FUNC_skb_under_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438f..ad35213 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c

@@ -376,6 +376,34 @@
 	.ret_type	= RET_INTEGER,
 };
 
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+	u32 idx = (u32)r2;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	if (unlikely(idx >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+	.func           = bpf_current_task_under_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +435,10 @@
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
+	case BPF_FUNC_get_prandom_u32:
+		return &bpf_get_prandom_u32_proto;
 	default:
 		return NULL;
 	}

diff --git a/net/Kconfig b/net/Kconfig
index c2cdbce..7b6cd34 100644
--- a/net/Kconfig
+++ b/net/Kconfig

@@ -369,6 +369,7 @@
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
+source "net/strparser/Kconfig"
 
 config FIB_RULES
 	bool

diff --git a/net/Makefile b/net/Makefile
index 9bd20bb..4cafaa2 100644
--- a/net/Makefile
+++ b/net/Makefile

@@ -35,6 +35,7 @@
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_AF_KCM)		+= kcm/
+obj-$(CONFIG_STREAM_PARSER)	+= strparser/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/

diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 81dbbf5..f2cc50d3 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c

@@ -20,12 +20,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "netlink.h"
 
 char batadv_routing_algo[20] = "BATMAN_IV";
 static struct hlist_head batadv_algo_list;
@@ -138,3 +144,65 @@
 
 module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
 		0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ *  algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_algo_ops *bat_algo_ops)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ *  algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_algo_ops *bat_algo_ops;
+	int skip = cb->args[0];
+	int i = 0;
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		if (i++ < skip)
+			continue;
+
+		if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_algo_ops)) {
+			i--;
+			break;
+		}
+	}
+
+	cb->args[0] = i;
+
+	return msg->len;
+}

diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 860d773..3b5b69c 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h

@@ -22,7 +22,9 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
 int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 19b0abd..9ed4f1f 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c

@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/printk.h>
 #include <linux/random.h>
@@ -48,12 +49,17 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bitarray.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
@@ -528,36 +534,25 @@
 static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 {
 	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
-	struct batadv_hard_iface *primary_if = NULL;
 
 	if (!forw_packet->if_incoming) {
 		pr_err("Error - can't forward packet: incoming iface not specified\n");
-		goto out;
+		return;
 	}
 
 	soft_iface = forw_packet->if_incoming->soft_iface;
-	bat_priv = netdev_priv(soft_iface);
 
 	if (WARN_ON(!forw_packet->if_outgoing))
-		goto out;
+		return;
 
 	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
-		goto out;
+		return;
 
 	if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
-		goto out;
-
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
+		return;
 
 	/* only for one specific outgoing interface */
 	batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
-
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
 }
 
 /**
@@ -685,19 +680,12 @@
 	struct batadv_forw_packet *forw_packet_aggr;
 	unsigned char *skb_buff;
 	unsigned int skb_size;
+	atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
 
-	/* own packet should always be scheduled */
-	if (!own_packet) {
-		if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-				   "batman packet queue full\n");
-			return;
-		}
-	}
-
-	forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+	forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+						    queue_left, bat_priv);
 	if (!forw_packet_aggr)
-		goto out_nomem;
+		return;
 
 	if (atomic_read(&bat_priv->aggregated_ogms) &&
 	    packet_len < BATADV_MAX_AGGREGATION_BYTES)
@@ -708,8 +696,11 @@
 	skb_size += ETH_HLEN;
 
 	forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
-	if (!forw_packet_aggr->skb)
-		goto out_free_forw_packet;
+	if (!forw_packet_aggr->skb) {
+		batadv_forw_packet_free(forw_packet_aggr);
+		return;
+	}
+
 	forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
 	skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
 
@@ -717,12 +708,7 @@
 	forw_packet_aggr->packet_len = packet_len;
 	memcpy(skb_buff, packet_buff, packet_len);
 
-	kref_get(&if_incoming->refcount);
-	kref_get(&if_outgoing->refcount);
 	forw_packet_aggr->own = own_packet;
-	forw_packet_aggr->if_incoming = if_incoming;
-	forw_packet_aggr->if_outgoing = if_outgoing;
-	forw_packet_aggr->num_packets = 0;
 	forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
 	forw_packet_aggr->send_time = send_time;
 
@@ -741,13 +727,6 @@
 	queue_delayed_work(batadv_event_workqueue,
 			   &forw_packet_aggr->delayed_work,
 			   send_time - jiffies);
-
-	return;
-out_free_forw_packet:
-	kfree(forw_packet_aggr);
-out_nomem:
-	if (!own_packet)
-		atomic_inc(&bat_priv->batman_queue_left);
 }
 
 /* aggregate a new packet into the existing ogm packet */
@@ -1830,10 +1809,6 @@
 		batadv_iv_ogm_schedule(forw_packet->if_incoming);
 
 out:
-	/* don't count own packet */
-	if (!forw_packet->own)
-		atomic_inc(&bat_priv->batman_queue_left);
-
 	batadv_forw_packet_free(forw_packet);
 }
 
@@ -1978,6 +1953,235 @@
 }
 
 /**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ *  given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+			       struct batadv_hard_iface *if_outgoing,
+			       u8 *tq_avg)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return false;
+
+	*tq_avg = n_ifinfo->bat_iv.tq_avg;
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *if_outgoing,
+				 struct batadv_orig_node *orig_node,
+				 struct batadv_neigh_node *neigh_node,
+				 bool best)
+{
+	void *hdr;
+	u8 tq_avg;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+		return 0;
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_priv *bat_priv,
+			      struct batadv_hard_iface *if_outgoing,
+			      struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+	u8 tq_avg_best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+					    &tq_avg_best))
+		goto out;
+
+	if (tq_avg_best == 0)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+						     bat_priv, if_outgoing,
+						     orig_node, neigh_node,
+						     best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_priv *bat_priv,
+			       struct batadv_hard_iface *if_outgoing,
+			       struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+						  if_outgoing, orig_node,
+						  sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			struct batadv_priv *bat_priv,
+			struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+						   cb->nlh->nlmsg_seq,
+						   bat_priv, if_outgoing, head,
+						   &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
+/**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
  * @hardif_neigh: hardif neighbour information
@@ -2029,6 +2233,181 @@
 }
 
 /**
+ * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ * @diff: pointer to integer receiving the calculated difference
+ *
+ * The content of *@diff is only valid when this function returns true.
+ * It is less, equal to or greater than 0 if the metric via neigh1 is lower,
+ * the same as or higher than the metric via neigh2
+ *
+ * Return: true when the difference could be calculated, false otherwise
+ */
+static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
+				     struct batadv_hard_iface *if_outgoing1,
+				     struct batadv_neigh_node *neigh2,
+				     struct batadv_hard_iface *if_outgoing2,
+				     int *diff)
+{
+	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
+	u8 tq1, tq2;
+	bool ret = true;
+
+	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
+	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+
+	if (!neigh1_ifinfo || !neigh2_ifinfo) {
+		ret = false;
+		goto out;
+	}
+
+	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
+	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
+	*diff = (int)tq1 - (int)tq2;
+
+out:
+	if (neigh1_ifinfo)
+		batadv_neigh_ifinfo_put(neigh1_ifinfo);
+	if (neigh2_ifinfo)
+		batadv_neigh_ifinfo_put(neigh2_ifinfo);
+
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ *  into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_priv *bat_priv,
+				struct batadv_hard_iface *hard_iface,
+				int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+						   hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    single_hardif,
+							    &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+					list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    hard_iface, &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
+/**
  * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
  * @neigh1: the first neighbor object of the comparison
  * @if_outgoing1: outgoing interface for the first neighbor
@@ -2043,27 +2422,13 @@
 				   struct batadv_neigh_node *neigh2,
 				   struct batadv_hard_iface *if_outgoing2)
 {
-	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-	u8 tq1, tq2;
+	bool ret;
 	int diff;
 
-	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
-
-	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		diff = 0;
-		goto out;
-	}
-
-	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	diff = tq1 - tq2;
-
-out:
-	if (neigh1_ifinfo)
-		batadv_neigh_ifinfo_put(neigh1_ifinfo);
-	if (neigh2_ifinfo)
-		batadv_neigh_ifinfo_put(neigh2_ifinfo);
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return 0;
 
 	return diff;
 }
@@ -2085,29 +2450,15 @@
 			   struct batadv_neigh_node *neigh2,
 			   struct batadv_hard_iface *if_outgoing2)
 {
-	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-	u8 tq1, tq2;
 	bool ret;
+	int diff;
 
-	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return false;
 
-	/* we can't say that the metric is better */
-	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		ret = false;
-		goto out;
-	}
-
-	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
-
-out:
-	if (neigh1_ifinfo)
-		batadv_neigh_ifinfo_put(neigh1_ifinfo);
-	if (neigh2_ifinfo)
-		batadv_neigh_ifinfo_put(neigh2_ifinfo);
-
+	ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
 	return ret;
 }
 
@@ -2117,6 +2468,323 @@
 	batadv_iv_ogm_schedule(hard_iface);
 }
 
+static struct batadv_gw_node *
+batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo;
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u64 max_gw_factor = 0;
+	u64 tmp_gw_factor = 0;
+	u8 max_tq = 0;
+	u8 tq_avg;
+	struct batadv_orig_node *orig_node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		orig_node = gw_node->orig_node;
+		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+		if (!router)
+			continue;
+
+		router_ifinfo = batadv_neigh_ifinfo_get(router,
+							BATADV_IF_DEFAULT);
+		if (!router_ifinfo)
+			goto next;
+
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			goto next;
+
+		tq_avg = router_ifinfo->bat_iv.tq_avg;
+
+		switch (atomic_read(&bat_priv->gw.sel_class)) {
+		case 1: /* fast connection */
+			tmp_gw_factor = tq_avg * tq_avg;
+			tmp_gw_factor *= gw_node->bandwidth_down;
+			tmp_gw_factor *= 100 * 100;
+			tmp_gw_factor >>= 18;
+
+			if ((tmp_gw_factor > max_gw_factor) ||
+			    ((tmp_gw_factor == max_gw_factor) &&
+			     (tq_avg > max_tq))) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+
+		default: /* 2:  stable connection (use best statistic)
+			  * 3:  fast-switch (use best statistic but change as
+			  *     soon as a better gateway appears)
+			  * XX: late-switch (use best statistic but change as
+			  *     soon as a better gateway appears which has
+			  *     $routing_class more tq points)
+			  */
+			if (tq_avg > max_tq) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+		}
+
+		if (tq_avg > max_tq)
+			max_tq = tq_avg;
+
+		if (tmp_gw_factor > max_gw_factor)
+			max_gw_factor = tmp_gw_factor;
+
+		batadv_gw_node_put(gw_node);
+
+next:
+		batadv_neigh_node_put(router);
+		if (router_ifinfo)
+			batadv_neigh_ifinfo_put(router_ifinfo);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
+				     struct batadv_orig_node *curr_gw_orig,
+				     struct batadv_orig_node *orig_node)
+{
+	struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL;
+	struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL;
+	struct batadv_neigh_node *router_gw = NULL;
+	struct batadv_neigh_node *router_orig = NULL;
+	u8 gw_tq_avg, orig_tq_avg;
+	bool ret = false;
+
+	/* dynamic re-election is performed only on fast or late switch */
+	if (atomic_read(&bat_priv->gw.sel_class) <= 2)
+		return false;
+
+	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
+	if (!router_gw) {
+		ret = true;
+		goto out;
+	}
+
+	router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw,
+						   BATADV_IF_DEFAULT);
+	if (!router_gw_ifinfo) {
+		ret = true;
+		goto out;
+	}
+
+	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router_orig)
+		goto out;
+
+	router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig,
+						     BATADV_IF_DEFAULT);
+	if (!router_orig_ifinfo)
+		goto out;
+
+	gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg;
+	orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg;
+
+	/* the TQ value has to be better */
+	if (orig_tq_avg < gw_tq_avg)
+		goto out;
+
+	/* if the routing class is greater than 3 the value tells us how much
+	 * greater the TQ value of the new gateway must be
+	 */
+	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
+		   gw_tq_avg, orig_tq_avg);
+
+	ret = true;
+out:
+	if (router_gw_ifinfo)
+		batadv_neigh_ifinfo_put(router_gw_ifinfo);
+	if (router_orig_ifinfo)
+		batadv_neigh_ifinfo_put(router_orig_ifinfo);
+	if (router_gw)
+		batadv_neigh_node_put(router_gw);
+	if (router_orig)
+		batadv_neigh_node_put(router_orig);
+
+	return ret;
+}
+
+/* fails if orig_node has no router */
+static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					  struct seq_file *seq,
+					  const struct batadv_gw_node *gw_node)
+{
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_iv.tq_avg, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
+			       struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+
+/**
+ * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				   struct batadv_priv *bat_priv,
+				   struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node)
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) ||
+	    nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN,
+		    router->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
+			gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_iv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			      struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					    bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.iface = {
@@ -2130,13 +2798,21 @@
 		.cmp = batadv_iv_ogm_neigh_cmp,
 		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
 		.print = batadv_iv_neigh_print,
+		.dump = batadv_iv_ogm_neigh_dump,
 	},
 	.orig = {
 		.print = batadv_iv_ogm_orig_print,
+		.dump = batadv_iv_ogm_orig_dump,
 		.free = batadv_iv_ogm_orig_free,
 		.add_if = batadv_iv_ogm_orig_add_if,
 		.del_if = batadv_iv_ogm_orig_del_if,
 	},
+	.gw = {
+		.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
+		.is_eligible = batadv_iv_gw_is_eligible,
+		.print = batadv_iv_gw_print,
+		.dump = batadv_iv_gw_dump,
+	},
 };
 
 int __init batadv_iv_init(void)

diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0366cbf..9e872dc 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c

@@ -21,24 +21,38 @@
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 
+struct sk_buff;
+
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -200,6 +214,138 @@
 }
 
 /**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+	throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+	throughput = throughput * 100;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_neigh_dump_hardif - Dump the  neighbours of a hard interface  into
+ *  a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_hard_iface *hard_iface,
+			   int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface  into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		    struct batadv_priv *bat_priv,
+		    struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, single_hardif,
+						       &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, hard_iface,
+						       &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
+/**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
  * @seq: debugfs table seq_file struct
@@ -266,6 +412,204 @@
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_hard_iface *if_outgoing,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node,
+			    bool best)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+	void *hdr;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return 0;
+
+	throughput = n_ifinfo->bat_v.throughput * 100;
+
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *if_outgoing,
+			 struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+						if_outgoing, orig_node,
+						neigh_node, best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv,
+			  struct batadv_hard_iface *if_outgoing,
+			  struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+					     if_outgoing, orig_node, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		   struct batadv_priv *bat_priv,
+		   struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_v_orig_dump_bucket(msg, portid,
+					      cb->nlh->nlmsg_seq,
+					      bat_priv, if_outgoing, head, &idx,
+					      &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
 static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
 			      struct batadv_hard_iface *if_outgoing1,
 			      struct batadv_neigh_node *neigh2,
@@ -320,6 +664,363 @@
 	return ret;
 }
 
+static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
+					char *buff, size_t count)
+{
+	u32 old_class, class;
+
+	if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
+				     "B.A.T.M.A.N. V GW selection class",
+				     &class))
+		return -EINVAL;
+
+	old_class = atomic_read(&bat_priv->gw.sel_class);
+	atomic_set(&bat_priv->gw.sel_class, class);
+
+	if (old_class != class)
+		batadv_gw_reselect(bat_priv);
+
+	return count;
+}
+
+static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
+{
+	u32 class = atomic_read(&bat_priv->gw.sel_class);
+
+	return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10);
+}
+
+/**
+ * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * @gw_node: the GW to retrieve the metric for
+ * @bw: the pointer where the metric will be stored. The metric is computed as
+ *  the minimum between the GW advertised throughput and the path throughput to
+ *  it in the mesh
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_orig_node *orig_node;
+	struct batadv_neigh_node *router;
+	int ret = -1;
+
+	orig_node = gw_node->orig_node;
+	router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	/* the GW metric is computed as the minimum between the path throughput
+	 * to reach the GW itself and the advertised bandwidth.
+	 * This gives us an approximation of the effective throughput that the
+	 * client can expect via this particular GW node
+	 */
+	*bw = router_ifinfo->bat_v.throughput;
+	*bw = min_t(u32, *bw, gw_node->bandwidth_down);
+
+	ret = 0;
+out:
+	if (router)
+		batadv_neigh_node_put(router);
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+
+	return ret;
+}
+
+/**
+ * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: the GW node having the best GW-metric, NULL if no GW is known
+ */
+static struct batadv_gw_node *
+batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u32 max_bw = 0, bw;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			continue;
+
+		if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
+			goto next;
+
+		if (curr_gw && (bw <= max_bw))
+			goto next;
+
+		if (curr_gw)
+			batadv_gw_node_put(curr_gw);
+
+		curr_gw = gw_node;
+		kref_get(&curr_gw->refcount);
+		max_bw = bw;
+
+next:
+		batadv_gw_node_put(gw_node);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+/**
+ * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * @bat_priv: the bat priv with all the soft interface information
+ * @curr_gw_orig: originator representing the currently selected GW
+ * @orig_node: the originator representing the new candidate
+ *
+ * Return: true if orig_node can be selected as current GW, false otherwise
+ */
+static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
+				    struct batadv_orig_node *curr_gw_orig,
+				    struct batadv_orig_node *orig_node)
+{
+	struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+	u32 gw_throughput, orig_throughput, threshold;
+	bool ret = false;
+
+	threshold = atomic_read(&bat_priv->gw.sel_class);
+
+	curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig);
+	if (!curr_gw) {
+		ret = true;
+		goto out;
+	}
+
+	if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) {
+		ret = true;
+		goto out;
+	}
+
+	orig_gw = batadv_gw_node_get(bat_priv, orig_node);
+	if (!orig_node)
+		goto out;
+
+	if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
+		goto out;
+
+	if (orig_throughput < gw_throughput)
+		goto out;
+
+	if ((orig_throughput - gw_throughput) < threshold)
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n",
+		   gw_throughput, orig_throughput);
+
+	ret = true;
+out:
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+	if (orig_gw)
+		batadv_gw_node_put(orig_gw);
+
+	return ret;
+}
+
+/* fails if orig_node has no router */
+static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					 struct seq_file *seq,
+					 const struct batadv_gw_node *gw_node)
+{
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_v.throughput / 10,
+		   router_ifinfo->bat_v.throughput % 10, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_print - print the gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: gateway table seq_file struct
+ */
+static void batadv_v_gw_print(struct batadv_priv *bat_priv,
+			      struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway        ( throughput)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+
+/**
+ * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_priv *bat_priv,
+				  struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node) {
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT,
+			router_ifinfo->bat_v.throughput)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			     struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 	.name = "BATMAN_V",
 	.iface = {
@@ -334,9 +1035,19 @@
 		.cmp = batadv_v_neigh_cmp,
 		.is_similar_or_better = batadv_v_neigh_is_sob,
 		.print = batadv_v_neigh_print,
+		.dump = batadv_v_neigh_dump,
 	},
 	.orig = {
 		.print = batadv_v_orig_print,
+		.dump = batadv_v_orig_dump,
+	},
+	.gw = {
+		.store_sel_class = batadv_v_store_sel_class,
+		.show_sel_class = batadv_v_show_sel_class,
+		.get_best_gw_node = batadv_v_gw_get_best_gw_node,
+		.is_eligible = batadv_v_gw_is_eligible,
+		.print = batadv_v_gw_print,
+		.dump = batadv_v_gw_dump,
 	},
 };
 
@@ -363,7 +1074,16 @@
  */
 int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 {
-	return batadv_v_ogm_init(bat_priv);
+	int ret = 0;
+
+	ret = batadv_v_ogm_init(bat_priv);
+	if (ret < 0)
+		return ret;
+
+	/* set default throughput difference threshold to 5Mbps */
+	atomic_set(&bat_priv->gw.sel_class, 50);
+
+	return 0;
 }
 
 /**

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ad2ffe1..35ed1d3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c

@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -45,12 +46,18 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -1148,7 +1155,7 @@
 
 	/* Let the loopdetect frames on the mesh in any case. */
 	if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
-		return 0;
+		return false;
 
 	/* check if it is a claim frame. */
 	ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -2052,6 +2059,168 @@
 }
 
 /**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_hard_iface *primary_if,
+			    struct batadv_bla_claim *claim)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(claim->backbone_gw->orig,
+				    primary_addr);
+
+	spin_lock_bh(&claim->backbone_gw->crc_lock);
+	backbone_crc = claim->backbone_gw->crc;
+	spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+	    nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    claim->backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_hard_iface *primary_if,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_claim *claim;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_claim_dump_entry(msg, portid, seq,
+						primary_if, claim)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.claim_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_claim_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq,
+						 primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+/**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
  * @seq: seq file to print on
@@ -2114,3 +2283,167 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hard_iface *primary_if,
+			       struct batadv_bla_backbone_gw *backbone_gw)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	int msecs;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+	spin_lock_bh(&backbone_gw->crc_lock);
+	backbone_crc = backbone_gw->crc;
+	spin_unlock_bh(&backbone_gw->crc_lock);
+
+	msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_hard_iface *primary_if,
+				struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+						   primary_if, backbone_gw)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.backbone_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_backbone_dump_bucket(msg, portid,
+						    cb->nlh->nlmsg_seq,
+						    primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}

diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0f01dae..1ae93e4 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h

@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct net_device;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -35,8 +36,10 @@
 			       struct batadv_orig_node *orig_node,
 			       int hdr_size);
 int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
 					     void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
 				    unsigned short vid);
 bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -47,7 +50,7 @@
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #define BATADV_BLA_CRC_INIT	0
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
@@ -112,6 +115,18 @@
 {
 }
 
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+					struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 1d68b6e..b4ffba7dd 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c

@@ -31,6 +31,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
+#include <net/net_namespace.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -305,12 +306,16 @@
  */
 int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
 	struct batadv_debuginfo **bat_debug;
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
 						   batadv_debugfs);
 	if (!hard_iface->debug_dir)
@@ -341,6 +346,11 @@
  */
 void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
+
+	if (net != &init_net)
+		return;
+
 	if (batadv_debugfs) {
 		debugfs_remove_recursive(hard_iface->debug_dir);
 		hard_iface->debug_dir = NULL;
@@ -351,11 +361,15 @@
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
 	struct batadv_debuginfo **bat_debug;
+	struct net *net = dev_net(dev);
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
 	if (!bat_priv->debug_dir)
 		goto out;
@@ -392,6 +406,10 @@
 void batadv_debugfs_del_meshif(struct net_device *dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+
+	if (net != &init_net)
+		return;
 
 	batadv_debug_log_cleanup(bat_priv);
 

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 63a805d..c2928c2 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c

@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
@@ -31,6 +32,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -39,13 +41,17 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -80,12 +86,12 @@
  * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
  * @gw_node: gateway node to free
  */
-static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+void batadv_gw_node_put(struct batadv_gw_node *gw_node)
 {
 	kref_put(&gw_node->refcount, batadv_gw_node_release);
 }
 
-static struct batadv_gw_node *
+struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *gw_node;
@@ -164,86 +170,6 @@
 	atomic_set(&bat_priv->gw.reselect, 1);
 }
 
-static struct batadv_gw_node *
-batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
-{
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo;
-	struct batadv_gw_node *gw_node, *curr_gw = NULL;
-	u64 max_gw_factor = 0;
-	u64 tmp_gw_factor = 0;
-	u8 max_tq = 0;
-	u8 tq_avg;
-	struct batadv_orig_node *orig_node;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		orig_node = gw_node->orig_node;
-		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-		if (!router)
-			continue;
-
-		router_ifinfo = batadv_neigh_ifinfo_get(router,
-							BATADV_IF_DEFAULT);
-		if (!router_ifinfo)
-			goto next;
-
-		if (!kref_get_unless_zero(&gw_node->refcount))
-			goto next;
-
-		tq_avg = router_ifinfo->bat_iv.tq_avg;
-
-		switch (atomic_read(&bat_priv->gw.sel_class)) {
-		case 1: /* fast connection */
-			tmp_gw_factor = tq_avg * tq_avg;
-			tmp_gw_factor *= gw_node->bandwidth_down;
-			tmp_gw_factor *= 100 * 100;
-			tmp_gw_factor >>= 18;
-
-			if ((tmp_gw_factor > max_gw_factor) ||
-			    ((tmp_gw_factor == max_gw_factor) &&
-			     (tq_avg > max_tq))) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-
-		default: /* 2:  stable connection (use best statistic)
-			  * 3:  fast-switch (use best statistic but change as
-			  *     soon as a better gateway appears)
-			  * XX: late-switch (use best statistic but change as
-			  *     soon as a better gateway appears which has
-			  *     $routing_class more tq points)
-			  */
-			if (tq_avg > max_tq) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-		}
-
-		if (tq_avg > max_tq)
-			max_tq = tq_avg;
-
-		if (tmp_gw_factor > max_gw_factor)
-			max_gw_factor = tmp_gw_factor;
-
-		batadv_gw_node_put(gw_node);
-
-next:
-		batadv_neigh_node_put(router);
-		if (router_ifinfo)
-			batadv_neigh_ifinfo_put(router_ifinfo);
-	}
-	rcu_read_unlock();
-
-	return curr_gw;
-}
-
 /**
  * batadv_gw_check_client_stop - check if client mode has been switched off
  * @bat_priv: the bat priv with all the soft interface information
@@ -287,12 +213,19 @@
 	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
+	if (!bat_priv->algo_ops->gw.get_best_gw_node)
+		goto out;
+
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
 	if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
 		goto out;
 
-	next_gw = batadv_gw_get_best_gw_node(bat_priv);
+	/* if gw.reselect is set to 1 it means that a previous call to
+	 * gw.is_eligible() said that we have a new best GW, therefore it can
+	 * now be picked from the list and selected
+	 */
+	next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv);
 
 	if (curr_gw == next_gw)
 		goto out;
@@ -360,70 +293,31 @@
 void batadv_gw_check_election(struct batadv_priv *bat_priv,
 			      struct batadv_orig_node *orig_node)
 {
-	struct batadv_neigh_ifinfo *router_orig_tq = NULL;
-	struct batadv_neigh_ifinfo *router_gw_tq = NULL;
 	struct batadv_orig_node *curr_gw_orig;
-	struct batadv_neigh_node *router_gw = NULL;
-	struct batadv_neigh_node *router_orig = NULL;
-	u8 gw_tq_avg, orig_tq_avg;
+
+	/* abort immediately if the routing algorithm does not support gateway
+	 * election
+	 */
+	if (!bat_priv->algo_ops->gw.is_eligible)
+		return;
 
 	curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
 	if (!curr_gw_orig)
 		goto reselect;
 
-	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
-	if (!router_gw)
-		goto reselect;
-
-	router_gw_tq = batadv_neigh_ifinfo_get(router_gw,
-					       BATADV_IF_DEFAULT);
-	if (!router_gw_tq)
-		goto reselect;
-
 	/* this node already is the gateway */
 	if (curr_gw_orig == orig_node)
 		goto out;
 
-	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-	if (!router_orig)
+	if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig,
+						orig_node))
 		goto out;
 
-	router_orig_tq = batadv_neigh_ifinfo_get(router_orig,
-						 BATADV_IF_DEFAULT);
-	if (!router_orig_tq)
-		goto out;
-
-	gw_tq_avg = router_gw_tq->bat_iv.tq_avg;
-	orig_tq_avg = router_orig_tq->bat_iv.tq_avg;
-
-	/* the TQ value has to be better */
-	if (orig_tq_avg < gw_tq_avg)
-		goto out;
-
-	/* if the routing class is greater than 3 the value tells us how much
-	 * greater the TQ value of the new gateway must be
-	 */
-	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
-	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
-		goto out;
-
-	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
-		   gw_tq_avg, orig_tq_avg);
-
 reselect:
 	batadv_gw_reselect(bat_priv);
 out:
 	if (curr_gw_orig)
 		batadv_orig_node_put(curr_gw_orig);
-	if (router_gw)
-		batadv_neigh_node_put(router_gw);
-	if (router_orig)
-		batadv_neigh_node_put(router_orig);
-	if (router_gw_tq)
-		batadv_neigh_ifinfo_put(router_gw_tq);
-	if (router_orig_tq)
-		batadv_neigh_ifinfo_put(router_orig_tq);
 }
 
 /**
@@ -472,9 +366,8 @@
  *
  * Return: gateway node if found or NULL otherwise.
  */
-static struct batadv_gw_node *
-batadv_gw_node_get(struct batadv_priv *bat_priv,
-		   struct batadv_orig_node *orig_node)
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node)
 {
 	struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
 
@@ -585,81 +478,85 @@
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 }
 
-/* fails if orig_node has no router */
-static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
-				    struct seq_file *seq,
-				    const struct batadv_gw_node *gw_node)
-{
-	struct batadv_gw_node *curr_gw;
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
-	int ret = -1;
-
-	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
-	if (!router)
-		goto out;
-
-	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
-	if (!router_ifinfo)
-		goto out;
-
-	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
-
-	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
-		   (curr_gw == gw_node ? "=>" : "  "),
-		   gw_node->orig_node->orig,
-		   router_ifinfo->bat_iv.tq_avg, router->addr,
-		   router->if_incoming->net_dev->name,
-		   gw_node->bandwidth_down / 10,
-		   gw_node->bandwidth_down % 10,
-		   gw_node->bandwidth_up / 10,
-		   gw_node->bandwidth_up % 10);
-	ret = seq_has_overflowed(seq) ? -1 : 0;
-
-	if (curr_gw)
-		batadv_gw_node_put(curr_gw);
-out:
-	if (router_ifinfo)
-		batadv_neigh_ifinfo_put(router_ifinfo);
-	if (router)
-		batadv_neigh_node_put(router);
-	return ret;
-}
-
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hard_iface *primary_if;
-	struct batadv_gw_node *gw_node;
-	int gw_count = 0;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
 	if (!primary_if)
-		goto out;
+		return 0;
 
-	seq_printf(seq,
-		   "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
+	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
-		   primary_if->net_dev->dev_addr, net_dev->name);
+		   primary_if->net_dev->dev_addr, net_dev->name,
+		   bat_priv->algo_ops->name);
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		/* fails if orig_node has no router */
-		if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
-			continue;
+	batadv_hardif_put(primary_if);
 
-		gw_count++;
+	if (!bat_priv->algo_ops->gw.print) {
+		seq_puts(seq,
+			 "No printing function for this routing protocol\n");
+		return 0;
 	}
-	rcu_read_unlock();
 
-	if (gw_count == 0)
-		seq_puts(seq, "No gateways in range ...\n");
+	bat_priv->algo_ops->gw.print(bat_priv, seq);
+
+	return 0;
+}
+
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (!bat_priv->algo_ops->gw.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+	ret = msg->len;
 
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
-	return 0;
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
 }
 
 /**

diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 582dd8c..859166d 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h

@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct batadv_tvlv_gateway_data;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -39,10 +40,16 @@
 void batadv_gw_node_delete(struct batadv_priv *bat_priv,
 			   struct batadv_orig_node *orig_node);
 void batadv_gw_node_free(struct batadv_priv *bat_priv);
+void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+struct batadv_gw_node *
+batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 enum batadv_dhcp_recipient
 batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
 			     u8 *chaddr);
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node);
 
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */

diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index d7bc6a8..2118481 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c

@@ -241,10 +241,9 @@
 
 	batadv_gw_node_update(bat_priv, orig, &gateway);
 
-	/* restart gateway selection if fast or late switching was enabled */
+	/* restart gateway selection */
 	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
-	    (atomic_read(&bat_priv->gw.sel_class) > 2))
+	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
 		batadv_gw_check_election(bat_priv, orig);
 }
 

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 1f90808..43c9a3e 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c

@@ -35,7 +35,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
 
 #include "bat_v.h"
 #include "bridge_loop_avoidance.h"
@@ -85,25 +86,55 @@
 }
 
 /**
+ * batadv_getlink_net - return link net namespace (of use fallback)
+ * @netdev: net_device to check
+ * @fallback_net: return in case get_link_net is not available for @netdev
+ *
+ * Return: result of rtnl_link_ops->get_link_net or @fallback_net
+ */
+static const struct net *batadv_getlink_net(const struct net_device *netdev,
+					    const struct net *fallback_net)
+{
+	if (!netdev->rtnl_link_ops)
+		return fallback_net;
+
+	if (!netdev->rtnl_link_ops->get_link_net)
+		return fallback_net;
+
+	return netdev->rtnl_link_ops->get_link_net(netdev);
+}
+
+/**
  * batadv_mutual_parents - check if two devices are each others parent
- * @dev1: 1st net_device
- * @dev2: 2nd net_device
+ * @dev1: 1st net dev
+ * @net1: 1st devices netns
+ * @dev2: 2nd net dev
+ * @net2: 2nd devices netns
  *
  * veth devices come in pairs and each is the parent of the other!
  *
  * Return: true if the devices are each others parent, otherwise false
  */
 static bool batadv_mutual_parents(const struct net_device *dev1,
-				  const struct net_device *dev2)
+				  const struct net *net1,
+				  const struct net_device *dev2,
+				  const struct net *net2)
 {
 	int dev1_parent_iflink = dev_get_iflink(dev1);
 	int dev2_parent_iflink = dev_get_iflink(dev2);
+	const struct net *dev1_parent_net;
+	const struct net *dev2_parent_net;
+
+	dev1_parent_net = batadv_getlink_net(dev1, net1);
+	dev2_parent_net = batadv_getlink_net(dev2, net2);
 
 	if (!dev1_parent_iflink || !dev2_parent_iflink)
 		return false;
 
 	return (dev1_parent_iflink == dev2->ifindex) &&
-	       (dev2_parent_iflink == dev1->ifindex);
+	       (dev2_parent_iflink == dev1->ifindex) &&
+	       net_eq(dev1_parent_net, net2) &&
+	       net_eq(dev2_parent_net, net1);
 }
 
 /**
@@ -121,8 +152,9 @@
  */
 static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 {
-	struct net_device *parent_dev;
 	struct net *net = dev_net(net_dev);
+	struct net_device *parent_dev;
+	const struct net *parent_net;
 	bool ret;
 
 	/* check if this is a batman-adv mesh interface */
@@ -134,13 +166,16 @@
 	    dev_get_iflink(net_dev) == net_dev->ifindex)
 		return false;
 
+	parent_net = batadv_getlink_net(net_dev, net);
+
 	/* recurse over the parent device */
-	parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
+	parent_dev = __dev_get_by_index((struct net *)parent_net,
+					dev_get_iflink(net_dev));
 	/* if we got a NULL parent_dev there is something broken.. */
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
 
-	if (batadv_mutual_parents(net_dev, parent_dev))
+	if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
 		return false;
 
 	ret = batadv_is_on_batman_iface(parent_dev);
@@ -625,25 +660,6 @@
 		batadv_hardif_put(primary_if);
 }
 
-/**
- * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
- * @work: work queue item
- *
- * Free the parts of the hard interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_hardif_remove_interface_finish(struct work_struct *work)
-{
-	struct batadv_hard_iface *hard_iface;
-
-	hard_iface = container_of(work, struct batadv_hard_iface,
-				  cleanup_work);
-
-	batadv_debugfs_del_hardif(hard_iface);
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-	batadv_hardif_put(hard_iface);
-}
-
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -676,8 +692,6 @@
 
 	INIT_LIST_HEAD(&hard_iface->list);
 	INIT_HLIST_HEAD(&hard_iface->neigh_list);
-	INIT_WORK(&hard_iface->cleanup_work,
-		  batadv_hardif_remove_interface_finish);
 
 	spin_lock_init(&hard_iface->neigh_list_lock);
 
@@ -719,7 +733,9 @@
 		return;
 
 	hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-	queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
+	batadv_debugfs_del_hardif(hard_iface);
+	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+	batadv_hardif_put(hard_iface);
 }
 
 void batadv_hardif_remove_interfaces(void)

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fe4c5e2..ef07e5b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c

@@ -82,6 +82,12 @@
 
 static int __init batadv_init(void)
 {
+	int ret;
+
+	ret = batadv_tt_cache_init();
+	if (ret < 0)
+		return ret;
+
 	INIT_LIST_HEAD(&batadv_hardif_list);
 	batadv_algo_init();
 
@@ -93,9 +99,8 @@
 	batadv_tp_meter_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
-
 	if (!batadv_event_workqueue)
-		return -ENOMEM;
+		goto err_create_wq;
 
 	batadv_socket_init();
 	batadv_debugfs_init();
@@ -108,6 +113,11 @@
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
 
 	return 0;
+
+err_create_wq:
+	batadv_tt_cache_destroy();
+
+	return -ENOMEM;
 }
 
 static void __exit batadv_exit(void)
@@ -123,6 +133,8 @@
 	batadv_event_workqueue = NULL;
 
 	rcu_barrier();
+
+	batadv_tt_cache_destroy();
 }
 
 int batadv_mesh_init(struct net_device *soft_iface)
@@ -638,3 +650,4 @@
 MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
 MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
+MODULE_ALIAS_RTNL_LINK("batadv");

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 06a8608..09af21e 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h

@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.3"
+#define BATADV_SOURCE_VERSION "2016.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cc91507..894df60 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c

@@ -528,7 +528,7 @@
 	}
 
 	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
+		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**

diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 231f8ea..18831e7 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c

@@ -18,6 +18,8 @@
 #include "netlink.h"
 #include "main.h"
 
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genetlink.h>
@@ -26,24 +28,33 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
+#include "translation-table.h"
 
-struct sk_buff;
-
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
 	.version = 1,
 	.maxattr = BATADV_ATTR_MAX,
+	.netnsok = true,
 };
 
 /* multicast groups */
@@ -69,9 +80,44 @@
 	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
 	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
 	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
 };
 
 /**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+	return attr ? nla_get_u32(attr) : 0;
+}
+
+/**
  * batadv_netlink_mesh_info_put - fill in generic information about mesh
  *  interface
  * @msg: netlink message to be sent back
@@ -93,9 +139,17 @@
 	    nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
 	    nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
 	    nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
-		    soft_iface->dev_addr))
+		    soft_iface->dev_addr) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
+		       (u8)atomic_read(&bat_priv->tt.vn)))
 		goto out;
 
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			ntohs(bat_priv->bla.claim_dest.group)))
+		goto out;
+#endif
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
 		hard_iface = primary_if->net_dev;
@@ -380,6 +434,106 @@
 	return ret;
 }
 
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_hard_iface *hard_iface)
+{
+	struct net_device *net_dev = hard_iface->net_dev;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_HARDIFS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			net_dev->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   net_dev->name) ||
+	    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+		    net_dev->dev_addr))
+		goto nla_put_failure;
+
+	if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+		if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hard_iface *hard_iface;
+	int ifindex;
+	int portid = NETLINK_CB(cb->skb).portid;
+	int seq = cb->nlh->nlmsg_seq;
+	int skip = cb->args[0];
+	int i = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface)
+		return -ENODEV;
+
+	if (!batadv_softif_is_valid(soft_iface)) {
+		dev_put(soft_iface);
+		return -ENODEV;
+	}
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
+			continue;
+
+		if (i++ < skip)
+			continue;
+
+		if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+						     hard_iface)) {
+			i--;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+
+	dev_put(soft_iface);
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
+
 static struct genl_ops batadv_netlink_ops[] = {
 	{
 		.cmd = BATADV_CMD_GET_MESH_INFO,
@@ -399,6 +553,61 @@
 		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_cancel,
 	},
+	{
+		.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_algo_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_HARDIFS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_netlink_dump_hardifs,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_local_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_global_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_ORIGINATORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_orig_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_NEIGHBORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_hardif_neigh_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_GATEWAYS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_gw_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_CLAIM,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_claim_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_BACKBONE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_backbone_dump,
+	},
+
 };
 
 /**

diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 945653a..52eb162 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h

@@ -21,12 +21,18 @@
 #include "main.h"
 
 #include <linux/types.h>
+#include <net/genetlink.h>
+
+struct nlmsghdr;
 
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
 
 int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
 				  u8 result, u32 test_time, u64 total_bytes,
 				  u32 cookie);
 
+extern struct genl_family batadv_netlink_family;
+
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3940b5d..95c8555 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c

@@ -28,11 +28,15 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "distributed-arp-table.h"
@@ -42,8 +46,10 @@
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 
 /* hash class keys */
@@ -721,6 +727,83 @@
 }
 
 /**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->neigh.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+/**
  * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
  *  free after rcu grace period
  * @ref: kref pointer of the orig_ifinfo
@@ -1330,6 +1413,83 @@
 	return 0;
 }
 
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->orig.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {

diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 566306b..ebc5618 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h

@@ -31,7 +31,9 @@
 
 #include "hash.h"
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@
 			struct batadv_hard_iface *if_outgoing);
 void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
 
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 
 struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@
 void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num);

diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6b011ff..6afc0b8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h

@@ -129,42 +129,6 @@
 };
 
 /**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- *  update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- *  This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- *  information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- *  not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- *  in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- *  the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
-	BATADV_TT_CLIENT_DEL     = BIT(0),
-	BATADV_TT_CLIENT_ROAM    = BIT(1),
-	BATADV_TT_CLIENT_WIFI    = BIT(4),
-	BATADV_TT_CLIENT_ISOLA	 = BIT(5),
-	BATADV_TT_CLIENT_NOPURGE = BIT(8),
-	BATADV_TT_CLIENT_NEW     = BIT(9),
-	BATADV_TT_CLIENT_PENDING = BIT(10),
-	BATADV_TT_CLIENT_TEMP	 = BIT(11),
-};
-
-/**
  * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
  * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
  */

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7602c00..610f2c4 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c

@@ -74,11 +74,23 @@
 	if (!orig_ifinfo)
 		return;
 
-	rcu_read_lock();
-	curr_router = rcu_dereference(orig_ifinfo->router);
-	if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
-		curr_router = NULL;
-	rcu_read_unlock();
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	/* curr_router used earlier may not be the current orig_ifinfo->router
+	 * anymore because it was dereferenced outside of the neigh_list_lock
+	 * protected region. After the new best neighbor has replace the current
+	 * best neighbor the reference counter needs to decrease. Consequently,
+	 * the code needs to ensure the curr_router variable contains a pointer
+	 * to the replaced best neighbor.
+	 */
+	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+	/* increase refcount of new best neighbor */
+	if (neigh_node)
+		kref_get(&neigh_node->refcount);
+
+	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+	batadv_orig_ifinfo_put(orig_ifinfo);
 
 	/* route deleted */
 	if ((curr_router) && (!neigh_node)) {
@@ -100,27 +112,6 @@
 			   curr_router->addr);
 	}
 
-	if (curr_router)
-		batadv_neigh_node_put(curr_router);
-
-	spin_lock_bh(&orig_node->neigh_list_lock);
-	/* curr_router used earlier may not be the current orig_ifinfo->router
-	 * anymore because it was dereferenced outside of the neigh_list_lock
-	 * protected region. After the new best neighbor has replace the current
-	 * best neighbor the reference counter needs to decrease. Consequently,
-	 * the code needs to ensure the curr_router variable contains a pointer
-	 * to the replaced best neighbor.
-	 */
-	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
-	/* increase refcount of new best neighbor */
-	if (neigh_node)
-		kref_get(&neigh_node->refcount);
-
-	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
-	spin_unlock_bh(&orig_node->neigh_list_lock);
-	batadv_orig_ifinfo_put(orig_ifinfo);
-
 	/* decrease refcount of previous best neighbor */
 	if (curr_router)
 		batadv_neigh_node_put(curr_router);

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 6191159..8d4e1f5 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c

@@ -315,8 +315,7 @@
  *
  * Wrap the given skb into a batman-adv unicast or unicast-4addr header
  * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
- * as packet_type. Then send this frame to the given orig_node and release a
- * reference to this orig_node.
+ * as packet_type. Then send this frame to the given orig_node.
  *
  * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
  */
@@ -370,8 +369,6 @@
 		ret = NET_XMIT_SUCCESS;
 
 out:
-	if (orig_node)
-		batadv_orig_node_put(orig_node);
 	if (ret == NET_XMIT_DROP)
 		kfree_skb(skb);
 	return ret;
@@ -403,6 +400,7 @@
 	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
 	struct batadv_orig_node *orig_node;
 	u8 *src, *dst;
+	int ret;
 
 	src = ethhdr->h_source;
 	dst = ethhdr->h_dest;
@@ -414,8 +412,13 @@
 	}
 	orig_node = batadv_transtable_search(bat_priv, src, dst, vid);
 
-	return batadv_send_skb_unicast(bat_priv, skb, packet_type,
-				       packet_subtype, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
+				      packet_subtype, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
 /**
@@ -433,12 +436,25 @@
 			   unsigned short vid)
 {
 	struct batadv_orig_node *orig_node;
+	int ret;
 
 	orig_node = batadv_gw_get_selected_orig(bat_priv);
-	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
-				       BATADV_P_DATA, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+				      BATADV_P_DATA, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
+/**
+ * batadv_forw_packet_free - free a forwarding packet
+ * @forw_packet: The packet to free
+ *
+ * This frees a forwarding packet and releases any resources it might
+ * have claimed.
+ */
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
 	kfree_skb(forw_packet->skb);
@@ -446,9 +462,73 @@
 		batadv_hardif_put(forw_packet->if_incoming);
 	if (forw_packet->if_outgoing)
 		batadv_hardif_put(forw_packet->if_outgoing);
+	if (forw_packet->queue_left)
+		atomic_inc(forw_packet->queue_left);
 	kfree(forw_packet);
 }
 
+/**
+ * batadv_forw_packet_alloc - allocate a forwarding packet
+ * @if_incoming: The (optional) if_incoming to be grabbed
+ * @if_outgoing: The (optional) if_outgoing to be grabbed
+ * @queue_left: The (optional) queue counter to decrease
+ * @bat_priv: The bat_priv for the mesh of this forw_packet
+ *
+ * Allocates a forwarding packet and tries to get a reference to the
+ * (optional) if_incoming, if_outgoing and queue_left. If queue_left
+ * is NULL then bat_priv is optional, too.
+ *
+ * Return: An allocated forwarding packet on success, NULL otherwise.
+ */
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv)
+{
+	struct batadv_forw_packet *forw_packet;
+	const char *qname;
+
+	if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) {
+		qname = "unknown";
+
+		if (queue_left == &bat_priv->bcast_queue_left)
+			qname = "bcast";
+
+		if (queue_left == &bat_priv->batman_queue_left)
+			qname = "batman";
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "%s queue is full\n", qname);
+
+		return NULL;
+	}
+
+	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+	if (!forw_packet)
+		goto err;
+
+	if (if_incoming)
+		kref_get(&if_incoming->refcount);
+
+	if (if_outgoing)
+		kref_get(&if_outgoing->refcount);
+
+	forw_packet->skb = NULL;
+	forw_packet->queue_left = queue_left;
+	forw_packet->if_incoming = if_incoming;
+	forw_packet->if_outgoing = if_outgoing;
+	forw_packet->num_packets = 0;
+
+	return forw_packet;
+
+err:
+	if (queue_left)
+		atomic_inc(queue_left);
+
+	return NULL;
+}
+
 static void
 _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 				 struct batadv_forw_packet *forw_packet,
@@ -487,24 +567,20 @@
 	struct batadv_bcast_packet *bcast_packet;
 	struct sk_buff *newskb;
 
-	if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
-		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "bcast packet queue full\n");
-		goto out;
-	}
-
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (!primary_if)
-		goto out_and_inc;
+		goto err;
 
-	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
-
+	forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+					       &bat_priv->bcast_queue_left,
+					       bat_priv);
+	batadv_hardif_put(primary_if);
 	if (!forw_packet)
-		goto out_and_inc;
+		goto err;
 
 	newskb = skb_copy(skb, GFP_ATOMIC);
 	if (!newskb)
-		goto packet_free;
+		goto err_packet_free;
 
 	/* as we have a copy now, it is safe to decrease the TTL */
 	bcast_packet = (struct batadv_bcast_packet *)newskb->data;
@@ -513,11 +589,6 @@
 	skb_reset_mac_header(newskb);
 
 	forw_packet->skb = newskb;
-	forw_packet->if_incoming = primary_if;
-	forw_packet->if_outgoing = NULL;
-
-	/* how often did we send the bcast packet ? */
-	forw_packet->num_packets = 0;
 
 	INIT_DELAYED_WORK(&forw_packet->delayed_work,
 			  batadv_send_outstanding_bcast_packet);
@@ -525,13 +596,9 @@
 	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
-packet_free:
-	kfree(forw_packet);
-out_and_inc:
-	atomic_inc(&bat_priv->bcast_queue_left);
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
+err_packet_free:
+	batadv_forw_packet_free(forw_packet);
+err:
 	return NETDEV_TX_BUSY;
 }
 
@@ -592,7 +659,6 @@
 
 out:
 	batadv_forw_packet_free(forw_packet);
-	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
 void
@@ -633,9 +699,6 @@
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->bcast_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
@@ -663,9 +726,6 @@
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->batman_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}

diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 7cecb75..999f786 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h

@@ -28,6 +28,12 @@
 struct sk_buff;
 
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv);
+
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_hard_iface *recv_if);

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7527c06..e508bf5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c

@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
@@ -46,7 +47,6 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/workqueue.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -57,6 +57,7 @@
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
+#include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "sysfs.h"
@@ -377,6 +378,8 @@
 dropped_freed:
 	batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
 end:
+	if (mcast_single_orig)
+		batadv_orig_node_put(mcast_single_orig);
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 	return NETDEV_TX_OK;
@@ -747,34 +750,6 @@
 }
 
 /**
- * batadv_softif_destroy_finish - cleans up the remains of a softif
- * @work: work queue item
- *
- * Free the parts of the soft interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_softif_destroy_finish(struct work_struct *work)
-{
-	struct batadv_softif_vlan *vlan;
-	struct batadv_priv *bat_priv;
-	struct net_device *soft_iface;
-
-	bat_priv = container_of(work, struct batadv_priv,
-				cleanup_work);
-	soft_iface = bat_priv->soft_iface;
-
-	/* destroy the "untagged" VLAN */
-	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
-	if (vlan) {
-		batadv_softif_destroy_vlan(bat_priv, vlan);
-		batadv_softif_vlan_put(vlan);
-	}
-
-	batadv_sysfs_del_meshif(soft_iface);
-	unregister_netdev(soft_iface);
-}
-
-/**
  * batadv_softif_init_late - late stage initialization of soft interface
  * @dev: registered network device to modify
  *
@@ -791,7 +766,6 @@
 
 	bat_priv = netdev_priv(dev);
 	bat_priv->soft_iface = dev;
-	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
 	/* batadv_interface_stats() needs to be available as soon as
 	 * register_netdevice() has been called
@@ -1028,8 +1002,19 @@
 void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_softif_vlan *vlan;
 
-	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+	ASSERT_RTNL();
+
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
+	batadv_sysfs_del_meshif(soft_iface);
+	unregister_netdevice(soft_iface);
 }
 
 /**

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe9ca94..02d96f2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c

@@ -37,6 +37,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -428,6 +429,13 @@
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
+	/* GW mode is not available if the routing algorithm in use does not
+	 * implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
 	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		bytes_written = sprintf(buff, "%s\n",
@@ -455,6 +463,13 @@
 	char *curr_gw_mode_str;
 	int gw_mode_tmp = -1;
 
+	/* toggling GW mode is allowed only if the routing algorithm in use
+	 * provides the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
 	if (buff[count - 1] == '\n')
 		buff[count - 1] = '\0';
 
@@ -514,6 +529,50 @@
 	return count;
 }
 
+static ssize_t batadv_show_gw_sel_class(struct kobject *kobj,
+					struct attribute *attr, char *buff)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* GW selection class is not available if the routing algorithm in use
+	 * does not implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
+	if (bat_priv->algo_ops->gw.show_sel_class)
+		return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff);
+
+	return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class));
+}
+
+static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
+					 struct attribute *attr, char *buff,
+					 size_t count)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* setting the GW selection class is allowed only if the routing
+	 * algorithm in use implements the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (bat_priv->algo_ops->gw.store_sel_class)
+		return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
+							      count);
+
+	return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+					batadv_post_gw_reselect, attr,
+					&bat_priv->gw.sel_class,
+					bat_priv->soft_iface);
+}
+
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
 				     struct attribute *attr, char *buff)
 {
@@ -625,8 +684,8 @@
 		     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
 		     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
-		     BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
+static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+		   batadv_store_gw_sel_class);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
@@ -712,6 +771,8 @@
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 out:
@@ -726,6 +787,8 @@
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 }
@@ -781,6 +844,10 @@
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 out:
@@ -800,6 +867,10 @@
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 }
@@ -828,31 +899,31 @@
 	return length;
 }
 
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
-				       struct attribute *attr, char *buff,
-				       size_t count)
+/**
+ * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @ifname: name of soft-interface to modify
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ *
+ * Return: 0 on success, 0 < on failure
+ */
+static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
+					  char ifname[IFNAMSIZ])
 {
-	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct net *net = dev_net(net_dev);
 	struct batadv_hard_iface *hard_iface;
-	int status_tmp = -1;
-	int ret = count;
+	int status_tmp;
+	int ret = 0;
+
+	ASSERT_RTNL();
 
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
-		return count;
+		return 0;
 
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if (strlen(buff) >= IFNAMSIZ) {
-		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
-		       buff);
-		batadv_hardif_put(hard_iface);
-		return -EINVAL;
-	}
-
-	if (strncmp(buff, "none", 4) == 0)
+	if (strncmp(ifname, "none", 4) == 0)
 		status_tmp = BATADV_IF_NOT_IN_USE;
 	else
 		status_tmp = BATADV_IF_I_WANT_YOU;
@@ -861,15 +932,13 @@
 		goto out;
 
 	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+	    (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
 		goto out;
 
-	rtnl_lock();
-
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
-		goto unlock;
+		goto out;
 	}
 
 	/* if the interface already is in use */
@@ -877,15 +946,71 @@
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
 
-	ret = batadv_hardif_enable_interface(hard_iface, net, buff);
-
-unlock:
-	rtnl_unlock();
+	ret = batadv_hardif_enable_interface(hard_iface, net, ifname);
 out:
 	batadv_hardif_put(hard_iface);
 	return ret;
 }
 
+/**
+ * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * @work: work queue item
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ */
+static void batadv_store_mesh_iface_work(struct work_struct *work)
+{
+	struct batadv_store_mesh_work *store_work;
+	int ret;
+
+	store_work = container_of(work, struct batadv_store_mesh_work, work);
+
+	rtnl_lock();
+	ret = batadv_store_mesh_iface_finish(store_work->net_dev,
+					     store_work->soft_iface_name);
+	rtnl_unlock();
+
+	if (ret < 0)
+		pr_err("Failed to store new mesh_iface state %s for %s: %d\n",
+		       store_work->soft_iface_name, store_work->net_dev->name,
+		       ret);
+
+	dev_put(store_work->net_dev);
+	kfree(store_work);
+}
+
+static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
+				       struct attribute *attr, char *buff,
+				       size_t count)
+{
+	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+	struct batadv_store_mesh_work *store_work;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (strlen(buff) >= IFNAMSIZ) {
+		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
+		       buff);
+		return -EINVAL;
+	}
+
+	store_work = kmalloc(sizeof(*store_work), GFP_KERNEL);
+	if (!store_work)
+		return -ENOMEM;
+
+	dev_hold(net_dev);
+	INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
+	store_work->net_dev = net_dev;
+	strlcpy(store_work->soft_iface_name, buff,
+		sizeof(store_work->soft_iface_name));
+
+	queue_work(batadv_event_workqueue, &store_work->work);
+
+	return count;
+}
+
 static ssize_t batadv_show_iface_status(struct kobject *kobj,
 					struct attribute *attr, char *buff)
 {
@@ -1048,6 +1173,8 @@
 
 void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
 {
+	kobject_uevent(*hardif_obj, KOBJ_REMOVE);
+	kobject_del(*hardif_obj);
 	kobject_put(*hardif_obj);
 	*hardif_obj = NULL;
 }

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7e6df7a..2080407 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c

@@ -22,12 +22,14 @@
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/byteorder/generic.h>
+#include <linux/cache.h>
 #include <linux/compiler.h>
 #include <linux/crc32c.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
+#include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
@@ -35,25 +37,39 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
 #include "tvlv.h"
 
+static struct kmem_cache *batadv_tl_cache __read_mostly;
+static struct kmem_cache *batadv_tg_cache __read_mostly;
+static struct kmem_cache *batadv_tt_orig_cache __read_mostly;
+static struct kmem_cache *batadv_tt_change_cache __read_mostly;
+static struct kmem_cache *batadv_tt_req_cache __read_mostly;
+static struct kmem_cache *batadv_tt_roam_cache __read_mostly;
+
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
 static struct lock_class_key batadv_tt_global_hash_lock_class_key;
@@ -205,6 +221,20 @@
 }
 
 /**
+ * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_local_entry *tt_local_entry;
+
+	tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+				      common.rcu);
+
+	kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
+/**
  * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -218,7 +248,7 @@
 
 	batadv_softif_vlan_put(tt_local_entry->vlan);
 
-	kfree_rcu(tt_local_entry, common.rcu);
+	call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
 }
 
 /**
@@ -234,6 +264,20 @@
 }
 
 /**
+ * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_global_entry *tt_global_entry;
+
+	tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+				       common.rcu);
+
+	kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
+/**
  * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -246,7 +290,8 @@
 				       common.refcount);
 
 	batadv_tt_global_del_orig_list(tt_global_entry);
-	kfree_rcu(tt_global_entry, common.rcu);
+
+	call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
 }
 
 /**
@@ -384,6 +429,19 @@
 }
 
 /**
+ * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_orig_list_entry *orig_entry;
+
+	orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+	kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
+/**
  * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
  *  queue for free after rcu grace period
  * @ref: kref pointer of the tt orig entry
@@ -396,7 +454,7 @@
 				  refcount);
 
 	batadv_orig_node_put(orig_entry->orig_node);
-	kfree_rcu(orig_entry, rcu);
+	call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
 }
 
 /**
@@ -426,7 +484,7 @@
 	bool event_removed = false;
 	bool del_op_requested, del_op_entry;
 
-	tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
+	tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC);
 	if (!tt_change_node)
 		return;
 
@@ -467,8 +525,8 @@
 		continue;
 del:
 		list_del(&entry->list);
-		kfree(entry);
-		kfree(tt_change_node);
+		kmem_cache_free(batadv_tt_change_cache, entry);
+		kmem_cache_free(batadv_tt_change_cache, tt_change_node);
 		event_removed = true;
 		goto unlock;
 	}
@@ -646,7 +704,7 @@
 		goto out;
 	}
 
-	tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+	tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC);
 	if (!tt_local)
 		goto out;
 
@@ -656,7 +714,7 @@
 		net_ratelimited_function(batadv_info, soft_iface,
 					 "adding TT local entry %pM to non-existent VLAN %d\n",
 					 addr, BATADV_PRINT_VID(vid));
-		kfree(tt_local);
+		kmem_cache_free(batadv_tl_cache, tt_local);
 		tt_local = NULL;
 		goto out;
 	}
@@ -959,7 +1017,7 @@
 			tt_diff_entries_count++;
 		}
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 	spin_unlock_bh(&bat_priv->tt.changes_list_lock);
 
@@ -1057,6 +1115,164 @@
 	return 0;
 }
 
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_tt_common_entry *common)
+{
+	void *hdr;
+	struct batadv_softif_vlan *vlan;
+	struct batadv_tt_local_entry *local;
+	unsigned int last_seen_msecs;
+	u32 crc;
+
+	local = container_of(common, struct batadv_tt_local_entry, common);
+	last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+	vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_softif_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_LOCAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct hlist_head *head, int *idx_s)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+					       common)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.local_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+						bat_priv, head, &idx))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	return ret;
+}
+
 static void
 batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
 			    struct batadv_tt_local_entry *tt_local_entry,
@@ -1259,7 +1475,7 @@
 	list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
 				 list) {
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 
 	atomic_set(&bat_priv->tt.local_changes, 0);
@@ -1341,7 +1557,7 @@
 		goto out;
 	}
 
-	orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
+	orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
 	if (!orig_entry)
 		goto out;
 
@@ -1411,7 +1627,8 @@
 		goto out;
 
 	if (!tt_global_entry) {
-		tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
+		tt_global_entry = kmem_cache_zalloc(batadv_tg_cache,
+						    GFP_ATOMIC);
 		if (!tt_global_entry)
 			goto out;
 
@@ -1704,6 +1921,218 @@
 }
 
 /**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_tt_common_entry *common,
+			       struct batadv_tt_orig_list_entry *orig,
+			       bool best)
+{
+	void *hdr;
+	struct batadv_orig_node_vlan *vlan;
+	u8 last_ttvn;
+	u32 crc;
+
+	vlan = batadv_orig_node_vlan_get(orig->orig_node,
+					 common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_orig_node_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_tt_common_entry *common, int *sub_s)
+{
+	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+	struct batadv_tt_global_entry *global;
+	struct hlist_head *head;
+	int sub = 0;
+	bool best;
+
+	global = container_of(common, struct batadv_tt_global_entry, common);
+	best_entry = batadv_transtable_best_orig(bat_priv, global);
+	head = &global->orig_list;
+
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (orig_entry == best_entry);
+
+		if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+						   orig_entry, best)) {
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_priv *bat_priv,
+			     struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+						common, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump -  Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.global_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_global_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, bat_priv,
+						 head, &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+
+	return ret;
+}
+
+/**
  * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
  * @tt_global_entry: the global entry to remove the orig_entry from
  * @orig_entry: the orig entry to remove and free
@@ -2280,7 +2709,7 @@
 
 	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
 
-	kfree(tt_req_node);
+	kmem_cache_free(batadv_tt_req_cache, tt_req_node);
 }
 
 /**
@@ -2367,7 +2796,7 @@
 			goto unlock;
 	}
 
-	tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC);
+	tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC);
 	if (!tt_req_node)
 		goto unlock;
 
@@ -3104,7 +3533,7 @@
 
 	list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
@@ -3121,7 +3550,7 @@
 			continue;
 
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
 }
@@ -3162,7 +3591,8 @@
 	}
 
 	if (!ret) {
-		tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC);
+		tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache,
+						GFP_ATOMIC);
 		if (!tt_roam_node)
 			goto unlock;
 
@@ -3865,3 +4295,85 @@
 
 	return ret;
 }
+
+/**
+ * batadv_tt_cache_init - Initialize tt memory object cache
+ *
+ * Return: 0 on success or negative error number in case of failure.
+ */
+int __init batadv_tt_cache_init(void)
+{
+	size_t tl_size = sizeof(struct batadv_tt_local_entry);
+	size_t tg_size = sizeof(struct batadv_tt_global_entry);
+	size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry);
+	size_t tt_change_size = sizeof(struct batadv_tt_change_node);
+	size_t tt_req_size = sizeof(struct batadv_tt_req_node);
+	size_t tt_roam_size = sizeof(struct batadv_tt_roam_node);
+
+	batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tl_cache)
+		return -ENOMEM;
+
+	batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tg_cache)
+		goto err_tt_tl_destroy;
+
+	batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache",
+						 tt_orig_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_orig_cache)
+		goto err_tt_tg_destroy;
+
+	batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache",
+						   tt_change_size, 0,
+						   SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_change_cache)
+		goto err_tt_orig_destroy;
+
+	batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache",
+						tt_req_size, 0,
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_req_cache)
+		goto err_tt_change_destroy;
+
+	batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache",
+						 tt_roam_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_roam_cache)
+		goto err_tt_req_destroy;
+
+	return 0;
+
+err_tt_req_destroy:
+	kmem_cache_destroy(batadv_tt_req_cache);
+	batadv_tt_req_cache = NULL;
+err_tt_change_destroy:
+	kmem_cache_destroy(batadv_tt_change_cache);
+	batadv_tt_change_cache = NULL;
+err_tt_orig_destroy:
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	batadv_tt_orig_cache = NULL;
+err_tt_tg_destroy:
+	kmem_cache_destroy(batadv_tg_cache);
+	batadv_tg_cache = NULL;
+err_tt_tl_destroy:
+	kmem_cache_destroy(batadv_tl_cache);
+	batadv_tl_cache = NULL;
+
+	return -ENOMEM;
+}
+
+/**
+ * batadv_tt_cache_destroy - Destroy tt memory object cache
+ */
+void batadv_tt_cache_destroy(void)
+{
+	kmem_cache_destroy(batadv_tl_cache);
+	kmem_cache_destroy(batadv_tg_cache);
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	kmem_cache_destroy(batadv_tt_change_cache);
+	kmem_cache_destroy(batadv_tt_req_cache);
+	kmem_cache_destroy(batadv_tt_roam_cache);
+}

diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 7c7e2c0..783fdba 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h

@@ -22,8 +22,10 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct net_device;
 struct seq_file;
+struct sk_buff;
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@
 			   const char *message, bool roaming);
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
 			       s32 match_vid, const char *message);
@@ -59,4 +63,7 @@
 bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 				  const u8 *addr, unsigned short vid);
 
+int batadv_tt_cache_init(void);
+void batadv_tt_cache_destroy(void);
+
 #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */

diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a64522c..b5f01a3 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h

@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -132,7 +133,6 @@
  * @rcu: struct used for freeing in an RCU-safe manner
  * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
  * @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @cleanup_work: work queue callback item for hard-interface deinit
  * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
  * @neigh_list: list of unique single hop neighbors via this interface
  * @neigh_list_lock: lock protecting neigh_list
@@ -152,7 +152,6 @@
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
 	struct batadv_hard_iface_bat_v bat_v;
 #endif
-	struct work_struct cleanup_work;
 	struct dentry *debug_dir;
 	struct hlist_head neigh_list;
 	/* neigh_list_lock protects: neigh_list */
@@ -1015,7 +1014,6 @@
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
  * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
  * @algo_ops: routing algorithm used by this mesh interface
@@ -1074,7 +1072,6 @@
 	spinlock_t tp_list_lock; /* protects tp_list */
 	atomic_t tp_num;
 	struct delayed_work orig_work;
-	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
 	struct batadv_algo_ops *algo_ops;
 	struct hlist_head softif_vlan_list;
@@ -1379,6 +1376,7 @@
  *  locally generated packet
  * @if_outgoing: packet where the packet should be sent to, or NULL if
  *  unspecified
+ * @queue_left: The queue (counter) this packet was applied to
  */
 struct batadv_forw_packet {
 	struct hlist_node list;
@@ -1391,11 +1389,13 @@
 	struct delayed_work delayed_work;
 	struct batadv_hard_iface *if_incoming;
 	struct batadv_hard_iface *if_outgoing;
+	atomic_t *queue_left;
 };
 
 /**
  * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
  * @activate: start routing mechanisms when hard-interface is brought up
+ *  (optional)
  * @enable: init routing info when hard-interface is enabled
  * @disable: de-init routing info when hard-interface is disabled
  * @update_mac: (re-)init mac addresses of the protocol information
@@ -1413,11 +1413,13 @@
 /**
  * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
  * @hardif_init: called on creation of single hop entry
+ *  (optional)
  * @cmp: compare the metrics of two neighbors for their respective outgoing
  *  interfaces
  * @is_similar_or_better: check if neigh1 is equally similar or better than
  *  neigh2 for their respective outgoing interface from the metric prospective
  * @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
  */
 struct batadv_algo_neigh_ops {
 	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1430,17 +1432,21 @@
 				     struct batadv_neigh_node *neigh2,
 				     struct batadv_hard_iface *if_outgoing2);
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
 };
 
 /**
  * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
  * @free: free the resources allocated by the routing algorithm for an orig_node
- *  object
+ *  object (optional)
  * @add_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to a new hard-interface being added into the mesh
+ *  orig_node due to a new hard-interface being added into the mesh (optional)
  * @del_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to an hard-interface being removed from the mesh
+ *  orig_node due to an hard-interface being removed from the mesh (optional)
  * @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
  */
 struct batadv_algo_orig_ops {
 	void (*free)(struct batadv_orig_node *orig_node);
@@ -1449,6 +1455,34 @@
 		      int del_if_num);
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
 		      struct batadv_hard_iface *hard_iface);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @store_sel_class: parse and stores a new GW selection class (optional)
+ * @show_sel_class: prints the current GW selection class (optional)
+ * @get_best_gw_node: select the best GW from the list of available nodes
+ *  (optional)
+ * @is_eligible: check if a newly discovered GW is a potential candidate for
+ *  the election as best GW (optional)
+ * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
+ */
+struct batadv_algo_gw_ops {
+	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
+				   size_t count);
+	ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+	struct batadv_gw_node *(*get_best_gw_node)
+		(struct batadv_priv *bat_priv);
+	bool (*is_eligible)(struct batadv_priv *bat_priv,
+			    struct batadv_orig_node *curr_gw_orig,
+			    struct batadv_orig_node *orig_node);
+	void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv);
 };
 
 /**
@@ -1458,6 +1492,7 @@
  * @iface: callbacks related to interface handling
  * @neigh: callbacks related to neighbors handling
  * @orig: callbacks related to originators handling
+ * @gw: callbacks related to GW mode
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
@@ -1465,6 +1500,7 @@
 	struct batadv_algo_iface_ops iface;
 	struct batadv_algo_neigh_ops neigh;
 	struct batadv_algo_orig_ops orig;
+	struct batadv_algo_gw_ops gw;
 };
 
 /**
@@ -1564,4 +1600,17 @@
 	BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
 };
 
+/**
+ * struct batadv_store_mesh_work - Work queue item to detach add/del interface
+ *  from sysfs locks
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @soft_iface_name: name of soft-interface to modify
+ * @work: work queue item
+ */
+struct batadv_store_mesh_work {
+	struct net_device *net_dev;
+	char soft_iface_name[IFNAMSIZ];
+	struct work_struct work;
+};
+
 #endif /* _NET_BATMAN_ADV_TYPES_H_ */

diff --git a/net/core/dev.c b/net/core/dev.c
index dd6ce59..a75df86 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -7625,6 +7625,9 @@
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	INIT_LIST_HEAD(&dev->ptype_all);
 	INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+	hash_init(dev->qdisc_hash);
+#endif
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..91028ae 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c

@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -338,32 +340,42 @@
 ip_proto_again:
 	switch (ip_proto) {
 	case IPPROTO_GRE: {
-		struct gre_hdr {
-			__be16 flags;
-			__be16 proto;
-		} *hdr, _hdr;
+		struct gre_base_hdr *hdr, _hdr;
+		u16 gre_ver;
+		int offset = 0;
 
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			goto out_bad;
-		/*
-		 * Only look inside GRE if version zero and no
-		 * routing
-		 */
-		if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+		/* Only look inside GRE without routing */
+		if (hdr->flags & GRE_ROUTING)
 			break;
 
-		proto = hdr->proto;
-		nhoff += 4;
+		/* Only look inside GRE for version 0 and 1 */
+		gre_ver = ntohs(hdr->flags & GRE_VERSION);
+		if (gre_ver > 1)
+			break;
+
+		proto = hdr->protocol;
+		if (gre_ver) {
+			/* Version1 must be PPTP, and check the flags */
+			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+				break;
+		}
+
+		offset += sizeof(struct gre_base_hdr);
+
 		if (hdr->flags & GRE_CSUM)
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+				  sizeof(((struct gre_full_hdr *)0)->reserved1);
+
 		if (hdr->flags & GRE_KEY) {
 			const __be32 *keyid;
 			__be32 _keyid;
 
-			keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
 						     data, hlen, &_keyid);
-
 			if (!keyid)
 				goto out_bad;
 
@@ -372,32 +384,65 @@
 				key_keyid = skb_flow_dissector_target(flow_dissector,
 								      FLOW_DISSECTOR_KEY_GRE_KEYID,
 								      target_container);
-				key_keyid->keyid = *keyid;
+				if (gre_ver == 0)
+					key_keyid->keyid = *keyid;
+				else
+					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
 			}
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->key);
 		}
+
 		if (hdr->flags & GRE_SEQ)
-			nhoff += 4;
-		if (proto == htons(ETH_P_TEB)) {
-			const struct ethhdr *eth;
-			struct ethhdr _eth;
+			offset += sizeof(((struct pptp_gre_header *)0)->seq);
 
-			eth = __skb_header_pointer(skb, nhoff,
-						   sizeof(_eth),
-						   data, hlen, &_eth);
-			if (!eth)
+		if (gre_ver == 0) {
+			if (proto == htons(ETH_P_TEB)) {
+				const struct ethhdr *eth;
+				struct ethhdr _eth;
+
+				eth = __skb_header_pointer(skb, nhoff + offset,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
+				if (!eth)
+					goto out_bad;
+				proto = eth->h_proto;
+				offset += sizeof(*eth);
+
+				/* Cap headers that we access via pointers at the
+				 * end of the Ethernet header as our maximum alignment
+				 * at that point is only 2 bytes.
+				 */
+				if (NET_IP_ALIGN)
+					hlen = (nhoff + offset);
+			}
+		} else { /* version 1, must be PPTP */
+			u8 _ppp_hdr[PPP_HDRLEN];
+			u8 *ppp_hdr;
+
+			if (hdr->flags & GRE_ACK)
+				offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+						     sizeof(_ppp_hdr), _ppp_hdr);
+			if (!ppp_hdr)
 				goto out_bad;
-			proto = eth->h_proto;
-			nhoff += sizeof(*eth);
 
-			/* Cap headers that we access via pointers at the
-			 * end of the Ethernet header as our maximum alignment
-			 * at that point is only 2 bytes.
-			 */
-			if (NET_IP_ALIGN)
-				hlen = nhoff;
+			switch (PPP_PROTOCOL(ppp_hdr)) {
+			case PPP_IP:
+				proto = htons(ETH_P_IP);
+				break;
+			case PPP_IPV6:
+				proto = htons(ETH_P_IPV6);
+				break;
+			default:
+				/* Could probably catch some more like MPLS */
+				break;
+			}
+
+			offset += PPP_HDRLEN;
 		}
 
+		nhoff += offset;
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 			goto out_good;

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cf26e04c4..2ae929f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c

@@ -1148,7 +1148,8 @@
 			} else
 				goto out;
 		} else {
-			if (lladdr == neigh->ha && new == NUD_STALE)
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    !(flags & NEIGH_UPDATE_F_ADMIN))
 				new = old;
 		}
 	}

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b..1fe5816 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -37,6 +37,8 @@
 };
 EXPORT_SYMBOL(init_net);
 
+static bool init_net_initialized;
+
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -750,6 +752,8 @@
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
+	init_net_initialized = true;
+
 	rtnl_lock();
 	list_add_tail_rcu(&init_net.list, &net_namespace_list);
 	rtnl_unlock();
@@ -811,15 +815,24 @@
 static int __register_pernet_operations(struct list_head *list,
 					struct pernet_operations *ops)
 {
+	if (!init_net_initialized) {
+		list_add_tail(&ops->list, list);
+		return 0;
+	}
+
 	return ops_init(ops, &init_net);
 }
 
 static void __unregister_pernet_operations(struct pernet_operations *ops)
 {
-	LIST_HEAD(net_exit_list);
-	list_add(&init_net.exit_list, &net_exit_list);
-	ops_exit_list(ops, &net_exit_list);
-	ops_free_list(ops, &net_exit_list);
+	if (!init_net_initialized) {
+		list_del(&ops->list);
+	} else {
+		LIST_HEAD(net_exit_list);
+		list_add(&init_net.exit_list, &net_exit_list);
+		ops_exit_list(ops, &net_exit_list);
+		ops_free_list(ops, &net_exit_list);
+	}
 }
 
 #endif /* CONFIG_NET_NS */

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ef2ebeb..317c319 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c

@@ -93,9 +93,6 @@
 		return NULL;
 
 	switch (id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, tb);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, tb);
 		break;
@@ -137,9 +134,6 @@
 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	switch (new->tb_id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, new);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, new);
 		break;
@@ -1249,7 +1243,6 @@
 
 	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9b4ca87..606cc3e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -472,6 +472,15 @@
 			continue;
 		}
 
+		/* Based on RFC3376 5.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->crcount)) &&
+		    (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -499,6 +508,7 @@
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
 		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1d71c40..ba9cbea 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c

@@ -85,7 +85,6 @@
 /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
 #define CONF_OPEN_RETRIES 	2	/* (Re)open devices twice */
 #define CONF_SEND_RETRIES 	6	/* Send six requests per open */
-#define CONF_INTER_TIMEOUT	(HZ)	/* Inter-device timeout: 1 second */
 #define CONF_BASE_TIMEOUT	(HZ*2)	/* Initial timeout: 2 seconds */
 #define CONF_TIMEOUT_RANDOM	(HZ)	/* Maximum amount of randomization */
 #define CONF_TIMEOUT_MULT	*7/4	/* Rate of timeout growth */
@@ -188,7 +187,7 @@
 };
 
 static struct ic_device *ic_first_dev __initdata;	/* List of open device */
-static struct net_device *ic_dev __initdata;		/* Selected device */
+static struct ic_device *ic_dev __initdata;		/* Selected device */
 
 static bool __init ic_is_init_dev(struct net_device *dev)
 {
@@ -307,7 +306,7 @@
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if (dev != ic_dev && !netdev_uses_dsa(dev)) {
+		if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
 			dev_change_flags(dev, d->flags);
 		}
@@ -372,7 +371,7 @@
 	int err;
 
 	memset(&ir, 0, sizeof(ir));
-	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
 	set_sockaddr(sin, ic_myaddr, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
 		pr_err("IP-Config: Unable to set interface address (%d)\n",
@@ -396,7 +395,7 @@
 	 * out, we'll try to muddle along.
 	 */
 	if (ic_dev_mtu != 0) {
-		strcpy(ir.ifr_name, ic_dev->name);
+		strcpy(ir.ifr_name, ic_dev->dev->name);
 		ir.ifr_mtu = ic_dev_mtu;
 		if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
 			pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
@@ -568,7 +567,7 @@
 		goto drop_unlock;
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	if (ic_myaddr == NONE)
 		ic_myaddr = tip;
 	ic_servaddr = sip;
@@ -655,8 +654,6 @@
 	.func =	ic_bootp_recv,
 };
 
-static __be32 ic_dev_xid;		/* Device under configuration */
-
 /*
  *  Initialize DHCP/BOOTP extension fields in the request.
  */
@@ -666,14 +663,14 @@
 #ifdef IPCONFIG_DHCP
 
 static void __init
-ic_dhcp_init_options(u8 *options)
+ic_dhcp_init_options(u8 *options, struct ic_device *d)
 {
 	u8 mt = ((ic_servaddr == NONE)
 		 ? DHCPDISCOVER : DHCPREQUEST);
 	u8 *e = options;
 	int len;
 
-	pr_debug("DHCP: Sending message type %d\n", mt);
+	pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name);
 
 	memcpy(e, ic_bootp_cookie, 4);	/* RFC1048 Magic Cookie */
 	e += 4;
@@ -857,7 +854,7 @@
 	/* add DHCP options or BOOTP extensions */
 #ifdef IPCONFIG_DHCP
 	if (ic_proto_enabled & IC_USE_DHCP)
-		ic_dhcp_init_options(b->exten);
+		ic_dhcp_init_options(b->exten, d);
 	else
 #endif
 		ic_bootp_init_ext(b->exten);
@@ -1033,14 +1030,8 @@
 	/* Is it a reply to our BOOTP request? */
 	if (b->op != BOOTP_REPLY ||
 	    b->xid != d->xid) {
-		net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
-				    b->op, b->xid);
-		goto drop_unlock;
-	}
-
-	/* Is it a reply for the device we are configuring? */
-	if (b->xid != ic_dev_xid) {
-		net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
+		net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n",
+				    d->dev->name, b->op, b->xid);
 		goto drop_unlock;
 	}
 
@@ -1075,7 +1066,7 @@
 				}
 			}
 
-			pr_debug("DHCP: Got message type %d\n", mt);
+			pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name);
 
 			switch (mt) {
 			case DHCPOFFER:
@@ -1130,7 +1121,7 @@
 	}
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	ic_myaddr = b->your_ip;
 	ic_servaddr = b->server_ip;
 	ic_addrservaddr = b->iph.saddr;
@@ -1225,9 +1216,6 @@
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
 	for (;;) {
 #ifdef IPCONFIG_BOOTP
-		/* Track the device we are configuring */
-		ic_dev_xid = d->xid;
-
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
 #endif
@@ -1236,15 +1224,19 @@
 			ic_rarp_send_if(d);
 #endif
 
-		jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
-		while (time_before(jiffies, jiff) && !ic_got_reply)
-			schedule_timeout_uninterruptible(1);
+		if (!d->next) {
+			jiff = jiffies + timeout;
+			while (time_before(jiffies, jiff) && !ic_got_reply)
+				schedule_timeout_uninterruptible(1);
+		}
 #ifdef IPCONFIG_DHCP
 		/* DHCP isn't done until we get a DHCPACK. */
 		if ((ic_got_reply & IC_BOOTP) &&
 		    (ic_proto_enabled & IC_USE_DHCP) &&
 		    ic_dhcp_msgtype != DHCPACK) {
 			ic_got_reply = 0;
+			/* continue on device that got the reply */
+			d = ic_dev;
 			pr_cont(",");
 			continue;
 		}
@@ -1487,7 +1479,7 @@
 #endif /* IPCONFIG_DYNAMIC */
 	} else {
 		/* Device selected manually or only one device -> use it */
-		ic_dev = ic_first_dev->dev;
+		ic_dev = ic_first_dev;
 	}
 
 	addr = root_nfs_parse_addr(root_server_path);
@@ -1501,14 +1493,6 @@
 		return -1;
 
 	/*
-	 * Close all network devices except the device we've
-	 * autoconfigured and set up routes.
-	 */
-	ic_close_devs();
-	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
-		return -1;
-
-	/*
 	 * Record which protocol was actually used.
 	 */
 #ifdef IPCONFIG_DYNAMIC
@@ -1522,7 +1506,7 @@
 	pr_info("IP-Config: Complete:\n");
 
 	pr_info("     device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
-		ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+		ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr,
 		&ic_myaddr, &ic_netmask, &ic_gateway);
 	pr_info("     host=%s, domain=%s, nis-domain=%s\n",
 		utsname()->nodename, ic_domain, utsname()->domainname);
@@ -1542,7 +1526,18 @@
 	pr_cont("\n");
 #endif /* !SILENT */
 
-	return 0;
+	/*
+	 * Close all network devices except the device we've
+	 * autoconfigured and set up routes.
+	 */
+	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+		err = -1;
+	else
+		err = 0;
+
+	ic_close_devs();
+
+	return err;
 }
 
 late_initcall(ip_auto_config);

diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index ec9efbc..aba0998 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c

@@ -172,6 +172,5 @@
 
 module_init(ila_init);
 module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
 MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
 MODULE_LICENSE("GPL");

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 704274c..397e1ed 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c

@@ -61,12 +61,12 @@
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT  5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
 
 static int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
-	struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
 	struct net_device *fb_tunnel_dev;
 };
@@ -96,12 +96,12 @@
    will match fallback tunnel.
  */
 
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
 static u32 HASH_ADDR(const struct in6_addr *addr)
 {
 	u32 hash = ipv6_addr_hash(addr);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
 }
 
 #define tunnels_r_l	tunnels[3]
@@ -1087,7 +1087,7 @@
 
 	for (prio = 0; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
 			struct ip6_tnl *t;
 
 			t = rtnl_dereference(ign->tunnels[prio][h]);

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7b0481e..2050217 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -64,8 +64,8 @@
 MODULE_ALIAS_RTNL_LINK("ip6tnl");
 MODULE_ALIAS_NETDEV("ip6tnl0");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -75,7 +75,7 @@
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
 }
 
 static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,7 +87,7 @@
 	/* the IPv6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
 };
@@ -2031,7 +2031,7 @@
 		if (dev->rtnl_link_ops == &ip6_link_ops)
 			unregister_netdevice_queue(dev, &list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			/* If dev is in the same netns, it has already

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f..cc7e058 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c

@@ -50,14 +50,14 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT  5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
 
 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
 }
 
 static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@
 	/* the vti6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
 };
@@ -1040,7 +1040,7 @@
 	struct ip6_tnl *t;
 	LIST_HEAD(list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			unregister_netdevice_queue(t->dev, &list);

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e..75c1fc5 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c

@@ -1739,6 +1739,15 @@
 			continue;
 		}
 
+		/* Based on RFC3810 6.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->mca_crcount)) &&
+		    (type == MLD2_ALLOW_NEW_SOURCES ||
+		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@
 		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 182b6a9..b1cdf80 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -62,7 +62,7 @@
    For comments look at net/ipv4/ip_gre.c --ANK
  */
 
-#define HASH_SIZE  16
+#define IP6_SIT_HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
 static bool log_ecn_error = true;
@@ -78,9 +78,9 @@
 
 static int sit_net_id __read_mostly;
 struct sit_net {
-	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
 	struct ip_tunnel __rcu *tunnels_wc[1];
 	struct ip_tunnel __rcu **tunnels[4];
 
@@ -1126,7 +1126,7 @@
 }
 #endif
 
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
 {
 	return ipproto == IPPROTO_IPV6 ||
 		ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@
 
 	for (prio = 1; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
 			struct ip_tunnel *t;
 
 			t = rtnl_dereference(sitn->tunnels[prio][h]);

diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index 5db94d9..87fca36 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig

@@ -3,6 +3,7 @@
 	tristate "KCM sockets"
 	depends on INET
 	select BPF_SYSCALL
+	select STREAM_PARSER
 	---help---
 	  KCM (Kernel Connection Multiplexor) sockets provide a method
 	  for multiplexing messages of a message based application

diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 16c2e03..47e4453 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c

@@ -155,8 +155,8 @@
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->stats.rx_msgs,
-		   psock->stats.rx_bytes,
+		   psock->strp.stats.rx_msgs,
+		   psock->strp.stats.rx_bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,9 +170,12 @@
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->rx_stopped)
+	if (psock->strp.rx_stopped)
 		seq_puts(seq, "RxStop ");
 
+	if (psock->strp.rx_paused)
+		seq_puts(seq, "RxPause ");
+
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
@@ -275,6 +278,7 @@
 {
 	struct kcm_psock_stats psock_stats;
 	struct kcm_mux_stats mux_stats;
+	struct strp_aggr_stats strp_stats;
 	struct kcm_mux *mux;
 	struct kcm_psock *psock;
 	struct net *net = seq->private;
@@ -282,20 +286,28 @@
 
 	memset(&mux_stats, 0, sizeof(mux_stats));
 	memset(&psock_stats, 0, sizeof(psock_stats));
+	memset(&strp_stats, 0, sizeof(strp_stats));
 
 	mutex_lock(&knet->mutex);
 
 	aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
 	aggregate_psock_stats(&knet->aggregate_psock_stats,
 			      &psock_stats);
+	aggregate_strp_stats(&knet->aggregate_strp_stats,
+			     &strp_stats);
 
 	list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
 		spin_lock_bh(&mux->lock);
 		aggregate_mux_stats(&mux->stats, &mux_stats);
 		aggregate_psock_stats(&mux->aggregate_psock_stats,
 				      &psock_stats);
-		list_for_each_entry(psock, &mux->psocks, psock_list)
+		aggregate_strp_stats(&mux->aggregate_strp_stats,
+				     &strp_stats);
+		list_for_each_entry(psock, &mux->psocks, psock_list) {
 			aggregate_psock_stats(&psock->stats, &psock_stats);
+			save_strp_stats(&psock->strp, &strp_stats);
+		}
+
 		spin_unlock_bh(&mux->lock);
 	}
 
@@ -328,7 +340,7 @@
 		   mux_stats.rx_ready_drops);
 
 	seq_printf(seq,
-		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
 		   "Psock",
 		   "RX-Msgs",
 		   "RX-Bytes",
@@ -337,6 +349,8 @@
 		   "Reserved",
 		   "Unreserved",
 		   "RX-Aborts",
+		   "RX-Intr",
+		   "RX-Unrecov",
 		   "RX-MemFail",
 		   "RX-NeedMor",
 		   "RX-BadLen",
@@ -345,20 +359,22 @@
 		   "TX-Aborts");
 
 	seq_printf(seq,
-		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   psock_stats.rx_msgs,
-		   psock_stats.rx_bytes,
+		   strp_stats.rx_msgs,
+		   strp_stats.rx_bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   psock_stats.rx_aborts,
-		   psock_stats.rx_mem_fail,
-		   psock_stats.rx_need_more_hdr,
-		   psock_stats.rx_bad_hdr_len,
-		   psock_stats.rx_msg_too_big,
-		   psock_stats.rx_msg_timeouts,
+		   strp_stats.rx_aborts,
+		   strp_stats.rx_interrupted,
+		   strp_stats.rx_unrecov_intr,
+		   strp_stats.rx_mem_fail,
+		   strp_stats.rx_need_more_hdr,
+		   strp_stats.rx_bad_hdr_len,
+		   strp_stats.rx_msg_too_big,
+		   strp_stats.rx_msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cb39e05..eedbe40 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c

@@ -1,3 +1,13 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
 #include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -35,38 +45,12 @@
 	return (struct kcm_tx_msg *)skb->cb;
 }
 
-static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
-{
-	return (struct kcm_rx_msg *)((void *)skb->cb +
-				     offsetof(struct qdisc_skb_cb, data));
-}
-
 static void report_csk_error(struct sock *csk, int err)
 {
 	csk->sk_err = EPIPE;
 	csk->sk_error_report(csk);
 }
 
-/* Callback lock held */
-static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
-			       struct sk_buff *skb)
-{
-	struct sock *csk = psock->sk;
-
-	/* Unrecoverable error in receive */
-
-	del_timer(&psock->rx_msg_timer);
-
-	if (psock->rx_stopped)
-		return;
-
-	psock->rx_stopped = 1;
-	KCM_STATS_INCR(psock->stats.rx_aborts);
-
-	/* Report an error on the lower socket */
-	report_csk_error(csk, err);
-}
-
 static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
 			       bool wakeup_kcm)
 {
@@ -109,12 +93,13 @@
 static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
-	KCM_STATS_ADD(mux->stats.rx_bytes,
-		      psock->stats.rx_bytes - psock->saved_rx_bytes);
+	STRP_STATS_ADD(mux->stats.rx_bytes,
+		       psock->strp.stats.rx_bytes -
+		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->stats.rx_msgs;
-	psock->saved_rx_bytes = psock->stats.rx_bytes;
+		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
+	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -167,11 +152,11 @@
 		 */
 		list_del(&psock->psock_ready_list);
 		psock->ready_rx_msg = NULL;
-
 		/* Commit clearing of ready_rx_msg for queuing work */
 		smp_mb();
 
-		queue_work(kcm_wq, &psock->rx_work);
+		strp_unpause(&psock->strp);
+		strp_check_rcv(&psock->strp);
 	}
 
 	/* Buffer limit is okay now, add to ready list */
@@ -285,6 +270,7 @@
 
 	if (list_empty(&mux->kcm_rx_waiters)) {
 		psock->ready_rx_msg = head;
+		strp_pause(&psock->strp);
 		list_add_tail(&psock->psock_ready_list,
 			      &mux->psocks_ready);
 		spin_unlock_bh(&mux->rx_lock);
@@ -353,276 +339,6 @@
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
-	if (psock->sk->sk_rcvtimeo)
-		mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
-	(*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-			unsigned int orig_offset, size_t orig_len)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
-	struct kcm_rx_msg *rxm;
-	struct kcm_sock *kcm;
-	struct sk_buff *head, *skb;
-	size_t eaten = 0, cand_len;
-	ssize_t extra;
-	int err;
-	bool cloned_orig = false;
-
-	if (psock->ready_rx_msg)
-		return 0;
-
-	head = psock->rx_skb_head;
-	if (head) {
-		/* Message already in progress */
-
-		rxm = kcm_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
-			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
-			 * are consumed.
-			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
-
-			return eaten;
-		}
-
-		if (unlikely(orig_offset)) {
-			/* Getting data with a non-zero offset when a message is
-			 * in progress is not expected. If it does happen, we
-			 * need to clone and pull since we can't deal with
-			 * offsets in the skbs for a message expect in the head.
-			 */
-			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
-			if (!orig_skb) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			if (!pskb_pull(orig_skb, orig_offset)) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				kfree_skb(orig_skb);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			cloned_orig = true;
-			orig_offset = 0;
-		}
-
-		if (!psock->rx_skb_nextp) {
-			/* We are going to append to the frags_list of head.
-			 * Need to unshare the frag_list.
-			 */
-			err = skb_unclone(head, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				return 0;
-			}
-
-			if (unlikely(skb_shinfo(head)->frag_list)) {
-				/* We can't append to an sk_buff that already
-				 * has a frag_list. We create a new head, point
-				 * the frag_list of that to the old head, and
-				 * then are able to use the old head->next for
-				 * appending to the message.
-				 */
-				if (WARN_ON(head->next)) {
-					desc->error = -EINVAL;
-					return 0;
-				}
-
-				skb = alloc_skb(0, GFP_ATOMIC);
-				if (!skb) {
-					KCM_STATS_INCR(psock->stats.rx_mem_fail);
-					desc->error = -ENOMEM;
-					return 0;
-				}
-				skb->len = head->len;
-				skb->data_len = head->len;
-				skb->truesize = head->truesize;
-				*kcm_rx_msg(skb) = *kcm_rx_msg(head);
-				psock->rx_skb_nextp = &head->next;
-				skb_shinfo(skb)->frag_list = head;
-				psock->rx_skb_head = skb;
-				head = skb;
-			} else {
-				psock->rx_skb_nextp =
-				    &skb_shinfo(head)->frag_list;
-			}
-		}
-	}
-
-	while (eaten < orig_len) {
-		/* Always clone since we will consume something */
-		skb = skb_clone(orig_skb, GFP_ATOMIC);
-		if (!skb) {
-			KCM_STATS_INCR(psock->stats.rx_mem_fail);
-			desc->error = -ENOMEM;
-			break;
-		}
-
-		cand_len = orig_len - eaten;
-
-		head = psock->rx_skb_head;
-		if (!head) {
-			head = skb;
-			psock->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			psock->rx_skb_nextp = NULL;
-			rxm = kcm_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->offset = orig_offset + eaten;
-		} else {
-			/* Unclone since we may be appending to an skb that we
-			 * already share a frag_list with.
-			 */
-			err = skb_unclone(skb, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				break;
-			}
-
-			rxm = kcm_rx_msg(head);
-			*psock->rx_skb_nextp = skb;
-			psock->rx_skb_nextp = &skb->next;
-			head->data_len += skb->len;
-			head->len += skb->len;
-			head->truesize += skb->truesize;
-		}
-
-		if (!rxm->full_len) {
-			ssize_t len;
-
-			len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
-			if (!len) {
-				/* Need more header to determine length */
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-				rxm->accum_len += cand_len;
-				eaten += cand_len;
-				KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
-				WARN_ON(eaten != orig_len);
-				break;
-			} else if (len > psock->sk->sk_rcvbuf) {
-				/* Message length exceeds maximum allowed */
-				KCM_STATS_INCR(psock->stats.rx_msg_too_big);
-				desc->error = -EMSGSIZE;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EMSGSIZE, head);
-				break;
-			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->offset) {
-				/* Length must be into new skb (and also
-				 * greater than zero)
-				 */
-				KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
-				desc->error = -EPROTO;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EPROTO, head);
-				break;
-			}
-
-			rxm->full_len = len;
-		}
-
-		extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
-		if (extra < 0) {
-			/* Message not complete yet. */
-			if (rxm->full_len - rxm->accum_len >
-			    tcp_inq(psock->sk)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set psock->rx_need_bytes to wait for
-				 * the rest of the message. Also, set "early
-				 * eaten" since we've already buffered the skb
-				 * but don't consume yet per tcp_read_sock.
-				 */
-
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-
-				psock->rx_need_bytes = rxm->full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
-				desc->count = 0; /* Stop reading socket */
-				break;
-			}
-			rxm->accum_len += cand_len;
-			eaten += cand_len;
-			WARN_ON(eaten != orig_len);
-			break;
-		}
-
-		/* Positive extra indicates ore bytes than needed for the
-		 * message
-		 */
-
-		WARN_ON(extra > cand_len);
-
-		eaten += (cand_len - extra);
-
-		/* Hurray, we have a new message! */
-		del_timer(&psock->rx_msg_timer);
-		psock->rx_skb_head = NULL;
-		KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
-		kcm = reserve_rx_kcm(psock, head);
-		if (!kcm) {
-			/* Unable to reserve a KCM, message is held in psock. */
-			break;
-		}
-
-		if (kcm_queue_rcv_skb(&kcm->sk, head)) {
-			/* Should mean socket buffer full */
-			unreserve_rx_kcm(psock, false);
-			goto try_queue;
-		}
-	}
-
-	if (cloned_orig)
-		kfree_skb(orig_skb);
-
-	KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
-	return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
-	read_descriptor_t desc;
-
-	desc.arg.data = psock;
-	desc.error = 0;
-	desc.count = 1; /* give more than one skb per call */
-
-	/* sk should be locked here, so okay to do tcp_read_sock */
-	tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
-	unreserve_rx_kcm(psock, true);
-
-	return desc.error;
-}
-
 /* Lower sock lock held */
 static void psock_tcp_data_ready(struct sock *sk)
 {
@@ -631,65 +347,49 @@
 	read_lock_bh(&sk->sk_callback_lock);
 
 	psock = (struct kcm_psock *)sk->sk_user_data;
-	if (unlikely(!psock || psock->rx_stopped))
-		goto out;
+	if (likely(psock))
+		strp_tcp_data_ready(&psock->strp);
 
-	if (psock->ready_rx_msg)
-		goto out;
-
-	if (psock->rx_need_bytes) {
-		if (tcp_inq(sk) >= psock->rx_need_bytes)
-			psock->rx_need_bytes = 0;
-		else
-			goto out;
-	}
-
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void do_psock_rx_work(struct kcm_psock *psock)
+/* Called with lower sock held */
+static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	read_descriptor_t rd_desc;
-	struct sock *csk = psock->sk;
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct kcm_sock *kcm;
 
-	/* We need the read lock to synchronize with psock_tcp_data_ready. We
-	 * need the socket lock for calling tcp_read_sock.
-	 */
-	lock_sock(csk);
-	read_lock_bh(&csk->sk_callback_lock);
+try_queue:
+	kcm = reserve_rx_kcm(psock, skb);
+	if (!kcm) {
+		 /* Unable to reserve a KCM, message is held in psock and strp
+		  * is paused.
+		  */
+		return;
+	}
 
-	if (unlikely(csk->sk_user_data != psock))
-		goto out;
-
-	if (unlikely(psock->rx_stopped))
-		goto out;
-
-	if (psock->ready_rx_msg)
-		goto out;
-
-	rd_desc.arg.data = psock;
-
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
-	read_unlock_bh(&csk->sk_callback_lock);
-	release_sock(csk);
+	if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+		/* Should mean socket buffer full */
+		unreserve_rx_kcm(psock, false);
+		goto try_queue;
+	}
 }
 
-static void psock_rx_work(struct work_struct *w)
+static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct bpf_prog *prog = psock->bpf_prog;
+
+	return (*prog->bpf_func)(skb, prog->insnsi);
 }
 
-static void psock_rx_delayed_work(struct work_struct *w)
+static int kcm_read_sock_done(struct strparser *strp, int err)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock,
-				      rx_delayed_work.work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+
+	unreserve_rx_kcm(psock, true);
+
+	return err;
 }
 
 static void psock_tcp_state_change(struct sock *sk)
@@ -713,14 +413,13 @@
 	psock = (struct kcm_psock *)sk->sk_user_data;
 	if (unlikely(!psock))
 		goto out;
-
 	mux = psock->mux;
 
 	spin_lock_bh(&mux->lock);
 
 	/* Check if the socket is reserved so someone is waiting for sending. */
 	kcm = psock->tx_kcm;
-	if (kcm)
+	if (kcm && !unlikely(kcm->tx_stopped))
 		queue_work(kcm_wq, &kcm->tx_work);
 
 	spin_unlock_bh(&mux->lock);
@@ -1411,7 +1110,7 @@
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1425,7 +1124,7 @@
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1481,7 +1180,7 @@
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1498,7 +1197,7 @@
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1674,15 +1373,6 @@
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_rx_msg_timeout(unsigned long arg)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)arg;
-
-	/* Message assembly timed out */
-	KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
-	kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
-}
-
 static int kcm_attach(struct socket *sock, struct socket *csock,
 		      struct bpf_prog *prog)
 {
@@ -1692,6 +1382,7 @@
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
+	struct strp_callbacks cb;
 
 	if (csock->ops->family != PF_INET &&
 	    csock->ops->family != PF_INET6)
@@ -1713,11 +1404,12 @@
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
-		    (unsigned long)psock);
+	cb.rcv_msg = kcm_rcv_strparser;
+	cb.abort_parser = NULL;
+	cb.parse_msg = kcm_parse_func_strparser;
+	cb.read_sock_done = kcm_read_sock_done;
 
-	INIT_WORK(&psock->rx_work, psock_rx_work);
-	INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+	strp_init(&psock->strp, csk, &cb);
 
 	sock_hold(csk);
 
@@ -1750,7 +1442,7 @@
 	spin_unlock_bh(&mux->lock);
 
 	/* Schedule RX work in case there are already bytes queued */
-	queue_work(kcm_wq, &psock->rx_work);
+	strp_check_rcv(&psock->strp);
 
 	return 0;
 }
@@ -1785,6 +1477,7 @@
 	return err;
 }
 
+/* Lower socket lock held */
 static void kcm_unattach(struct kcm_psock *psock)
 {
 	struct sock *csk = psock->sk;
@@ -1798,7 +1491,7 @@
 	csk->sk_data_ready = psock->save_data_ready;
 	csk->sk_write_space = psock->save_write_space;
 	csk->sk_state_change = psock->save_state_change;
-	psock->rx_stopped = 1;
+	strp_stop(&psock->strp);
 
 	if (WARN_ON(psock->rx_kcm)) {
 		write_unlock_bh(&csk->sk_callback_lock);
@@ -1821,18 +1514,14 @@
 
 	write_unlock_bh(&csk->sk_callback_lock);
 
-	del_timer_sync(&psock->rx_msg_timer);
-	cancel_work_sync(&psock->rx_work);
-	cancel_delayed_work_sync(&psock->rx_delayed_work);
+	strp_done(&psock->strp);
 
 	bpf_prog_put(psock->bpf_prog);
 
-	kfree_skb(psock->rx_skb_head);
-	psock->rx_skb_head = NULL;
-
 	spin_lock_bh(&mux->lock);
 
 	aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+	save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
 
 	KCM_STATS_INCR(mux->stats.psock_unattach);
 
@@ -1915,6 +1604,7 @@
 
 		spin_unlock_bh(&mux->lock);
 
+		/* Lower socket lock should already be held */
 		kcm_unattach(psock);
 
 		err = 0;
@@ -2059,8 +1749,11 @@
 	/* Release psocks */
 	list_for_each_entry_safe(psock, tmp_psock,
 				 &mux->psocks, psock_list) {
-		if (!WARN_ON(psock->unattaching))
+		if (!WARN_ON(psock->unattaching)) {
+			lock_sock(psock->strp.sk);
 			kcm_unattach(psock);
+			release_sock(psock->strp.sk);
+		}
 	}
 
 	if (WARN_ON(mux->psocks_cnt))
@@ -2072,6 +1765,8 @@
 	aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
 	aggregate_psock_stats(&mux->aggregate_psock_stats,
 			      &knet->aggregate_psock_stats);
+	aggregate_strp_stats(&mux->aggregate_strp_stats,
+			     &knet->aggregate_strp_stats);
 	list_del_rcu(&mux->kcm_mux_list);
 	knet->count--;
 	mutex_unlock(&knet->mutex);
@@ -2151,6 +1846,13 @@
 	 * it will just return.
 	 */
 	__skb_queue_purge(&sk->sk_write_queue);
+
+	/* Set tx_stopped. This is checked when psock is bound to a kcm and we
+	 * get a writespace callback. This prevents further work being queued
+	 * from the callback (unbinding the psock occurs after canceling work.
+	 */
+	kcm->tx_stopped = 1;
+
 	release_sock(sk);
 
 	spin_lock_bh(&mux->lock);

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ba5fc1f..42a41ae 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h

@@ -1088,13 +1088,13 @@
 }
 
 static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
-					      struct ieee80211_sta *sta)
+					      struct sta_info *sta)
 {
 	u32 ret = 0;
 
-	trace_drv_get_expected_throughput(sta);
-	if (local->ops->get_expected_throughput)
-		ret = local->ops->get_expected_throughput(&local->hw, sta);
+	trace_drv_get_expected_throughput(&sta->sta);
+	if (local->ops->get_expected_throughput && sta->uploaded)
+		ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
 	trace_drv_return_u32(local, ret);
 
 	return ret;

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 8f9c3bd..fa7d37c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c

@@ -326,22 +326,33 @@
 	u32 tx_time, estimated_retx;
 	u64 result;
 
-	if (sta->mesh->fail_avg >= 100)
-		return MAX_METRIC;
+	/* Try to get rate based on HW/SW RC algorithm.
+	 * Rate is returned in units of Kbps, correct this
+	 * to comply with airtime calculation units
+	 * Round up in case we get rate < 100Kbps
+	 */
+	rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
 
-	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
-	rate = cfg80211_calculate_bitrate(&rinfo);
-	if (WARN_ON(!rate))
-		return MAX_METRIC;
+	if (rate) {
+		err = 0;
+	} else {
+		if (sta->mesh->fail_avg >= 100)
+			return MAX_METRIC;
 
-	err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+		rate = cfg80211_calculate_bitrate(&rinfo);
+		if (WARN_ON(!rate))
+			return MAX_METRIC;
+
+		err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+	}
 
 	/* bitrate is in units of 100 Kbps, while we need rate in units of
 	 * 1Mbps. This will be corrected on tx_time computation.
 	 */
 	tx_time = (device_constant + 10 * test_frame_len / rate);
 	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
-	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
 	return (u32)result;
 }
 

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 76b737d..19f14c9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -2279,11 +2279,7 @@
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-	/* check if the driver has a SW RC implementation */
-	if (ref && ref->ops->get_expected_throughput)
-		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
-	else
-		thr = drv_get_expected_throughput(local, &sta->sta);
+	thr = sta_get_expected_throughput(sta);
 
 	if (thr != 0) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@ -2291,6 +2287,25 @@
 	}
 }
 
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct rate_control_ref *ref = NULL;
+	u32 thr = 0;
+
+	if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+		ref = local->rate_ctrl;
+
+	/* check if the driver has a SW RC implementation */
+	if (ref && ref->ops->get_expected_throughput)
+		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+	else
+		thr = drv_get_expected_throughput(local, sta);
+
+	return thr;
+}
+
 unsigned long ieee80211_sta_last_active(struct sta_info *sta)
 {
 	struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b0ef3..0556be3 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h

@@ -712,6 +712,8 @@
 			  struct rate_info *rinfo);
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
 
+u32 sta_get_expected_throughput(struct sta_info *sta);
+
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 			  unsigned long exp_time);
 u8 sta_info_tx_streams(struct sta_info *sta);

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5023966..1d0746d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c

@@ -2334,7 +2334,6 @@
 	struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
-	int nh_pos, h_pos;
 	bool wme_sta = false, authorized = false;
 	bool tdls_peer;
 	bool multicast;
@@ -2640,13 +2639,7 @@
 		encaps_len = 0;
 	}
 
-	nh_pos = skb_network_header(skb) - skb->data;
-	h_pos = skb_transport_header(skb) - skb->data;
-
 	skb_pull(skb, skip_header_bytes);
-	nh_pos -= skip_header_bytes;
-	h_pos -= skip_header_bytes;
-
 	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
 	/*
@@ -2672,18 +2665,12 @@
 		}
 	}
 
-	if (encaps_data) {
+	if (encaps_data)
 		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
-		nh_pos += encaps_len;
-		h_pos += encaps_len;
-	}
 
 #ifdef CONFIG_MAC80211_MESH
-	if (meshhdrlen > 0) {
+	if (meshhdrlen > 0)
 		memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
-		nh_pos += meshhdrlen;
-		h_pos += meshhdrlen;
-	}
 #endif
 
 	if (ieee80211_is_data_qos(fc)) {
@@ -2699,15 +2686,7 @@
 	} else
 		memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
 
-	nh_pos += hdrlen;
-	h_pos += hdrlen;
-
-	/* Update skb pointers to various headers since this modified frame
-	 * is going to go through Linux networking code that may potentially
-	 * need things like pointer to IP header. */
 	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, nh_pos);
-	skb_set_transport_header(skb, h_pos);
 
 	info = IEEE80211_SKB_CB(skb);
 	memset(info, 0, sizeof(*info));
@@ -4390,9 +4369,6 @@
 	int ac = ieee802_1d_to_ac[tid & 7];
 
 	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-
 	skb_set_queue_mapping(skb, ac);
 	skb->priority = tid;
 

diff --git a/net/rds/ib.h b/net/rds/ib.h
index 046f750..45ac8e8 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h

@@ -333,6 +333,7 @@
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
+__printf(2, 3)
 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 			     struct rdma_cm_event *event);

diff --git a/net/rds/rds.h b/net/rds/rds.h
index b2d17f0..fd0bccb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h

@@ -688,6 +688,7 @@
 #define rds_conn_error(conn, fmt...) \
 	__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
 
+__printf(2, 3)
 void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
 #define rds_conn_path_error(cp, fmt...) \
 	__rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 12ebde8..25aada7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -29,6 +29,7 @@
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
 #include <linux/slab.h>
+#include <linux/hashtable.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -263,33 +264,33 @@
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry_rcu(q, &root->list, list) {
+	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
 		if (q->handle == handle)
 			return q;
 	}
 	return NULL;
 }
 
-void qdisc_list_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
 		ASSERT_RTNL();
-		list_add_tail_rcu(&q->list, &root->list);
+		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_add);
+EXPORT_SYMBOL(qdisc_hash_add);
 
-void qdisc_list_del(struct Qdisc *q)
+void qdisc_hash_del(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		ASSERT_RTNL();
-		list_del_rcu(&q->list);
+		hash_del_rcu(&q->hash);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_del);
+EXPORT_SYMBOL(qdisc_hash_del);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
@@ -998,7 +999,7 @@
 				goto err_out4;
 		}
 
-		qdisc_list_add(sch);
+		qdisc_hash_add(sch);
 
 		return sch;
 	}
@@ -1435,6 +1436,7 @@
 {
 	int ret = 0, q_idx = *q_idx_p;
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1449,7 +1451,7 @@
 			goto done;
 		q_idx++;
 	}
-	list_for_each_entry(q, &root->list, list) {
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (q_idx < s_q_idx) {
 			q_idx++;
 			continue;
@@ -1765,6 +1767,7 @@
 			       int *t_p, int s_t)
 {
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1772,7 +1775,7 @@
 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
 		return -1;
 
-	list_for_each_entry(q, &root->list, list) {
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 	}

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e95b67c..18faecc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c

@@ -423,7 +423,6 @@
 	.dequeue	=	noop_dequeue,
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
-	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
@@ -613,7 +612,6 @@
 		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 		sch->padded = (char *) sch - (char *) p;
 	}
-	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
 
 	spin_lock_init(&sch->busylock);
@@ -700,7 +698,7 @@
 		return;
 
 #ifdef CONFIG_NET_SCHED
-	qdisc_list_del(qdisc);
+	qdisc_hash_del(qdisc);
 
 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
@@ -788,6 +786,10 @@
 			qdisc->ops->attach(qdisc);
 		}
 	}
+#ifdef CONFIG_NET_SCHED
+	if (dev->qdisc)
+		qdisc_hash_add(dev->qdisc);
+#endif
 }
 
 static void transition_one_qdisc(struct net_device *dev,

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ddc7bd..000f1d3 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c

@@ -142,8 +142,6 @@
 					   link-sharing, max(myf, cfmin) */
 	u64	cl_myf;			/* my fit-time (calculated from this
 					   class's own upperlimit curve) */
-	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
-					   history dependence) */
 	u64	cl_cfmin;		/* earliest children's fit-time (used
 					   with cl_myf to obtain cl_f) */
 	u64	cl_cvtmin;		/* minimal virtual time among the
@@ -151,11 +149,8 @@
 					   (monotonic within a period) */
 	u64	cl_vtadj;		/* intra-period cumulative vt
 					   adjustment */
-	u64	cl_vtoff;		/* inter-period cumulative vt offset */
-	u64	cl_cvtmax;		/* max child's vt in the last period */
-	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
-	u64	cl_pcvtoff;		/* parent's cvtoff at initialization
-					   time */
+	u64	cl_cvtoff;		/* largest virtual time seen among
+					   the children */
 
 	struct internal_sc cl_rsc;	/* internal real-time service curve */
 	struct internal_sc cl_fsc;	/* internal fair service curve */
@@ -701,28 +696,16 @@
 			} else {
 				/*
 				 * first child for a new parent backlog period.
-				 * add parent's cvtmax to cvtoff to make a new
-				 * vt (vtoff + vt) larger than the vt in the
-				 * last period for all children.
+				 * initialize cl_vt to the highest value seen
+				 * among the siblings. this is analogous to
+				 * what cur_time would provide in realtime case.
 				 */
-				vt = cl->cl_parent->cl_cvtmax;
-				cl->cl_parent->cl_cvtoff += vt;
-				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_vt = cl->cl_parent->cl_cvtoff;
 				cl->cl_parent->cl_cvtmin = 0;
-				cl->cl_vt = 0;
 			}
 
-			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
-							cl->cl_pcvtoff;
-
 			/* update the virtual curve */
-			vt = cl->cl_vt + cl->cl_vtoff;
-			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
-						      cl->cl_total);
-			if (cl->cl_virtual.x == vt) {
-				cl->cl_virtual.x -= cl->cl_vtoff;
-				cl->cl_vtoff = 0;
-			}
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 			cl->cl_vtadj = 0;
 
 			cl->cl_vtperiod++;  /* increment vt period */
@@ -745,7 +728,6 @@
 				/* compute myf */
 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
 						      cl->cl_total);
-				cl->cl_myfadj = 0;
 			}
 		}
 
@@ -779,8 +761,7 @@
 			go_passive = 0;
 
 		/* update vt */
-		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-			    - cl->cl_vtoff + cl->cl_vtadj;
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj;
 
 		/*
 		 * if vt of the class is smaller than cvtmin,
@@ -795,9 +776,9 @@
 		if (go_passive) {
 			/* no more active child, going passive */
 
-			/* update cvtmax of the parent class */
-			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
-				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+			/* update cvtoff of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtoff)
+				cl->cl_parent->cl_cvtoff = cl->cl_vt;
 
 			/* remove this class from the vt tree */
 			vttree_remove(cl);
@@ -813,9 +794,10 @@
 
 		/* update f */
 		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
 			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
 							      cl->cl_total);
-#if 0
 			/*
 			 * This code causes classes to stay way under their
 			 * limit when multiple classes are used at gigabit
@@ -940,7 +922,7 @@
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
 	sc2isc(fsc, &cl->cl_fsc);
-	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 	cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1094,7 +1076,6 @@
 	if (parent->level == 0)
 		hfsc_purge_queue(sch, parent);
 	hfsc_adjust_levels(parent);
-	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
 	qdisc_class_hash_grow(sch, &q->clhash);
@@ -1482,16 +1463,12 @@
 	cl->cl_e            = 0;
 	cl->cl_vt           = 0;
 	cl->cl_vtadj        = 0;
-	cl->cl_vtoff        = 0;
 	cl->cl_cvtmin       = 0;
-	cl->cl_cvtmax       = 0;
 	cl->cl_cvtoff       = 0;
-	cl->cl_pcvtoff      = 0;
 	cl->cl_vtperiod     = 0;
 	cl->cl_parentperiod = 0;
 	cl->cl_f            = 0;
 	cl->cl_myf          = 0;
-	cl->cl_myfadj       = 0;
 	cl->cl_cfmin        = 0;
 	cl->cl_nactive      = 0;
 

diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b943982..2bc8d7f 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c

@@ -88,7 +88,7 @@
 			qdisc_destroy(old);
 #ifdef CONFIG_NET_SCHED
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 #endif
 
 	}

diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 549c663..b5c502c 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c

@@ -182,7 +182,7 @@
 		if (old)
 			qdisc_destroy(old);
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 	}
 	kfree(priv->qdiscs);
 	priv->qdiscs = NULL;

diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
new file mode 100644
index 0000000..6cff3f6
--- /dev/null
+++ b/net/strparser/Kconfig

@@ -0,0 +1,4 @@
+
+config STREAM_PARSER
+	tristate
+	default n

diff --git a/net/strparser/Makefile b/net/strparser/Makefile
new file mode 100644
index 0000000..858a126
--- /dev/null
+++ b/net/strparser/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_STREAM_PARSER) += strparser.o

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
new file mode 100644
index 0000000..fd688c0
--- /dev/null
+++ b/net/strparser/strparser.c

@@ -0,0 +1,492 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static struct workqueue_struct *strp_wq;
+
+struct _strp_rx_msg {
+	/* Internal cb structure. struct strp_rx_msg must be first for passing
+	 * to upper layer.
+	 */
+	struct strp_rx_msg strp;
+	int accum_len;
+	int early_eaten;
+};
+
+static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+{
+	return (struct _strp_rx_msg *)((void *)skb->cb +
+		offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Lower lock held */
+static void strp_abort_rx_strp(struct strparser *strp, int err)
+{
+	struct sock *csk = strp->sk;
+
+	/* Unrecoverable error in receive */
+
+	del_timer(&strp->rx_msg_timer);
+
+	if (strp->rx_stopped)
+		return;
+
+	strp->rx_stopped = 1;
+
+	/* Report an error on the lower socket */
+	csk->sk_err = err;
+	csk->sk_error_report(csk);
+}
+
+static void strp_start_rx_timer(struct strparser *strp)
+{
+	if (strp->sk->sk_rcvtimeo)
+		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+}
+
+/* Lower lock held */
+static void strp_parser_err(struct strparser *strp, int err,
+			    read_descriptor_t *desc)
+{
+	desc->error = err;
+	kfree_skb(strp->rx_skb_head);
+	strp->rx_skb_head = NULL;
+	strp->cb.abort_parser(strp, err);
+}
+
+/* Lower socket lock held */
+static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+			 unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+	struct _strp_rx_msg *rxm;
+	struct sk_buff *head, *skb;
+	size_t eaten = 0, cand_len;
+	ssize_t extra;
+	int err;
+	bool cloned_orig = false;
+
+	if (strp->rx_paused)
+		return 0;
+
+	head = strp->rx_skb_head;
+	if (head) {
+		/* Message already in progress */
+
+		rxm = _strp_rx_msg(head);
+		if (unlikely(rxm->early_eaten)) {
+			/* Already some number of bytes on the receive sock
+			 * data saved in rx_skb_head, just indicate they
+			 * are consumed.
+			 */
+			eaten = orig_len <= rxm->early_eaten ?
+				orig_len : rxm->early_eaten;
+			rxm->early_eaten -= eaten;
+
+			return eaten;
+		}
+
+		if (unlikely(orig_offset)) {
+			/* Getting data with a non-zero offset when a message is
+			 * in progress is not expected. If it does happen, we
+			 * need to clone and pull since we can't deal with
+			 * offsets in the skbs for a message expect in the head.
+			 */
+			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+			if (!orig_skb) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			if (!pskb_pull(orig_skb, orig_offset)) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				kfree_skb(orig_skb);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			cloned_orig = true;
+			orig_offset = 0;
+		}
+
+		if (!strp->rx_skb_nextp) {
+			/* We are going to append to the frags_list of head.
+			 * Need to unshare the frag_list.
+			 */
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				return 0;
+			}
+
+			if (unlikely(skb_shinfo(head)->frag_list)) {
+				/* We can't append to an sk_buff that already
+				 * has a frag_list. We create a new head, point
+				 * the frag_list of that to the old head, and
+				 * then are able to use the old head->next for
+				 * appending to the message.
+				 */
+				if (WARN_ON(head->next)) {
+					desc->error = -EINVAL;
+					return 0;
+				}
+
+				skb = alloc_skb(0, GFP_ATOMIC);
+				if (!skb) {
+					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					desc->error = -ENOMEM;
+					return 0;
+				}
+				skb->len = head->len;
+				skb->data_len = head->len;
+				skb->truesize = head->truesize;
+				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
+				strp->rx_skb_nextp = &head->next;
+				skb_shinfo(skb)->frag_list = head;
+				strp->rx_skb_head = skb;
+				head = skb;
+			} else {
+				strp->rx_skb_nextp =
+				    &skb_shinfo(head)->frag_list;
+			}
+		}
+	}
+
+	while (eaten < orig_len) {
+		/* Always clone since we will consume something */
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
+		if (!skb) {
+			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			desc->error = -ENOMEM;
+			break;
+		}
+
+		cand_len = orig_len - eaten;
+
+		head = strp->rx_skb_head;
+		if (!head) {
+			head = skb;
+			strp->rx_skb_head = head;
+			/* Will set rx_skb_nextp on next packet if needed */
+			strp->rx_skb_nextp = NULL;
+			rxm = _strp_rx_msg(head);
+			memset(rxm, 0, sizeof(*rxm));
+			rxm->strp.offset = orig_offset + eaten;
+		} else {
+			/* Unclone since we may be appending to an skb that we
+			 * already share a frag_list with.
+			 */
+			err = skb_unclone(skb, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				break;
+			}
+
+			rxm = _strp_rx_msg(head);
+			*strp->rx_skb_nextp = skb;
+			strp->rx_skb_nextp = &skb->next;
+			head->data_len += skb->len;
+			head->len += skb->len;
+			head->truesize += skb->truesize;
+		}
+
+		if (!rxm->strp.full_len) {
+			ssize_t len;
+
+			len = (*strp->cb.parse_msg)(strp, head);
+
+			if (!len) {
+				/* Need more header to determine length */
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+				rxm->accum_len += cand_len;
+				eaten += cand_len;
+				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				WARN_ON(eaten != orig_len);
+				break;
+			} else if (len < 0) {
+				if (len == -ESTRPIPE && rxm->accum_len) {
+					len = -ENODATA;
+					strp->rx_unrecov_intr = 1;
+				} else {
+					strp->rx_interrupted = 1;
+				}
+				strp_parser_err(strp, err, desc);
+				break;
+			} else if (len > strp->sk->sk_rcvbuf) {
+				/* Message length exceeds maximum allowed */
+				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				strp_parser_err(strp, -EMSGSIZE, desc);
+				break;
+			} else if (len <= (ssize_t)head->len -
+					  skb->len - rxm->strp.offset) {
+				/* Length must be into new skb (and also
+				 * greater than zero)
+				 */
+				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				strp_parser_err(strp, -EPROTO, desc);
+				break;
+			}
+
+			rxm->strp.full_len = len;
+		}
+
+		extra = (ssize_t)(rxm->accum_len + cand_len) -
+			rxm->strp.full_len;
+
+		if (extra < 0) {
+			/* Message not complete yet. */
+			if (rxm->strp.full_len - rxm->accum_len >
+			    tcp_inq(strp->sk)) {
+				/* Don't have the whole messages in the socket
+				 * buffer. Set strp->rx_need_bytes to wait for
+				 * the rest of the message. Also, set "early
+				 * eaten" since we've already buffered the skb
+				 * but don't consume yet per tcp_read_sock.
+				 */
+
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+
+				strp->rx_need_bytes = rxm->strp.full_len -
+						       rxm->accum_len;
+				rxm->accum_len += cand_len;
+				rxm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				desc->count = 0; /* Stop reading socket */
+				break;
+			}
+			rxm->accum_len += cand_len;
+			eaten += cand_len;
+			WARN_ON(eaten != orig_len);
+			break;
+		}
+
+		/* Positive extra indicates ore bytes than needed for the
+		 * message
+		 */
+
+		WARN_ON(extra > cand_len);
+
+		eaten += (cand_len - extra);
+
+		/* Hurray, we have a new message! */
+		del_timer(&strp->rx_msg_timer);
+		strp->rx_skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.rx_msgs);
+
+		/* Give skb to upper layer */
+		strp->cb.rcv_msg(strp, head);
+
+		if (unlikely(strp->rx_paused)) {
+			/* Upper layer paused strp */
+			break;
+		}
+	}
+
+	if (cloned_orig)
+		kfree_skb(orig_skb);
+
+	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+
+	return eaten;
+}
+
+static int default_read_sock_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+/* Called with lock held on lower socket */
+static int strp_tcp_read_sock(struct strparser *strp)
+{
+	read_descriptor_t desc;
+
+	desc.arg.data = strp;
+	desc.error = 0;
+	desc.count = 1; /* give more than one skb per call */
+
+	/* sk should be locked here, so okay to do tcp_read_sock */
+	tcp_read_sock(strp->sk, &desc, strp_tcp_recv);
+
+	desc.error = strp->cb.read_sock_done(strp, desc.error);
+
+	return desc.error;
+}
+
+/* Lower sock lock held */
+void strp_tcp_data_ready(struct strparser *strp)
+{
+	struct sock *csk = strp->sk;
+
+	if (unlikely(strp->rx_stopped))
+		return;
+
+	/* This check is needed to synchronize with do_strp_rx_work.
+	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	 * the lock held here is bh_lock_sock. The two locks can be
+	 * held by different threads at the same time, but bh_lock_sock
+	 * allows a thread in BH context to safely check if the process
+	 * lock is held. In this case, if the lock is held, queue work.
+	 */
+	if (sock_owned_by_user(csk)) {
+		queue_work(strp_wq, &strp->rx_work);
+		return;
+	}
+
+	if (strp->rx_paused)
+		return;
+
+	if (strp->rx_need_bytes) {
+		if (tcp_inq(csk) >= strp->rx_need_bytes)
+			strp->rx_need_bytes = 0;
+		else
+			return;
+	}
+
+	if (strp_tcp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_tcp_data_ready);
+
+static void do_strp_rx_work(struct strparser *strp)
+{
+	read_descriptor_t rd_desc;
+	struct sock *csk = strp->sk;
+
+	/* We need the read lock to synchronize with strp_tcp_data_ready. We
+	 * need the socket lock for calling tcp_read_sock.
+	 */
+	lock_sock(csk);
+
+	if (unlikely(csk->sk_user_data != strp))
+		goto out;
+
+	if (unlikely(strp->rx_stopped))
+		goto out;
+
+	if (strp->rx_paused)
+		goto out;
+
+	rd_desc.arg.data = strp;
+
+	if (strp_tcp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+
+out:
+	release_sock(csk);
+}
+
+static void strp_rx_work(struct work_struct *w)
+{
+	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+}
+
+static void strp_rx_msg_timeout(unsigned long arg)
+{
+	struct strparser *strp = (struct strparser *)arg;
+
+	/* Message assembly timed out */
+	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
+	lock_sock(strp->sk);
+	strp->cb.abort_parser(strp, ETIMEDOUT);
+	release_sock(strp->sk);
+}
+
+int strp_init(struct strparser *strp, struct sock *csk,
+	      struct strp_callbacks *cb)
+{
+	if (!cb || !cb->rcv_msg || !cb->parse_msg)
+		return -EINVAL;
+
+	memset(strp, 0, sizeof(*strp));
+
+	strp->sk = csk;
+
+	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->rx_work, strp_rx_work);
+
+	strp->cb.rcv_msg = cb->rcv_msg;
+	strp->cb.parse_msg = cb->parse_msg;
+	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(strp_init);
+
+/* strp must already be stopped so that strp_tcp_recv will no longer be called.
+ * Note that strp_done is not called with the lower socket held.
+ */
+void strp_done(struct strparser *strp)
+{
+	WARN_ON(!strp->rx_stopped);
+
+	del_timer_sync(&strp->rx_msg_timer);
+	cancel_work_sync(&strp->rx_work);
+
+	if (strp->rx_skb_head) {
+		kfree_skb(strp->rx_skb_head);
+		strp->rx_skb_head = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(strp_done);
+
+void strp_stop(struct strparser *strp)
+{
+	strp->rx_stopped = 1;
+}
+EXPORT_SYMBOL_GPL(strp_stop);
+
+void strp_check_rcv(struct strparser *strp)
+{
+	queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_check_rcv);
+
+static int __init strp_mod_init(void)
+{
+	strp_wq = create_singlethread_workqueue("kstrp");
+
+	return 0;
+}
+
+static void __exit strp_mod_exit(void)
+{
+}
+module_init(strp_mod_init);
+module_exit(strp_mod_exit);
+MODULE_LICENSE("GPL");

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5fc9dd..9e90129 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c

@@ -1306,6 +1306,7 @@
 
 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 }
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
 				       struct net_device *group_dev)
@@ -1323,7 +1324,6 @@
 
 	return dev->ifindex;
 }
-EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
 					  u32 old_mark, u32 *reset_mark)

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 46a71c7..5bc1a3d 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c

@@ -42,26 +42,37 @@
 			       struct ctl_table *table)
 {
 	struct net *net = container_of(head->set, struct net, sysctls);
-	kuid_t root_uid = make_kuid(net->user_ns, 0);
-	kgid_t root_gid = make_kgid(net->user_ns, 0);
 
 	/* Allow network administrator to have same access as root. */
-	if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
-	    uid_eq(root_uid, current_euid())) {
+	if (ns_capable(net->user_ns, CAP_NET_ADMIN)) {
 		int mode = (table->mode >> 6) & 7;
 		return (mode << 6) | (mode << 3) | mode;
 	}
-	/* Allow netns root group to have the same access as the root group */
-	if (in_egroup_p(root_gid)) {
-		int mode = (table->mode >> 3) & 7;
-		return (mode << 3) | mode;
-	}
+
 	return table->mode;
 }
 
+static void net_ctl_set_ownership(struct ctl_table_header *head,
+				  struct ctl_table *table,
+				  kuid_t *uid, kgid_t *gid)
+{
+	struct net *net = container_of(head->set, struct net, sysctls);
+	kuid_t ns_root_uid;
+	kgid_t ns_root_gid;
+
+	ns_root_uid = make_kuid(net->user_ns, 0);
+	if (uid_valid(ns_root_uid))
+		*uid = ns_root_uid;
+
+	ns_root_gid = make_kgid(net->user_ns, 0);
+	if (gid_valid(ns_root_gid))
+		*gid = ns_root_gid;
+}
+
 static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
 	.permissions = net_ctl_permissions,
+	.set_ownership = net_ctl_set_ownership,
 };
 
 static int __net_init sysctl_net_init(struct net *net)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7645e97..2029b49 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c

@@ -906,6 +906,8 @@
 	if (WARN_ON(wdev->netdev))
 		return;
 
+	nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+
 	list_del_rcu(&wdev->list);
 	rdev->devlist_generation++;
 
@@ -1079,6 +1081,8 @@
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
 		     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
 			dev->priv_flags |= IFF_DONT_BRIDGE;
+
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 		break;
 	case NETDEV_GOING_DOWN:
 		cfg80211_leave(rdev, wdev);
@@ -1157,6 +1161,8 @@
 		 * remove and clean it up.
 		 */
 		if (!list_empty(&wdev->list)) {
+			nl80211_notify_iface(rdev, wdev,
+					     NL80211_CMD_DEL_INTERFACE);
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f02653a..4997857 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -2751,7 +2751,7 @@
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct vif_params params;
 	struct wireless_dev *wdev;
-	struct sk_buff *msg, *event;
+	struct sk_buff *msg;
 	int err;
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
@@ -2855,20 +2855,15 @@
 		return -ENOBUFS;
 	}
 
-	event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (event) {
-		if (nl80211_send_iface(event, 0, 0, 0,
-				       rdev, wdev, false) < 0) {
-			nlmsg_free(event);
-			goto out;
-		}
+	/*
+	 * For wdevs which have no associated netdev object (e.g. of type
+	 * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+	 * For all other types, the event will be generated from the
+	 * netdev notifier
+	 */
+	if (!wdev->netdev)
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					event, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	}
-
-out:
 	return genlmsg_reply(msg, info);
 }
 
@@ -2876,18 +2871,10 @@
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct wireless_dev *wdev = info->user_ptr[1];
-	struct sk_buff *msg;
-	int status;
 
 	if (!rdev->ops->del_virtual_intf)
 		return -EOPNOTSUPP;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
-		nlmsg_free(msg);
-		msg = NULL;
-	}
-
 	/*
 	 * If we remove a wireless device without a netdev then clear
 	 * user_ptr[1] so that nl80211_post_doit won't dereference it
@@ -2898,15 +2885,7 @@
 	if (!wdev->netdev)
 		info->user_ptr[1] = NULL;
 
-	status = rdev_del_virtual_intf(rdev, wdev);
-	if (status >= 0 && msg)
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					msg, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	else
-		nlmsg_free(msg);
-
-	return status;
+	return rdev_del_virtual_intf(rdev, wdev);
 }
 
 static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -5374,6 +5353,18 @@
 	return 0;
 }
 
+static int nl80211_check_power_mode(const struct nlattr *nla,
+				    enum nl80211_mesh_power_mode min,
+				    enum nl80211_mesh_power_mode max,
+				    enum nl80211_mesh_power_mode *out)
+{
+	u32 val = nla_get_u32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -5518,7 +5509,7 @@
 				  NL80211_MESH_POWER_ACTIVE,
 				  NL80211_MESH_POWER_MAX,
 				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nl80211_check_u32);
+				  nl80211_check_power_mode);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
 				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@ -7773,12 +7764,13 @@
 
 	ibss.beacon_interval = 100;
 
-	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
 		ibss.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
-			return -EINVAL;
-	}
+
+	err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+	if (err)
+		return err;
 
 	if (!rdev->ops->join_ibss)
 		return -EOPNOTSUPP;
@@ -9252,9 +9244,10 @@
 	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
 		setup.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (setup.beacon_interval < 10 ||
-		    setup.beacon_interval > 10000)
-			return -EINVAL;
+
+		err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+		if (err)
+			return err;
 	}
 
 	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@ -11847,6 +11840,29 @@
 				NL80211_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+				struct wireless_dev *wdev,
+				enum nl80211_commands cmd)
+{
+	struct sk_buff *msg;
+
+	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+		cmd != NL80211_CMD_DEL_INTERFACE);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+			       cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
 static int nl80211_add_scan_req(struct sk_buff *msg,
 				struct cfg80211_registered_device *rdev)
 {

diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a63f402..7e3821d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h

@@ -7,6 +7,9 @@
 void nl80211_exit(void);
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
 			  enum nl80211_commands cmd);
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+			  struct wireless_dev *wdev,
+			  enum nl80211_commands cmd);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct wireless_dev *wdev);
 struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,

diff --git a/net/wireless/util.c b/net/wireless/util.c
index b7d1592..0675f51 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c

@@ -1559,7 +1559,7 @@
 	struct wireless_dev *wdev;
 	int res = 0;
 
-	if (!beacon_int)
+	if (beacon_int < 10 || beacon_int > 10000)
 		return -EINVAL;
 
 	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 90ebf7d..eb582c6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile

@@ -24,6 +24,7 @@
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += test_current_task_under_cgroup
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -49,6 +50,8 @@
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+				       test_current_task_under_cgroup_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -74,6 +77,7 @@
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += test_current_task_under_cgroup_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -97,6 +101,7 @@
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7927a09..6f1672a 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h

@@ -37,12 +37,16 @@
 	(void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
 	(void *) BPF_FUNC_redirect;
-static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+				    unsigned long long flags, void *data,
+				    int size) =
 	(void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
 static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
 	(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+	(void *) BPF_FUNC_current_task_under_cgroup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
new file mode 100644
index 0000000..86b28d7
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c

@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/utsname.h>
+
+struct bpf_map_def SEC("maps") cgroup_map = {
+	.type			= BPF_MAP_TYPE_CGROUP_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u32),
+	.max_entries	= 1,
+};
+
+struct bpf_map_def SEC("maps") perf_map = {
+	.type			= BPF_MAP_TYPE_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u64),
+	.max_entries	= 1,
+};
+
+/* Writes the last PID that called sync to a map at index 0 */
+SEC("kprobe/sys_sync")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	u64 pid = bpf_get_current_pid_tgid();
+	int idx = 0;
+
+	if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
+		return 0;
+
+	bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;

diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
new file mode 100644
index 0000000..30b0bce
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_user.c

@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#define CGROUP_MOUNT_PATH	"/mnt"
+#define CGROUP_PATH		"/mnt/my-cgroup"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+static int join_cgroup(char *path)
+{
+	int fd, rc = 0;
+	pid_t pid = getpid();
+	char cgroup_path[PATH_MAX + 1];
+
+	snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
+
+	fd = open(cgroup_path, O_WRONLY);
+	if (fd < 0) {
+		log_err("Opening Cgroup");
+		return 1;
+	}
+
+	if (dprintf(fd, "%d\n", pid) < 0) {
+		log_err("Joining Cgroup");
+		rc = 1;
+	}
+	close(fd);
+	return rc;
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int cg2, idx = 0;
+	pid_t remote_pid, local_pid = getpid();
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	/*
+	 * This is to avoid interfering with existing cgroups. Unfortunately,
+	 * most people don't have cgroupv2 enabled at this point in time.
+	 * It's easier to create our own mount namespace and manage it
+	 * ourselves.
+	 */
+	if (unshare(CLONE_NEWNS)) {
+		log_err("unshare");
+		return 1;
+	}
+
+	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+		log_err("mount fakeroot");
+		return 1;
+	}
+
+	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+		log_err("mount cgroup2");
+		return 1;
+	}
+
+	if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup");
+		return 1;
+	}
+
+	cg2 = open(CGROUP_PATH, O_RDONLY);
+	if (cg2 < 0) {
+		log_err("opening target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
+		log_err("Adding target cgroup to map");
+		goto cleanup_cgroup_err;
+	}
+	if (join_cgroup("/mnt/my-cgroup")) {
+		log_err("Leaving target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	/*
+	 * The installed helper program catched the sync call, and should
+	 * write it to the map.
+	 */
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid != remote_pid) {
+		fprintf(stderr,
+			"BPF Helper didn't write correct PID to map, but: %d\n",
+			remote_pid);
+		goto leave_cgroup_err;
+	}
+
+	/* Verify the negative scenario; leave the cgroup */
+	if (join_cgroup(CGROUP_MOUNT_PATH))
+		goto leave_cgroup_err;
+
+	remote_pid = 0;
+	bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid == remote_pid) {
+		fprintf(stderr, "BPF cgroup negative test did not work\n");
+		goto cleanup_cgroup_err;
+	}
+
+	rmdir(CGROUP_PATH);
+	return 0;
+
+	/* Error condition, cleanup */
+leave_cgroup_err:
+	join_cgroup(CGROUP_MOUNT_PATH);
+cleanup_cgroup_err:
+	rmdir(CGROUP_PATH);
+	return 1;
+}

diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index fe2fcec..78c6f13 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c

@@ -1449,7 +1449,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test1",
+		"direct packet access: test1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1466,7 +1466,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test2",
+		"direct packet access: test2",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
 			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
@@ -1499,7 +1499,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test3",
+		"direct packet access: test3",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1511,7 +1511,7 @@
 		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
 	},
 	{
-		"pkt: test4",
+		"direct packet access: test4",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1528,6 +1528,112 @@
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"helper access to packet: test1, valid packet_ptr range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {5},
+		.result_unpriv = ACCEPT,
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test2, unchecked packet_ptr",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {1},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test3, variable add",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+					offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {11},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test4, packet_ptr with bad range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {7},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test5, packet_ptr with too short range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {6},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
 };
 
 static int probe_filter_length(struct bpf_insn *fp)
commit	60747ef4d173c2747bf7f0377fb22846cb422195	[log] [tgz]
author	David S. Miller <davem@davemloft.net>	Thu Aug 18 01:17:32 2016 -0400
committer	David S. Miller <davem@davemloft.net>	Thu Aug 18 01:17:32 2016 -0400
tree	ea0faf33b952495c47909be1400c475a3f3821b0
parent	484334198f8ce9552e20930fff9408ebf6bcf94d [diff]
parent	184ca823481c99dadd7d946e5afd4bb921eab30d [diff]