Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next
Ben Hutchings says:
====================
1. A little more refactoring.
2. Remove the unnecessary use of atomic_t that you pointed out.
3. Add support for starting or queueing firmware requests from atomic
context.
4. Add hwmon support for additional sensors found on some new boards.
5. Add support for the EF10 controller architecture, the SFC9100 family
and specifically the SFC9120 controller.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz9021.txt b/Documentation/devicetree/bindings/net/micrel-ksz9021.txt
new file mode 100644
index 0000000..997a63f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/micrel-ksz9021.txt
@@ -0,0 +1,49 @@
+Micrel KSZ9021 Gigabit Ethernet PHY
+
+Some boards require special tuning values, particularly when it comes to
+clock delays. You can specify clock delay values by adding
+micrel-specific properties to an Ethernet OF device node.
+
+All skew control options are specified in picoseconds. The minimum
+value is 0, and the maximum value is 3000.
+
+Optional properties:
+ - rxc-skew-ps : Skew control of RXC pad
+ - rxdv-skew-ps : Skew control of RX CTL pad
+ - txc-skew-ps : Skew control of TXC pad
+ - txen-skew-ps : Skew control of TX_CTL pad
+ - rxd0-skew-ps : Skew control of RX data 0 pad
+ - rxd1-skew-ps : Skew control of RX data 1 pad
+ - rxd2-skew-ps : Skew control of RX data 2 pad
+ - rxd3-skew-ps : Skew control of RX data 3 pad
+ - txd0-skew-ps : Skew control of TX data 0 pad
+ - txd1-skew-ps : Skew control of TX data 1 pad
+ - txd2-skew-ps : Skew control of TX data 2 pad
+ - txd3-skew-ps : Skew control of TX data 3 pad
+
+Examples:
+
+ /* Attach to an Ethernet device with autodetected PHY */
+ &enet {
+ rxc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ txen-skew-ps = <0>;
+ status = "okay";
+ };
+
+ /* Attach to an explicitly-specified PHY */
+ mdio {
+ phy0: ethernet-phy@0 {
+ rxc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ txen-skew-ps = <0>;
+ reg = <0>;
+ };
+ };
+ ethernet@70000 {
+ status = "okay";
+ phy = <&phy0>;
+ phy-mode = "rgmii-id";
+ };
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 261c563..eba0e5e 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -22,6 +22,11 @@
- snps,pbl Programmable Burst Length
- snps,fixed-burst Program the DMA to use the fixed burst mode
- snps,mixed-burst Program the DMA to use the mixed burst mode
+- snps,force_thresh_dma_mode Force DMA to use the threshold mode for
+ both tx and rx
+- snps,force_sf_dma_mode Force DMA to use the Store and Forward
+ mode for both tx and rx. This flag is
+ ignored if force_thresh_dma_mode is set.
Optional properties:
- mac-address: 6 bytes, mac address
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 15356ac..7f9d4f5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2953,7 +2953,7 @@
improve throughput, but will also increase the
amount of memory reserved for use by the client.
- swapaccount[=0|1]
+ swapaccount=[0|1]
[KNL] Enable accounting of swap in memory resource
controller if no parameter or 1 is given or disable
it if 0 is given (See Documentation/cgroups/memory.txt)
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
index fcb6c71c..13a3212 100644
--- a/Documentation/networking/e100.txt
+++ b/Documentation/networking/e100.txt
@@ -1,7 +1,7 @@
Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
==============================================================
-November 15, 2005
+March 15, 2011
Contents
========
@@ -122,7 +122,7 @@
NOTE: This setting is not saved across reboots.
- Ethtool
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt
index 71ca958..437b209 100644
--- a/Documentation/networking/e1000.txt
+++ b/Documentation/networking/e1000.txt
@@ -1,8 +1,8 @@
-Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters
-===============================================================
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
@@ -420,15 +420,15 @@
- The maximum MTU setting for Jumbo Frames is 16110. This value coincides
with the maximum Jumbo Frames size of 16128.
- - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or
- loss of link.
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+ poor performance or loss of link.
- Adapters based on the Intel(R) 82542 and 82573V/E controller do not
support Jumbo Frames. These correspond to the following product names:
Intel(R) PRO/1000 Gigabit Server Adapter
Intel(R) PRO/1000 PM Network Connection
- Ethtool
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diagnostics, as well as displaying statistical information. The ethtool
diff --git a/Documentation/networking/e1000e.txt b/Documentation/networking/e1000e.txt
index 97b5ba9..ad2d9f3 100644
--- a/Documentation/networking/e1000e.txt
+++ b/Documentation/networking/e1000e.txt
@@ -1,8 +1,8 @@
-Linux* Driver for Intel(R) Network Connection
-=============================================
+Linux* Driver for Intel(R) Ethernet Network Connection
+======================================================
Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
@@ -259,13 +259,16 @@
- The maximum MTU setting for Jumbo Frames is 9216. This value coincides
with the maximum Jumbo Frames size of 9234 bytes.
- - Using Jumbo Frames at 10 or 100 Mbps is not supported and may result in
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
poor performance or loss of link.
- Some adapters limit Jumbo Frames sized packets to a maximum of
4096 bytes and some adapters do not support Jumbo Frames.
- Ethtool
+ - Jumbo Frames cannot be configured on an 82579-based Network device, if
+ MACSec is enabled on the system.
+
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diagnostics, as well as displaying statistical information. We
@@ -273,6 +276,9 @@
http://ftp.kernel.org/pub/software/network/ethtool/
+ NOTE: When validating enable/disable tests on some parts (82578, for example)
+ you need to add a few seconds between tests when working with ethtool.
+
Speed and Duplex
----------------
Speed and Duplex are configured through the ethtool* utility. For
diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt
index 9a2a0371..4ebbd65 100644
--- a/Documentation/networking/igb.txt
+++ b/Documentation/networking/igb.txt
@@ -1,8 +1,8 @@
-Linux* Base Driver for Intel(R) Network Connection
-==================================================
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
@@ -36,6 +36,53 @@
This parameter adds support for SR-IOV. It causes the driver to spawn up to
max_vfs worth of virtual function.
+QueuePairs
+----------
+Valid Range: 0-1
+Default Value: 1 (TX and RX will be paired onto one interrupt vector)
+
+If set to 0, when MSI-X is enabled, the TX and RX will attempt to occupy
+separate vectors.
+
+This option can be overridden to 1 if there are not sufficient interrupts
+available. This can occur if any combination of RSS, VMDQ, and max_vfs
+results in more than 4 queues being used.
+
+Node
+----
+Valid Range: 0-n
+Default Value: -1 (off)
+
+ 0 - n: where n is the number of the NUMA node that should be used to
+ allocate memory for this adapter port.
+ -1: uses the driver default of allocating memory on whichever processor is
+ running insmod/modprobe.
+
+ The Node parameter will allow you to pick which NUMA node you want to have
+ the adapter allocate memory from. All driver structures, in-memory queues,
+ and receive buffers will be allocated on the node specified. This parameter
+ is only useful when interrupt affinity is specified, otherwise some portion
+ of the time the interrupt could run on a different core than the memory is
+ allocated on, causing slower memory access and impacting throughput, CPU, or
+ both.
+
+EEE
+---
+Valid Range: 0-1
+Default Value: 1 (enabled)
+
+ A link between two EEE-compliant devices will result in periodic bursts of
+ data followed by long periods where in the link is in an idle state. This Low
+ Power Idle (LPI) state is supported in both 1Gbps and 100Mbps link speeds.
+ NOTE: EEE support requires autonegotiation.
+
+DMAC
+----
+Valid Range: 0-1
+Default Value: 1 (enabled)
+ Enables or disables DMA Coalescing feature.
+
+
Additional Configurations
=========================
@@ -55,10 +102,10 @@
- The maximum MTU setting for Jumbo Frames is 9216. This value coincides
with the maximum Jumbo Frames size of 9234 bytes.
- - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or
- loss of link.
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+ poor performance or loss of link.
- Ethtool
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diagnostics, as well as displaying statistical information. The latest
@@ -106,6 +153,14 @@
Where n=the VF that attempted to do the spoofing.
+ Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
+ ------------------------------------------------------------
+ You can set a MAC address of a Virtual Function (VF), a default VLAN and the
+ rate limit using the IProute2 tool. Download the latest version of the
+ iproute2 tool from Sourceforge if your version does not have all the
+ features you require.
+
+
Support
=======
diff --git a/Documentation/networking/igbvf.txt b/Documentation/networking/igbvf.txt
index cbfe4ee..40db17a 100644
--- a/Documentation/networking/igbvf.txt
+++ b/Documentation/networking/igbvf.txt
@@ -1,8 +1,8 @@
-Linux* Base Driver for Intel(R) Network Connection
-==================================================
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
@@ -55,7 +55,7 @@
Additional Configurations
=========================
- Ethtool
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diagnostics, as well as displaying statistical information. The ethtool
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index debfe85..1cb3aeb 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -482,6 +482,15 @@
tcp_timestamps - BOOLEAN
Enable timestamps as defined in RFC1323.
+tcp_min_tso_segs - INTEGER
+ Minimal number of segments per TSO frame.
+ Since linux-3.12, TCP does an automatic sizing of TSO frames,
+ depending on flow rate, instead of filling 64Kbytes packets.
+ For specific usages, it's possible to force TCP to build big
+ TSO frames. Note that TCP stack might split too big TSO packets
+ if available window is too small.
+ Default: 2
+
tcp_tso_win_divisor - INTEGER
This allows control over what percentage of the congestion window
can be consumed by a single TSO frame.
@@ -1349,6 +1358,12 @@
MLDv2 report retransmit will take place.
Default: 1000 (1 second)
+suppress_frag_ndisc - INTEGER
+ Control RFC 6980 (Security Implications of IPv6 Fragmentation
+ with IPv6 Neighbor Discovery) behavior:
+ 1 - (default) discard fragmented neighbor discovery packets
+ 0 - allow fragmented neighbor discovery packets
+
icmp/*:
ratelimit - INTEGER
Limit the maximal rates for sending ICMPv6 packets.
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt
index d75a1f9..1e0c045 100644
--- a/Documentation/networking/ixgb.txt
+++ b/Documentation/networking/ixgb.txt
@@ -1,7 +1,7 @@
-Linux Base Driver for 10 Gigabit Intel(R) Network Connection
-=============================================================
+Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
+=====================================================================
-October 9, 2007
+March 14, 2011
Contents
@@ -274,9 +274,9 @@
-------------------------------------------------
Configuring a network driver to load properly when the system is started is
distribution dependent. Typically, the configuration process involves adding
- an alias line to files in /etc/modprobe.d/ as well as editing other system
- startup scripts and/or configuration files. Many popular Linux distributions
- ship with tools to make these changes for you. To learn the proper way to
+ an alias line to /etc/modprobe.conf as well as editing other system startup
+ scripts and/or configuration files. Many popular Linux distributions ship
+ with tools to make these changes for you. To learn the proper way to
configure a network device for your system, refer to your distribution
documentation. If during this process you are asked for the driver or module
name, the name for the Linux Base Driver for the Intel 10GbE Family of
@@ -306,7 +306,7 @@
with the maximum Jumbo Frames size of 16128.
- Ethtool
+ ethtool
-------
The driver utilizes the ethtool interface for driver configuration and
diagnostics, as well as displaying statistical information. The ethtool
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
index af77ed3..96ccceb 100644
--- a/Documentation/networking/ixgbe.txt
+++ b/Documentation/networking/ixgbe.txt
@@ -1,8 +1,9 @@
-Linux Base Driver for 10 Gigabit PCI Express Intel(R) Network Connection
-========================================================================
+Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Family of
+Adapters
+=============================================================================
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Intel 10 Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
@@ -16,8 +17,8 @@
Identifying Your Adapter
========================
-The driver in this release is compatible with 82598 and 82599-based Intel
-Network Connections.
+The driver in this release is compatible with 82598, 82599 and X540-based
+Intel Network Connections.
For more information on how to identify your adapter, go to the Adapter &
Driver ID Guide at:
@@ -72,7 +73,7 @@
Laser turns off for SFP+ when ifconfig down
-------------------------------------------
"ifconfig down" turns off the laser for 82599-based SFP+ fiber adapters.
-"ifconfig up" turns on the later.
+"ifconfig up" turns on the laser.
82598-BASED ADAPTERS
@@ -118,6 +119,93 @@
behavior is changed to off. Flow control in 1 gig mode on these devices can
lead to Tx hangs.
+Intel(R) Ethernet Flow Director
+-------------------------------
+Supports advanced filters that direct receive packets by their flows to
+different queues. Enables tight control on routing a flow in the platform.
+Matches flows and CPU cores for flow affinity. Supports multiple parameters
+for flexible flow classification and load balancing.
+
+Flow director is enabled only if the kernel is multiple TX queue capable.
+
+An included script (set_irq_affinity.sh) automates setting the IRQ to CPU
+affinity.
+
+You can verify that the driver is using Flow Director by looking at the counter
+in ethtool: fdir_miss and fdir_match.
+
+Other ethtool Commands:
+To enable Flow Director
+ ethtool -K ethX ntuple on
+To add a filter
+ Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 0x178000a
+ action 1
+To see the list of filters currently present:
+ ethtool -u ethX
+
+Perfect Filter: Perfect filter is an interface to load the filter table that
+funnels all flow into queue_0 unless an alternative queue is specified using
+"action". In that case, any flow that matches the filter criteria will be
+directed to the appropriate queue.
+
+If the queue is defined as -1, filter will drop matching packets.
+
+To account for filter matches and misses, there are two stats in ethtool:
+fdir_match and fdir_miss. In addition, rx_queue_N_packets shows the number of
+packets processed by the Nth queue.
+
+NOTE: Receive Packet Steering (RPS) and Receive Flow Steering (RFS) are not
+compatible with Flow Director. IF Flow Director is enabled, these will be
+disabled.
+
+The following three parameters impact Flow Director.
+
+FdirMode
+--------
+Valid Range: 0-2 (0=off, 1=ATR, 2=Perfect filter mode)
+Default Value: 1
+
+ Flow Director filtering modes.
+
+FdirPballoc
+-----------
+Valid Range: 0-2 (0=64k, 1=128k, 2=256k)
+Default Value: 0
+
+ Flow Director allocated packet buffer size.
+
+AtrSampleRate
+--------------
+Valid Range: 1-100
+Default Value: 20
+
+ Software ATR Tx packet sample rate. For example, when set to 20, every 20th
+ packet, looks to see if the packet will create a new flow.
+
+Node
+----
+Valid Range: 0-n
+Default Value: 1 (off)
+
+ 0 - n: where n is the number of NUMA nodes (i.e. 0 - 3) currently online in
+ your system
+ 1: turns this option off
+
+ The Node parameter will allow you to pick which NUMA node you want to have
+ the adapter allocate memory on.
+
+max_vfs
+-------
+Valid Range: 1-63
+Default Value: 0
+
+ If the value is greater than 0 it will also force the VMDq parameter to be 1
+ or more.
+
+ This parameter adds support for SR-IOV. It causes the driver to spawn up to
+ max_vfs worth of virtual function.
+
+
Additional Configurations
=========================
@@ -221,9 +309,10 @@
Known Issues
============
- Enabling SR-IOV in a 32-bit Microsoft* Windows* Server 2008 Guest OS using
- Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE controller under KVM
- -----------------------------------------------------------------------------
+ Enabling SR-IOV in a 32-bit or 64-bit Microsoft* Windows* Server 2008/R2
+ Guest OS using Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE
+ controller under KVM
+ ------------------------------------------------------------------------
KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
includes traditional PCIe devices, as well as SR-IOV-capable devices using
Intel 82576-based and 82599-based controllers.
diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt
index 5a91a41..53d8d2a 100644
--- a/Documentation/networking/ixgbevf.txt
+++ b/Documentation/networking/ixgbevf.txt
@@ -1,8 +1,8 @@
-Linux* Base Driver for Intel(R) Network Connection
-==================================================
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2010 Intel Corporation.
+Copyright(c) 1999 - 2013 Intel Corporation.
Contents
========
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
index 8fa2dd1..37c20ee 100644
--- a/Documentation/networking/openvswitch.txt
+++ b/Documentation/networking/openvswitch.txt
@@ -91,6 +91,46 @@
in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
+Wildcarded flow key format
+--------------------------
+
+A wildcarded flow is described with two sequences of Netlink attributes
+passed over the Netlink socket. A flow key, exactly as described above, and an
+optional corresponding flow mask.
+
+A wildcarded flow can represent a group of exact match flows. Each '1' bit
+in the mask specifies a exact match with the corresponding bit in the flow key.
+A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
+of a incoming packet. Using wildcarded flow can improve the flow set up rate
+by reduce the number of new flows need to be processed by the user space program.
+
+Support for the mask Netlink attribute is optional for both the kernel and user
+space program. The kernel can ignore the mask attribute, installing an exact
+match flow, or reduce the number of don't care bits in the kernel to less than
+what was specified by the user space program. In this case, variations in bits
+that the kernel does not implement will simply result in additional flow setups.
+The kernel module will also work with user space programs that neither support
+nor supply flow mask attributes.
+
+Since the kernel may ignore or modify wildcard bits, it can be difficult for
+the userspace program to know exactly what matches are installed. There are
+two possible approaches: reactively install flows as they miss the kernel
+flow table (and therefore not attempt to determine wildcard changes at all)
+or use the kernel's response messages to determine the installed wildcards.
+
+When interacting with userspace, the kernel should maintain the match portion
+of the key exactly as originally installed. This will provides a handle to
+identify the flow for all future operations. However, when reporting the
+mask of an installed flow, the mask should include any restrictions imposed
+by the kernel.
+
+The behavior when using overlapping wildcarded flows is undefined. It is the
+responsibility of the user space program to ensure that any incoming packet
+can match at most one flow, wildcarded or not. The current implementation
+performs best-effort detection of overlapping wildcarded flows and may reject
+some but not all of them. However, this behavior may change in future versions.
+
+
Basic rule for evolving flow keys
---------------------------------
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 8572796..c012236 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -543,6 +543,14 @@
In the AF_PACKET fanout mode, packet reception can be load balanced among
processes. This also works in combination with mmap(2) on packet sockets.
+Currently implemented fanout policies are:
+
+ - PACKET_FANOUT_HASH: schedule to socket by skb's rxhash
+ - PACKET_FANOUT_LB: schedule to socket by round-robin
+ - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on
+ - PACKET_FANOUT_RND: schedule to socket by random selection
+ - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another
+
Minimal example code by David S. Miller (try things like "./test eth0 hash",
"./test eth0 lb", etc.):
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 654d2e5..457b8bb 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -123,6 +123,7 @@
int bugged_jumbo;
int pmt;
int force_sf_dma_mode;
+ int force_thresh_dma_mode;
int riwt_off;
void (*fix_mac_speed)(void *priv, unsigned int speed);
void (*bus_setup)(void __iomem *ioaddr);
@@ -159,6 +160,8 @@
o pmt: core has the embedded power module (optional).
o force_sf_dma_mode: force DMA to use the Store and Forward mode
instead of the Threshold.
+ o force_thresh_dma_mode: force DMA to use the Shreshold mode other than
+ the Store and Forward mode.
o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
o fix_mac_speed: this callback is used for modifying some syscfg registers
(on ST SoCs) according to the link speed negotiated by the
diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt
index 7b5996d..ec11429 100644
--- a/Documentation/networking/tproxy.txt
+++ b/Documentation/networking/tproxy.txt
@@ -2,9 +2,8 @@
=========================
This feature adds Linux 2.2-like transparent proxy support to current kernels.
-To use it, enable NETFILTER_TPROXY, the socket match and the TPROXY target in
-your kernel config. You will need policy routing too, so be sure to enable that
-as well.
+To use it, enable the socket match and the TPROXY target in your kernel config.
+You will need policy routing too, so be sure to enable that as well.
1. Making non-local sockets work
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index d569f2a..9a0319a 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -50,6 +50,19 @@
it's a Per-CPU variable.
Default: 64
+default_qdisc
+--------------
+
+The default queuing discipline to use for network devices. This allows
+overriding the default queue discipline of pfifo_fast with an
+alternative. Since the default queuing discipline is created with the
+no additional parameters so is best suited to queuing disciplines that
+work well without configuration like stochastic fair queue (sfq),
+CoDel (codel) or fair queue CoDel (fq_codel). Don't use queuing disciplines
+like Hierarchical Token Bucket or Deficit Round Robin which require setting
+up classes and bandwidths.
+Default: pfifo_fast
+
busy_read
----------------
Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c6f9db..b2887c5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5884,7 +5884,7 @@
F: include/linux/i2c-omap.h
OMAP DEVICE TREE SUPPORT
-M: Benoît Cousson <b-cousson@ti.com>
+M: Benoît Cousson <bcousson@baylibre.com>
M: Tony Lindgren <tony@atomide.com>
L: linux-omap@vger.kernel.org
L: devicetree@vger.kernel.org
@@ -5964,14 +5964,14 @@
F: drivers/char/hw_random/omap-rng.c
OMAP HWMOD SUPPORT
-M: Benoît Cousson <b-cousson@ti.com>
+M: Benoît Cousson <bcousson@baylibre.com>
M: Paul Walmsley <paul@pwsan.com>
L: linux-omap@vger.kernel.org
S: Maintained
F: arch/arm/mach-omap2/omap_hwmod.*
OMAP HWMOD DATA FOR OMAP4-BASED DEVICES
-M: Benoît Cousson <b-cousson@ti.com>
+M: Benoît Cousson <bcousson@baylibre.com>
L: linux-omap@vger.kernel.org
S: Maintained
F: arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -7367,7 +7367,6 @@
SGI GRU DRIVER
M: Dimitri Sivanich <sivanich@sgi.com>
-M: Robin Holt <holt@sgi.com>
S: Maintained
F: drivers/misc/sgi-gru/
@@ -7387,7 +7386,8 @@
F: Documentation/sgi-visws.txt
SGI XP/XPC/XPNET DRIVER
-M: Robin Holt <holt@sgi.com>
+M: Cliff Whickman <cpw@sgi.com>
+M: Robin Holt <robinmholt@gmail.com>
S: Maintained
F: drivers/misc/sgi-xp/
@@ -7973,6 +7973,12 @@
F: arch/m68k/include/asm/sun3*
F: drivers/net/ethernet/i825xx/sun3*
+SUNDANCE NETWORK DRIVER
+M: Denis Kirjanov <kda@linux-powerpc.org>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/dlink/sundance.c
+
SUPERH
M: Paul Mundt <lethal@linux-sh.org>
L: linux-sh@vger.kernel.org
diff --git a/Makefile b/Makefile
index 6e48848..a5a55f4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 3
PATCHLEVEL = 11
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Linux for Workgroups
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index d59b70c..3d77dbe 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -14,11 +14,11 @@
compatible = "atmel,at91sam9n12ek", "atmel,at91sam9n12", "atmel,at91sam9";
chosen {
- bootargs = "mem=128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
+ bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
};
memory {
- reg = <0x20000000 0x10000000>;
+ reg = <0x20000000 0x8000000>;
};
clocks {
diff --git a/arch/arm/boot/dts/at91sam9x5ek.dtsi b/arch/arm/boot/dts/at91sam9x5ek.dtsi
index b753855..49e3c45 100644
--- a/arch/arm/boot/dts/at91sam9x5ek.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5ek.dtsi
@@ -94,8 +94,9 @@
usb0: ohci@00600000 {
status = "okay";
- num-ports = <2>;
- atmel,vbus-gpio = <&pioD 19 GPIO_ACTIVE_LOW
+ num-ports = <3>;
+ atmel,vbus-gpio = <0 /* &pioD 18 GPIO_ACTIVE_LOW *//* Activate to have access to port A */
+ &pioD 19 GPIO_ACTIVE_LOW
&pioD 20 GPIO_ACTIVE_LOW
>;
};
diff --git a/arch/arm/boot/dts/sama5d3xmb.dtsi b/arch/arm/boot/dts/sama5d3xmb.dtsi
index 8a9e05d..dba739b 100644
--- a/arch/arm/boot/dts/sama5d3xmb.dtsi
+++ b/arch/arm/boot/dts/sama5d3xmb.dtsi
@@ -81,6 +81,14 @@
macb1: ethernet@f802c000 {
phy-mode = "rmii";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@1 {
+ interrupt-parent = <&pioE>;
+ interrupts = <30 IRQ_TYPE_EDGE_FALLING>;
+ reg = <1>;
+ };
};
pinctrl@fffff200 {
diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts
index 365760b..40e6fb2 100644
--- a/arch/arm/boot/dts/tegra20-seaboard.dts
+++ b/arch/arm/boot/dts/tegra20-seaboard.dts
@@ -830,6 +830,8 @@
regulator-max-microvolt = <5000000>;
enable-active-high;
gpio = <&gpio 24 0>; /* PD0 */
+ regulator-always-on;
+ regulator-boot-on;
};
};
diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts
index ed4b901..37c93d3 100644
--- a/arch/arm/boot/dts/tegra20-trimslice.dts
+++ b/arch/arm/boot/dts/tegra20-trimslice.dts
@@ -412,6 +412,8 @@
regulator-max-microvolt = <5000000>;
enable-active-high;
gpio = <&gpio 170 0>; /* PV2 */
+ regulator-always-on;
+ regulator-boot-on;
};
};
diff --git a/arch/arm/boot/dts/tegra20-whistler.dts b/arch/arm/boot/dts/tegra20-whistler.dts
index ab67c94..a3d0eba 100644
--- a/arch/arm/boot/dts/tegra20-whistler.dts
+++ b/arch/arm/boot/dts/tegra20-whistler.dts
@@ -588,6 +588,8 @@
regulator-max-microvolt = <5000000>;
enable-active-high;
gpio = <&tca6416 0 0>; /* GPIO_PMU0 */
+ regulator-always-on;
+ regulator-boot-on;
};
vbus3_reg: regulator@3 {
@@ -598,6 +600,8 @@
regulator-max-microvolt = <5000000>;
enable-active-high;
gpio = <&tca6416 1 0>; /* GPIO_PMU1 */
+ regulator-always-on;
+ regulator-boot-on;
};
};
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 6462a72..a252c0b 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -88,4 +88,7 @@
{
return 1 << mpidr_hash.bits;
}
+
+extern int platform_can_cpu_hotplug(void);
+
#endif
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index f8b8965..b07c09e 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -107,7 +107,7 @@
" subs %1, %0, %0, ror #16\n"
" addeq %0, %0, %4\n"
" strexeq %2, %0, [%3]"
- : "=&r" (slock), "=&r" (contended), "=r" (res)
+ : "=&r" (slock), "=&r" (contended), "=&r" (res)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
} while (res);
@@ -168,17 +168,20 @@
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
- unsigned long tmp;
+ unsigned long contended, res;
- __asm__ __volatile__(
-" ldrex %0, [%1]\n"
-" teq %0, #0\n"
-" strexeq %0, %2, [%1]"
- : "=&r" (tmp)
- : "r" (&rw->lock), "r" (0x80000000)
- : "cc");
+ do {
+ __asm__ __volatile__(
+ " ldrex %0, [%2]\n"
+ " mov %1, #0\n"
+ " teq %0, #0\n"
+ " strexeq %1, %3, [%2]"
+ : "=&r" (contended), "=&r" (res)
+ : "r" (&rw->lock), "r" (0x80000000)
+ : "cc");
+ } while (res);
- if (tmp == 0) {
+ if (!contended) {
smp_mb();
return 1;
} else {
@@ -254,18 +257,26 @@
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
- unsigned long tmp, tmp2 = 1;
+ unsigned long contended, res;
- __asm__ __volatile__(
-" ldrex %0, [%2]\n"
-" adds %0, %0, #1\n"
-" strexpl %1, %0, [%2]\n"
- : "=&r" (tmp), "+r" (tmp2)
- : "r" (&rw->lock)
- : "cc");
+ do {
+ __asm__ __volatile__(
+ " ldrex %0, [%2]\n"
+ " mov %1, #0\n"
+ " adds %0, %0, #1\n"
+ " strexpl %1, %0, [%2]"
+ : "=&r" (contended), "=&r" (res)
+ : "r" (&rw->lock)
+ : "cc");
+ } while (res);
- smp_mb();
- return tmp2 == 0;
+ /* If the lock is negative, then it is already held for write. */
+ if (contended < 0x80000000) {
+ smp_mb();
+ return 1;
+ } else {
+ return 0;
+ }
}
/* read_can_lock - would read_trylock() succeed? */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d40d0ef..9cbe70c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -357,7 +357,8 @@
.endm
.macro kuser_cmpxchg_check
-#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \
+ !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
#ifndef CONFIG_MMU
#warning "NPTL on non MMU needs fixing"
#else
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 25442f4..fc79202 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -84,17 +84,13 @@
void set_fiq_handler(void *start, unsigned int length)
{
-#if defined(CONFIG_CPU_USE_DOMAINS)
- void *base = (void *)0xffff0000;
-#else
void *base = vectors_page;
-#endif
unsigned offset = FIQ_OFFSET;
memcpy(base + offset, start, length);
+ if (!cache_is_vipt_nonaliasing())
+ flush_icache_range(base + offset, offset + length);
flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
- if (!vectors_high())
- flush_icache_range(offset, offset + length);
}
int claim_fiq(struct fiq_handler *f)
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 4fb074c..d7c82df 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -15,6 +15,7 @@
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/mach-types.h>
+#include <asm/smp_plat.h>
#include <asm/system_misc.h>
extern const unsigned char relocate_new_kernel[];
@@ -39,6 +40,14 @@
int i, err;
/*
+ * Validate that if the current HW supports SMP, then the SW supports
+ * and implements CPU hotplug for the current HW. If not, we won't be
+ * able to kexec reliably, so fail the prepare operation.
+ */
+ if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+ return -EINVAL;
+
+ /*
* No segment at default ATAGs address. try to locate
* a dtb using magic.
*/
@@ -134,10 +143,13 @@
unsigned long reboot_code_buffer_phys;
void *reboot_code_buffer;
- if (num_online_cpus() > 1) {
- pr_err("kexec: error: multiple CPUs still online\n");
- return;
- }
+ /*
+ * This can only happen if machine_shutdown() failed to disable some
+ * CPU, and that can only happen if the checks in
+ * machine_kexec_prepare() were not correct. If this fails, we can't
+ * reliably kexec anyway, so BUG_ON is appropriate.
+ */
+ BUG_ON(num_online_cpus() > 1);
page_list = image->head & PAGE_MASK;
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 21f7790..e186ee1 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -56,7 +56,7 @@
int mapping;
if (config >= PERF_COUNT_HW_MAX)
- return -ENOENT;
+ return -EINVAL;
mapping = (*event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
@@ -258,6 +258,9 @@
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct pmu *leader_pmu = event->group_leader->pmu;
+ if (is_software_event(event))
+ return 1;
+
if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
return 1;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 536c85f..94f6b05 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -462,7 +462,7 @@
{
return in_gate_area(NULL, addr);
}
-#define is_gate_vma(vma) ((vma) = &gate_vma)
+#define is_gate_vma(vma) ((vma) == &gate_vma)
#else
#define is_gate_vma(vma) 0
#endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index c2b4f8f..2dc19349e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -145,6 +145,16 @@
return -ENOSYS;
}
+int platform_can_cpu_hotplug(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (smp_ops.cpu_kill)
+ return 1;
+#endif
+
+ return 0;
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static void percpu_timer_stop(void);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 4a51990..db9cf69 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -146,7 +146,11 @@
#define access_pmintenclr pm_fake
/* Architected CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ * registers preceding 32-bit ones.
*/
static const struct coproc_reg cp15_regs[] = {
/* CSSELR: swapped by interrupt.S. */
@@ -154,8 +158,8 @@
NULL, reset_unknown, c0_CSSELR },
/* TTBR0/TTBR1: swapped by interrupt.S. */
- { CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
- { CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+ { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
+ { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
/* TTBCR: swapped by interrupt.S. */
{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
@@ -182,7 +186,7 @@
NULL, reset_unknown, c6_IFAR },
/* PAR swapped by interrupt.S */
- { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
+ { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
/*
* DC{C,I,CI}SW operations:
@@ -399,12 +403,13 @@
| KVM_REG_ARM_OPC1_MASK))
return false;
params->is_64bit = true;
- params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
+ /* CRm to CRn: see cp15_to_index for details */
+ params->CRn = ((id & KVM_REG_ARM_CRM_MASK)
>> KVM_REG_ARM_CRM_SHIFT);
params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
>> KVM_REG_ARM_OPC1_SHIFT);
params->Op2 = 0;
- params->CRn = 0;
+ params->CRm = 0;
return true;
default:
return false;
@@ -898,7 +903,14 @@
if (reg->is_64) {
val |= KVM_REG_SIZE_U64;
val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
- val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
+ /*
+ * CRn always denotes the primary coproc. reg. nr. for the
+ * in-kernel representation, but the user space API uses the
+ * CRm for the encoding, because it is modelled after the
+ * MRRC/MCRR instructions: see the ARM ARM rev. c page
+ * B3-1445
+ */
+ val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT);
} else {
val |= KVM_REG_SIZE_U32;
val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index b7301d3..0461d5c 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -135,6 +135,8 @@
return -1;
if (i1->CRn != i2->CRn)
return i1->CRn - i2->CRn;
+ if (i1->is_64 != i2->is_64)
+ return i2->is_64 - i1->is_64;
if (i1->CRm != i2->CRm)
return i1->CRm - i2->CRm;
if (i1->Op1 != i2->Op1)
@@ -145,6 +147,7 @@
#define CRn(_x) .CRn = _x
#define CRm(_x) .CRm = _x
+#define CRm64(_x) .CRn = _x, .CRm = 0
#define Op1(_x) .Op1 = _x
#define Op2(_x) .Op2 = _x
#define is64 .is_64 = true
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index 685063a..cf93472 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -114,7 +114,11 @@
/*
* A15-specific CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ * registers preceding 32-bit ones.
*/
static const struct coproc_reg a15_regs[] = {
/* MPIDR: we use VMPIDR for guest access. */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b8e06b7..0c25d94 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -63,7 +63,8 @@
static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_exit_mmio *mmio)
{
- unsigned long rt, len;
+ unsigned long rt;
+ int len;
bool is_write, sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca6bea4..0988d9e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -85,6 +85,12 @@
return p;
}
+static bool page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+}
+
static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
pmd_t *pmd_table = pmd_offset(pud, 0);
@@ -103,12 +109,6 @@
put_page(virt_to_page(pmd));
}
-static bool pmd_empty(pmd_t *pmd)
-{
- struct page *pmd_page = virt_to_page(pmd);
- return page_count(pmd_page) == 1;
-}
-
static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
{
if (pte_present(*pte)) {
@@ -118,12 +118,6 @@
}
}
-static bool pte_empty(pte_t *pte)
-{
- struct page *pte_page = virt_to_page(pte);
- return page_count(pte_page) == 1;
-}
-
static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
unsigned long long start, u64 size)
{
@@ -132,37 +126,37 @@
pmd_t *pmd;
pte_t *pte;
unsigned long long addr = start, end = start + size;
- u64 range;
+ u64 next;
while (addr < end) {
pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
- addr += PUD_SIZE;
+ addr = pud_addr_end(addr, end);
continue;
}
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
- addr += PMD_SIZE;
+ addr = pmd_addr_end(addr, end);
continue;
}
pte = pte_offset_kernel(pmd, addr);
clear_pte_entry(kvm, pte, addr);
- range = PAGE_SIZE;
+ next = addr + PAGE_SIZE;
/* If we emptied the pte, walk back up the ladder */
- if (pte_empty(pte)) {
+ if (page_empty(pte)) {
clear_pmd_entry(kvm, pmd, addr);
- range = PMD_SIZE;
- if (pmd_empty(pmd)) {
+ next = pmd_addr_end(addr, end);
+ if (page_empty(pmd) && !page_empty(pud)) {
clear_pud_entry(kvm, pud, addr);
- range = PUD_SIZE;
+ next = pud_addr_end(addr, end);
}
}
- addr += range;
+ addr = next;
}
}
diff --git a/arch/arm/mach-at91/at91sam9x5.c b/arch/arm/mach-at91/at91sam9x5.c
index 2abee66..916e5a1 100644
--- a/arch/arm/mach-at91/at91sam9x5.c
+++ b/arch/arm/mach-at91/at91sam9x5.c
@@ -227,6 +227,8 @@
CLKDEV_CON_DEV_ID("usart", "f8020000.serial", &usart1_clk),
CLKDEV_CON_DEV_ID("usart", "f8024000.serial", &usart2_clk),
CLKDEV_CON_DEV_ID("usart", "f8028000.serial", &usart3_clk),
+ CLKDEV_CON_DEV_ID("usart", "f8040000.serial", &uart0_clk),
+ CLKDEV_CON_DEV_ID("usart", "f8044000.serial", &uart1_clk),
CLKDEV_CON_DEV_ID("t0_clk", "f8008000.timer", &tcb0_clk),
CLKDEV_CON_DEV_ID("t0_clk", "f800c000.timer", &tcb0_clk),
CLKDEV_CON_DEV_ID("mci_clk", "f0008000.mmc", &mmc0_clk),
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index dff4ddc..139e42d 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -75,6 +75,7 @@
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
.ecc_mode = NAND_ECC_HW_SYNDROME,
+ .ecc_bits = 4,
.bbt_options = NAND_BBT_USE_FLASH,
};
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a33686a..fa4bfaf 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -153,6 +153,7 @@
.parts = davinci_evm_nandflash_partition,
.nr_parts = ARRAY_SIZE(davinci_evm_nandflash_partition),
.ecc_mode = NAND_ECC_HW,
+ .ecc_bits = 1,
.bbt_options = NAND_BBT_USE_FLASH,
.timing = &davinci_evm_nandflash_timing,
};
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index fbb8e5a..0c005e8 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -90,6 +90,7 @@
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
.ecc_mode = NAND_ECC_HW,
+ .ecc_bits = 1,
.options = 0,
};
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 2bc112a..808233b 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -88,6 +88,7 @@
.parts = davinci_ntosd2_nandflash_partition,
.nr_parts = ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
.ecc_mode = NAND_ECC_HW,
+ .ecc_bits = 1,
.bbt_options = NAND_BBT_USE_FLASH,
};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index f6eeb87..827d150 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -122,11 +122,7 @@
};
static struct musb_hdrc_platform_data tusb_data = {
-#ifdef CONFIG_USB_GADGET_MUSB_HDRC
.mode = MUSB_OTG,
-#else
- .mode = MUSB_HOST,
-#endif
.set_power = tusb_set_power,
.min_power = 25, /* x2 = 50 mA drawn from VBUS as peripheral */
.power = 100, /* Max 100 mA VBUS for host mode */
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index d2ea68e..773510556 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -85,7 +85,7 @@
static struct omap_musb_board_data musb_board_data = {
.interface_type = MUSB_INTERFACE_ULPI,
- .mode = MUSB_PERIPHERAL,
+ .mode = MUSB_OTG,
.power = 0,
};
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index 8c4de27..bc89723 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -38,11 +38,8 @@
};
static struct musb_hdrc_platform_data musb_plat = {
-#ifdef CONFIG_USB_GADGET_MUSB_HDRC
.mode = MUSB_OTG,
-#else
- .mode = MUSB_HOST,
-#endif
+
/* .clock is set dynamically */
.config = &musb_config,
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index fe31bfc..c98511c 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -73,9 +73,6 @@
static struct mcp251x_platform_data mcp251x_info = {
.oscillator_frequency = 16E6,
- .board_specific_setup = NULL,
- .power_enable = NULL,
- .transceiver_enable = NULL
};
static struct spi_board_info mcp251x_board_info[] = {
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index f5d4364..04a0aea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -29,6 +29,8 @@
#include <linux/i2c/pca953x.h>
#include <linux/apm-emulation.h>
#include <linux/can/platform/mcp251x.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
#include <asm/mach-types.h>
#include <asm/suspend.h>
@@ -391,33 +393,34 @@
};
/* CAN bus on SPI */
-static int zeus_mcp2515_setup(struct spi_device *sdev)
-{
- int err;
+static struct regulator_consumer_supply can_regulator_consumer =
+ REGULATOR_SUPPLY("vdd", "spi3.0");
- err = gpio_request(ZEUS_CAN_SHDN_GPIO, "CAN shutdown");
- if (err)
- return err;
+static struct regulator_init_data can_regulator_init_data = {
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
+ .consumer_supplies = &can_regulator_consumer,
+ .num_consumer_supplies = 1,
+};
- err = gpio_direction_output(ZEUS_CAN_SHDN_GPIO, 1);
- if (err) {
- gpio_free(ZEUS_CAN_SHDN_GPIO);
- return err;
- }
+static struct fixed_voltage_config can_regulator_pdata = {
+ .supply_name = "CAN_SHDN",
+ .microvolts = 3300000,
+ .gpio = ZEUS_CAN_SHDN_GPIO,
+ .init_data = &can_regulator_init_data,
+};
- return 0;
-}
-
-static int zeus_mcp2515_transceiver_enable(int enable)
-{
- gpio_set_value(ZEUS_CAN_SHDN_GPIO, !enable);
- return 0;
-}
+static struct platform_device can_regulator_device = {
+ .name = "reg-fixed-volage",
+ .id = -1,
+ .dev = {
+ .platform_data = &can_regulator_pdata,
+ },
+};
static struct mcp251x_platform_data zeus_mcp2515_pdata = {
.oscillator_frequency = 16*1000*1000,
- .board_specific_setup = zeus_mcp2515_setup,
- .power_enable = zeus_mcp2515_transceiver_enable,
};
static struct spi_board_info zeus_spi_board_info[] = {
@@ -516,6 +519,7 @@
&zeus_leds_device,
&zeus_pcmcia_device,
&zeus_max6369_device,
+ &can_regulator_device,
};
/* AC'97 */
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index c5be60d..3a6ffa2 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -358,7 +358,6 @@
static struct sh_eth_plat_data sh_eth_platdata = {
.phy = 0x00, /* LAN8710A */
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_GIGABIT,
.phy_interface = PHY_INTERFACE_MODE_MII,
};
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index 3354a85..fa8885b 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -89,7 +89,6 @@
static struct sh_eth_plat_data ether_platform_data __initdata = {
.phy = 0x01,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_FAST_RCAR,
.phy_interface = PHY_INTERFACE_MODE_RMII,
/*
* Although the LINK signal is available on the board, it's connected to
diff --git a/arch/arm/plat-samsung/init.c b/arch/arm/plat-samsung/init.c
index 3e5c461..50a3ea0 100644
--- a/arch/arm/plat-samsung/init.c
+++ b/arch/arm/plat-samsung/init.c
@@ -55,12 +55,13 @@
printk("CPU %s (id 0x%08lx)\n", cpu->name, idcode);
- if (cpu->map_io == NULL || cpu->init == NULL) {
+ if (cpu->init == NULL) {
printk(KERN_ERR "CPU %s support not enabled\n", cpu->name);
panic("Unsupported Samsung CPU");
}
- cpu->map_io();
+ if (cpu->map_io)
+ cpu->map_io();
}
/* s3c24xx_init_clocks
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index c9770ba..8a6295c 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -170,6 +170,7 @@
per_cpu(xen_vcpu, cpu) = vcpup;
enable_percpu_irq(xen_events_irq, 0);
+ put_cpu();
}
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de41..b25763b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,14 +42,15 @@
#define TPIDR_EL1 18 /* Thread ID, Privileged */
#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
+#define PAR_EL1 21 /* Physical Address Register */
/* 32bit specific registers. Keep them at the end of the range */
-#define DACR32_EL2 21 /* Domain Access Control Register */
-#define IFSR32_EL2 22 /* Instruction Fault Status Register */
-#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */
-#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */
-#define TEECR32_EL1 25 /* ThumbEE Configuration Register */
-#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */
-#define NR_SYS_REGS 27
+#define DACR32_EL2 22 /* Domain Access Control Register */
+#define IFSR32_EL2 23 /* Instruction Fault Status Register */
+#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */
+#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */
+#define TEECR32_EL1 26 /* ThumbEE Configuration Register */
+#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */
+#define NR_SYS_REGS 28
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -69,6 +70,8 @@
#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
+#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
+#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d739..0859a4d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -129,7 +129,7 @@
struct kvm_mmu_memory_cache mmu_page_cache;
/* Target CPU and feature flags */
- u32 target;
+ int target;
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
/* Detect first run of a vcpu */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9ba33c4..12e6ccb 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -107,7 +107,12 @@
static int
armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
{
- int mapping = (*event_map)[config];
+ int mapping;
+
+ if (config >= PERF_COUNT_HW_MAX)
+ return -EINVAL;
+
+ mapping = (*event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
}
@@ -317,6 +322,9 @@
struct hw_perf_event fake_event = event->hw;
struct pmu *leader_pmu = event->group_leader->pmu;
+ if (is_software_event(event))
+ return 1;
+
if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
return 1;
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ff985e3..1ac0bbb 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -214,6 +214,7 @@
mrs x21, tpidr_el1
mrs x22, amair_el1
mrs x23, cntkctl_el1
+ mrs x24, par_el1
stp x4, x5, [x3]
stp x6, x7, [x3, #16]
@@ -225,6 +226,7 @@
stp x18, x19, [x3, #112]
stp x20, x21, [x3, #128]
stp x22, x23, [x3, #144]
+ str x24, [x3, #160]
.endm
.macro restore_sysregs
@@ -243,6 +245,7 @@
ldp x18, x19, [x3, #112]
ldp x20, x21, [x3, #128]
ldp x22, x23, [x3, #144]
+ ldr x24, [x3, #160]
msr vmpidr_el2, x4
msr csselr_el1, x5
@@ -264,6 +267,7 @@
msr tpidr_el1, x21
msr amair_el1, x22
msr cntkctl_el1, x23
+ msr par_el1, x24
.endm
.macro skip_32bit_state tmp, target
@@ -600,6 +604,8 @@
// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
ENTRY(__kvm_tlb_flush_vmid_ipa)
+ dsb ishst
+
kern_hyp_va x0
ldr x2, [x0, #KVM_VTTBR]
msr vttbr_el2, x2
@@ -621,6 +627,7 @@
ENDPROC(__kvm_tlb_flush_vmid_ipa)
ENTRY(__kvm_flush_vm_context)
+ dsb ishst
tlbi alle1is
ic ialluis
dsb sy
@@ -753,6 +760,10 @@
*/
tbnz x1, #7, 1f // S1PTW is set
+ /* Preserve PAR_EL1 */
+ mrs x3, par_el1
+ push x3, xzr
+
/*
* Permission fault, HPFAR_EL2 is invalid.
* Resolve the IPA the hard way using the guest VA.
@@ -766,6 +777,8 @@
/* Read result */
mrs x3, par_el1
+ pop x0, xzr // Restore PAR_EL1 from the stack
+ msr par_el1, x0
tbnz x3, #0, 3f // Bail out if we failed the translation
ubfx x3, x3, #12, #36 // Extract IPA
lsl x3, x3, #4 // and present it like HPFAR
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 9492360..02e9d09 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,9 @@
/* FAR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
NULL, reset_unknown, FAR_EL1 },
+ /* PAR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+ NULL, reset_unknown, PAR_EL1 },
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c
index 2291a7d..fa277ae 100644
--- a/arch/m68k/emu/natfeat.c
+++ b/arch/m68k/emu/natfeat.c
@@ -18,9 +18,11 @@
#include <asm/machdep.h>
#include <asm/natfeat.h>
+extern long nf_get_id2(const char *feature_name);
+
asm("\n"
-" .global nf_get_id,nf_call\n"
-"nf_get_id:\n"
+" .global nf_get_id2,nf_call\n"
+"nf_get_id2:\n"
" .short 0x7300\n"
" rts\n"
"nf_call:\n"
@@ -29,12 +31,25 @@
"1: moveq.l #0,%d0\n"
" rts\n"
" .section __ex_table,\"a\"\n"
-" .long nf_get_id,1b\n"
+" .long nf_get_id2,1b\n"
" .long nf_call,1b\n"
" .previous");
-EXPORT_SYMBOL_GPL(nf_get_id);
EXPORT_SYMBOL_GPL(nf_call);
+long nf_get_id(const char *feature_name)
+{
+ /* feature_name may be in vmalloc()ed memory, so make a copy */
+ char name_copy[32];
+ size_t n;
+
+ n = strlcpy(name_copy, feature_name, sizeof(name_copy));
+ if (n >= sizeof(name_copy))
+ return 0;
+
+ return nf_get_id2(name_copy);
+}
+EXPORT_SYMBOL_GPL(nf_get_id);
+
void nfprint(const char *fmt, ...)
{
static char buf[256];
diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h
index 444ea8a..ef881cf 100644
--- a/arch/m68k/include/asm/div64.h
+++ b/arch/m68k/include/asm/div64.h
@@ -15,16 +15,17 @@
unsigned long long n64; \
} __n; \
unsigned long __rem, __upper; \
+ unsigned long __base = (base); \
\
__n.n64 = (n); \
if ((__upper = __n.n32[0])) { \
asm ("divul.l %2,%1:%0" \
- : "=d" (__n.n32[0]), "=d" (__upper) \
- : "d" (base), "0" (__n.n32[0])); \
+ : "=d" (__n.n32[0]), "=d" (__upper) \
+ : "d" (__base), "0" (__n.n32[0])); \
} \
asm ("divu.l %2,%1:%0" \
- : "=d" (__n.n32[1]), "=d" (__rem) \
- : "d" (base), "1" (__upper), "0" (__n.n32[1])); \
+ : "=d" (__n.n32[1]), "=d" (__rem) \
+ : "d" (__base), "1" (__upper), "0" (__n.n32[1])); \
(n) = __n.n64; \
__rem; \
})
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index e773659..46048d2 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -803,6 +803,32 @@
dec_insn.next_pc_inc;
return 1;
break;
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+ case lwc2_op: /* This is bbit0 on Octeon */
+ if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0)
+ *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+ else
+ *contpc = regs->cp0_epc + 8;
+ return 1;
+ case ldc2_op: /* This is bbit032 on Octeon */
+ if ((regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) == 0)
+ *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+ else
+ *contpc = regs->cp0_epc + 8;
+ return 1;
+ case swc2_op: /* This is bbit1 on Octeon */
+ if (regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt))
+ *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+ else
+ *contpc = regs->cp0_epc + 8;
+ return 1;
+ case sdc2_op: /* This is bbit132 on Octeon */
+ if (regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32)))
+ *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+ else
+ *contpc = regs->cp0_epc + 8;
+ return 1;
+#endif
case cop0_op:
case cop1_op:
case cop2_op:
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 23a64d25..6d6d92b 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,7 +32,7 @@
struct mm_struct *mm;
struct mmu_table_batch *batch;
unsigned int fullmm;
- unsigned long start, unsigned long end;
+ unsigned long start, end;
};
struct mmu_table_batch {
diff --git a/arch/sh/boards/board-espt.c b/arch/sh/boards/board-espt.c
index 4d94dff..7291e2f 100644
--- a/arch/sh/boards/board-espt.c
+++ b/arch/sh/boards/board-espt.c
@@ -80,7 +80,6 @@
static struct sh_eth_plat_data sh7763_eth_pdata = {
.phy = 0,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_GIGABIT,
.phy_interface = PHY_INTERFACE_MODE_MII,
};
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index 4f114d1..25c5a93 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -77,7 +77,6 @@
static struct sh_eth_plat_data sh7757_eth0_pdata = {
.phy = 1,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_FAST_SH4,
.set_mdio_gate = sh7757_eth_set_mdio_gate,
};
@@ -106,7 +105,6 @@
static struct sh_eth_plat_data sh7757_eth1_pdata = {
.phy = 1,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_FAST_SH4,
.set_mdio_gate = sh7757_eth_set_mdio_gate,
};
@@ -151,7 +149,6 @@
static struct sh_eth_plat_data sh7757_eth_giga0_pdata = {
.phy = 18,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_GIGABIT,
.set_mdio_gate = sh7757_eth_giga_set_mdio_gate,
.phy_interface = PHY_INTERFACE_MODE_RGMII_ID,
};
@@ -186,7 +183,6 @@
static struct sh_eth_plat_data sh7757_eth_giga1_pdata = {
.phy = 19,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_GIGABIT,
.set_mdio_gate = sh7757_eth_giga_set_mdio_gate,
.phy_interface = PHY_INTERFACE_MODE_RGMII_ID,
};
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 61fade0..a4f630f 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -159,7 +159,6 @@
static struct sh_eth_plat_data sh_eth_plat = {
.phy = 0x1f, /* SMSC LAN8700 */
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_FAST_SH4,
.phy_interface = PHY_INTERFACE_MODE_MII,
.ether_link_active_low = 1
};
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index b70180e..21e4230 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -365,7 +365,7 @@
static struct resource sh_eth_resources[] = {
[0] = {
.start = SH_ETH_ADDR,
- .end = SH_ETH_ADDR + 0x1FC,
+ .end = SH_ETH_ADDR + 0x1FC - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -377,6 +377,7 @@
static struct sh_eth_plat_data sh_eth_plat = {
.phy = 0x1f, /* SMSC LAN8187 */
.edmac_endian = EDMAC_LITTLE_ENDIAN,
+ .phy_interface = PHY_INTERFACE_MODE_MII,
};
static struct platform_device sh_eth_device = {
diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c
index 50ba481..2c8fb04 100644
--- a/arch/sh/boards/mach-sh7763rdp/setup.c
+++ b/arch/sh/boards/mach-sh7763rdp/setup.c
@@ -88,7 +88,6 @@
static struct sh_eth_plat_data sh7763_eth_pdata = {
.phy = 1,
.edmac_endian = EDMAC_LITTLE_ENDIAN,
- .register_type = SH_ETH_REG_GIGABIT,
.phy_interface = PHY_INTERFACE_MODE_MII,
};
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index bb11e19..4df4d4f 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/serial.h>
#include <linux/serial_sci.h>
+#include <linux/sh_eth.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
@@ -110,10 +111,16 @@
},
};
+static struct sh_eth_plat_data eth_platform_data = {
+ .phy = 1,
+ .edmac_endian = EDMAC_LITTLE_ENDIAN,
+ .phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
static struct resource eth_resources[] = {
[0] = {
.start = 0xfb000000,
- .end = 0xfb0001c8,
+ .end = 0xfb0001c7,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -127,7 +134,7 @@
.name = "sh7619-ether",
.id = -1,
.dev = {
- .platform_data = (void *)1,
+ .platform_data = ð_platform_data,
},
.num_resources = ARRAY_SIZE(eth_resources),
.resource = eth_resources,
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 653668d..4a8cb8d 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -35,9 +35,9 @@
*/
if (boot_params->sentinel) {
/* fields in boot_params are left uninitialized, clear them */
- memset(&boot_params->olpc_ofw_header, 0,
+ memset(&boot_params->ext_ramdisk_image, 0,
(char *)&boot_params->efi_info -
- (char *)&boot_params->olpc_ofw_header);
+ (char *)&boot_params->ext_ramdisk_image);
memset(&boot_params->kbd_status, 0,
(char *)&boot_params->hdr -
(char *)&boot_params->kbd_status);
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 50e5c58..4c01917 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -59,7 +59,7 @@
extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
extern int apply_microcode_amd(int cpu);
-extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size);
+extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
#ifdef CONFIG_MICROCODE_AMD_EARLY
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f654ece..08a0890 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -512,7 +512,7 @@
static const int amd_erratum_383[];
static const int amd_erratum_400[];
-static bool cpu_has_amd_erratum(const int *erratum);
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
static void init_amd(struct cpuinfo_x86 *c)
{
@@ -729,11 +729,11 @@
value &= ~(1ULL << 24);
wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
- if (cpu_has_amd_erratum(amd_erratum_383))
+ if (cpu_has_amd_erratum(c, amd_erratum_383))
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
- if (cpu_has_amd_erratum(amd_erratum_400))
+ if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
@@ -878,23 +878,13 @@
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-static bool cpu_has_amd_erratum(const int *erratum)
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
- struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
int osvw_id = *erratum++;
u32 range;
u32 ms;
- /*
- * If called early enough that current_cpu_data hasn't been initialized
- * yet, fall back to boot_cpu_data.
- */
- if (cpu->x86 == 0)
- cpu = &boot_cpu_data;
-
- if (cpu->x86_vendor != X86_VENDOR_AMD)
- return false;
-
if (osvw_id >= 0 && osvw_id < 65536 &&
cpu_has(cpu, X86_FEATURE_OSVW)) {
u64 osvw_len;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7a0adb7..7123b5d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -145,10 +145,9 @@
return 0;
}
-static unsigned int verify_patch_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(u8 family, u32 patch_size,
unsigned int size)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 max_size;
#define F1XH_MPB_MAX_SIZE 2048
@@ -156,7 +155,7 @@
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
- switch (c->x86) {
+ switch (family) {
case 0x14:
max_size = F14H_MPB_MAX_SIZE;
break;
@@ -277,9 +276,8 @@
* driver cannot continue functioning normally. In such cases, we tear
* down everything we've used up so far and exit.
*/
-static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_header_amd *mc_hdr;
struct ucode_patch *patch;
unsigned int patch_size, crnt_size, ret;
@@ -299,7 +297,7 @@
/* check if patch is for the current family */
proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
- if (proc_fam != c->x86)
+ if (proc_fam != family)
return crnt_size;
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
@@ -308,7 +306,7 @@
return crnt_size;
}
- ret = verify_patch_size(cpu, patch_size, leftover);
+ ret = verify_patch_size(family, patch_size, leftover);
if (!ret) {
pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
return crnt_size;
@@ -339,7 +337,8 @@
return crnt_size;
}
-static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size)
+static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
+ size_t size)
{
enum ucode_state ret = UCODE_ERROR;
unsigned int leftover;
@@ -362,7 +361,7 @@
}
while (leftover) {
- crnt_size = verify_and_add_patch(cpu, fw, leftover);
+ crnt_size = verify_and_add_patch(family, fw, leftover);
if (crnt_size < 0)
return ret;
@@ -373,22 +372,22 @@
return UCODE_OK;
}
-enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
{
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
- ret = __load_microcode_amd(cpu, data, size);
+ ret = __load_microcode_amd(family, data, size);
if (ret != UCODE_OK)
cleanup();
#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
/* save BSP's matching patch for early load */
- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
- struct ucode_patch *p = find_patch(cpu);
+ if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
+ struct ucode_patch *p = find_patch(smp_processor_id());
if (p) {
memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
@@ -441,7 +440,7 @@
goto fw_release;
}
- ret = load_microcode_amd(cpu, fw->data, fw->size);
+ ret = load_microcode_amd(c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
index 1d14ffe..6073104 100644
--- a/arch/x86/kernel/microcode_amd_early.c
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -238,25 +238,17 @@
uci->cpu_sig.sig = cpuid_eax(0x00000001);
}
#else
-static void collect_cpu_info_amd_early(struct cpuinfo_x86 *c,
- struct ucode_cpu_info *uci)
+void load_ucode_amd_ap(void)
{
+ unsigned int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u32 rev, eax;
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
eax = cpuid_eax(0x00000001);
- uci->cpu_sig.sig = eax;
uci->cpu_sig.rev = rev;
- c->microcode = rev;
- c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-}
-
-void load_ucode_amd_ap(void)
-{
- unsigned int cpu = smp_processor_id();
-
- collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu);
+ uci->cpu_sig.sig = eax;
if (cpu && !ucode_loaded) {
void *ucode;
@@ -265,8 +257,10 @@
return;
ucode = (void *)(initrd_start + ucode_offset);
- if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK)
+ eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+ if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK)
return;
+
ucode_loaded = true;
}
@@ -278,6 +272,8 @@
{
enum ucode_state ret;
void *ucode;
+ u32 eax;
+
#ifdef CONFIG_X86_32
unsigned int bsp = boot_cpu_data.cpu_index;
struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
@@ -293,7 +289,10 @@
return 0;
ucode = (void *)(initrd_start + ucode_offset);
- ret = load_microcode_amd(0, ucode, ucode_size);
+ eax = cpuid_eax(0x00000001);
+ eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+ ret = load_microcode_amd(eax, ucode, ucode_size);
if (ret != UCODE_OK)
return -EINVAL;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 48f8375..30277e2 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@
*begin = new_begin;
}
} else {
- *begin = mmap_legacy_base();
+ *begin = current->mm->mmap_legacy_base;
*end = TASK_SIZE;
}
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index f63778c..25e7e13 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -98,7 +98,7 @@
* Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
* does, but not when emulating X86_32
*/
-unsigned long mmap_legacy_base(void)
+static unsigned long mmap_legacy_base(void)
{
if (mmap_is_ia32())
return TASK_UNMAPPED_BASE;
@@ -112,11 +112,13 @@
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
{
+ mm->mmap_legacy_base = mmap_legacy_base();
+ mm->mmap_base = mmap_base();
+
if (mmap_is_legacy()) {
- mm->mmap_base = mmap_legacy_base();
+ mm->mmap_base = mm->mmap_legacy_base;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 056d11f..8f3eea6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -313,6 +313,17 @@
e820_add_region(start, end - start, type);
}
+void xen_ignore_unusable(struct e820entry *list, size_t map_size)
+{
+ struct e820entry *entry;
+ unsigned int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ if (entry->type == E820_UNUSABLE)
+ entry->type = E820_RAM;
+ }
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
@@ -353,6 +364,17 @@
}
BUG_ON(rc);
+ /*
+ * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+ * regions, so if we're using the machine memory map leave the
+ * region as RAM as it is in the pseudo-physical map.
+ *
+ * UNUSABLE regions in domUs are not handled and will need
+ * a patch in the future.
+ */
+ if (xen_initial_domain())
+ xen_ignore_unusable(map, memmap.nr_entries);
+
/* Make sure the Xen-supplied memory map is well-ordered. */
sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ca92754..b81c88e 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -694,8 +694,15 @@
static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc;
- rc = native_cpu_up(cpu, tidle);
- WARN_ON (xen_smp_intr_init(cpu));
+ /*
+ * xen_smp_intr_init() needs to run before native_cpu_up()
+ * so that IPI vectors are set up on the booting CPU before
+ * it is marked online in native_cpu_up().
+ */
+ rc = xen_smp_intr_init(cpu);
+ WARN_ON(rc);
+ if (!rc)
+ rc = native_cpu_up(cpu, tidle);
return rc;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index dc53a52..9e65783 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -85,9 +85,17 @@
struct sg_table *sg,
enum dma_data_direction dir)
{
+ struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
+
+ mutex_lock(&obj->base.dev->struct_mutex);
+
dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
sg_free_table(sg);
kfree(sg);
+
+ i915_gem_object_unpin_pages(obj);
+
+ mutex_unlock(&obj->base.dev->struct_mutex);
}
static void i915_gem_dmabuf_release(struct dma_buf *dma_buf)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e38b457..be79f47 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10042,6 +10042,8 @@
u32 power_well_driver;
+ int num_transcoders;
+
struct intel_cursor_error_state {
u32 control;
u32 position;
@@ -10050,16 +10052,7 @@
} cursor[I915_MAX_PIPES];
struct intel_pipe_error_state {
- enum transcoder cpu_transcoder;
- u32 conf;
u32 source;
-
- u32 htotal;
- u32 hblank;
- u32 hsync;
- u32 vtotal;
- u32 vblank;
- u32 vsync;
} pipe[I915_MAX_PIPES];
struct intel_plane_error_state {
@@ -10071,6 +10064,19 @@
u32 surface;
u32 tile_offset;
} plane[I915_MAX_PIPES];
+
+ struct intel_transcoder_error_state {
+ enum transcoder cpu_transcoder;
+
+ u32 conf;
+
+ u32 htotal;
+ u32 hblank;
+ u32 hsync;
+ u32 vtotal;
+ u32 vblank;
+ u32 vsync;
+ } transcoder[4];
};
struct intel_display_error_state *
@@ -10078,9 +10084,17 @@
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct intel_display_error_state *error;
- enum transcoder cpu_transcoder;
+ int transcoders[] = {
+ TRANSCODER_A,
+ TRANSCODER_B,
+ TRANSCODER_C,
+ TRANSCODER_EDP,
+ };
int i;
+ if (INTEL_INFO(dev)->num_pipes == 0)
+ return NULL;
+
error = kmalloc(sizeof(*error), GFP_ATOMIC);
if (error == NULL)
return NULL;
@@ -10089,9 +10103,6 @@
error->power_well_driver = I915_READ(HSW_PWR_WELL_DRIVER);
for_each_pipe(i) {
- cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i);
- error->pipe[i].cpu_transcoder = cpu_transcoder;
-
if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) {
error->cursor[i].control = I915_READ(CURCNTR(i));
error->cursor[i].position = I915_READ(CURPOS(i));
@@ -10115,14 +10126,25 @@
error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i));
}
- error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder));
error->pipe[i].source = I915_READ(PIPESRC(i));
- error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
- error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder));
- error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder));
- error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
- error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder));
- error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder));
+ }
+
+ error->num_transcoders = INTEL_INFO(dev)->num_pipes;
+ if (HAS_DDI(dev_priv->dev))
+ error->num_transcoders++; /* Account for eDP. */
+
+ for (i = 0; i < error->num_transcoders; i++) {
+ enum transcoder cpu_transcoder = transcoders[i];
+
+ error->transcoder[i].cpu_transcoder = cpu_transcoder;
+
+ error->transcoder[i].conf = I915_READ(PIPECONF(cpu_transcoder));
+ error->transcoder[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
+ error->transcoder[i].hblank = I915_READ(HBLANK(cpu_transcoder));
+ error->transcoder[i].hsync = I915_READ(HSYNC(cpu_transcoder));
+ error->transcoder[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
+ error->transcoder[i].vblank = I915_READ(VBLANK(cpu_transcoder));
+ error->transcoder[i].vsync = I915_READ(VSYNC(cpu_transcoder));
}
/* In the code above we read the registers without checking if the power
@@ -10144,22 +10166,16 @@
{
int i;
+ if (!error)
+ return;
+
err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes);
if (HAS_POWER_WELL(dev))
err_printf(m, "PWR_WELL_CTL2: %08x\n",
error->power_well_driver);
for_each_pipe(i) {
err_printf(m, "Pipe [%d]:\n", i);
- err_printf(m, " CPU transcoder: %c\n",
- transcoder_name(error->pipe[i].cpu_transcoder));
- err_printf(m, " CONF: %08x\n", error->pipe[i].conf);
err_printf(m, " SRC: %08x\n", error->pipe[i].source);
- err_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal);
- err_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank);
- err_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync);
- err_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal);
- err_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank);
- err_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync);
err_printf(m, "Plane [%d]:\n", i);
err_printf(m, " CNTR: %08x\n", error->plane[i].control);
@@ -10180,5 +10196,17 @@
err_printf(m, " POS: %08x\n", error->cursor[i].position);
err_printf(m, " BASE: %08x\n", error->cursor[i].base);
}
+
+ for (i = 0; i < error->num_transcoders; i++) {
+ err_printf(m, " CPU transcoder: %c\n",
+ transcoder_name(error->transcoder[i].cpu_transcoder));
+ err_printf(m, " CONF: %08x\n", error->transcoder[i].conf);
+ err_printf(m, " HTOTAL: %08x\n", error->transcoder[i].htotal);
+ err_printf(m, " HBLANK: %08x\n", error->transcoder[i].hblank);
+ err_printf(m, " HSYNC: %08x\n", error->transcoder[i].hsync);
+ err_printf(m, " VTOTAL: %08x\n", error->transcoder[i].vtotal);
+ err_printf(m, " VBLANK: %08x\n", error->transcoder[i].vblank);
+ err_printf(m, " VSYNC: %08x\n", error->transcoder[i].vsync);
+ }
}
#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 274b8e1..9f19259 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2163,7 +2163,7 @@
WREG32(reg, tmp_); \
} while (0)
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
-#define WREG32_OR(reg, or) WREG32_P(reg, or, ~or)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
#define WREG32_PLL_P(reg, val, mask) \
do { \
uint32_t tmp_ = RREG32_PLL(reg); \
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index f1c1575..b79f4f5 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -356,6 +356,14 @@
return -EINVAL;
}
+ if (bo->tbo.sync_obj) {
+ r = radeon_fence_wait(bo->tbo.sync_obj, false);
+ if (r) {
+ DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
+ return r;
+ }
+ }
+
r = radeon_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index bcc68ec..f5e92cf 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -744,10 +744,10 @@
(const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers));
radeon_program_register_sequence(rdev,
rv730_golden_registers,
- (const u32)ARRAY_SIZE(rv770_golden_registers));
+ (const u32)ARRAY_SIZE(rv730_golden_registers));
radeon_program_register_sequence(rdev,
rv730_mgcg_init,
- (const u32)ARRAY_SIZE(rv770_mgcg_init));
+ (const u32)ARRAY_SIZE(rv730_mgcg_init));
break;
case CHIP_RV710:
radeon_program_register_sequence(rdev,
@@ -758,18 +758,18 @@
(const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers));
radeon_program_register_sequence(rdev,
rv710_golden_registers,
- (const u32)ARRAY_SIZE(rv770_golden_registers));
+ (const u32)ARRAY_SIZE(rv710_golden_registers));
radeon_program_register_sequence(rdev,
rv710_mgcg_init,
- (const u32)ARRAY_SIZE(rv770_mgcg_init));
+ (const u32)ARRAY_SIZE(rv710_mgcg_init));
break;
case CHIP_RV740:
radeon_program_register_sequence(rdev,
rv740_golden_registers,
- (const u32)ARRAY_SIZE(rv770_golden_registers));
+ (const u32)ARRAY_SIZE(rv740_golden_registers));
radeon_program_register_sequence(rdev,
rv740_mgcg_init,
- (const u32)ARRAY_SIZE(rv770_mgcg_init));
+ (const u32)ARRAY_SIZE(rv740_mgcg_init));
break;
default:
break;
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index dc112a7..4296155 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -959,23 +959,21 @@
return r;
}
-static void remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
+static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
{
- struct entry *e = hash_lookup(mq, oblock);
+ struct mq_policy *mq = to_mq_policy(p);
+ struct entry *e;
+
+ mutex_lock(&mq->lock);
+
+ e = hash_lookup(mq, oblock);
BUG_ON(!e || !e->in_cache);
del(mq, e);
e->in_cache = false;
push(mq, e);
-}
-static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
-{
- struct mq_policy *mq = to_mq_policy(p);
-
- mutex_lock(&mq->lock);
- remove_mapping(mq, oblock);
mutex_unlock(&mq->lock);
}
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 3a5db7b..0182352 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -971,58 +971,62 @@
/*********************** tlb/rlb shared functions *********************/
-static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
+static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
+ u16 vid)
{
- struct bonding *bond = bond_get_bond_by_slave(slave);
struct learning_pkt pkt;
+ struct sk_buff *skb;
int size = sizeof(struct learning_pkt);
- int i;
+ char *data;
memset(&pkt, 0, size);
memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
pkt.type = cpu_to_be16(ETH_P_LOOP);
- for (i = 0; i < MAX_LP_BURST; i++) {
- struct sk_buff *skb;
- char *data;
+ skb = dev_alloc_skb(size);
+ if (!skb)
+ return;
- skb = dev_alloc_skb(size);
+ data = skb_put(skb, size);
+ memcpy(data, &pkt, size);
+
+ skb_reset_mac_header(skb);
+ skb->network_header = skb->mac_header + ETH_HLEN;
+ skb->protocol = pkt.type;
+ skb->priority = TC_PRIO_CONTROL;
+ skb->dev = slave->dev;
+
+ if (vid) {
+ skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid);
if (!skb) {
+ pr_err("%s: Error: failed to insert VLAN tag\n",
+ slave->bond->dev->name);
return;
}
-
- data = skb_put(skb, size);
- memcpy(data, &pkt, size);
-
- skb_reset_mac_header(skb);
- skb->network_header = skb->mac_header + ETH_HLEN;
- skb->protocol = pkt.type;
- skb->priority = TC_PRIO_CONTROL;
- skb->dev = slave->dev;
-
- if (bond_vlan_used(bond)) {
- struct vlan_entry *vlan;
-
- vlan = bond_next_vlan(bond,
- bond->alb_info.current_alb_vlan);
-
- bond->alb_info.current_alb_vlan = vlan;
- if (!vlan) {
- kfree_skb(skb);
- continue;
- }
-
- skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id);
- if (!skb) {
- pr_err("%s: Error: failed to insert VLAN tag\n",
- bond->dev->name);
- continue;
- }
- }
-
- dev_queue_xmit(skb);
}
+
+ dev_queue_xmit(skb);
+}
+
+
+static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
+{
+ struct bonding *bond = bond_get_bond_by_slave(slave);
+ struct net_device *upper;
+ struct list_head *iter;
+
+ /* send untagged */
+ alb_send_lp_vid(slave, mac_addr, 0);
+
+ /* loop through vlans and send one packet for each */
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper->priv_flags & IFF_802_1Q_VLAN)
+ alb_send_lp_vid(slave, mac_addr,
+ vlan_dev_vlan_id(upper));
+ }
+ rcu_read_unlock();
}
static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
@@ -1759,11 +1763,6 @@
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
{
- if (bond->alb_info.current_alb_vlan &&
- (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) {
- bond->alb_info.current_alb_vlan = NULL;
- }
-
if (bond->alb_info.rlb_enabled) {
rlb_clear_vlan(bond, vlan_id);
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index e7a5b8b..e02c9c5 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -53,7 +53,6 @@
#define TLB_NULL_INDEX 0xffffffff
-#define MAX_LP_BURST 3
/* rlb defs */
#define RLB_HASH_TABLE_SIZE 256
@@ -170,7 +169,6 @@
* rx traffic should be
* rebalanced
*/
- struct vlan_entry *current_alb_vlan;
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4264a76..c50679f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -283,116 +283,6 @@
/*---------------------------------- VLAN -----------------------------------*/
/**
- * bond_add_vlan - add a new vlan id on bond
- * @bond: bond that got the notification
- * @vlan_id: the vlan id to add
- *
- * Returns -ENOMEM if allocation failed.
- */
-static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
-{
- struct vlan_entry *vlan;
-
- pr_debug("bond: %s, vlan id %d\n",
- (bond ? bond->dev->name : "None"), vlan_id);
-
- vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
- if (!vlan)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&vlan->vlan_list);
- vlan->vlan_id = vlan_id;
-
- write_lock_bh(&bond->lock);
-
- list_add_tail(&vlan->vlan_list, &bond->vlan_list);
-
- write_unlock_bh(&bond->lock);
-
- pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
-
- return 0;
-}
-
-/**
- * bond_del_vlan - delete a vlan id from bond
- * @bond: bond that got the notification
- * @vlan_id: the vlan id to delete
- *
- * returns -ENODEV if @vlan_id was not found in @bond.
- */
-static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
-{
- struct vlan_entry *vlan;
- int res = -ENODEV;
-
- pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
-
- block_netpoll_tx();
- write_lock_bh(&bond->lock);
-
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- if (vlan->vlan_id == vlan_id) {
- list_del(&vlan->vlan_list);
-
- if (bond_is_lb(bond))
- bond_alb_clear_vlan(bond, vlan_id);
-
- pr_debug("removed VLAN ID %d from bond %s\n",
- vlan_id, bond->dev->name);
-
- kfree(vlan);
-
- res = 0;
- goto out;
- }
- }
-
- pr_debug("couldn't find VLAN ID %d in bond %s\n",
- vlan_id, bond->dev->name);
-
-out:
- write_unlock_bh(&bond->lock);
- unblock_netpoll_tx();
- return res;
-}
-
-/**
- * bond_next_vlan - safely skip to the next item in the vlans list.
- * @bond: the bond we're working on
- * @curr: item we're advancing from
- *
- * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
- * or @curr->next otherwise (even if it is @curr itself again).
- *
- * Caller must hold bond->lock
- */
-struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
-{
- struct vlan_entry *next, *last;
-
- if (list_empty(&bond->vlan_list))
- return NULL;
-
- if (!curr) {
- next = list_entry(bond->vlan_list.next,
- struct vlan_entry, vlan_list);
- } else {
- last = list_entry(bond->vlan_list.prev,
- struct vlan_entry, vlan_list);
- if (last == curr) {
- next = list_entry(bond->vlan_list.next,
- struct vlan_entry, vlan_list);
- } else {
- next = list_entry(curr->vlan_list.next,
- struct vlan_entry, vlan_list);
- }
- }
-
- return next;
-}
-
-/**
* bond_dev_queue_xmit - Prepare skb for xmit.
*
* @bond: bond device that got this skb for tx.
@@ -451,13 +341,6 @@
goto unwind;
}
- res = bond_add_vlan(bond, vid);
- if (res) {
- pr_err("%s: Error: Failed to add vlan id %d\n",
- bond_dev->name, vid);
- goto unwind;
- }
-
return 0;
unwind:
@@ -478,17 +361,12 @@
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- int res;
bond_for_each_slave(bond, slave)
vlan_vid_del(slave->dev, proto, vid);
- res = bond_del_vlan(bond, vid);
- if (res) {
- pr_err("%s: Error: Failed to remove vlan id %d\n",
- bond_dev->name, vid);
- return res;
- }
+ if (bond_is_lb(bond))
+ bond_alb_clear_vlan(bond, vid);
return 0;
}
@@ -1603,7 +1481,8 @@
dev_mc_add(slave_dev, lacpdu_multicast);
}
- if (vlan_vids_add_by_dev(slave_dev, bond_dev)) {
+ res = vlan_vids_add_by_dev(slave_dev, bond_dev);
+ if (res) {
pr_err("%s: Error: Couldn't add bond vlan ids to %s\n",
bond_dev->name, slave_dev->name);
goto err_close;
@@ -1953,7 +1832,7 @@
bond_set_carrier(bond);
eth_hw_addr_random(bond_dev);
- if (bond_vlan_used(bond)) {
+ if (vlan_uses_dev(bond_dev)) {
pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
bond_dev->name, bond_dev->name);
pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
@@ -2391,24 +2270,25 @@
}
}
-static int bond_has_this_ip(struct bonding *bond, __be32 ip)
+static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
{
- struct vlan_entry *vlan;
- struct net_device *vlan_dev;
+ struct net_device *upper;
+ struct list_head *iter;
+ bool ret = false;
if (ip == bond_confirm_addr(bond->dev, 0, ip))
- return 1;
+ return true;
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- rcu_read_lock();
- vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q),
- vlan->vlan_id);
- rcu_read_unlock();
- if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip))
- return 1;
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (ip == bond_confirm_addr(upper, 0, ip)) {
+ ret = true;
+ break;
+ }
}
+ rcu_read_unlock();
- return 0;
+ return ret;
}
/*
@@ -2443,81 +2323,79 @@
static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
{
- int i, vlan_id;
- __be32 *targets = bond->params.arp_targets;
- struct vlan_entry *vlan;
- struct net_device *vlan_dev = NULL;
+ struct net_device *upper, *vlan_upper;
+ struct list_head *iter, *vlan_iter;
struct rtable *rt;
+ __be32 *targets = bond->params.arp_targets, addr;
+ int i, vlan_id;
- for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
- __be32 addr;
- if (!targets[i])
- break;
+ for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
pr_debug("basa: target %pI4\n", &targets[i]);
- if (!bond_vlan_used(bond)) {
- pr_debug("basa: empty vlan: arp_send\n");
- addr = bond_confirm_addr(bond->dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, 0);
- continue;
- }
- /*
- * If VLANs are configured, we do a route lookup to
- * determine which VLAN interface would be used, so we
- * can tag the ARP with the proper VLAN tag.
- */
+ /* Find out through which dev should the packet go */
rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
RTO_ONLINK, 0);
if (IS_ERR(rt)) {
- if (net_ratelimit()) {
- pr_warning("%s: no route to arp_ip_target %pI4\n",
- bond->dev->name, &targets[i]);
- }
- continue;
- }
-
- /*
- * This target is not on a VLAN
- */
- if (rt->dst.dev == bond->dev) {
- ip_rt_put(rt);
- pr_debug("basa: rtdev == bond->dev: arp_send\n");
- addr = bond_confirm_addr(bond->dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, 0);
+ pr_debug("%s: no route to arp_ip_target %pI4\n",
+ bond->dev->name, &targets[i]);
continue;
}
vlan_id = 0;
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- rcu_read_lock();
- vlan_dev = __vlan_find_dev_deep(bond->dev,
- htons(ETH_P_8021Q),
- vlan->vlan_id);
- rcu_read_unlock();
- if (vlan_dev == rt->dst.dev) {
- vlan_id = vlan->vlan_id;
- pr_debug("basa: vlan match on %s %d\n",
- vlan_dev->name, vlan_id);
- break;
+
+ /* bond device itself */
+ if (rt->dst.dev == bond->dev)
+ goto found;
+
+ rcu_read_lock();
+ /* first we search only for vlan devices. for every vlan
+ * found we verify its upper dev list, searching for the
+ * rt->dst.dev. If found we save the tag of the vlan and
+ * proceed to send the packet.
+ *
+ * TODO: QinQ?
+ */
+ netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) {
+ if (!is_vlan_dev(vlan_upper))
+ continue;
+ netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) {
+ if (upper == rt->dst.dev) {
+ vlan_id = vlan_dev_vlan_id(vlan_upper);
+ rcu_read_unlock();
+ goto found;
+ }
}
}
- if (vlan_id && vlan_dev) {
- ip_rt_put(rt);
- addr = bond_confirm_addr(vlan_dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, vlan_id);
- continue;
- }
+ /* if the device we're looking for is not on top of any of
+ * our upper vlans, then just search for any dev that
+ * matches, and in case it's a vlan - save the id
+ */
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper == rt->dst.dev) {
+ /* if it's a vlan - get its VID */
+ if (is_vlan_dev(upper))
+ vlan_id = vlan_dev_vlan_id(upper);
- if (net_ratelimit()) {
- pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
- bond->dev->name, &targets[i],
- rt->dst.dev ? rt->dst.dev->name : "NULL");
+ rcu_read_unlock();
+ goto found;
+ }
}
+ rcu_read_unlock();
+
+ /* Not our device - skip */
+ pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
+ bond->dev->name, &targets[i],
+ rt->dst.dev ? rt->dst.dev->name : "NULL");
+
ip_rt_put(rt);
+ continue;
+
+found:
+ addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
+ ip_rt_put(rt);
+ bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ addr, vlan_id);
}
}
@@ -4141,7 +4019,6 @@
/* Initialize pointers */
bond->dev = bond_dev;
- INIT_LIST_HEAD(&bond->vlan_list);
/* Initialize the device entry points */
ether_setup(bond_dev);
@@ -4194,7 +4071,6 @@
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave, *tmp_slave;
- struct vlan_entry *vlan, *tmp;
bond_netpoll_cleanup(bond_dev);
@@ -4206,11 +4082,6 @@
list_del(&bond->bond_list);
bond_debug_unregister(bond);
-
- list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) {
- list_del(&vlan->vlan_list);
- kfree(vlan);
- }
}
/*------------------------- Module initialization ---------------------------*/
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 4bf52d5..4abc925 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -185,11 +185,6 @@
#define BOND_MAX_MODENAME_LEN 20
-struct vlan_entry {
- struct list_head vlan_list;
- unsigned short vlan_id;
-};
-
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct list_head list;
@@ -254,7 +249,6 @@
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
struct bond_params params;
- struct list_head vlan_list;
struct workqueue_struct *wq;
struct delayed_work mii_work;
struct delayed_work arp_work;
@@ -267,9 +261,22 @@
#endif /* CONFIG_DEBUG_FS */
};
+/* if we hold rtnl_lock() - call vlan_uses_dev() */
static inline bool bond_vlan_used(struct bonding *bond)
{
- return !list_empty(&bond->vlan_list);
+ struct net_device *upper;
+ struct list_head *iter;
+
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper->priv_flags & IFF_802_1Q_VLAN) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
}
#define bond_slave_get_rcu(dev) \
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index dbbe97a..3b1ff61 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1355,7 +1355,7 @@
if (at91_is_sam9263(priv))
dev->sysfs_groups[0] = &at91_sysfs_attr_group;
- dev_set_drvdata(&pdev->dev, dev);
+ platform_set_drvdata(pdev, dev);
SET_NETDEV_DEV(dev, &pdev->dev);
err = register_candev(dev);
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index c6f838d..294ced3 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -195,7 +195,7 @@
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
priv->raminit_ctrlreg = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(priv->raminit_ctrlreg) || priv->instance < 0)
+ if (IS_ERR(priv->raminit_ctrlreg) || (int)priv->instance < 0)
dev_info(&pdev->dev, "control memory is not used for raminit\n");
else
priv->raminit = c_can_hw_raminit;
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index c48174e..71c677e 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1083,7 +1083,7 @@
netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT);
- dev_set_drvdata(&pdev->dev, dev);
+ platform_set_drvdata(pdev, dev);
SET_NETDEV_DEV(dev, &pdev->dev);
err = register_flexcandev(dev);
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 8cda23b..fe7dd69 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -37,9 +37,6 @@
*
* static struct mcp251x_platform_data mcp251x_info = {
* .oscillator_frequency = 8000000,
- * .board_specific_setup = &mcp251x_setup,
- * .power_enable = mcp251x_power_enable,
- * .transceiver_enable = NULL,
* };
*
* static struct spi_board_info spi_board_info[] = {
@@ -76,6 +73,7 @@
#include <linux/slab.h>
#include <linux/spi/spi.h>
#include <linux/uaccess.h>
+#include <linux/regulator/consumer.h>
/* SPI interface instruction set */
#define INSTRUCTION_WRITE 0x02
@@ -264,6 +262,8 @@
#define AFTER_SUSPEND_POWER 4
#define AFTER_SUSPEND_RESTART 8
int restart_tx;
+ struct regulator *power;
+ struct regulator *transceiver;
};
#define MCP251X_IS(_model) \
@@ -667,16 +667,25 @@
return (st1 == 0x80 && st2 == 0x07) ? 1 : 0;
}
+static int mcp251x_power_enable(struct regulator *reg, int enable)
+{
+ if (IS_ERR(reg))
+ return 0;
+
+ if (enable)
+ return regulator_enable(reg);
+ else
+ return regulator_disable(reg);
+}
+
static void mcp251x_open_clean(struct net_device *net)
{
struct mcp251x_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
free_irq(spi->irq, priv);
mcp251x_hw_sleep(spi);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(0);
+ mcp251x_power_enable(priv->transceiver, 0);
close_candev(net);
}
@@ -684,7 +693,6 @@
{
struct mcp251x_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
close_candev(net);
@@ -704,8 +712,7 @@
mcp251x_hw_sleep(spi);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(0);
+ mcp251x_power_enable(priv->transceiver, 0);
priv->can.state = CAN_STATE_STOPPED;
@@ -928,8 +935,7 @@
{
struct mcp251x_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
- unsigned long flags;
+ unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_FALLING;
int ret;
ret = open_candev(net);
@@ -939,25 +945,17 @@
}
mutex_lock(&priv->mcp_lock);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(1);
+ mcp251x_power_enable(priv->transceiver, 1);
priv->force_quit = 0;
priv->tx_skb = NULL;
priv->tx_len = 0;
- flags = IRQF_ONESHOT;
- if (pdata->irq_flags)
- flags |= pdata->irq_flags;
- else
- flags |= IRQF_TRIGGER_FALLING;
-
ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist,
flags, DEVICE_NAME, priv);
if (ret) {
dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(0);
+ mcp251x_power_enable(priv->transceiver, 0);
close_candev(net);
goto open_unlock;
}
@@ -1026,6 +1024,19 @@
CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
priv->model = spi_get_device_id(spi)->driver_data;
priv->net = net;
+
+ priv->power = devm_regulator_get(&spi->dev, "vdd");
+ priv->transceiver = devm_regulator_get(&spi->dev, "xceiver");
+ if ((PTR_ERR(priv->power) == -EPROBE_DEFER) ||
+ (PTR_ERR(priv->transceiver) == -EPROBE_DEFER)) {
+ ret = -EPROBE_DEFER;
+ goto error_power;
+ }
+
+ ret = mcp251x_power_enable(priv->power, 1);
+ if (ret)
+ goto error_power;
+
spi_set_drvdata(spi, priv);
priv->spi = spi;
@@ -1068,30 +1079,24 @@
}
}
- if (pdata->power_enable)
- pdata->power_enable(1);
-
- /* Call out to platform specific setup */
- if (pdata->board_specific_setup)
- pdata->board_specific_setup(spi);
-
SET_NETDEV_DEV(net, &spi->dev);
/* Configure the SPI bus */
- spi->mode = SPI_MODE_0;
+ spi->mode = spi->mode ? : SPI_MODE_0;
+ if (mcp251x_is_2510(spi))
+ spi->max_speed_hz = spi->max_speed_hz ? : 5 * 1000 * 1000;
+ else
+ spi->max_speed_hz = spi->max_speed_hz ? : 10 * 1000 * 1000;
spi->bits_per_word = 8;
spi_setup(spi);
/* Here is OK to not lock the MCP, no one knows about it yet */
if (!mcp251x_hw_probe(spi)) {
- dev_info(&spi->dev, "Probe failed\n");
+ ret = -ENODEV;
goto error_probe;
}
mcp251x_hw_sleep(spi);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(0);
-
ret = register_candev(net);
if (ret)
goto error_probe;
@@ -1109,13 +1114,13 @@
if (!mcp251x_enable_dma)
kfree(priv->spi_tx_buf);
error_tx_buf:
- free_candev(net);
if (mcp251x_enable_dma)
dma_free_coherent(&spi->dev, PAGE_SIZE,
priv->spi_tx_buf, priv->spi_tx_dma);
+ mcp251x_power_enable(priv->power, 0);
+error_power:
+ free_candev(net);
error_alloc:
- if (pdata->power_enable)
- pdata->power_enable(0);
dev_err(&spi->dev, "probe failed\n");
error_out:
return ret;
@@ -1123,12 +1128,10 @@
static int mcp251x_can_remove(struct spi_device *spi)
{
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
struct mcp251x_priv *priv = spi_get_drvdata(spi);
struct net_device *net = priv->net;
unregister_candev(net);
- free_candev(net);
if (mcp251x_enable_dma) {
dma_free_coherent(&spi->dev, PAGE_SIZE,
@@ -1138,8 +1141,9 @@
kfree(priv->spi_rx_buf);
}
- if (pdata->power_enable)
- pdata->power_enable(0);
+ mcp251x_power_enable(priv->power, 0);
+
+ free_candev(net);
return 0;
}
@@ -1149,7 +1153,6 @@
static int mcp251x_can_suspend(struct device *dev)
{
struct spi_device *spi = to_spi_device(dev);
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
struct mcp251x_priv *priv = spi_get_drvdata(spi);
struct net_device *net = priv->net;
@@ -1163,15 +1166,14 @@
netif_device_detach(net);
mcp251x_hw_sleep(spi);
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(0);
+ mcp251x_power_enable(priv->transceiver, 0);
priv->after_suspend = AFTER_SUSPEND_UP;
} else {
priv->after_suspend = AFTER_SUSPEND_DOWN;
}
- if (pdata->power_enable) {
- pdata->power_enable(0);
+ if (!IS_ERR(priv->power)) {
+ regulator_disable(priv->power);
priv->after_suspend |= AFTER_SUSPEND_POWER;
}
@@ -1181,16 +1183,14 @@
static int mcp251x_can_resume(struct device *dev)
{
struct spi_device *spi = to_spi_device(dev);
- struct mcp251x_platform_data *pdata = spi->dev.platform_data;
struct mcp251x_priv *priv = spi_get_drvdata(spi);
if (priv->after_suspend & AFTER_SUSPEND_POWER) {
- pdata->power_enable(1);
+ mcp251x_power_enable(priv->power, 1);
queue_work(priv->wq, &priv->restart_work);
} else {
if (priv->after_suspend & AFTER_SUSPEND_UP) {
- if (pdata->transceiver_enable)
- pdata->transceiver_enable(1);
+ mcp251x_power_enable(priv->transceiver, 1);
queue_work(priv->wq, &priv->restart_work);
} else {
priv->after_suspend = 0;
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index b7232a9..f92f001 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -840,7 +840,7 @@
ei_local = netdev_priv(dev);
ax = to_ax_dev(dev);
- ax->plat = pdev->dev.platform_data;
+ ax->plat = dev_get_platdata(&pdev->dev);
platform_set_drvdata(pdev, dev);
ei_local->rxcr_base = ax->plat->rcr_val;
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index e904b38..e66684a 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -1647,12 +1647,12 @@
setup_mac_addr(ndev->dev_addr);
- if (!pdev->dev.platform_data) {
+ if (!dev_get_platdata(&pdev->dev)) {
dev_err(&pdev->dev, "Cannot get platform device bfin_mii_bus!\n");
rc = -ENODEV;
goto out_err_probe_mac;
}
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
lp->mii_bus = platform_get_drvdata(pd);
if (!lp->mii_bus) {
dev_err(&pdev->dev, "Cannot get mii_bus!\n");
@@ -1660,7 +1660,7 @@
goto out_err_probe_mac;
}
lp->mii_bus->priv = ndev;
- mii_bus_data = pd->dev.platform_data;
+ mii_bus_data = dev_get_platdata(&pd->dev);
rc = mii_probe(ndev, mii_bus_data->phy_mode);
if (rc) {
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 7ff4b30..e066945 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1464,18 +1464,18 @@
}
/* Allocate TX descriptor ring in coherent memory */
- greth->tx_bd_base = dma_alloc_coherent(greth->dev, 1024,
- &greth->tx_bd_base_phys,
- GFP_KERNEL | __GFP_ZERO);
+ greth->tx_bd_base = dma_zalloc_coherent(greth->dev, 1024,
+ &greth->tx_bd_base_phys,
+ GFP_KERNEL);
if (!greth->tx_bd_base) {
err = -ENOMEM;
goto error3;
}
/* Allocate RX descriptor ring in coherent memory */
- greth->rx_bd_base = dma_alloc_coherent(greth->dev, 1024,
- &greth->rx_bd_base_phys,
- GFP_KERNEL | __GFP_ZERO);
+ greth->rx_bd_base = dma_zalloc_coherent(greth->dev, 1024,
+ &greth->rx_bd_base_phys,
+ GFP_KERNEL);
if (!greth->rx_bd_base) {
err = -ENOMEM;
goto error4;
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index ceb45bc..91d52b4 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1131,7 +1131,7 @@
writel(0, aup->enable);
aup->mac_enabled = 0;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
if (!pd) {
dev_info(&pdev->dev, "no platform_data passed,"
" PHY search on MAC0\n");
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index b1bcd4b..8ac48fb 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -948,8 +948,7 @@
/* allocate rx dma ring */
size = priv->rx_ring_size * sizeof(struct bcm_enet_desc);
- p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma,
- GFP_KERNEL | __GFP_ZERO);
+ p = dma_zalloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
goto out_freeirq_tx;
@@ -960,8 +959,7 @@
/* allocate tx dma ring */
size = priv->tx_ring_size * sizeof(struct bcm_enet_desc);
- p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma,
- GFP_KERNEL | __GFP_ZERO);
+ p = dma_zalloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
goto out_free_rx_ring;
@@ -1747,11 +1745,10 @@
if (!bcm_enet_shared_base[0])
return -ENODEV;
- res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
res_irq_tx = platform_get_resource(pdev, IORESOURCE_IRQ, 2);
- if (!res_mem || !res_irq || !res_irq_rx || !res_irq_tx)
+ if (!res_irq || !res_irq_rx || !res_irq_tx)
return -ENODEV;
ret = 0;
@@ -1767,9 +1764,10 @@
if (ret)
goto out;
- priv->base = devm_request_and_ioremap(&pdev->dev, res_mem);
- if (priv->base == NULL) {
- ret = -ENOMEM;
+ res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->base = devm_ioremap_resource(&pdev->dev, res_mem);
+ if (IS_ERR(priv->base)) {
+ ret = PTR_ERR(priv->base);
goto out;
}
@@ -1800,7 +1798,7 @@
priv->rx_ring_size = BCMENET_DEF_RX_DESC;
priv->tx_ring_size = BCMENET_DEF_TX_DESC;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
if (pd) {
memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN);
priv->has_phy = pd->has_phy;
@@ -1964,7 +1962,7 @@
} else {
struct bcm63xx_enet_platform_data *pd;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
if (pd && pd->mii_config)
pd->mii_config(dev, 0, bcm_enet_mdio_read_mii,
bcm_enet_mdio_write_mii);
@@ -2742,7 +2740,7 @@
priv->tx_ring_size = BCMENET_DEF_TX_DESC;
priv->dma_maxburst = BCMENETSW_DMA_MAXBURST;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
if (pd) {
memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN);
memcpy(priv->used_ports, pd->used_ports,
@@ -2836,7 +2834,6 @@
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
release_mem_region(res->start, resource_size(res));
- platform_set_drvdata(pdev, NULL);
free_netdev(dev);
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 4148058..e838a3f 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -853,9 +853,8 @@
bp->status_stats_size = status_blk_size +
sizeof(struct statistics_block);
- status_blk = dma_alloc_coherent(&bp->pdev->dev, bp->status_stats_size,
- &bp->status_blk_mapping,
- GFP_KERNEL | __GFP_ZERO);
+ status_blk = dma_zalloc_coherent(&bp->pdev->dev, bp->status_stats_size,
+ &bp->status_blk_mapping, GFP_KERNEL);
if (status_blk == NULL)
goto alloc_mem_err;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 126dec4..3e77a1b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1333,6 +1333,8 @@
BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN,
BNX2X_SP_RTNL_RX_MODE,
BNX2X_SP_RTNL_HYPERVISOR_VLAN,
+ BNX2X_SP_RTNL_TX_STOP,
+ BNX2X_SP_RTNL_TX_RESUME,
};
struct bnx2x_prev_path_list {
@@ -2067,9 +2069,8 @@
void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
bool is_pf);
-#define BNX2X_ILT_ZALLOC(x, y, size) \
- x = dma_alloc_coherent(&bp->pdev->dev, size, y, \
- GFP_KERNEL | __GFP_ZERO)
+#define BNX2X_ILT_ZALLOC(x, y, size) \
+ x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL)
#define BNX2X_ILT_FREE(x, y, size) \
do { \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 38be494..affb764 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -51,8 +51,7 @@
#define BNX2X_PCI_ALLOC(x, y, size) \
do { \
- x = dma_alloc_coherent(&bp->pdev->dev, size, y, \
- GFP_KERNEL | __GFP_ZERO); \
+ x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \
if (x == NULL) \
goto alloc_mem_err; \
DP(NETIF_MSG_HW, "BNX2X_PCI_ALLOC: Physical %Lx Virtual %p\n", \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index f9122f2..fcf2761 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -30,10 +30,8 @@
#include "bnx2x_dcb.h"
/* forward declarations of dcbx related functions */
-static int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp);
static void bnx2x_pfc_set_pfc(struct bnx2x *bp);
static void bnx2x_dcbx_update_ets_params(struct bnx2x *bp);
-static int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp);
static void bnx2x_dcbx_get_ets_pri_pg_tbl(struct bnx2x *bp,
u32 *set_configuration_ets_pg,
u32 *pri_pg_tbl);
@@ -425,30 +423,52 @@
bnx2x_pfc_clear(bp);
}
-static int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp)
+int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp)
{
struct bnx2x_func_state_params func_params = {NULL};
+ int rc;
func_params.f_obj = &bp->func_obj;
func_params.cmd = BNX2X_F_CMD_TX_STOP;
+ __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+ __set_bit(RAMROD_RETRY, &func_params.ramrod_flags);
+
DP(BNX2X_MSG_DCB, "STOP TRAFFIC\n");
- return bnx2x_func_state_change(bp, &func_params);
+
+ rc = bnx2x_func_state_change(bp, &func_params);
+ if (rc) {
+ BNX2X_ERR("Unable to hold traffic for HW configuration\n");
+ bnx2x_panic();
+ }
+
+ return rc;
}
-static int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp)
+int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp)
{
struct bnx2x_func_state_params func_params = {NULL};
struct bnx2x_func_tx_start_params *tx_params =
&func_params.params.tx_start;
+ int rc;
func_params.f_obj = &bp->func_obj;
func_params.cmd = BNX2X_F_CMD_TX_START;
+ __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+ __set_bit(RAMROD_RETRY, &func_params.ramrod_flags);
+
bnx2x_dcbx_fw_struct(bp, tx_params);
DP(BNX2X_MSG_DCB, "START TRAFFIC\n");
- return bnx2x_func_state_change(bp, &func_params);
+
+ rc = bnx2x_func_state_change(bp, &func_params);
+ if (rc) {
+ BNX2X_ERR("Unable to resume traffic after HW configuration\n");
+ bnx2x_panic();
+ }
+
+ return rc;
}
static void bnx2x_dcbx_2cos_limit_update_ets_config(struct bnx2x *bp)
@@ -744,7 +764,9 @@
if (IS_MF(bp))
bnx2x_link_sync_notify(bp);
- bnx2x_dcbx_stop_hw_tx(bp);
+ set_bit(BNX2X_SP_RTNL_TX_STOP, &bp->sp_rtnl_state);
+
+ schedule_delayed_work(&bp->sp_rtnl_task, 0);
return;
}
@@ -757,7 +779,9 @@
/* ets may affect cmng configuration: reinit it in hw */
bnx2x_set_local_cmng(bp);
- bnx2x_dcbx_resume_hw_tx(bp);
+ set_bit(BNX2X_SP_RTNL_TX_RESUME, &bp->sp_rtnl_state);
+
+ schedule_delayed_work(&bp->sp_rtnl_task, 0);
return;
case BNX2X_DCBX_STATE_TX_RELEASED:
@@ -2367,21 +2391,24 @@
case DCB_FEATCFG_ATTR_PG:
if (bp->dcbx_local_feat.ets.enabled)
*flags |= DCB_FEATCFG_ENABLE;
- if (bp->dcbx_error & DCBX_LOCAL_ETS_ERROR)
+ if (bp->dcbx_error & (DCBX_LOCAL_ETS_ERROR |
+ DCBX_REMOTE_MIB_ERROR))
*flags |= DCB_FEATCFG_ERROR;
break;
case DCB_FEATCFG_ATTR_PFC:
if (bp->dcbx_local_feat.pfc.enabled)
*flags |= DCB_FEATCFG_ENABLE;
if (bp->dcbx_error & (DCBX_LOCAL_PFC_ERROR |
- DCBX_LOCAL_PFC_MISMATCH))
+ DCBX_LOCAL_PFC_MISMATCH |
+ DCBX_REMOTE_MIB_ERROR))
*flags |= DCB_FEATCFG_ERROR;
break;
case DCB_FEATCFG_ATTR_APP:
if (bp->dcbx_local_feat.app.enabled)
*flags |= DCB_FEATCFG_ENABLE;
if (bp->dcbx_error & (DCBX_LOCAL_APP_ERROR |
- DCBX_LOCAL_APP_MISMATCH))
+ DCBX_LOCAL_APP_MISMATCH |
+ DCBX_REMOTE_MIB_ERROR))
*flags |= DCB_FEATCFG_ERROR;
break;
default:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
index 125bd1b..804b8f6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
@@ -199,4 +199,7 @@
int bnx2x_dcbnl_update_applist(struct bnx2x *bp, bool delall);
#endif /* BCM_DCBNL */
+int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp);
+int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp);
+
#endif /* BNX2X_DCB_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7f4ec80..17f117c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2261,6 +2261,23 @@
bp->link_params.req_fc_auto_adv = BNX2X_FLOW_CTRL_BOTH;
}
+static void bnx2x_init_dropless_fc(struct bnx2x *bp)
+{
+ u32 pause_enabled = 0;
+
+ if (!CHIP_IS_E1(bp) && bp->dropless_fc && bp->link_vars.link_up) {
+ if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
+ pause_enabled = 1;
+
+ REG_WR(bp, BAR_USTRORM_INTMEM +
+ USTORM_ETH_PAUSE_ENABLED_OFFSET(BP_PORT(bp)),
+ pause_enabled);
+ }
+
+ DP(NETIF_MSG_IFUP | NETIF_MSG_LINK, "dropless_fc is %s\n",
+ pause_enabled ? "enabled" : "disabled");
+}
+
int bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode)
{
int rc, cfx_idx = bnx2x_get_link_cfg_idx(bp);
@@ -2294,6 +2311,8 @@
bnx2x_release_phy_lock(bp);
+ bnx2x_init_dropless_fc(bp);
+
bnx2x_calc_fc_adv(bp);
if (bp->link_vars.link_up) {
@@ -2315,6 +2334,8 @@
bnx2x_phy_init(&bp->link_params, &bp->link_vars);
bnx2x_release_phy_lock(bp);
+ bnx2x_init_dropless_fc(bp);
+
bnx2x_calc_fc_adv(bp);
} else
BNX2X_ERR("Bootcode is missing - can not set link\n");
@@ -2556,21 +2577,10 @@
bnx2x_link_update(&bp->link_params, &bp->link_vars);
+ bnx2x_init_dropless_fc(bp);
+
if (bp->link_vars.link_up) {
- /* dropless flow control */
- if (!CHIP_IS_E1(bp) && bp->dropless_fc) {
- int port = BP_PORT(bp);
- u32 pause_enabled = 0;
-
- if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
- pause_enabled = 1;
-
- REG_WR(bp, BAR_USTRORM_INTMEM +
- USTORM_ETH_PAUSE_ENABLED_OFFSET(port),
- pause_enabled);
- }
-
if (bp->link_vars.mac_type != MAC_TYPE_EMAC) {
struct host_port_stats *pstats;
@@ -9643,6 +9653,12 @@
&bp->sp_rtnl_state))
bnx2x_pf_set_vfs_vlan(bp);
+ if (test_and_clear_bit(BNX2X_SP_RTNL_TX_STOP, &bp->sp_rtnl_state))
+ bnx2x_dcbx_stop_hw_tx(bp);
+
+ if (test_and_clear_bit(BNX2X_SP_RTNL_TX_RESUME, &bp->sp_rtnl_state))
+ bnx2x_dcbx_resume_hw_tx(bp);
+
/* work which needs rtnl lock not-taken (as it takes the lock itself and
* can be called from other contexts as well)
*/
@@ -11145,6 +11161,9 @@
int tmp;
u32 cfg;
+ if (IS_VF(bp))
+ return 0;
+
if (IS_MF(bp) && !CHIP_IS_E1x(bp)) {
/* Take function: tmp = func */
tmp = BP_ABS_FUNC(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 1d925fd..fbc026c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1755,11 +1755,8 @@
void bnx2x_iov_init_dmae(struct bnx2x *bp)
{
- DP(BNX2X_MSG_IOV, "SRIOV is %s\n", IS_SRIOV(bp) ? "ON" : "OFF");
- if (!IS_SRIOV(bp))
- return;
-
- REG_WR(bp, DMAE_REG_BACKWARD_COMP_EN, 0);
+ if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+ REG_WR(bp, DMAE_REG_BACKWARD_COMP_EN, 0);
}
static int bnx2x_vf_bus(struct bnx2x *bp, int vfid)
@@ -3092,8 +3089,9 @@
pci_disable_sriov(bp->pdev);
}
-static int bnx2x_vf_ndo_sanity(struct bnx2x *bp, int vfidx,
- struct bnx2x_virtf *vf)
+static int bnx2x_vf_ndo_prep(struct bnx2x *bp, int vfidx,
+ struct bnx2x_virtf **vf,
+ struct pf_vf_bulletin_content **bulletin)
{
if (bp->state != BNX2X_STATE_OPEN) {
BNX2X_ERR("vf ndo called though PF is down\n");
@@ -3111,12 +3109,22 @@
return -EINVAL;
}
- if (!vf) {
+ /* init members */
+ *vf = BP_VF(bp, vfidx);
+ *bulletin = BP_VF_BULLETIN(bp, vfidx);
+
+ if (!*vf) {
BNX2X_ERR("vf ndo called but vf was null. vfidx was %d\n",
vfidx);
return -EINVAL;
}
+ if (!*bulletin) {
+ BNX2X_ERR("vf ndo called but Bulletin Board struct is null. vfidx was %d\n",
+ vfidx);
+ return -EINVAL;
+ }
+
return 0;
}
@@ -3124,17 +3132,19 @@
struct ifla_vf_info *ivi)
{
struct bnx2x *bp = netdev_priv(dev);
- struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
- struct bnx2x_vlan_mac_obj *mac_obj = &bnx2x_vfq(vf, 0, mac_obj);
- struct bnx2x_vlan_mac_obj *vlan_obj = &bnx2x_vfq(vf, 0, vlan_obj);
- struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+ struct bnx2x_virtf *vf = NULL;
+ struct pf_vf_bulletin_content *bulletin = NULL;
+ struct bnx2x_vlan_mac_obj *mac_obj;
+ struct bnx2x_vlan_mac_obj *vlan_obj;
int rc;
- /* sanity */
- rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+ /* sanity and init */
+ rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
if (rc)
return rc;
- if (!mac_obj || !vlan_obj || !bulletin) {
+ mac_obj = &bnx2x_vfq(vf, 0, mac_obj);
+ vlan_obj = &bnx2x_vfq(vf, 0, vlan_obj);
+ if (!mac_obj || !vlan_obj) {
BNX2X_ERR("VF partially initialized\n");
return -EINVAL;
}
@@ -3191,11 +3201,11 @@
{
struct bnx2x *bp = netdev_priv(dev);
int rc, q_logical_state;
- struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
- struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+ struct bnx2x_virtf *vf = NULL;
+ struct pf_vf_bulletin_content *bulletin = NULL;
- /* sanity */
- rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+ /* sanity and init */
+ rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
if (rc)
return rc;
if (!is_valid_ether_addr(mac)) {
@@ -3257,11 +3267,11 @@
{
struct bnx2x *bp = netdev_priv(dev);
int rc, q_logical_state;
- struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
- struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+ struct bnx2x_virtf *vf = NULL;
+ struct pf_vf_bulletin_content *bulletin = NULL;
- /* sanity */
- rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+ /* sanity and init */
+ rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 95b8995..2e55ee2 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8591,10 +8591,10 @@
if (!i && tg3_flag(tp, ENABLE_RSS))
continue;
- tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev,
- TG3_RX_RCB_RING_BYTES(tp),
- &tnapi->rx_rcb_mapping,
- GFP_KERNEL | __GFP_ZERO);
+ tnapi->rx_rcb = dma_zalloc_coherent(&tp->pdev->dev,
+ TG3_RX_RCB_RING_BYTES(tp),
+ &tnapi->rx_rcb_mapping,
+ GFP_KERNEL);
if (!tnapi->rx_rcb)
goto err_out;
}
@@ -8643,10 +8643,9 @@
{
int i;
- tp->hw_stats = dma_alloc_coherent(&tp->pdev->dev,
- sizeof(struct tg3_hw_stats),
- &tp->stats_mapping,
- GFP_KERNEL | __GFP_ZERO);
+ tp->hw_stats = dma_zalloc_coherent(&tp->pdev->dev,
+ sizeof(struct tg3_hw_stats),
+ &tp->stats_mapping, GFP_KERNEL);
if (!tp->hw_stats)
goto err_out;
@@ -8654,10 +8653,10 @@
struct tg3_napi *tnapi = &tp->napi[i];
struct tg3_hw_status *sblk;
- tnapi->hw_status = dma_alloc_coherent(&tp->pdev->dev,
- TG3_HW_STATUS_SIZE,
- &tnapi->status_mapping,
- GFP_KERNEL | __GFP_ZERO);
+ tnapi->hw_status = dma_zalloc_coherent(&tp->pdev->dev,
+ TG3_HW_STATUS_SIZE,
+ &tnapi->status_mapping,
+ GFP_KERNEL);
if (!tnapi->hw_status)
goto err_out;
diff --git a/drivers/net/ethernet/brocade/bna/cna.h b/drivers/net/ethernet/brocade/bna/cna.h
index c37f706..43405f6 100644
--- a/drivers/net/ethernet/brocade/bna/cna.h
+++ b/drivers/net/ethernet/brocade/bna/cna.h
@@ -37,8 +37,8 @@
extern char bfa_version[];
-#define CNA_FW_FILE_CT "ctfw-3.2.1.0.bin"
-#define CNA_FW_FILE_CT2 "ct2fw-3.2.1.0.bin"
+#define CNA_FW_FILE_CT "ctfw-3.2.1.1.bin"
+#define CNA_FW_FILE_CT2 "ct2fw-3.2.1.1.bin"
#define FC_SYMNAME_MAX 256 /*!< max name server symbolic name size */
#pragma pack(1)
diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
index bb5d63f..ce75de9 100644
--- a/drivers/net/ethernet/cadence/at91_ether.c
+++ b/drivers/net/ethernet/cadence/at91_ether.c
@@ -304,7 +304,7 @@
/* Detect MAC & PHY and perform ethernet interface initialization */
static int __init at91ether_probe(struct platform_device *pdev)
{
- struct macb_platform_data *board_data = pdev->dev.platform_data;
+ struct macb_platform_data *board_data = dev_get_platdata(&pdev->dev);
struct resource *regs;
struct net_device *dev;
struct phy_device *phydev;
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index e866608..9257869 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -27,6 +27,7 @@
#include <linux/phy.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/pinctrl/consumer.h>
@@ -124,7 +125,7 @@
u8 addr[6];
int i;
- pdata = bp->pdev->dev.platform_data;
+ pdata = dev_get_platdata(&bp->pdev->dev);
/* Check all 4 address register for vaild address */
for (i = 0; i < 4; i++) {
@@ -275,7 +276,7 @@
phydev = phy_find_first(bp->mii_bus);
if (!phydev) {
netdev_err(dev, "no PHY found\n");
- return -1;
+ return -ENXIO;
}
pdata = dev_get_platdata(&bp->pdev->dev);
@@ -314,6 +315,7 @@
int macb_mii_init(struct macb *bp)
{
struct macb_platform_data *pdata;
+ struct device_node *np;
int err = -ENXIO, i;
/* Enable management port */
@@ -333,10 +335,7 @@
bp->pdev->name, bp->pdev->id);
bp->mii_bus->priv = bp;
bp->mii_bus->parent = &bp->dev->dev;
- pdata = bp->pdev->dev.platform_data;
-
- if (pdata)
- bp->mii_bus->phy_mask = pdata->phy_mask;
+ pdata = dev_get_platdata(&bp->pdev->dev);
bp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);
if (!bp->mii_bus->irq) {
@@ -344,17 +343,45 @@
goto err_out_free_mdiobus;
}
- for (i = 0; i < PHY_MAX_ADDR; i++)
- bp->mii_bus->irq[i] = PHY_POLL;
-
dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
- if (mdiobus_register(bp->mii_bus))
+ np = bp->pdev->dev.of_node;
+ if (np) {
+ /* try dt phy registration */
+ err = of_mdiobus_register(bp->mii_bus, np);
+
+ /* fallback to standard phy registration if no phy were
+ found during dt phy registration */
+ if (!err && !phy_find_first(bp->mii_bus)) {
+ for (i = 0; i < PHY_MAX_ADDR; i++) {
+ struct phy_device *phydev;
+
+ phydev = mdiobus_scan(bp->mii_bus, i);
+ if (IS_ERR(phydev)) {
+ err = PTR_ERR(phydev);
+ break;
+ }
+ }
+
+ if (err)
+ goto err_out_unregister_bus;
+ }
+ } else {
+ for (i = 0; i < PHY_MAX_ADDR; i++)
+ bp->mii_bus->irq[i] = PHY_POLL;
+
+ if (pdata)
+ bp->mii_bus->phy_mask = pdata->phy_mask;
+
+ err = mdiobus_register(bp->mii_bus);
+ }
+
+ if (err)
goto err_out_free_mdio_irq;
- if (macb_mii_probe(bp->dev) != 0) {
+ err = macb_mii_probe(bp->dev);
+ if (err)
goto err_out_unregister_bus;
- }
return 0;
@@ -1824,7 +1851,7 @@
err = of_get_phy_mode(pdev->dev.of_node);
if (err < 0) {
- pdata = pdev->dev.platform_data;
+ pdata = dev_get_platdata(&pdev->dev);
if (pdata && pdata->is_rmii)
bp->phy_interface = PHY_INTERFACE_MODE_RMII;
else
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index e3d4ec8..ec88de4 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -814,7 +814,7 @@
if (pdev == NULL)
return -ENODEV;
- data = pdev->dev.platform_data;
+ data = dev_get_platdata(&pdev->dev);
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile
index e52296d..239e1e4 100644
--- a/drivers/net/ethernet/cisco/enic/Makefile
+++ b/drivers/net/ethernet/cisco/enic/Makefile
@@ -2,5 +2,5 @@
enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \
- enic_ethtool.o
+ enic_ethtool.o enic_api.o
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 2e37c63..be16731 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -32,8 +32,8 @@
#define DRV_NAME "enic"
#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION "2.1.1.39"
-#define DRV_COPYRIGHT "Copyright 2008-2011 Cisco Systems, Inc"
+#define DRV_VERSION "2.1.1.43"
+#define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc"
#define ENIC_BARS_MAX 6
@@ -96,6 +96,7 @@
#ifdef CONFIG_PCI_IOV
u16 num_vfs;
#endif
+ spinlock_t enic_api_lock;
struct enic_port_profile *pp;
/* work queue cache line section */
diff --git a/drivers/net/ethernet/cisco/enic/enic_api.c b/drivers/net/ethernet/cisco/enic/enic_api.c
new file mode 100644
index 0000000..e13efbd
--- /dev/null
+++ b/drivers/net/ethernet/cisco/enic/enic_api.c
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2013 Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include "vnic_dev.h"
+#include "vnic_devcmd.h"
+
+#include "enic_res.h"
+#include "enic.h"
+#include "enic_api.h"
+
+int enic_api_devcmd_proxy_by_index(struct net_device *netdev, int vf,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1, int wait)
+{
+ int err;
+ struct enic *enic = netdev_priv(netdev);
+ struct vnic_dev *vdev = enic->vdev;
+
+ spin_lock(&enic->enic_api_lock);
+ spin_lock(&enic->devcmd_lock);
+
+ vnic_dev_cmd_proxy_by_index_start(vdev, vf);
+ err = vnic_dev_cmd(vdev, cmd, a0, a1, wait);
+ vnic_dev_cmd_proxy_end(vdev);
+
+ spin_unlock(&enic->devcmd_lock);
+ spin_unlock(&enic->enic_api_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(enic_api_devcmd_proxy_by_index);
diff --git a/drivers/net/ethernet/cisco/enic/enic_api.h b/drivers/net/ethernet/cisco/enic/enic_api.h
new file mode 100644
index 0000000..6b9f925
--- /dev/null
+++ b/drivers/net/ethernet/cisco/enic/enic_api.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2013 Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __ENIC_API_H__
+#define __ENIC_API_H__
+
+#include <linux/netdevice.h>
+
+#include "vnic_dev.h"
+#include "vnic_devcmd.h"
+
+int enic_api_devcmd_proxy_by_index(struct net_device *netdev, int vf,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1, int wait);
+
+#endif
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index b12b32b..bcf15b1 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1733,6 +1733,7 @@
rtnl_lock();
+ spin_lock(&enic->enic_api_lock);
enic_dev_hang_notify(enic);
enic_stop(enic->netdev);
enic_dev_hang_reset(enic);
@@ -1741,6 +1742,8 @@
enic_set_rss_nic_cfg(enic);
enic_dev_set_ig_vlan_rewrite_mode(enic);
enic_open(enic->netdev);
+ spin_unlock(&enic->enic_api_lock);
+ call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
rtnl_unlock();
}
@@ -2153,6 +2156,7 @@
*/
spin_lock_init(&enic->devcmd_lock);
+ spin_lock_init(&enic->enic_api_lock);
/*
* Set ingress vlan rewrite mode before vnic initialization
diff --git a/drivers/net/ethernet/cisco/enic/enic_res.h b/drivers/net/ethernet/cisco/enic/enic_res.h
index 25be273..69f60af 100644
--- a/drivers/net/ethernet/cisco/enic/enic_res.h
+++ b/drivers/net/ethernet/cisco/enic/enic_res.h
@@ -47,6 +47,9 @@
int offload_mode, int cq_entry, int sop, int eop, int loopback)
{
struct wq_enet_desc *desc = vnic_wq_next_desc(wq);
+ u8 desc_skip_cnt = 1;
+ u8 compressed_send = 0;
+ u64 wrid = 0;
wq_enet_desc_enc(desc,
(u64)dma_addr | VNIC_PADDR_TARGET,
@@ -59,7 +62,8 @@
(u16)vlan_tag,
(u8)loopback);
- vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop);
+ vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop, desc_skip_cnt,
+ (u8)cq_entry, compressed_send, wrid);
}
static inline void enic_queue_wq_desc_cont(struct vnic_wq *wq,
@@ -120,6 +124,7 @@
dma_addr_t dma_addr, unsigned int len)
{
struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
+ u64 wrid = 0;
u8 type = os_buf_index ?
RQ_ENET_TYPE_NOT_SOP : RQ_ENET_TYPE_ONLY_SOP;
@@ -127,7 +132,7 @@
(u64)dma_addr | VNIC_PADDR_TARGET,
type, (u16)len);
- vnic_rq_post(rq, os_buf, os_buf_index, dma_addr, len);
+ vnic_rq_post(rq, os_buf, os_buf_index, dma_addr, len, wrid);
}
struct enic;
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index 23d5552..b9a0d78 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -281,11 +281,25 @@
* 0 if no VIF-CONFIG-INFO TLV was ever received. */
CMD_CONFIG_INFO_GET = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 44),
+ /* INT13 API: (u64)a0=paddr to vnic_int13_params struct
+ * (u32)a1=INT13_CMD_xxx
+ */
+ CMD_INT13_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 45),
+
+ /* Set default vlan:
+ * in: (u16)a0=new default vlan
+ * (u16)a1=zero for overriding vlan with param a0,
+ * non-zero for resetting vlan to the default
+ * out: (u16)a0=old default vlan
+ */
+ CMD_SET_DEFAULT_VLAN = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 46),
+
/* init_prov_info2:
* Variant of CMD_INIT_PROV_INFO, where it will not try to enable
* the vnic until CMD_ENABLE2 is issued.
* (u64)a0=paddr of vnic_devcmd_provinfo
- * (u32)a1=sizeof provision info */
+ * (u32)a1=sizeof provision info
+ */
CMD_INIT_PROV_INFO2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 47),
/* enable2:
@@ -339,16 +353,57 @@
CMD_INTR_COAL_CONVERT = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 50),
/*
- * cmd_set_mac_addr
- * set mac address
+ * Set the predefined mac address as default
* in:
* (u48)a0 = mac addr
- *
*/
CMD_SET_MAC_ADDR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 55),
+
+ /* Update the provisioning info of the given VIF
+ * (u64)a0=paddr of vnic_devcmd_provinfo
+ * (u32)a1=sizeof provision info
+ */
+ CMD_PROV_INFO_UPDATE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 56),
+
+ /* Add a filter.
+ * in: (u64) a0= filter address
+ * (u32) a1= size of filter
+ * out: (u32) a0=filter identifier
+ */
+ CMD_ADD_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 58),
+
+ /* Delete a filter.
+ * in: (u32) a0=filter identifier
+ */
+ CMD_DEL_FILTER = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 59),
+
+ /* Enable a Queue Pair in User space NIC
+ * in: (u32) a0=Queue Pair number
+ * (u32) a1= command
+ */
+ CMD_QP_ENABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 60),
+
+ /* Disable a Queue Pair in User space NIC
+ * in: (u32) a0=Queue Pair number
+ * (u32) a1= command
+ */
+ CMD_QP_DISABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 61),
+
+ /* Stats dump Queue Pair in User space NIC
+ * in: (u32) a0=Queue Pair number
+ * (u64) a1=host buffer addr for status dump
+ * (u32) a2=length of the buffer
+ */
+ CMD_QP_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 62),
+
+ /* Clear stats for Queue Pair in User space NIC
+ * in: (u32) a0=Queue Pair number
+ */
+ CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63),
};
/* CMD_ENABLE2 flags */
+#define CMD_ENABLE2_STANDBY 0x0
#define CMD_ENABLE2_ACTIVE 0x1
/* flags for CMD_OPEN */
@@ -364,6 +419,9 @@
#define CMD_PFILTER_PROMISCUOUS 0x08
#define CMD_PFILTER_ALL_MULTICAST 0x10
+/* Commands for CMD_QP_ENABLE/CM_QP_DISABLE */
+#define CMD_QP_RQWQ 0x0
+
/* rewrite modes for CMD_IG_VLAN_REWRITE_MODE */
#define IG_VLAN_REWRITE_MODE_DEFAULT_TRUNK 0
#define IG_VLAN_REWRITE_MODE_UNTAG_DEFAULT_VLAN 1
@@ -390,6 +448,7 @@
ERR_EMAXRES = 10,
ERR_ENOTSUPPORTED = 11,
ERR_EINPROGRESS = 12,
+ ERR_MAX
};
/*
@@ -435,6 +494,115 @@
u8 data[0];
};
+/* These are used in flags field of different filters to denote
+ * valid fields used.
+ */
+#define FILTER_FIELD_VALID(fld) (1 << (fld - 1))
+
+#define FILTER_FIELDS_USNIC ( \
+ FILTER_FIELD_VALID(1) | \
+ FILTER_FIELD_VALID(2) | \
+ FILTER_FIELD_VALID(3) | \
+ FILTER_FIELD_VALID(4))
+
+#define FILTER_FIELDS_IPV4_5TUPLE ( \
+ FILTER_FIELD_VALID(1) | \
+ FILTER_FIELD_VALID(2) | \
+ FILTER_FIELD_VALID(3) | \
+ FILTER_FIELD_VALID(4) | \
+ FILTER_FIELD_VALID(5))
+
+#define FILTER_FIELDS_MAC_VLAN ( \
+ FILTER_FIELD_VALID(1) | \
+ FILTER_FIELD_VALID(2))
+
+#define FILTER_FIELD_USNIC_VLAN FILTER_FIELD_VALID(1)
+#define FILTER_FIELD_USNIC_ETHTYPE FILTER_FIELD_VALID(2)
+#define FILTER_FIELD_USNIC_PROTO FILTER_FIELD_VALID(3)
+#define FILTER_FIELD_USNIC_ID FILTER_FIELD_VALID(4)
+
+struct filter_usnic_id {
+ u32 flags;
+ u16 vlan;
+ u16 ethtype;
+ u8 proto_version;
+ u32 usnic_id;
+} __packed;
+
+#define FILTER_FIELD_5TUP_PROTO FILTER_FIELD_VALID(1)
+#define FILTER_FIELD_5TUP_SRC_AD FILTER_FIELD_VALID(2)
+#define FILTER_FIELD_5TUP_DST_AD FILTER_FIELD_VALID(3)
+#define FILTER_FIELD_5TUP_SRC_PT FILTER_FIELD_VALID(4)
+#define FILTER_FIELD_5TUP_DST_PT FILTER_FIELD_VALID(5)
+
+/* Enums for the protocol field. */
+enum protocol_e {
+ PROTO_UDP = 0,
+ PROTO_TCP = 1,
+};
+
+struct filter_ipv4_5tuple {
+ u32 flags;
+ u32 protocol;
+ u32 src_addr;
+ u32 dst_addr;
+ u16 src_port;
+ u16 dst_port;
+} __packed;
+
+#define FILTER_FIELD_VMQ_VLAN FILTER_FIELD_VALID(1)
+#define FILTER_FIELD_VMQ_MAC FILTER_FIELD_VALID(2)
+
+struct filter_mac_vlan {
+ u32 flags;
+ u16 vlan;
+ u8 mac_addr[6];
+} __packed;
+
+/* Specifies the filter_action type. */
+enum {
+ FILTER_ACTION_RQ_STEERING = 0,
+ FILTER_ACTION_MAX
+};
+
+struct filter_action {
+ u32 type;
+ union {
+ u32 rq_idx;
+ } u;
+} __packed;
+
+/* Specifies the filter type. */
+enum filter_type {
+ FILTER_USNIC_ID = 0,
+ FILTER_IPV4_5TUPLE = 1,
+ FILTER_MAC_VLAN = 2,
+ FILTER_MAX
+};
+
+struct filter {
+ u32 type;
+ union {
+ struct filter_usnic_id usnic;
+ struct filter_ipv4_5tuple ipv4;
+ struct filter_mac_vlan mac_vlan;
+ } u;
+} __packed;
+
+enum {
+ CLSF_TLV_FILTER = 0,
+ CLSF_TLV_ACTION = 1,
+};
+
+/* Maximum size of buffer to CMD_ADD_FILTER */
+#define FILTER_MAX_BUF_SIZE 100
+
+struct filter_tlv {
+ u_int32_t type;
+ u_int32_t length;
+ u_int32_t val[0];
+};
+
/*
* Writing cmd register causes STAT_BUSY to get set in status register.
* When cmd completes, STAT_BUSY will be cleared.
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c
index 7e1488f..36a2ed6 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c
@@ -30,12 +30,9 @@
static int vnic_rq_alloc_bufs(struct vnic_rq *rq)
{
struct vnic_rq_buf *buf;
- struct vnic_dev *vdev;
unsigned int i, j, count = rq->ring.desc_count;
unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count);
- vdev = rq->vdev;
-
for (i = 0; i < blks; i++) {
rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ(count), GFP_ATOMIC);
if (!rq->bufs[i])
@@ -141,7 +138,7 @@
unsigned int error_interrupt_enable,
unsigned int error_interrupt_offset)
{
- u32 fetch_index;
+ u32 fetch_index = 0;
/* Use current fetch_index as the ring starting point */
fetch_index = ioread32(&rq->ctrl->fetch_index);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h
index 2056586..ee7bc95 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h
@@ -72,6 +72,7 @@
unsigned int len;
unsigned int index;
void *desc;
+ uint64_t wr_id;
};
struct vnic_rq {
@@ -110,7 +111,8 @@
static inline void vnic_rq_post(struct vnic_rq *rq,
void *os_buf, unsigned int os_buf_index,
- dma_addr_t dma_addr, unsigned int len)
+ dma_addr_t dma_addr, unsigned int len,
+ uint64_t wrid)
{
struct vnic_rq_buf *buf = rq->to_use;
@@ -118,6 +120,7 @@
buf->os_buf_index = os_buf_index;
buf->dma_addr = dma_addr;
buf->len = len;
+ buf->wr_id = wrid;
buf = buf->next;
rq->to_use = buf;
diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.c b/drivers/net/ethernet/cisco/enic/vnic_wq.c
index 5e0d7a2..3e6b8d5 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_wq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_wq.c
@@ -30,12 +30,9 @@
static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
{
struct vnic_wq_buf *buf;
- struct vnic_dev *vdev;
unsigned int i, j, count = wq->ring.desc_count;
unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count);
- vdev = wq->vdev;
-
for (i = 0; i < blks; i++) {
wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ(count), GFP_ATOMIC);
if (!wq->bufs[i])
diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.h b/drivers/net/ethernet/cisco/enic/vnic_wq.h
index 7dd937a..2c6c708 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_wq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_wq.h
@@ -58,6 +58,10 @@
unsigned int index;
int sop;
void *desc;
+ uint64_t wr_id; /* Cookie */
+ uint8_t cq_entry; /* Gets completion event from hw */
+ uint8_t desc_skip_cnt; /* Num descs to occupy */
+ uint8_t compressed_send; /* Both hdr and payload in one desc */
};
/* Break the vnic_wq_buf allocations into blocks of 32/64 entries */
@@ -102,14 +106,20 @@
static inline void vnic_wq_post(struct vnic_wq *wq,
void *os_buf, dma_addr_t dma_addr,
- unsigned int len, int sop, int eop)
+ unsigned int len, int sop, int eop,
+ uint8_t desc_skip_cnt, uint8_t cq_entry,
+ uint8_t compressed_send, uint64_t wrid)
{
struct vnic_wq_buf *buf = wq->to_use;
buf->sop = sop;
+ buf->cq_entry = cq_entry;
+ buf->compressed_send = compressed_send;
+ buf->desc_skip_cnt = desc_skip_cnt;
buf->os_buf = eop ? os_buf : NULL;
buf->dma_addr = dma_addr;
buf->len = len;
+ buf->wr_id = wrid;
buf = buf->next;
if (eop) {
@@ -123,7 +133,7 @@
}
wq->to_use = buf;
- wq->ring.desc_avail--;
+ wq->ring.desc_avail -= desc_skip_cnt;
}
static inline void vnic_wq_service(struct vnic_wq *wq,
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index a13b312..5f5896e 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1384,7 +1384,7 @@
static int
dm9000_probe(struct platform_device *pdev)
{
- struct dm9000_plat_data *pdata = pdev->dev.platform_data;
+ struct dm9000_plat_data *pdata = dev_get_platdata(&pdev->dev);
struct board_info *db; /* Point a board information structure */
struct net_device *ndev;
const unsigned char *mac_src;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 50d9c63..bf3bf6f 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -469,6 +469,17 @@
}
}
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void sundance_poll_controller(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+
+ disable_irq(np->pci_dev->irq);
+ intr_handler(np->pci_dev->irq, dev);
+ enable_irq(np->pci_dev->irq);
+}
+#endif
+
static const struct net_device_ops netdev_ops = {
.ndo_open = netdev_open,
.ndo_stop = netdev_close,
@@ -480,6 +491,9 @@
.ndo_change_mtu = change_mtu,
.ndo_set_mac_address = sundance_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = sundance_poll_controller,
+#endif
};
static int sundance_probe1(struct pci_dev *pdev,
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 11c815d..ace5050 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -99,14 +99,18 @@
#define MCC_Q_LEN 128 /* total size not to exceed 8 pages */
#define MCC_CQ_LEN 256
-#define BE3_MAX_RSS_QS 8
#define BE2_MAX_RSS_QS 4
-#define MAX_RSS_QS BE3_MAX_RSS_QS
-#define MAX_RX_QS (MAX_RSS_QS + 1) /* RSS qs + 1 def Rx */
+#define BE3_MAX_RSS_QS 16
+#define BE3_MAX_TX_QS 16
+#define BE3_MAX_EVT_QS 16
-#define MAX_TX_QS 8
+#define MAX_RX_QS 32
+#define MAX_EVT_QS 32
+#define MAX_TX_QS 32
+
#define MAX_ROCE_EQS 5
-#define MAX_MSIX_VECTORS (MAX_RSS_QS + MAX_ROCE_EQS) /* RSS qs + RoCE */
+#define MAX_MSIX_VECTORS 32
+#define MIN_MSIX_VECTORS 1
#define BE_TX_BUDGET 256
#define BE_NAPI_WEIGHT 64
#define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */
@@ -189,6 +193,7 @@
u32 cur_eqd; /* in usecs */
u8 idx; /* array index */
+ u8 msix_idx;
u16 tx_budget;
u16 spurious_intr;
struct napi_struct napi;
@@ -352,6 +357,18 @@
u32 supported;
};
+struct be_resources {
+ u16 max_vfs; /* Total VFs "really" supported by FW/HW */
+ u16 max_mcast_mac;
+ u16 max_tx_qs;
+ u16 max_rss_qs;
+ u16 max_rx_qs;
+ u16 max_uc_mac; /* Max UC MACs programmable */
+ u16 max_vlans; /* Number of vlans supported */
+ u16 max_evt_qs;
+ u32 if_cap_flags;
+};
+
struct be_adapter {
struct pci_dev *pdev;
struct net_device *netdev;
@@ -369,18 +386,19 @@
spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */
spinlock_t mcc_cq_lock;
- u32 num_msix_vec;
- u32 num_evt_qs;
- struct be_eq_obj eq_obj[MAX_MSIX_VECTORS];
+ u16 cfg_num_qs; /* configured via set-channels */
+ u16 num_evt_qs;
+ u16 num_msix_vec;
+ struct be_eq_obj eq_obj[MAX_EVT_QS];
struct msix_entry msix_entries[MAX_MSIX_VECTORS];
bool isr_registered;
/* TX Rings */
- u32 num_tx_qs;
+ u16 num_tx_qs;
struct be_tx_obj tx_obj[MAX_TX_QS];
/* Rx rings */
- u32 num_rx_qs;
+ u16 num_rx_qs;
struct be_rx_obj rx_obj[MAX_RX_QS];
u32 big_page_size; /* Compounded page size shared by rx wrbs */
@@ -430,8 +448,8 @@
u32 flash_status;
struct completion flash_compl;
- u32 num_vfs; /* Number of VFs provisioned by PF driver */
- u32 dev_num_vfs; /* Number of VFs supported by HW */
+ struct be_resources res; /* resources available for the func */
+ u16 num_vfs; /* Number of VFs provisioned by PF */
u8 virtfn;
struct be_vf_cfg *vf_cfg;
bool be3_native;
@@ -446,21 +464,13 @@
u16 qnq_vid;
u32 msg_enable;
int be_get_temp_freq;
- u16 max_mcast_mac;
- u16 max_tx_queues;
- u16 max_rss_queues;
- u16 max_rx_queues;
- u16 max_pmac_cnt;
- u16 max_vlans;
- u16 max_event_queues;
- u32 if_cap_flags;
u8 pf_number;
u64 rss_flags;
};
#define be_physfn(adapter) (!adapter->virtfn)
#define sriov_enabled(adapter) (adapter->num_vfs > 0)
-#define sriov_want(adapter) (adapter->dev_num_vfs && num_vfs && \
+#define sriov_want(adapter) (be_max_vfs(adapter) && num_vfs && \
be_physfn(adapter))
#define for_all_vfs(adapter, vf_cfg, i) \
for (i = 0, vf_cfg = &adapter->vf_cfg[i]; i < adapter->num_vfs; \
@@ -469,6 +479,26 @@
#define ON 1
#define OFF 0
+#define be_max_vlans(adapter) (adapter->res.max_vlans)
+#define be_max_uc(adapter) (adapter->res.max_uc_mac)
+#define be_max_mc(adapter) (adapter->res.max_mcast_mac)
+#define be_max_vfs(adapter) (adapter->res.max_vfs)
+#define be_max_rss(adapter) (adapter->res.max_rss_qs)
+#define be_max_txqs(adapter) (adapter->res.max_tx_qs)
+#define be_max_prio_txqs(adapter) (adapter->res.max_prio_tx_qs)
+#define be_max_rxqs(adapter) (adapter->res.max_rx_qs)
+#define be_max_eqs(adapter) (adapter->res.max_evt_qs)
+#define be_if_cap_flags(adapter) (adapter->res.if_cap_flags)
+
+static inline u16 be_max_qs(struct be_adapter *adapter)
+{
+ /* If no RSS, need atleast the one def RXQ */
+ u16 num = max_t(u16, be_max_rss(adapter), 1);
+
+ num = min(num, be_max_eqs(adapter));
+ return min_t(u16, num, num_online_cpus());
+}
+
#define lancer_chip(adapter) (adapter->pdev->device == OC_DEVICE_ID3 || \
adapter->pdev->device == OC_DEVICE_ID4)
@@ -672,6 +702,8 @@
extern bool be_is_wol_supported(struct be_adapter *adapter);
extern bool be_pause_supported(struct be_adapter *adapter);
extern u32 be_get_fw_log_level(struct be_adapter *adapter);
+int be_update_queues(struct be_adapter *adapter);
+int be_poll(struct napi_struct *napi, int budget);
/*
* internal function to initialize-cleanup roce device.
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 85923e2..52c9085 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -633,6 +633,12 @@
return &wrb->payload.sgl[0];
}
+static inline void fill_wrb_tags(struct be_mcc_wrb *wrb,
+ unsigned long addr)
+{
+ wrb->tag0 = addr & 0xFFFFFFFF;
+ wrb->tag1 = upper_32_bits(addr);
+}
/* Don't touch the hdr after it's prepared */
/* mem will be NULL for embedded commands */
@@ -641,17 +647,12 @@
struct be_mcc_wrb *wrb, struct be_dma_mem *mem)
{
struct be_sge *sge;
- unsigned long addr = (unsigned long)req_hdr;
- u64 req_addr = addr;
req_hdr->opcode = opcode;
req_hdr->subsystem = subsystem;
req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr));
req_hdr->version = 0;
-
- wrb->tag0 = req_addr & 0xFFFFFFFF;
- wrb->tag1 = upper_32_bits(req_addr);
-
+ fill_wrb_tags(wrb, (ulong) req_hdr);
wrb->payload_length = cmd_len;
if (mem) {
wrb->embedded |= (1 & MCC_WRB_SGE_CNT_MASK) <<
@@ -678,31 +679,6 @@
}
}
-/* Converts interrupt delay in microseconds to multiplier value */
-static u32 eq_delay_to_mult(u32 usec_delay)
-{
-#define MAX_INTR_RATE 651042
- const u32 round = 10;
- u32 multiplier;
-
- if (usec_delay == 0)
- multiplier = 0;
- else {
- u32 interrupt_rate = 1000000 / usec_delay;
- /* Max delay, corresponding to the lowest interrupt rate */
- if (interrupt_rate == 0)
- multiplier = 1023;
- else {
- multiplier = (MAX_INTR_RATE - interrupt_rate) * round;
- multiplier /= interrupt_rate;
- /* Round the multiplier to the closest value.*/
- multiplier = (multiplier + round/2) / round;
- multiplier = min(multiplier, (u32)1023);
- }
- }
- return multiplier;
-}
-
static inline struct be_mcc_wrb *wrb_from_mbox(struct be_adapter *adapter)
{
struct be_dma_mem *mbox_mem = &adapter->mbox_mem;
@@ -730,6 +706,78 @@
return wrb;
}
+static bool use_mcc(struct be_adapter *adapter)
+{
+ return adapter->mcc_obj.q.created;
+}
+
+/* Must be used only in process context */
+static int be_cmd_lock(struct be_adapter *adapter)
+{
+ if (use_mcc(adapter)) {
+ spin_lock_bh(&adapter->mcc_lock);
+ return 0;
+ } else {
+ return mutex_lock_interruptible(&adapter->mbox_lock);
+ }
+}
+
+/* Must be used only in process context */
+static void be_cmd_unlock(struct be_adapter *adapter)
+{
+ if (use_mcc(adapter))
+ spin_unlock_bh(&adapter->mcc_lock);
+ else
+ return mutex_unlock(&adapter->mbox_lock);
+}
+
+static struct be_mcc_wrb *be_cmd_copy(struct be_adapter *adapter,
+ struct be_mcc_wrb *wrb)
+{
+ struct be_mcc_wrb *dest_wrb;
+
+ if (use_mcc(adapter)) {
+ dest_wrb = wrb_from_mccq(adapter);
+ if (!dest_wrb)
+ return NULL;
+ } else {
+ dest_wrb = wrb_from_mbox(adapter);
+ }
+
+ memcpy(dest_wrb, wrb, sizeof(*wrb));
+ if (wrb->embedded & cpu_to_le32(MCC_WRB_EMBEDDED_MASK))
+ fill_wrb_tags(dest_wrb, (ulong) embedded_payload(wrb));
+
+ return dest_wrb;
+}
+
+/* Must be used only in process context */
+static int be_cmd_notify_wait(struct be_adapter *adapter,
+ struct be_mcc_wrb *wrb)
+{
+ struct be_mcc_wrb *dest_wrb;
+ int status;
+
+ status = be_cmd_lock(adapter);
+ if (status)
+ return status;
+
+ dest_wrb = be_cmd_copy(adapter, wrb);
+ if (!dest_wrb)
+ return -EBUSY;
+
+ if (use_mcc(adapter))
+ status = be_mcc_notify_wait(adapter);
+ else
+ status = be_mbox_notify_wait(adapter);
+
+ if (!status)
+ memcpy(wrb, dest_wrb, sizeof(*wrb));
+
+ be_cmd_unlock(adapter);
+ return status;
+}
+
/* Tell fw we're about to start firing cmds by writing a
* special pattern across the wrb hdr; uses mbox
*/
@@ -790,13 +838,12 @@
return status;
}
-int be_cmd_eq_create(struct be_adapter *adapter,
- struct be_queue_info *eq, int eq_delay)
+int be_cmd_eq_create(struct be_adapter *adapter, struct be_eq_obj *eqo)
{
struct be_mcc_wrb *wrb;
struct be_cmd_req_eq_create *req;
- struct be_dma_mem *q_mem = &eq->dma_mem;
- int status;
+ struct be_dma_mem *q_mem = &eqo->q.dma_mem;
+ int status, ver = 0;
if (mutex_lock_interruptible(&adapter->mbox_lock))
return -1;
@@ -807,15 +854,18 @@
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
OPCODE_COMMON_EQ_CREATE, sizeof(*req), wrb, NULL);
+ /* Support for EQ_CREATEv2 available only SH-R onwards */
+ if (!(BEx_chip(adapter) || lancer_chip(adapter)))
+ ver = 2;
+
+ req->hdr.version = ver;
req->num_pages = cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size));
AMAP_SET_BITS(struct amap_eq_context, valid, req->context, 1);
/* 4byte eqe*/
AMAP_SET_BITS(struct amap_eq_context, size, req->context, 0);
AMAP_SET_BITS(struct amap_eq_context, count, req->context,
- __ilog2_u32(eq->len/256));
- AMAP_SET_BITS(struct amap_eq_context, delaymult, req->context,
- eq_delay_to_mult(eq_delay));
+ __ilog2_u32(eqo->q.len / 256));
be_dws_cpu_to_le(req->context, sizeof(req->context));
be_cmd_page_addrs_prepare(req->pages, ARRAY_SIZE(req->pages), q_mem);
@@ -823,8 +873,10 @@
status = be_mbox_notify_wait(adapter);
if (!status) {
struct be_cmd_resp_eq_create *resp = embedded_payload(wrb);
- eq->id = le16_to_cpu(resp->eq_id);
- eq->created = true;
+ eqo->q.id = le16_to_cpu(resp->eq_id);
+ eqo->msix_idx =
+ (ver == 2) ? le16_to_cpu(resp->msix_idx) : eqo->idx;
+ eqo->q.created = true;
}
mutex_unlock(&adapter->mbox_lock);
@@ -1130,25 +1182,16 @@
int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo)
{
- struct be_mcc_wrb *wrb;
+ struct be_mcc_wrb wrb = {0};
struct be_cmd_req_eth_tx_create *req;
struct be_queue_info *txq = &txo->q;
struct be_queue_info *cq = &txo->cq;
struct be_dma_mem *q_mem = &txq->dma_mem;
int status, ver = 0;
- spin_lock_bh(&adapter->mcc_lock);
-
- wrb = wrb_from_mccq(adapter);
- if (!wrb) {
- status = -EBUSY;
- goto err;
- }
-
- req = embedded_payload(wrb);
-
+ req = embedded_payload(&wrb);
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH,
- OPCODE_ETH_TX_CREATE, sizeof(*req), wrb, NULL);
+ OPCODE_ETH_TX_CREATE, sizeof(*req), &wrb, NULL);
if (lancer_chip(adapter)) {
req->hdr.version = 1;
@@ -1166,12 +1209,11 @@
req->cq_id = cpu_to_le16(cq->id);
req->queue_size = be_encoded_q_len(txq->len);
be_cmd_page_addrs_prepare(req->pages, ARRAY_SIZE(req->pages), q_mem);
-
ver = req->hdr.version;
- status = be_mcc_notify_wait(adapter);
+ status = be_cmd_notify_wait(adapter, &wrb);
if (!status) {
- struct be_cmd_resp_eth_tx_create *resp = embedded_payload(wrb);
+ struct be_cmd_resp_eth_tx_create *resp = embedded_payload(&wrb);
txq->id = le16_to_cpu(resp->cid);
if (ver == 2)
txo->db_offset = le32_to_cpu(resp->db_offset);
@@ -1180,9 +1222,6 @@
txq->created = true;
}
-err:
- spin_unlock_bh(&adapter->mcc_lock);
-
return status;
}
@@ -1311,44 +1350,32 @@
}
/* Create an rx filtering policy configuration on an i/f
- * Uses MCCQ
+ * Will use MBOX only if MCCQ has not been created.
*/
int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
u32 *if_handle, u32 domain)
{
- struct be_mcc_wrb *wrb;
+ struct be_mcc_wrb wrb = {0};
struct be_cmd_req_if_create *req;
int status;
- spin_lock_bh(&adapter->mcc_lock);
-
- wrb = wrb_from_mccq(adapter);
- if (!wrb) {
- status = -EBUSY;
- goto err;
- }
- req = embedded_payload(wrb);
-
+ req = embedded_payload(&wrb);
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
- OPCODE_COMMON_NTWK_INTERFACE_CREATE, sizeof(*req), wrb, NULL);
+ OPCODE_COMMON_NTWK_INTERFACE_CREATE, sizeof(*req), &wrb, NULL);
req->hdr.domain = domain;
req->capability_flags = cpu_to_le32(cap_flags);
req->enable_flags = cpu_to_le32(en_flags);
-
req->pmac_invalid = true;
- status = be_mcc_notify_wait(adapter);
+ status = be_cmd_notify_wait(adapter, &wrb);
if (!status) {
- struct be_cmd_resp_if_create *resp = embedded_payload(wrb);
+ struct be_cmd_resp_if_create *resp = embedded_payload(&wrb);
*if_handle = le32_to_cpu(resp->interface_id);
/* Hack to retrieve VF's pmac-id on BE3 */
if (BE3_chip(adapter) && !be_physfn(adapter))
adapter->pmac_id[0] = le32_to_cpu(resp->pmac_id);
}
-
-err:
- spin_unlock_bh(&adapter->mcc_lock);
return status;
}
@@ -1797,8 +1824,7 @@
*/
req->if_flags_mask |=
cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS &
- adapter->if_cap_flags);
-
+ be_if_cap_flags(adapter));
req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev));
netdev_for_each_mc_addr(ha, adapter->netdev)
memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
@@ -3087,30 +3113,63 @@
return status;
}
-static struct be_nic_resource_desc *be_get_nic_desc(u8 *buf, u32 desc_count,
- u32 max_buf_size)
+static struct be_nic_res_desc *be_get_nic_desc(u8 *buf, u32 desc_count)
{
- struct be_nic_resource_desc *desc = (struct be_nic_resource_desc *)buf;
+ struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf;
int i;
for (i = 0; i < desc_count; i++) {
- desc->desc_len = desc->desc_len ? : RESOURCE_DESC_SIZE;
- if (((void *)desc + desc->desc_len) >
- (void *)(buf + max_buf_size))
- return NULL;
+ if (hdr->desc_type == NIC_RESOURCE_DESC_TYPE_V0 ||
+ hdr->desc_type == NIC_RESOURCE_DESC_TYPE_V1)
+ return (struct be_nic_res_desc *)hdr;
- if (desc->desc_type == NIC_RESOURCE_DESC_TYPE_V0 ||
- desc->desc_type == NIC_RESOURCE_DESC_TYPE_V1)
- return desc;
-
- desc = (void *)desc + desc->desc_len;
+ hdr->desc_len = hdr->desc_len ? : RESOURCE_DESC_SIZE_V0;
+ hdr = (void *)hdr + hdr->desc_len;
}
-
return NULL;
}
+static struct be_pcie_res_desc *be_get_pcie_desc(u8 devfn, u8 *buf,
+ u32 desc_count)
+{
+ struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf;
+ struct be_pcie_res_desc *pcie;
+ int i;
+
+ for (i = 0; i < desc_count; i++) {
+ if ((hdr->desc_type == PCIE_RESOURCE_DESC_TYPE_V0 ||
+ hdr->desc_type == PCIE_RESOURCE_DESC_TYPE_V1)) {
+ pcie = (struct be_pcie_res_desc *)hdr;
+ if (pcie->pf_num == devfn)
+ return pcie;
+ }
+
+ hdr->desc_len = hdr->desc_len ? : RESOURCE_DESC_SIZE_V0;
+ hdr = (void *)hdr + hdr->desc_len;
+ }
+ return NULL;
+}
+
+static void be_copy_nic_desc(struct be_resources *res,
+ struct be_nic_res_desc *desc)
+{
+ res->max_uc_mac = le16_to_cpu(desc->unicast_mac_count);
+ res->max_vlans = le16_to_cpu(desc->vlan_count);
+ res->max_mcast_mac = le16_to_cpu(desc->mcast_mac_count);
+ res->max_tx_qs = le16_to_cpu(desc->txq_count);
+ res->max_rss_qs = le16_to_cpu(desc->rssq_count);
+ res->max_rx_qs = le16_to_cpu(desc->rq_count);
+ res->max_evt_qs = le16_to_cpu(desc->eq_count);
+ /* Clear flags that driver is not interested in */
+ res->if_cap_flags = le32_to_cpu(desc->cap_flags) &
+ BE_IF_CAP_FLAGS_WANT;
+ /* Need 1 RXQ as the default RXQ */
+ if (res->max_rss_qs && res->max_rss_qs == res->max_rx_qs)
+ res->max_rss_qs -= 1;
+}
+
/* Uses Mbox */
-int be_cmd_get_func_config(struct be_adapter *adapter)
+int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
{
struct be_mcc_wrb *wrb;
struct be_cmd_req_get_func_config *req;
@@ -3149,28 +3208,16 @@
if (!status) {
struct be_cmd_resp_get_func_config *resp = cmd.va;
u32 desc_count = le32_to_cpu(resp->desc_count);
- struct be_nic_resource_desc *desc;
+ struct be_nic_res_desc *desc;
- desc = be_get_nic_desc(resp->func_param, desc_count,
- sizeof(resp->func_param));
+ desc = be_get_nic_desc(resp->func_param, desc_count);
if (!desc) {
status = -EINVAL;
goto err;
}
adapter->pf_number = desc->pf_num;
- adapter->max_pmac_cnt = le16_to_cpu(desc->unicast_mac_count);
- adapter->max_vlans = le16_to_cpu(desc->vlan_count);
- adapter->max_mcast_mac = le16_to_cpu(desc->mcast_mac_count);
- adapter->max_tx_queues = le16_to_cpu(desc->txq_count);
- adapter->max_rss_queues = le16_to_cpu(desc->rssq_count);
- adapter->max_rx_queues = le16_to_cpu(desc->rq_count);
-
- adapter->max_event_queues = le16_to_cpu(desc->eq_count);
- adapter->if_cap_flags = le32_to_cpu(desc->cap_flags);
-
- /* Clear flags that driver is not interested in */
- adapter->if_cap_flags &= BE_IF_CAP_FLAGS_WANT;
+ be_copy_nic_desc(res, desc);
}
err:
mutex_unlock(&adapter->mbox_lock);
@@ -3241,54 +3288,51 @@
}
/* Uses sync mcc, if MCCQ is already created otherwise mbox */
-int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags,
- u16 *txq_count, u8 domain)
+int be_cmd_get_profile_config(struct be_adapter *adapter,
+ struct be_resources *res, u8 domain)
{
+ struct be_cmd_resp_get_profile_config *resp;
+ struct be_pcie_res_desc *pcie;
+ struct be_nic_res_desc *nic;
struct be_queue_info *mccq = &adapter->mcc_obj.q;
struct be_dma_mem cmd;
+ u32 desc_count;
int status;
memset(&cmd, 0, sizeof(struct be_dma_mem));
- if (!lancer_chip(adapter))
- cmd.size = sizeof(struct be_cmd_resp_get_profile_config_v1);
- else
- cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
- cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size,
- &cmd.dma);
- if (!cmd.va) {
- dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
+ cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
+ cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+ if (!cmd.va)
return -ENOMEM;
- }
if (!mccq->created)
status = be_cmd_get_profile_config_mbox(adapter, domain, &cmd);
else
status = be_cmd_get_profile_config_mccq(adapter, domain, &cmd);
- if (!status) {
- struct be_cmd_resp_get_profile_config *resp = cmd.va;
- u32 desc_count = le32_to_cpu(resp->desc_count);
- struct be_nic_resource_desc *desc;
+ if (status)
+ goto err;
- desc = be_get_nic_desc(resp->func_param, desc_count,
- sizeof(resp->func_param));
+ resp = cmd.va;
+ desc_count = le32_to_cpu(resp->desc_count);
- if (!desc) {
- status = -EINVAL;
- goto err;
- }
- if (cap_flags)
- *cap_flags = le32_to_cpu(desc->cap_flags);
- if (txq_count)
- *txq_count = le32_to_cpu(desc->txq_count);
- }
+ pcie = be_get_pcie_desc(adapter->pdev->devfn, resp->func_param,
+ desc_count);
+ if (pcie)
+ res->max_vfs = le16_to_cpu(pcie->num_vfs);
+
+ nic = be_get_nic_desc(resp->func_param, desc_count);
+ if (nic)
+ be_copy_nic_desc(res, nic);
+
err:
if (cmd.va)
- pci_free_consistent(adapter->pdev, cmd.size,
- cmd.va, cmd.dma);
+ pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
return status;
}
-/* Uses sync mcc */
+/* Currently only Lancer uses this command and it supports version 0 only
+ * Uses sync mcc
+ */
int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps,
u8 domain)
{
@@ -3309,12 +3353,10 @@
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
OPCODE_COMMON_SET_PROFILE_CONFIG, sizeof(*req),
wrb, NULL);
-
req->hdr.domain = domain;
req->desc_count = cpu_to_le32(1);
-
- req->nic_desc.desc_type = NIC_RESOURCE_DESC_TYPE_V0;
- req->nic_desc.desc_len = RESOURCE_DESC_SIZE;
+ req->nic_desc.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V0;
+ req->nic_desc.hdr.desc_len = RESOURCE_DESC_SIZE_V0;
req->nic_desc.flags = (1 << QUN) | (1 << IMM) | (1 << NOSV);
req->nic_desc.pf_num = adapter->pf_number;
req->nic_desc.vf_num = domain;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 6237192..52f3d4c 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -307,7 +307,7 @@
struct be_cmd_resp_eq_create {
struct be_cmd_resp_hdr resp_hdr;
u16 eq_id; /* sword */
- u16 rsvd0; /* sword */
+ u16 msix_idx; /* available only in v2 */
} __packed;
/******************** Mac query ***************************/
@@ -1718,11 +1718,13 @@
struct be_fat_conf_params set_params;
};
-#define RESOURCE_DESC_SIZE 88
+#define RESOURCE_DESC_SIZE_V0 72
+#define RESOURCE_DESC_SIZE_V1 88
+#define PCIE_RESOURCE_DESC_TYPE_V0 0x40
#define NIC_RESOURCE_DESC_TYPE_V0 0x41
+#define PCIE_RESOURCE_DESC_TYPE_V1 0x50
#define NIC_RESOURCE_DESC_TYPE_V1 0x51
-#define MAX_RESOURCE_DESC 4
-#define MAX_RESOURCE_DESC_V1 32
+#define MAX_RESOURCE_DESC 264
/* QOS unit number */
#define QUN 4
@@ -1731,9 +1733,30 @@
/* No save */
#define NOSV 7
-struct be_nic_resource_desc {
+struct be_res_desc_hdr {
u8 desc_type;
u8 desc_len;
+} __packed;
+
+struct be_pcie_res_desc {
+ struct be_res_desc_hdr hdr;
+ u8 rsvd0;
+ u8 flags;
+ u16 rsvd1;
+ u8 pf_num;
+ u8 rsvd2;
+ u32 rsvd3;
+ u8 sriov_state;
+ u8 pf_state;
+ u8 pf_type;
+ u8 rsvd4;
+ u16 num_vfs;
+ u16 rsvd5;
+ u32 rsvd6[17];
+} __packed;
+
+struct be_nic_res_desc {
+ struct be_res_desc_hdr hdr;
u8 rsvd1;
u8 flags;
u8 vf_num;
@@ -1762,7 +1785,7 @@
u8 wol_param;
u16 rsvd7;
u32 rsvd8[3];
-};
+} __packed;
struct be_cmd_req_get_func_config {
struct be_cmd_req_hdr hdr;
@@ -1771,7 +1794,7 @@
struct be_cmd_resp_get_func_config {
struct be_cmd_resp_hdr hdr;
u32 desc_count;
- u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE];
+ u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1];
};
#define ACTIVE_PROFILE_TYPE 0x2
@@ -1783,26 +1806,20 @@
};
struct be_cmd_resp_get_profile_config {
- struct be_cmd_req_hdr hdr;
+ struct be_cmd_resp_hdr hdr;
u32 desc_count;
- u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE];
-};
-
-struct be_cmd_resp_get_profile_config_v1 {
- struct be_cmd_req_hdr hdr;
- u32 desc_count;
- u8 func_param[MAX_RESOURCE_DESC_V1 * RESOURCE_DESC_SIZE];
+ u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1];
};
struct be_cmd_req_set_profile_config {
struct be_cmd_req_hdr hdr;
u32 rsvd;
u32 desc_count;
- struct be_nic_resource_desc nic_desc;
+ struct be_nic_res_desc nic_desc;
};
struct be_cmd_resp_set_profile_config {
- struct be_cmd_req_hdr hdr;
+ struct be_cmd_resp_hdr hdr;
};
struct be_cmd_enable_disable_vf {
@@ -1851,8 +1868,7 @@
u32 en_flags, u32 *if_handle, u32 domain);
extern int be_cmd_if_destroy(struct be_adapter *adapter, int if_handle,
u32 domain);
-extern int be_cmd_eq_create(struct be_adapter *adapter,
- struct be_queue_info *eq, int eq_delay);
+extern int be_cmd_eq_create(struct be_adapter *adapter, struct be_eq_obj *eqo);
extern int be_cmd_cq_create(struct be_adapter *adapter,
struct be_queue_info *cq, struct be_queue_info *eq,
bool no_delay, int num_cqe_dma_coalesce);
@@ -1964,10 +1980,10 @@
extern bool dump_present(struct be_adapter *adapter);
extern int lancer_test_and_set_rdy_state(struct be_adapter *adapter);
extern int be_cmd_query_port_name(struct be_adapter *adapter, u8 *port_name);
-extern int be_cmd_get_func_config(struct be_adapter *adapter);
-extern int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags,
- u16 *txq_count, u8 domain);
-
+int be_cmd_get_func_config(struct be_adapter *adapter,
+ struct be_resources *res);
+int be_cmd_get_profile_config(struct be_adapter *adapter,
+ struct be_resources *res, u8 domain);
extern int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps,
u8 domain);
extern int be_cmd_get_if_id(struct be_adapter *adapter,
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 4f8c941..b440a1f 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1119,6 +1119,29 @@
return status;
}
+static void be_get_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct be_adapter *adapter = netdev_priv(netdev);
+
+ ch->combined_count = adapter->num_evt_qs;
+ ch->max_combined = be_max_qs(adapter);
+}
+
+static int be_set_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct be_adapter *adapter = netdev_priv(netdev);
+
+ if (ch->rx_count || ch->tx_count || ch->other_count ||
+ !ch->combined_count || ch->combined_count > be_max_qs(adapter))
+ return -EINVAL;
+
+ adapter->cfg_num_qs = ch->combined_count;
+
+ return be_update_queues(adapter);
+}
+
const struct ethtool_ops be_ethtool_ops = {
.get_settings = be_get_settings,
.get_drvinfo = be_get_drvinfo,
@@ -1145,4 +1168,6 @@
.self_test = be_self_test,
.get_rxnfc = be_get_rxnfc,
.set_rxnfc = be_set_rxnfc,
+ .get_channels = be_get_channels,
+ .set_channels = be_set_channels
};
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index ff2b40d..39e0a76 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -145,8 +145,8 @@
q->len = len;
q->entry_size = entry_size;
mem->size = len * entry_size;
- mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
- GFP_KERNEL | __GFP_ZERO);
+ mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
+ GFP_KERNEL);
if (!mem->va)
return -ENOMEM;
return 0;
@@ -1001,7 +1001,7 @@
if (adapter->promiscuous)
return 0;
- if (adapter->vlans_added > adapter->max_vlans)
+ if (adapter->vlans_added > be_max_vlans(adapter))
goto set_vlan_promisc;
/* Construct VLAN Table to give to HW */
@@ -1042,7 +1042,7 @@
goto ret;
adapter->vlan_tag[vid] = 1;
- if (adapter->vlans_added <= (adapter->max_vlans + 1))
+ if (adapter->vlans_added <= (be_max_vlans(adapter) + 1))
status = be_vid_config(adapter);
if (!status)
@@ -1068,7 +1068,7 @@
goto ret;
adapter->vlan_tag[vid] = 0;
- if (adapter->vlans_added <= adapter->max_vlans)
+ if (adapter->vlans_added <= be_max_vlans(adapter))
status = be_vid_config(adapter);
if (!status)
@@ -1101,7 +1101,7 @@
/* Enable multicast promisc if num configured exceeds what we support */
if (netdev->flags & IFF_ALLMULTI ||
- netdev_mc_count(netdev) > adapter->max_mcast_mac) {
+ netdev_mc_count(netdev) > be_max_mc(adapter)) {
be_cmd_rx_filter(adapter, IFF_ALLMULTI, ON);
goto done;
}
@@ -1115,7 +1115,7 @@
adapter->pmac_id[i], 0);
}
- if (netdev_uc_count(netdev) > adapter->max_pmac_cnt) {
+ if (netdev_uc_count(netdev) > be_max_uc(adapter)) {
be_cmd_rx_filter(adapter, IFF_PROMISC, ON);
adapter->promiscuous = true;
goto done;
@@ -1913,6 +1913,7 @@
if (eqo->q.created) {
be_eq_clean(eqo);
be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
+ netif_napi_del(&eqo->napi);
}
be_queue_free(adapter, &eqo->q);
}
@@ -1924,9 +1925,12 @@
struct be_eq_obj *eqo;
int i, rc;
- adapter->num_evt_qs = num_irqs(adapter);
+ adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
+ adapter->cfg_num_qs);
for_all_evt_queues(adapter, eqo, i) {
+ netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
+ BE_NAPI_WEIGHT);
eqo->adapter = adapter;
eqo->tx_budget = BE_TX_BUDGET;
eqo->idx = i;
@@ -1939,7 +1943,7 @@
if (rc)
return rc;
- rc = be_cmd_eq_create(adapter, eq, eqo->cur_eqd);
+ rc = be_cmd_eq_create(adapter, eqo);
if (rc)
return rc;
}
@@ -2013,31 +2017,13 @@
}
}
-static int be_num_txqs_want(struct be_adapter *adapter)
-{
- if ((!lancer_chip(adapter) && sriov_want(adapter)) ||
- be_is_mc(adapter) ||
- (!lancer_chip(adapter) && !be_physfn(adapter)) ||
- BE2_chip(adapter))
- return 1;
- else
- return adapter->max_tx_queues;
-}
-
-static int be_tx_cqs_create(struct be_adapter *adapter)
+static int be_tx_qs_create(struct be_adapter *adapter)
{
struct be_queue_info *cq, *eq;
- int status;
struct be_tx_obj *txo;
- u8 i;
+ int status, i;
- adapter->num_tx_qs = be_num_txqs_want(adapter);
- if (adapter->num_tx_qs != MAX_TX_QS) {
- rtnl_lock();
- netif_set_real_num_tx_queues(adapter->netdev,
- adapter->num_tx_qs);
- rtnl_unlock();
- }
+ adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
for_all_tx_queues(adapter, txo, i) {
cq = &txo->cq;
@@ -2053,16 +2039,7 @@
status = be_cmd_cq_create(adapter, cq, eq, false, 3);
if (status)
return status;
- }
- return 0;
-}
-static int be_tx_qs_create(struct be_adapter *adapter)
-{
- struct be_tx_obj *txo;
- int i, status;
-
- for_all_tx_queues(adapter, txo, i) {
status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
sizeof(struct be_eth_wrb));
if (status)
@@ -2098,17 +2075,14 @@
struct be_rx_obj *rxo;
int rc, i;
- /* We'll create as many RSS rings as there are irqs.
- * But when there's only one irq there's no use creating RSS rings
+ /* We can create as many RSS rings as there are EQs. */
+ adapter->num_rx_qs = adapter->num_evt_qs;
+
+ /* We'll use RSS only if atleast 2 RSS rings are supported.
+ * When RSS is used, we'll need a default RXQ for non-IP traffic.
*/
- adapter->num_rx_qs = (num_irqs(adapter) > 1) ?
- num_irqs(adapter) + 1 : 1;
- if (adapter->num_rx_qs != MAX_RX_QS) {
- rtnl_lock();
- netif_set_real_num_rx_queues(adapter->netdev,
- adapter->num_rx_qs);
- rtnl_unlock();
- }
+ if (adapter->num_rx_qs > 1)
+ adapter->num_rx_qs++;
adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
for_all_rx_queues(adapter, rxo, i) {
@@ -2260,7 +2234,7 @@
return (work_done < budget); /* Done */
}
-static int be_poll(struct napi_struct *napi, int budget)
+int be_poll(struct napi_struct *napi, int budget)
{
struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
struct be_adapter *adapter = eqo->adapter;
@@ -2372,38 +2346,24 @@
if (msix_enabled(adapter)) {
pci_disable_msix(adapter->pdev);
adapter->num_msix_vec = 0;
+ adapter->num_msix_roce_vec = 0;
}
}
-static uint be_num_rss_want(struct be_adapter *adapter)
-{
- u32 num = 0;
-
- if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
- (lancer_chip(adapter) ||
- (!sriov_want(adapter) && be_physfn(adapter)))) {
- num = adapter->max_rss_queues;
- num = min_t(u32, num, (u32)netif_get_num_default_rss_queues());
- }
- return num;
-}
-
static int be_msix_enable(struct be_adapter *adapter)
{
-#define BE_MIN_MSIX_VECTORS 1
- int i, status, num_vec, num_roce_vec = 0;
+ int i, status, num_vec;
struct device *dev = &adapter->pdev->dev;
- /* If RSS queues are not used, need a vec for default RX Q */
- num_vec = min(be_num_rss_want(adapter), num_online_cpus());
- if (be_roce_supported(adapter)) {
- num_roce_vec = min_t(u32, MAX_ROCE_MSIX_VECTORS,
- (num_online_cpus() + 1));
- num_roce_vec = min(num_roce_vec, MAX_ROCE_EQS);
- num_vec += num_roce_vec;
- num_vec = min(num_vec, MAX_MSIX_VECTORS);
- }
- num_vec = max(num_vec, BE_MIN_MSIX_VECTORS);
+ /* If RoCE is supported, program the max number of NIC vectors that
+ * may be configured via set-channels, along with vectors needed for
+ * RoCe. Else, just program the number we'll use initially.
+ */
+ if (be_roce_supported(adapter))
+ num_vec = min_t(int, 2 * be_max_eqs(adapter),
+ 2 * num_online_cpus());
+ else
+ num_vec = adapter->cfg_num_qs;
for (i = 0; i < num_vec; i++)
adapter->msix_entries[i].entry = i;
@@ -2411,7 +2371,7 @@
status = pci_enable_msix(adapter->pdev, adapter->msix_entries, num_vec);
if (status == 0) {
goto done;
- } else if (status >= BE_MIN_MSIX_VECTORS) {
+ } else if (status >= MIN_MSIX_VECTORS) {
num_vec = status;
status = pci_enable_msix(adapter->pdev, adapter->msix_entries,
num_vec);
@@ -2420,30 +2380,29 @@
}
dev_warn(dev, "MSIx enable failed\n");
+
/* INTx is not supported in VFs, so fail probe if enable_msix fails */
if (!be_physfn(adapter))
return status;
return 0;
done:
- if (be_roce_supported(adapter)) {
- if (num_vec > num_roce_vec) {
- adapter->num_msix_vec = num_vec - num_roce_vec;
- adapter->num_msix_roce_vec =
- num_vec - adapter->num_msix_vec;
- } else {
- adapter->num_msix_vec = num_vec;
- adapter->num_msix_roce_vec = 0;
- }
- } else
- adapter->num_msix_vec = num_vec;
- dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec);
+ if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
+ adapter->num_msix_roce_vec = num_vec / 2;
+ dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
+ adapter->num_msix_roce_vec);
+ }
+
+ adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
+
+ dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
+ adapter->num_msix_vec);
return 0;
}
static inline int be_msix_vec_get(struct be_adapter *adapter,
struct be_eq_obj *eqo)
{
- return adapter->msix_entries[eqo->idx].vector;
+ return adapter->msix_entries[eqo->msix_idx].vector;
}
static int be_msix_register(struct be_adapter *adapter)
@@ -2556,8 +2515,8 @@
/* Wait for all pending tx completions to arrive so that
* all tx skbs are freed.
*/
- be_tx_compl_clean(adapter);
netif_tx_disable(netdev);
+ be_tx_compl_clean(adapter);
be_rx_qs_destroy(adapter);
@@ -2683,8 +2642,8 @@
memset(mac, 0, ETH_ALEN);
cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
- cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
- GFP_KERNEL | __GFP_ZERO);
+ cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+ GFP_KERNEL);
if (cmd.va == NULL)
return -1;
@@ -2795,14 +2754,27 @@
adapter->num_vfs = 0;
}
-static int be_clear(struct be_adapter *adapter)
+static void be_clear_queues(struct be_adapter *adapter)
{
- int i;
+ be_mcc_queues_destroy(adapter);
+ be_rx_cqs_destroy(adapter);
+ be_tx_queues_destroy(adapter);
+ be_evt_queues_destroy(adapter);
+}
+static void be_cancel_worker(struct be_adapter *adapter)
+{
if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
cancel_delayed_work_sync(&adapter->work);
adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
}
+}
+
+static int be_clear(struct be_adapter *adapter)
+{
+ int i;
+
+ be_cancel_worker(adapter);
if (sriov_enabled(adapter))
be_vf_clear(adapter);
@@ -2815,10 +2787,7 @@
be_cmd_if_destroy(adapter, adapter->if_handle, 0);
- be_mcc_queues_destroy(adapter);
- be_rx_cqs_destroy(adapter);
- be_tx_queues_destroy(adapter);
- be_evt_queues_destroy(adapter);
+ be_clear_queues(adapter);
kfree(adapter->pmac_id);
adapter->pmac_id = NULL;
@@ -2829,6 +2798,7 @@
static int be_vfs_if_create(struct be_adapter *adapter)
{
+ struct be_resources res = {0};
struct be_vf_cfg *vf_cfg;
u32 cap_flags, en_flags, vf;
int status;
@@ -2837,9 +2807,12 @@
BE_IF_FLAGS_MULTICAST;
for_all_vfs(adapter, vf_cfg, vf) {
- if (!BE3_chip(adapter))
- be_cmd_get_profile_config(adapter, &cap_flags,
- NULL, vf + 1);
+ if (!BE3_chip(adapter)) {
+ status = be_cmd_get_profile_config(adapter, &res,
+ vf + 1);
+ if (!status)
+ cap_flags = res.if_cap_flags;
+ }
/* If a FW profile exists, then cap_flags are updated */
en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
@@ -2885,10 +2858,10 @@
dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs);
adapter->num_vfs = old_vfs;
} else {
- if (num_vfs > adapter->dev_num_vfs)
+ if (num_vfs > be_max_vfs(adapter))
dev_info(dev, "Device supports %d VFs and not %d\n",
- adapter->dev_num_vfs, num_vfs);
- adapter->num_vfs = min_t(u16, num_vfs, adapter->dev_num_vfs);
+ be_max_vfs(adapter), num_vfs);
+ adapter->num_vfs = min_t(u16, num_vfs, be_max_vfs(adapter));
if (!adapter->num_vfs)
return 0;
}
@@ -2967,6 +2940,51 @@
return status;
}
+/* On BE2/BE3 FW does not suggest the supported limits */
+static void BEx_get_resources(struct be_adapter *adapter,
+ struct be_resources *res)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ bool use_sriov = false;
+
+ if (BE3_chip(adapter) && be_physfn(adapter)) {
+ int max_vfs;
+
+ max_vfs = pci_sriov_get_totalvfs(pdev);
+ res->max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
+ use_sriov = res->max_vfs && num_vfs;
+ }
+
+ if (be_physfn(adapter))
+ res->max_uc_mac = BE_UC_PMAC_COUNT;
+ else
+ res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
+
+ if (adapter->function_mode & FLEX10_MODE)
+ res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
+ else
+ res->max_vlans = BE_NUM_VLANS_SUPPORTED;
+ res->max_mcast_mac = BE_MAX_MC;
+
+ if (BE2_chip(adapter) || use_sriov || be_is_mc(adapter) ||
+ !be_physfn(adapter))
+ res->max_tx_qs = 1;
+ else
+ res->max_tx_qs = BE3_MAX_TX_QS;
+
+ if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
+ !use_sriov && be_physfn(adapter))
+ res->max_rss_qs = (adapter->be3_native) ?
+ BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
+ res->max_rx_qs = res->max_rss_qs + 1;
+
+ res->max_evt_qs = be_physfn(adapter) ? BE3_MAX_EVT_QS : 1;
+
+ res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
+ if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
+ res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
+}
+
static void be_setup_init(struct be_adapter *adapter)
{
adapter->vlan_prio_bmap = 0xff;
@@ -2980,76 +2998,56 @@
adapter->cmd_privileges = MIN_PRIVILEGES;
}
-static void be_get_resources(struct be_adapter *adapter)
+static int be_get_resources(struct be_adapter *adapter)
{
- u16 dev_num_vfs;
- int pos, status;
- bool profile_present = false;
- u16 txq_count = 0;
+ struct device *dev = &adapter->pdev->dev;
+ struct be_resources res = {0};
+ int status;
+ if (BEx_chip(adapter)) {
+ BEx_get_resources(adapter, &res);
+ adapter->res = res;
+ }
+
+ /* For BE3 only check if FW suggests a different max-txqs value */
+ if (BE3_chip(adapter)) {
+ status = be_cmd_get_profile_config(adapter, &res, 0);
+ if (!status && res.max_tx_qs)
+ adapter->res.max_tx_qs =
+ min(adapter->res.max_tx_qs, res.max_tx_qs);
+ }
+
+ /* For Lancer, SH etc read per-function resource limits from FW.
+ * GET_FUNC_CONFIG returns per function guaranteed limits.
+ * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
+ */
if (!BEx_chip(adapter)) {
- status = be_cmd_get_func_config(adapter);
- if (!status)
- profile_present = true;
- } else if (BE3_chip(adapter) && be_physfn(adapter)) {
- be_cmd_get_profile_config(adapter, NULL, &txq_count, 0);
+ status = be_cmd_get_func_config(adapter, &res);
+ if (status)
+ return status;
+
+ /* If RoCE may be enabled stash away half the EQs for RoCE */
+ if (be_roce_supported(adapter))
+ res.max_evt_qs /= 2;
+ adapter->res = res;
+
+ if (be_physfn(adapter)) {
+ status = be_cmd_get_profile_config(adapter, &res, 0);
+ if (status)
+ return status;
+ adapter->res.max_vfs = res.max_vfs;
+ }
+
+ dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
+ be_max_txqs(adapter), be_max_rxqs(adapter),
+ be_max_rss(adapter), be_max_eqs(adapter),
+ be_max_vfs(adapter));
+ dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
+ be_max_uc(adapter), be_max_mc(adapter),
+ be_max_vlans(adapter));
}
- if (profile_present) {
- adapter->max_tx_queues = min_t(u16, adapter->max_tx_queues,
- MAX_TX_QS);
- adapter->max_rss_queues = min_t(u16, adapter->max_rss_queues,
- BE3_MAX_RSS_QS);
- adapter->max_event_queues = min_t(u16,
- adapter->max_event_queues,
- BE3_MAX_RSS_QS);
-
- if (adapter->max_rss_queues &&
- adapter->max_rss_queues == adapter->max_rx_queues)
- adapter->max_rss_queues -= 1;
-
- if (adapter->max_event_queues < adapter->max_rss_queues)
- adapter->max_rss_queues = adapter->max_event_queues;
-
- } else {
- if (be_physfn(adapter))
- adapter->max_pmac_cnt = BE_UC_PMAC_COUNT;
- else
- adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT;
-
- if (adapter->function_mode & FLEX10_MODE)
- adapter->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
- else
- adapter->max_vlans = BE_NUM_VLANS_SUPPORTED;
-
- adapter->max_mcast_mac = BE_MAX_MC;
- adapter->max_tx_queues = txq_count ? txq_count : MAX_TX_QS;
- adapter->max_tx_queues = min_t(u16, adapter->max_tx_queues,
- MAX_TX_QS);
- adapter->max_rss_queues = (adapter->be3_native) ?
- BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
- adapter->max_event_queues = BE3_MAX_RSS_QS;
-
- adapter->if_cap_flags = BE_IF_FLAGS_UNTAGGED |
- BE_IF_FLAGS_BROADCAST |
- BE_IF_FLAGS_MULTICAST |
- BE_IF_FLAGS_PASS_L3L4_ERRORS |
- BE_IF_FLAGS_MCAST_PROMISCUOUS |
- BE_IF_FLAGS_VLAN_PROMISCUOUS |
- BE_IF_FLAGS_PROMISCUOUS;
-
- if (adapter->function_caps & BE_FUNCTION_CAPS_RSS)
- adapter->if_cap_flags |= BE_IF_FLAGS_RSS;
- }
-
- pos = pci_find_ext_capability(adapter->pdev, PCI_EXT_CAP_ID_SRIOV);
- if (pos) {
- pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF,
- &dev_num_vfs);
- if (BE3_chip(adapter))
- dev_num_vfs = min_t(u16, dev_num_vfs, MAX_VFS);
- adapter->dev_num_vfs = dev_num_vfs;
- }
+ return 0;
}
/* Routine to query per function resource limits */
@@ -3062,20 +3060,22 @@
&adapter->function_caps,
&adapter->asic_rev);
if (status)
- goto err;
+ return status;
- be_get_resources(adapter);
+ status = be_get_resources(adapter);
+ if (status)
+ return status;
/* primary mac needs 1 pmac entry */
- adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1,
- sizeof(u32), GFP_KERNEL);
- if (!adapter->pmac_id) {
- status = -ENOMEM;
- goto err;
- }
+ adapter->pmac_id = kcalloc(be_max_uc(adapter) + 1, sizeof(u32),
+ GFP_KERNEL);
+ if (!adapter->pmac_id)
+ return -ENOMEM;
-err:
- return status;
+ /* Sanitize cfg_num_qs based on HW and platform limits */
+ adapter->cfg_num_qs = min(adapter->cfg_num_qs, be_max_qs(adapter));
+
+ return 0;
}
static int be_mac_setup(struct be_adapter *adapter)
@@ -3104,11 +3104,87 @@
return 0;
}
+static void be_schedule_worker(struct be_adapter *adapter)
+{
+ schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+ adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
+}
+
+static int be_setup_queues(struct be_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int status;
+
+ status = be_evt_queues_create(adapter);
+ if (status)
+ goto err;
+
+ status = be_tx_qs_create(adapter);
+ if (status)
+ goto err;
+
+ status = be_rx_cqs_create(adapter);
+ if (status)
+ goto err;
+
+ status = be_mcc_queues_create(adapter);
+ if (status)
+ goto err;
+
+ status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
+ if (status)
+ goto err;
+
+ status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
+ if (status)
+ goto err;
+
+ return 0;
+err:
+ dev_err(&adapter->pdev->dev, "queue_setup failed\n");
+ return status;
+}
+
+int be_update_queues(struct be_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int status;
+
+ if (netif_running(netdev))
+ be_close(netdev);
+
+ be_cancel_worker(adapter);
+
+ /* If any vectors have been shared with RoCE we cannot re-program
+ * the MSIx table.
+ */
+ if (!adapter->num_msix_roce_vec)
+ be_msix_disable(adapter);
+
+ be_clear_queues(adapter);
+
+ if (!msix_enabled(adapter)) {
+ status = be_msix_enable(adapter);
+ if (status)
+ return status;
+ }
+
+ status = be_setup_queues(adapter);
+ if (status)
+ return status;
+
+ be_schedule_worker(adapter);
+
+ if (netif_running(netdev))
+ status = be_open(netdev);
+
+ return status;
+}
+
static int be_setup(struct be_adapter *adapter)
{
struct device *dev = &adapter->pdev->dev;
- u32 en_flags;
- u32 tx_fc, rx_fc;
+ u32 tx_fc, rx_fc, en_flags;
int status;
be_setup_init(adapter);
@@ -3124,19 +3200,20 @@
if (status)
goto err;
- status = be_evt_queues_create(adapter);
+ en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
+ BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
+ if (adapter->function_caps & BE_FUNCTION_CAPS_RSS)
+ en_flags |= BE_IF_FLAGS_RSS;
+ en_flags = en_flags & be_if_cap_flags(adapter);
+ status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
+ &adapter->if_handle, 0);
if (status)
goto err;
- status = be_tx_cqs_create(adapter);
- if (status)
- goto err;
-
- status = be_rx_cqs_create(adapter);
- if (status)
- goto err;
-
- status = be_mcc_queues_create(adapter);
+ /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
+ rtnl_lock();
+ status = be_setup_queues(adapter);
+ rtnl_unlock();
if (status)
goto err;
@@ -3147,24 +3224,10 @@
if (be_is_mc(adapter))
adapter->cmd_privileges = MAX_PRIVILEGES;
- en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
- BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
- if (adapter->function_caps & BE_FUNCTION_CAPS_RSS)
- en_flags |= BE_IF_FLAGS_RSS;
- en_flags = en_flags & adapter->if_cap_flags;
- status = be_cmd_if_create(adapter, adapter->if_cap_flags, en_flags,
- &adapter->if_handle, 0);
- if (status != 0)
- goto err;
-
status = be_mac_setup(adapter);
if (status)
goto err;
- status = be_tx_qs_create(adapter);
- if (status)
- goto err;
-
be_cmd_get_fw_ver(adapter, adapter->fw_ver, adapter->fw_on_flash);
if (adapter->vlans_added)
@@ -3178,8 +3241,8 @@
be_cmd_set_flow_control(adapter, adapter->tx_fc,
adapter->rx_fc);
- if (be_physfn(adapter)) {
- if (adapter->dev_num_vfs)
+ if (be_physfn(adapter) && num_vfs) {
+ if (be_max_vfs(adapter))
be_vf_setup(adapter);
else
dev_warn(dev, "device doesn't support SRIOV\n");
@@ -3189,8 +3252,7 @@
if (!status && be_pause_supported(adapter))
adapter->phy.fc_autoneg = 1;
- schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
- adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
+ be_schedule_worker(adapter);
return 0;
err:
be_clear(adapter);
@@ -3756,8 +3818,6 @@
static void be_netdev_init(struct net_device *netdev)
{
struct be_adapter *adapter = netdev_priv(netdev);
- struct be_eq_obj *eqo;
- int i;
netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
@@ -3780,9 +3840,6 @@
netdev->netdev_ops = &be_netdev_ops;
SET_ETHTOOL_OPS(netdev, &be_ethtool_ops);
-
- for_all_evt_queues(adapter, eqo, i)
- netif_napi_add(netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT);
}
static void be_unmap_pci_bars(struct be_adapter *adapter)
@@ -3889,9 +3946,9 @@
memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox));
rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
- rx_filter->va = dma_alloc_coherent(&adapter->pdev->dev, rx_filter->size,
- &rx_filter->dma,
- GFP_KERNEL | __GFP_ZERO);
+ rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev,
+ rx_filter->size, &rx_filter->dma,
+ GFP_KERNEL);
if (rx_filter->va == NULL) {
status = -ENOMEM;
goto free_mbox;
@@ -3937,8 +3994,8 @@
/* BE3 and Skyhawk */
cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
- cmd->va = dma_alloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma,
- GFP_KERNEL | __GFP_ZERO);
+ cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma,
+ GFP_KERNEL);
if (cmd->va == NULL)
return -1;
return 0;
@@ -4045,6 +4102,7 @@
level = be_get_fw_log_level(adapter);
adapter->msg_enable = level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
+ adapter->cfg_num_qs = netif_get_num_default_rss_queues();
return 0;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 645e846..9cd5415 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -60,7 +60,7 @@
*/
num_vec = adapter->num_msix_vec + adapter->num_msix_roce_vec;
dev_info.intr_mode = BE_INTERRUPT_MODE_MSIX;
- dev_info.msix.num_vectors = min(num_vec, MAX_ROCE_MSIX_VECTORS);
+ dev_info.msix.num_vectors = min(num_vec, MAX_MSIX_VECTORS);
/* provide start index of the vector,
* so in case of linear usage,
* it can use the base as starting point.
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index 2765729..2cd1129 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -29,7 +29,7 @@
BE_INTERRUPT_MODE_MSI = 2,
};
-#define MAX_ROCE_MSIX_VECTORS 16
+#define MAX_MSIX_VECTORS 32
struct be_dev_info {
u8 __iomem *db;
u64 unmapped_db;
@@ -45,7 +45,7 @@
struct {
int num_vectors;
int start_vector;
- u32 vector_list[MAX_ROCE_MSIX_VECTORS];
+ u32 vector_list[MAX_MSIX_VECTORS];
} msix;
};
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index cf579fb..4de8cfd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1030,8 +1030,8 @@
}
/* Allow the platform setup code to pass in a MAC address. */
- if (pdev->dev.platform_data) {
- struct ethoc_platform_data *pdata = pdev->dev.platform_data;
+ if (dev_get_platdata(&pdev->dev)) {
+ struct ethoc_platform_data *pdata = dev_get_platdata(&pdev->dev);
memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN);
priv->phy_id = pdata->phy_id;
} else {
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 934e1ae..212f44b 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -778,10 +778,9 @@
{
int i;
- priv->descs = dma_alloc_coherent(priv->dev,
- sizeof(struct ftgmac100_descs),
- &priv->descs_dma_addr,
- GFP_KERNEL | __GFP_ZERO);
+ priv->descs = dma_zalloc_coherent(priv->dev,
+ sizeof(struct ftgmac100_descs),
+ &priv->descs_dma_addr, GFP_KERNEL);
if (!priv->descs)
return -ENOMEM;
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 4658f4c..8be5b40 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -732,10 +732,10 @@
{
int i;
- priv->descs = dma_alloc_coherent(priv->dev,
- sizeof(struct ftmac100_descs),
- &priv->descs_dma_addr,
- GFP_KERNEL | __GFP_ZERO);
+ priv->descs = dma_zalloc_coherent(priv->dev,
+ sizeof(struct ftmac100_descs),
+ &priv->descs_dma_addr,
+ GFP_KERNEL);
if (!priv->descs)
return -ENOMEM;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index fdf9307..0cd5e4b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -69,7 +69,6 @@
#endif
#define DRIVER_NAME "fec"
-#define FEC_NAPI_WEIGHT 64
/* Pause frame feild and FIFO threshold */
#define FEC_ENET_FCE (1 << 5)
@@ -1060,7 +1059,7 @@
static void fec_get_mac(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
+ struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev);
unsigned char *iap, tmpaddr[ETH_ALEN];
/*
@@ -1100,10 +1099,10 @@
* 4) FEC mac registers set by bootloader
*/
if (!is_valid_ether_addr(iap)) {
- *((unsigned long *) &tmpaddr[0]) =
- be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
- *((unsigned short *) &tmpaddr[4]) =
- be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
+ *((__be32 *) &tmpaddr[0]) =
+ cpu_to_be32(readl(fep->hwp + FEC_ADDR_LOW));
+ *((__be16 *) &tmpaddr[4]) =
+ cpu_to_be16(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
iap = &tmpaddr[0];
}
@@ -1981,7 +1980,7 @@
ndev->ethtool_ops = &fec_enet_ethtool_ops;
writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
- netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT);
+ netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);
if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) {
/* enable hw VLAN support */
@@ -2089,7 +2088,7 @@
ret = of_get_phy_mode(pdev->dev.of_node);
if (ret < 0) {
- pdata = pdev->dev.platform_data;
+ pdata = dev_get_platdata(&pdev->dev);
if (pdata)
fep->phy_interface = pdata->phy;
else
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
index 360a578..e052890 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
@@ -123,12 +123,10 @@
static int mpc52xx_fec_mdio_remove(struct platform_device *of)
{
- struct device *dev = &of->dev;
- struct mii_bus *bus = dev_get_drvdata(dev);
+ struct mii_bus *bus = platform_get_drvdata(of);
struct mpc52xx_fec_mdio_priv *priv = bus->priv;
mdiobus_unregister(bus);
- dev_set_drvdata(dev, NULL);
iounmap(priv->regs);
kfree(priv);
mdiobus_free(bus);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 8de53a1..6b60582 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -583,7 +583,6 @@
struct sk_buff *skb)
{
struct sk_buff *new_skb;
- struct fs_enet_private *fep = netdev_priv(dev);
/* Alloc new skb */
new_skb = netdev_alloc_skb(dev, skb->len + 4);
@@ -1000,6 +999,8 @@
struct fs_enet_private *fep;
struct fs_platform_info *fpi;
const u32 *data;
+ struct clk *clk;
+ int err;
const u8 *mac_addr;
const char *phy_connection_type;
int privsize, len, ret = -ENODEV;
@@ -1037,6 +1038,20 @@
fpi->use_rmii = 1;
}
+ /* make clock lookup non-fatal (the driver is shared among platforms),
+ * but require enable to succeed when a clock was specified/found,
+ * keep a reference to the clock upon successful acquisition
+ */
+ clk = devm_clk_get(&ofdev->dev, "per");
+ if (!IS_ERR(clk)) {
+ err = clk_prepare_enable(clk);
+ if (err) {
+ ret = err;
+ goto out_free_fpi;
+ }
+ fpi->clk_per = clk;
+ }
+
privsize = sizeof(*fep) +
sizeof(struct sk_buff **) *
(fpi->rx_ring + fpi->tx_ring);
@@ -1108,6 +1123,8 @@
free_netdev(ndev);
out_put:
of_node_put(fpi->phy_node);
+ if (fpi->clk_per)
+ clk_disable_unprepare(fpi->clk_per);
out_free_fpi:
kfree(fpi);
return ret;
@@ -1124,6 +1141,8 @@
fep->ops->cleanup_data(ndev);
dev_set_drvdata(fep->dev, NULL);
of_node_put(fep->fpi->phy_node);
+ if (fep->fpi->clk_per)
+ clk_disable_unprepare(fep->fpi->clk_per);
free_netdev(ndev);
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index c93a056..c4f6506 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -409,7 +409,7 @@
priv->regs = priv->map + data->mii_offset;
new_bus->parent = &pdev->dev;
- dev_set_drvdata(&pdev->dev, new_bus);
+ platform_set_drvdata(pdev, new_bus);
if (data->get_tbipa) {
for_each_child_of_node(np, tbi) {
@@ -468,8 +468,6 @@
mdiobus_unregister(bus);
- dev_set_drvdata(device, NULL);
-
iounmap(priv->map);
mdiobus_free(bus);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 3c43dac..5930c39 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3911,14 +3911,12 @@
static int ucc_geth_remove(struct platform_device* ofdev)
{
- struct device *device = &ofdev->dev;
- struct net_device *dev = dev_get_drvdata(device);
+ struct net_device *dev = platform_get_drvdata(ofdev);
struct ucc_geth_private *ugeth = netdev_priv(dev);
unregister_netdev(dev);
free_netdev(dev);
ucc_geth_memclean(ugeth);
- dev_set_drvdata(device, NULL);
return 0;
}
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index d300a0c..2d3b064 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2955,8 +2955,6 @@
DBG(dev, "remove" NL);
- dev_set_drvdata(&ofdev->dev, NULL);
-
unregister_netdev(dev->ndev);
cancel_work_sync(&dev->reset_work);
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 856ea66..dac564c 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -637,8 +637,8 @@
bd_size = sizeof(struct mal_descriptor) *
(NUM_TX_BUFF * mal->num_tx_chans +
NUM_RX_BUFF * mal->num_rx_chans);
- mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma,
- GFP_KERNEL | __GFP_ZERO);
+ mal->bd_virt = dma_zalloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma,
+ GFP_KERNEL);
if (mal->bd_virt == NULL) {
err = -ENOMEM;
goto fail_unmap;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 82a967c..73a8aee 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1019,8 +1019,8 @@
txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
txdr->size = ALIGN(txdr->size, 4096);
- txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
- GFP_KERNEL | __GFP_ZERO);
+ txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+ GFP_KERNEL);
if (!txdr->desc) {
ret_val = 2;
goto err_nomem;
@@ -1077,8 +1077,8 @@
}
rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
- rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
- GFP_KERNEL | __GFP_ZERO);
+ rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+ GFP_KERNEL);
if (!rxdr->desc) {
ret_val = 6;
goto err_nomem;
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 104fcec..8fed74e 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1011,6 +1011,11 @@
/* Must release MDIO ownership and mutex after MAC reset. */
switch (hw->mac.type) {
+ case e1000_82573:
+ /* Release mutex only if the hw semaphore is acquired */
+ if (!ret_val)
+ e1000_put_hw_semaphore_82573(hw);
+ break;
case e1000_82574:
case e1000_82583:
/* Release mutex only if the hw semaphore is acquired */
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index e4ebd7d..a8633b8 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1665,7 +1665,7 @@
ret_val = 13; /* ret_val is the same as mis-compare */
break;
}
- if (jiffies >= (time + 20)) {
+ if (time_after(jiffies, time + 20)) {
ret_val = 14; /* error code for time out error */
break;
}
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index b799fd9..b7f3843 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -233,7 +233,8 @@
#define MAX_PS_BUFFERS 4
/* Number of packet split data buffers (not including the header buffer) */
-#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1)
+#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1)
+
/* Receive Descriptor - Packet Split */
union e1000_rx_desc_packet_split {
struct {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e6d2c0f..e87e9b0 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -64,8 +64,6 @@
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state);
-
static const struct e1000_info *e1000_info_tbl[] = {
[board_82571] = &e1000_82571_info,
[board_82572] = &e1000_82572_info,
@@ -6001,11 +5999,18 @@
* correctable error when the MAC transitions from D0 to D3. To
* prevent this we need to mask off the correctable errors on the
* downstream port of the pci-e switch.
+ *
+ * We don't have the associated upstream bridge while assigning
+ * the PCI device into guest. For example, the KVM on power is
+ * one of the cases.
*/
if (adapter->flags & FLAG_IS_QUAD_PORT) {
struct pci_dev *us_dev = pdev->bus->self;
u16 devctl;
+ if (!us_dev)
+ return 0;
+
pcie_capability_read_word(us_dev, PCI_EXP_DEVCTL, &devctl);
pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL,
(devctl & ~PCI_EXP_DEVCTL_CERE));
@@ -6019,38 +6024,73 @@
return 0;
}
-#ifdef CONFIG_PCIEASPM
-static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+/**
+ * e1000e_disable_aspm - Disable ASPM states
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
{
- pci_disable_link_state_locked(pdev, state);
-}
-#else
-static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
-{
- u16 aspm_ctl = 0;
+ struct pci_dev *parent = pdev->bus->self;
+ u16 aspm_dis_mask = 0;
+ u16 pdev_aspmc, parent_aspmc;
- if (state & PCIE_LINK_STATE_L0S)
- aspm_ctl |= PCI_EXP_LNKCTL_ASPM_L0S;
- if (state & PCIE_LINK_STATE_L1)
- aspm_ctl |= PCI_EXP_LNKCTL_ASPM_L1;
+ switch (state) {
+ case PCIE_LINK_STATE_L0S:
+ case PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1:
+ aspm_dis_mask |= PCI_EXP_LNKCTL_ASPM_L0S;
+ /* fall-through - can't have L1 without L0s */
+ case PCIE_LINK_STATE_L1:
+ aspm_dis_mask |= PCI_EXP_LNKCTL_ASPM_L1;
+ break;
+ default:
+ return;
+ }
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &pdev_aspmc);
+ pdev_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+
+ if (parent) {
+ pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+ &parent_aspmc);
+ parent_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+ }
+
+ /* Nothing to do if the ASPM states to be disabled already are */
+ if (!(pdev_aspmc & aspm_dis_mask) &&
+ (!parent || !(parent_aspmc & aspm_dis_mask)))
+ return;
+
+ dev_info(&pdev->dev, "Disabling ASPM %s %s\n",
+ (aspm_dis_mask & pdev_aspmc & PCI_EXP_LNKCTL_ASPM_L0S) ?
+ "L0s" : "",
+ (aspm_dis_mask & pdev_aspmc & PCI_EXP_LNKCTL_ASPM_L1) ?
+ "L1" : "");
+
+#ifdef CONFIG_PCIEASPM
+ pci_disable_link_state_locked(pdev, state);
+
+ /* Double-check ASPM control. If not disabled by the above, the
+ * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
+ * not enabled); override by writing PCI config space directly.
+ */
+ pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &pdev_aspmc);
+ pdev_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+
+ if (!(aspm_dis_mask & pdev_aspmc))
+ return;
+#endif
/* Both device and parent should have the same ASPM setting.
* Disable ASPM in downstream component first and then upstream.
*/
- pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_ctl);
+ pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_dis_mask);
- if (pdev->bus->self)
- pcie_capability_clear_word(pdev->bus->self, PCI_EXP_LNKCTL,
- aspm_ctl);
-}
-#endif
-static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
-{
- dev_info(&pdev->dev, "Disabling ASPM %s %s\n",
- (state & PCIE_LINK_STATE_L0S) ? "L0s" : "",
- (state & PCIE_LINK_STATE_L1) ? "L1" : "");
-
- __e1000e_disable_aspm(pdev, state);
+ if (parent)
+ pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+ aspm_dis_mask);
}
#ifdef CONFIG_PM
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index f21a91a..d398fad 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -238,6 +238,7 @@
size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
E1000_EECD_SIZE_EX_SHIFT);
+
/* Added to a constant, "size" becomes the left-shift value
* for setting word_size.
*/
@@ -250,86 +251,52 @@
size = 15;
nvm->word_size = 1 << size;
- if (hw->mac.type < e1000_i210) {
- nvm->opcode_bits = 8;
- nvm->delay_usec = 1;
+ nvm->opcode_bits = 8;
+ nvm->delay_usec = 1;
- switch (nvm->override) {
- case e1000_nvm_override_spi_large:
- nvm->page_size = 32;
- nvm->address_bits = 16;
- break;
- case e1000_nvm_override_spi_small:
- nvm->page_size = 8;
- nvm->address_bits = 8;
- break;
- default:
- nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
- nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
- 16 : 8;
- break;
- }
- if (nvm->word_size == (1 << 15))
- nvm->page_size = 128;
-
- nvm->type = e1000_nvm_eeprom_spi;
- } else {
- nvm->type = e1000_nvm_flash_hw;
+ switch (nvm->override) {
+ case e1000_nvm_override_spi_large:
+ nvm->page_size = 32;
+ nvm->address_bits = 16;
+ break;
+ case e1000_nvm_override_spi_small:
+ nvm->page_size = 8;
+ nvm->address_bits = 8;
+ break;
+ default:
+ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
+ 16 : 8;
+ break;
}
+ if (nvm->word_size == (1 << 15))
+ nvm->page_size = 128;
+
+ nvm->type = e1000_nvm_eeprom_spi;
/* NVM Function Pointers */
+ nvm->ops.acquire = igb_acquire_nvm_82575;
+ nvm->ops.release = igb_release_nvm_82575;
+ nvm->ops.write = igb_write_nvm_spi;
+ nvm->ops.validate = igb_validate_nvm_checksum;
+ nvm->ops.update = igb_update_nvm_checksum;
+ if (nvm->word_size < (1 << 15))
+ nvm->ops.read = igb_read_nvm_eerd;
+ else
+ nvm->ops.read = igb_read_nvm_spi;
+
+ /* override generic family function pointers for specific descendants */
switch (hw->mac.type) {
case e1000_82580:
nvm->ops.validate = igb_validate_nvm_checksum_82580;
nvm->ops.update = igb_update_nvm_checksum_82580;
- nvm->ops.acquire = igb_acquire_nvm_82575;
- nvm->ops.release = igb_release_nvm_82575;
- if (nvm->word_size < (1 << 15))
- nvm->ops.read = igb_read_nvm_eerd;
- else
- nvm->ops.read = igb_read_nvm_spi;
- nvm->ops.write = igb_write_nvm_spi;
break;
case e1000_i354:
case e1000_i350:
nvm->ops.validate = igb_validate_nvm_checksum_i350;
nvm->ops.update = igb_update_nvm_checksum_i350;
- nvm->ops.acquire = igb_acquire_nvm_82575;
- nvm->ops.release = igb_release_nvm_82575;
- if (nvm->word_size < (1 << 15))
- nvm->ops.read = igb_read_nvm_eerd;
- else
- nvm->ops.read = igb_read_nvm_spi;
- nvm->ops.write = igb_write_nvm_spi;
- break;
- case e1000_i210:
- nvm->ops.validate = igb_validate_nvm_checksum_i210;
- nvm->ops.update = igb_update_nvm_checksum_i210;
- nvm->ops.acquire = igb_acquire_nvm_i210;
- nvm->ops.release = igb_release_nvm_i210;
- nvm->ops.read = igb_read_nvm_srrd_i210;
- nvm->ops.write = igb_write_nvm_srwr_i210;
- nvm->ops.valid_led_default = igb_valid_led_default_i210;
- break;
- case e1000_i211:
- nvm->ops.acquire = igb_acquire_nvm_i210;
- nvm->ops.release = igb_release_nvm_i210;
- nvm->ops.read = igb_read_nvm_i211;
- nvm->ops.valid_led_default = igb_valid_led_default_i210;
- nvm->ops.validate = NULL;
- nvm->ops.update = NULL;
- nvm->ops.write = NULL;
break;
default:
- nvm->ops.validate = igb_validate_nvm_checksum;
- nvm->ops.update = igb_update_nvm_checksum;
- nvm->ops.acquire = igb_acquire_nvm_82575;
- nvm->ops.release = igb_release_nvm_82575;
- if (nvm->word_size < (1 << 15))
- nvm->ops.read = igb_read_nvm_eerd;
- else
- nvm->ops.read = igb_read_nvm_spi;
- nvm->ops.write = igb_write_nvm_spi;
break;
}
@@ -516,6 +483,8 @@
case E1000_DEV_ID_I210_FIBER:
case E1000_DEV_ID_I210_SERDES:
case E1000_DEV_ID_I210_SGMII:
+ case E1000_DEV_ID_I210_COPPER_FLASHLESS:
+ case E1000_DEV_ID_I210_SERDES_FLASHLESS:
mac->type = e1000_i210;
break;
case E1000_DEV_ID_I211_COPPER:
@@ -601,6 +570,15 @@
/* NVM initialization */
ret_val = igb_init_nvm_params_82575(hw);
+ switch (hw->mac.type) {
+ case e1000_i210:
+ case e1000_i211:
+ ret_val = igb_init_nvm_params_i210(hw);
+ break;
+ default:
+ break;
+ }
+
if (ret_val)
goto out;
@@ -1320,7 +1298,7 @@
**/
static s32 igb_reset_hw_82575(struct e1000_hw *hw)
{
- u32 ctrl, icr;
+ u32 ctrl;
s32 ret_val;
/* Prevent the PCI-E bus from sticking if there is no TLP connection
@@ -1365,7 +1343,7 @@
/* Clear any pending interrupt events. */
wr32(E1000_IMC, 0xffffffff);
- icr = rd32(E1000_ICR);
+ rd32(E1000_ICR);
/* Install any alternate MAC address into RAR0 */
ret_val = igb_check_alt_mac_addr(hw);
@@ -2103,10 +2081,9 @@
s32 ret_val = 0;
/* BH SW mailbox bit in SW_FW_SYNC */
u16 swmbsw_mask = E1000_SW_SYNCH_MB;
- u32 ctrl, icr;
+ u32 ctrl;
bool global_device_reset = hw->dev_spec._82575.global_device_reset;
-
hw->dev_spec._82575.global_device_reset = false;
/* due to hw errata, global device reset doesn't always
@@ -2165,7 +2142,7 @@
/* Clear any pending interrupt events. */
wr32(E1000_IMC, 0xffffffff);
- icr = rd32(E1000_ICR);
+ rd32(E1000_ICR);
ret_val = igb_reset_mdicnfg_82580(hw);
if (ret_val)
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index aa201ab..60559af 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -620,6 +620,7 @@
#define E1000_EECD_SIZE_EX_SHIFT 11
#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */
#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done*/
+#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */
#define E1000_FLUDONE_ATTEMPTS 20000
#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */
#define E1000_I210_FIFO_SEL_RX 0x00
@@ -627,6 +628,11 @@
#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0)
#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06
#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01
+#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */
+/* Secure FLASH mode requires removing MSb */
+#define E1000_I210_FW_PTR_MASK 0x7FFF
+/* Firmware code revision field word offset*/
+#define E1000_I210_FW_VER_OFFSET 328
#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */
#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done*/
#define E1000_FLUDONE_ATTEMPTS 20000
@@ -665,20 +671,26 @@
#define NVM_INIT_CTRL_4 0x0013
#define NVM_LED_1_CFG 0x001C
#define NVM_LED_0_2_CFG 0x001F
-
-/* NVM version defines */
#define NVM_ETRACK_WORD 0x0042
+#define NVM_ETRACK_HIWORD 0x0043
#define NVM_COMB_VER_OFF 0x0083
#define NVM_COMB_VER_PTR 0x003d
-#define NVM_MAJOR_MASK 0xF000
-#define NVM_MINOR_MASK 0x0FF0
-#define NVM_BUILD_MASK 0x000F
-#define NVM_COMB_VER_MASK 0x00FF
-#define NVM_MAJOR_SHIFT 12
-#define NVM_MINOR_SHIFT 4
-#define NVM_COMB_VER_SHFT 8
-#define NVM_VER_INVALID 0xFFFF
-#define NVM_ETRACK_SHIFT 16
+
+/* NVM version defines */
+#define NVM_MAJOR_MASK 0xF000
+#define NVM_MINOR_MASK 0x0FF0
+#define NVM_IMAGE_ID_MASK 0x000F
+#define NVM_COMB_VER_MASK 0x00FF
+#define NVM_MAJOR_SHIFT 12
+#define NVM_MINOR_SHIFT 4
+#define NVM_COMB_VER_SHFT 8
+#define NVM_VER_INVALID 0xFFFF
+#define NVM_ETRACK_SHIFT 16
+#define NVM_ETRACK_VALID 0x8000
+#define NVM_NEW_DEC_MASK 0x0F00
+#define NVM_HEX_CONV 16
+#define NVM_HEX_TENS 10
+
#define NVM_ETS_CFG 0x003E
#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0
#define NVM_ETS_LTHRES_DELTA_SHIFT 6
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 94d7866..37a9c06 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -67,6 +67,8 @@
#define E1000_DEV_ID_I210_FIBER 0x1536
#define E1000_DEV_ID_I210_SERDES 0x1537
#define E1000_DEV_ID_I210_SGMII 0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C
#define E1000_DEV_ID_I211_COPPER 0x1539
#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40
#define E1000_DEV_ID_I354_SGMII 0x1F41
@@ -110,6 +112,7 @@
e1000_nvm_none,
e1000_nvm_eeprom_spi,
e1000_nvm_flash_hw,
+ e1000_nvm_invm,
e1000_nvm_flash_sw
};
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index ddb3cf5..0c03933 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -335,57 +335,101 @@
}
/**
- * igb_read_nvm_i211 - Read NVM wrapper function for I211
+ * igb_read_invm_word_i210 - Reads OTP
+ * @hw: pointer to the HW structure
+ * @address: the word address (aka eeprom offset) to read
+ * @data: pointer to the data read
+ *
+ * Reads 16-bit words from the OTP. Return error when the word is not
+ * stored in OTP.
+ **/
+static s32 igb_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
+{
+ s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+ u32 invm_dword;
+ u16 i;
+ u8 record_type, word_address;
+
+ for (i = 0; i < E1000_INVM_SIZE; i++) {
+ invm_dword = rd32(E1000_INVM_DATA_REG(i));
+ /* Get record type */
+ record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
+ if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
+ break;
+ if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
+ i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
+ if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
+ i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
+ if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
+ word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
+ if (word_address == address) {
+ *data = INVM_DWORD_TO_WORD_DATA(invm_dword);
+ hw_dbg("Read INVM Word 0x%02x = %x",
+ address, *data);
+ status = E1000_SUCCESS;
+ break;
+ }
+ }
+ }
+ if (status != E1000_SUCCESS)
+ hw_dbg("Requested word 0x%02x not found in OTP\n", address);
+ return status;
+}
+
+/**
+ * igb_read_invm_i210 - Read invm wrapper function for I210/I211
* @hw: pointer to the HW structure
* @words: number of words to read
* @data: pointer to the data read
*
* Wrapper function to return data formerly found in the NVM.
**/
-s32 igb_read_nvm_i211(struct e1000_hw *hw, u16 offset, u16 words,
- u16 *data)
+static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset,
+ u16 words __always_unused, u16 *data)
{
s32 ret_val = E1000_SUCCESS;
/* Only the MAC addr is required to be present in the iNVM */
switch (offset) {
case NVM_MAC_ADDR:
- ret_val = igb_read_invm_i211(hw, offset, &data[0]);
- ret_val |= igb_read_invm_i211(hw, offset+1, &data[1]);
- ret_val |= igb_read_invm_i211(hw, offset+2, &data[2]);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, &data[0]);
+ ret_val |= igb_read_invm_word_i210(hw, (u8)offset+1,
+ &data[1]);
+ ret_val |= igb_read_invm_word_i210(hw, (u8)offset+2,
+ &data[2]);
if (ret_val != E1000_SUCCESS)
hw_dbg("MAC Addr not found in iNVM\n");
break;
case NVM_INIT_CTRL_2:
- ret_val = igb_read_invm_i211(hw, (u8)offset, data);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
if (ret_val != E1000_SUCCESS) {
*data = NVM_INIT_CTRL_2_DEFAULT_I211;
ret_val = E1000_SUCCESS;
}
break;
case NVM_INIT_CTRL_4:
- ret_val = igb_read_invm_i211(hw, (u8)offset, data);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
if (ret_val != E1000_SUCCESS) {
*data = NVM_INIT_CTRL_4_DEFAULT_I211;
ret_val = E1000_SUCCESS;
}
break;
case NVM_LED_1_CFG:
- ret_val = igb_read_invm_i211(hw, (u8)offset, data);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
if (ret_val != E1000_SUCCESS) {
*data = NVM_LED_1_CFG_DEFAULT_I211;
ret_val = E1000_SUCCESS;
}
break;
case NVM_LED_0_2_CFG:
- igb_read_invm_i211(hw, offset, data);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
if (ret_val != E1000_SUCCESS) {
*data = NVM_LED_0_2_CFG_DEFAULT_I211;
ret_val = E1000_SUCCESS;
}
break;
case NVM_ID_LED_SETTINGS:
- ret_val = igb_read_invm_i211(hw, (u8)offset, data);
+ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
if (ret_val != E1000_SUCCESS) {
*data = ID_LED_RESERVED_FFFF;
ret_val = E1000_SUCCESS;
@@ -411,48 +455,6 @@
}
/**
- * igb_read_invm_i211 - Reads OTP
- * @hw: pointer to the HW structure
- * @address: the word address (aka eeprom offset) to read
- * @data: pointer to the data read
- *
- * Reads 16-bit words from the OTP. Return error when the word is not
- * stored in OTP.
- **/
-s32 igb_read_invm_i211(struct e1000_hw *hw, u16 address, u16 *data)
-{
- s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
- u32 invm_dword;
- u16 i;
- u8 record_type, word_address;
-
- for (i = 0; i < E1000_INVM_SIZE; i++) {
- invm_dword = rd32(E1000_INVM_DATA_REG(i));
- /* Get record type */
- record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
- if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
- break;
- if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
- i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
- if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
- i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
- if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
- word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
- if (word_address == (u8)address) {
- *data = INVM_DWORD_TO_WORD_DATA(invm_dword);
- hw_dbg("Read INVM Word 0x%02x = %x",
- address, *data);
- status = E1000_SUCCESS;
- break;
- }
- }
- }
- if (status != E1000_SUCCESS)
- hw_dbg("Requested word 0x%02x not found in OTP\n", address);
- return status;
-}
-
-/**
* igb_read_invm_version - Reads iNVM version and image type
* @hw: pointer to the HW structure
* @invm_ver: version structure for the version read
@@ -661,6 +663,23 @@
}
/**
+ * igb_get_flash_presence_i210 - Check if flash device is detected.
+ * @hw: pointer to the HW structure
+ *
+ **/
+bool igb_get_flash_presence_i210(struct e1000_hw *hw)
+{
+ u32 eec = 0;
+ bool ret_val = false;
+
+ eec = rd32(E1000_EECD);
+ if (eec & E1000_EECD_FLASH_DETECTED_I210)
+ ret_val = true;
+
+ return ret_val;
+}
+
+/**
* igb_update_flash_i210 - Commit EEPROM to the flash
* @hw: pointer to the HW structure
*
@@ -786,3 +805,33 @@
{
return __igb_access_xmdio_reg(hw, addr, dev_addr, &data, false);
}
+
+/**
+ * igb_init_nvm_params_i210 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ **/
+s32 igb_init_nvm_params_i210(struct e1000_hw *hw)
+{
+ s32 ret_val = 0;
+ struct e1000_nvm_info *nvm = &hw->nvm;
+
+ nvm->ops.acquire = igb_acquire_nvm_i210;
+ nvm->ops.release = igb_release_nvm_i210;
+ nvm->ops.valid_led_default = igb_valid_led_default_i210;
+
+ /* NVM Function Pointers */
+ if (igb_get_flash_presence_i210(hw)) {
+ hw->nvm.type = e1000_nvm_flash_hw;
+ nvm->ops.read = igb_read_nvm_srrd_i210;
+ nvm->ops.write = igb_write_nvm_srwr_i210;
+ nvm->ops.validate = igb_validate_nvm_checksum_i210;
+ nvm->ops.update = igb_update_nvm_checksum_i210;
+ } else {
+ hw->nvm.type = e1000_nvm_invm;
+ nvm->ops.read = igb_read_invm_i210;
+ nvm->ops.write = NULL;
+ nvm->ops.validate = NULL;
+ nvm->ops.update = NULL;
+ }
+ return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index 5caa332..dde3c4b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -35,20 +35,19 @@
u16 words, u16 *data);
extern s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
u16 words, u16 *data);
-extern s32 igb_read_invm_i211(struct e1000_hw *hw, u16 address, u16 *data);
extern s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
extern void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
extern s32 igb_acquire_nvm_i210(struct e1000_hw *hw);
extern void igb_release_nvm_i210(struct e1000_hw *hw);
extern s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
-extern s32 igb_read_nvm_i211(struct e1000_hw *hw, u16 offset, u16 words,
- u16 *data);
extern s32 igb_read_invm_version(struct e1000_hw *hw,
struct e1000_fw_version *invm_ver);
extern s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
u16 *data);
extern s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
u16 data);
+extern s32 igb_init_nvm_params_i210(struct e1000_hw *hw);
+extern bool igb_get_flash_presence_i210(struct e1000_hw *hw);
#define E1000_STM_OPCODE 0xDB00
#define E1000_EEPROM_FLASH_SIZE_WORD 0x11
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index 7f9cd7c..a7db7f3 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -709,11 +709,16 @@
**/
void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
{
- u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
- u16 fw_version;
+ u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
+ u8 q, hval, rem, result;
+ u16 comb_verh, comb_verl, comb_offset;
memset(fw_vers, 0, sizeof(struct e1000_fw_version));
+ /* basic eeprom version numbers and bits used vary by part and by tool
+ * used to create the nvm images. Check which data format we have.
+ */
+ hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
switch (hw->mac.type) {
case e1000_i211:
igb_read_invm_version(hw, fw_vers);
@@ -721,30 +726,30 @@
case e1000_82575:
case e1000_82576:
case e1000_82580:
- case e1000_i354:
- case e1000_i350:
- case e1000_i210:
+ /* Use this format, unless EETRACK ID exists,
+ * then use alternate format
+ */
+ if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+ >> NVM_MAJOR_SHIFT;
+ fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+ >> NVM_MINOR_SHIFT;
+ fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+ goto etrack_id;
+ }
break;
- default:
- return;
- }
- /* basic eeprom version numbers */
- hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) >> NVM_MAJOR_SHIFT;
- fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK);
-
- /* etrack id */
- hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
- hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
- fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) | eeprom_verl;
-
- switch (hw->mac.type) {
case e1000_i210:
- case e1000_i354:
+ if (!(igb_get_flash_presence_i210(hw))) {
+ igb_read_invm_version(hw, fw_vers);
+ return;
+ }
+ /* fall through */
case e1000_i350:
/* find combo image version */
hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
- if ((comb_offset != 0x0) && (comb_offset != NVM_VER_INVALID)) {
+ if ((comb_offset != 0x0) &&
+ (comb_offset != NVM_VER_INVALID)) {
hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
+ 1), 1, &comb_verh);
@@ -760,15 +765,42 @@
fw_vers->or_major =
comb_verl >> NVM_COMB_VER_SHFT;
fw_vers->or_build =
- ((comb_verl << NVM_COMB_VER_SHFT)
- | (comb_verh >> NVM_COMB_VER_SHFT));
+ (comb_verl << NVM_COMB_VER_SHFT)
+ | (comb_verh >> NVM_COMB_VER_SHFT);
fw_vers->or_patch =
comb_verh & NVM_COMB_VER_MASK;
}
}
break;
default:
- break;
+ return;
+ }
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+ >> NVM_MAJOR_SHIFT;
+
+ /* check for old style version format in newer images*/
+ if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+ eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+ } else {
+ eeprom_verl = (fw_version & NVM_MINOR_MASK)
+ >> NVM_MINOR_SHIFT;
+ }
+ /* Convert minor value to hex before assigning to output struct
+ * Val to be converted will not be higher than 99, per tool output
+ */
+ q = eeprom_verl / NVM_HEX_CONV;
+ hval = q * NVM_HEX_TENS;
+ rem = eeprom_verl % NVM_HEX_CONV;
+ result = hval + rem;
+ fw_vers->eep_minor = result;
+
+etrack_id:
+ if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
+ hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
+ hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
+ fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
+ | eeprom_verl;
}
return;
}
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
index 6bfc0c4..433b741 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.h
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h
@@ -44,6 +44,7 @@
u32 etrack_id;
u16 eep_major;
u16 eep_minor;
+ u16 eep_build;
u8 invm_major;
u8 invm_minor;
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 15ea8dc..c1fae7a 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -343,6 +343,8 @@
};
#endif
+#define IGB_RETA_SIZE 128
+
/* board specific private data structure */
struct igb_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -444,6 +446,8 @@
struct i2c_algo_bit_data i2c_algo;
struct i2c_adapter i2c_adap;
struct i2c_client *i2c_client;
+ u32 rss_indir_tbl_init;
+ u8 rss_indir_tbl[IGB_RETA_SIZE];
};
#define IGB_FLAG_HAS_MSI (1 << 0)
@@ -480,6 +484,7 @@
extern void igb_down(struct igb_adapter *);
extern void igb_reinit_locked(struct igb_adapter *);
extern void igb_reset(struct igb_adapter *);
+extern void igb_write_rss_indir_tbl(struct igb_adapter *);
extern int igb_set_spd_dplx(struct igb_adapter *, u32, u8);
extern int igb_setup_tx_resources(struct igb_ring *);
extern int igb_setup_rx_resources(struct igb_ring *);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 85fe7b5..ce9b5a9 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1335,12 +1335,23 @@
static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
{
+ struct e1000_hw *hw = &adapter->hw;
+
*data = 0;
- /* Validate eeprom on all parts but i211 */
- if (adapter->hw.mac.type != e1000_i211) {
+ /* Validate eeprom on all parts but flashless */
+ switch (hw->mac.type) {
+ case e1000_i210:
+ case e1000_i211:
+ if (igb_get_flash_presence_i210(hw)) {
+ if (adapter->hw.nvm.ops.validate(&adapter->hw) < 0)
+ *data = 2;
+ }
+ break;
+ default:
if (adapter->hw.nvm.ops.validate(&adapter->hw) < 0)
*data = 2;
+ break;
}
return *data;
@@ -2672,7 +2683,9 @@
igb_set_eee_i350(hw);
/* reset link */
- if (!netif_running(netdev))
+ if (netif_running(netdev))
+ igb_reinit_locked(adapter);
+ else
igb_reset(adapter);
}
@@ -2771,6 +2784,90 @@
pm_runtime_put(&adapter->pdev->dev);
}
+static u32 igb_get_rxfh_indir_size(struct net_device *netdev)
+{
+ return IGB_RETA_SIZE;
+}
+
+static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ for (i = 0; i < IGB_RETA_SIZE; i++)
+ indir[i] = adapter->rss_indir_tbl[i];
+
+ return 0;
+}
+
+void igb_write_rss_indir_tbl(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 reg = E1000_RETA(0);
+ u32 shift = 0;
+ int i = 0;
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+ shift = 6;
+ break;
+ case e1000_82576:
+ /* 82576 supports 2 RSS queues for SR-IOV */
+ if (adapter->vfs_allocated_count)
+ shift = 3;
+ break;
+ default:
+ break;
+ }
+
+ while (i < IGB_RETA_SIZE) {
+ u32 val = 0;
+ int j;
+
+ for (j = 3; j >= 0; j--) {
+ val <<= 8;
+ val |= adapter->rss_indir_tbl[i + j];
+ }
+
+ wr32(reg, val << shift);
+ reg += 4;
+ i += 4;
+ }
+}
+
+static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ u32 num_queues;
+
+ num_queues = adapter->rss_queues;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ /* 82576 supports 2 RSS queues for SR-IOV */
+ if (adapter->vfs_allocated_count)
+ num_queues = 2;
+ break;
+ default:
+ break;
+ }
+
+ /* Verify user input. */
+ for (i = 0; i < IGB_RETA_SIZE; i++)
+ if (indir[i] >= num_queues)
+ return -EINVAL;
+
+
+ for (i = 0; i < IGB_RETA_SIZE; i++)
+ adapter->rss_indir_tbl[i] = indir[i];
+
+ igb_write_rss_indir_tbl(adapter);
+
+ return 0;
+}
+
static const struct ethtool_ops igb_ethtool_ops = {
.get_settings = igb_get_settings,
.set_settings = igb_set_settings,
@@ -2804,6 +2901,9 @@
.set_eee = igb_set_eee,
.get_module_info = igb_get_module_info,
.get_module_eeprom = igb_get_module_eeprom,
+ .get_rxfh_indir_size = igb_get_rxfh_indir_size,
+ .get_rxfh_indir = igb_get_rxfh_indir,
+ .set_rxfh_indir = igb_set_rxfh_indir,
.begin = igb_ethtool_begin,
.complete = igb_ethtool_complete,
};
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c1d72c0..df33c4b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -85,6 +85,8 @@
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
@@ -1013,7 +1015,7 @@
adapter->q_vector[v_idx] = NULL;
netif_napi_del(&q_vector->napi);
- /* ixgbe_get_stats64() might access the rings on this vector,
+ /* igb_get_stats64() might access the rings on this vector,
* we must wait a grace period before freeing it.
*/
kfree_rcu(q_vector, rcu);
@@ -1929,12 +1931,17 @@
igb_get_fw_version(hw, &fw);
switch (hw->mac.type) {
+ case e1000_i210:
case e1000_i211:
- snprintf(adapter->fw_version, sizeof(adapter->fw_version),
- "%2d.%2d-%d",
- fw.invm_major, fw.invm_minor, fw.invm_img_type);
- break;
-
+ if (!(igb_get_flash_presence_i210(hw))) {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%2d.%2d-%d",
+ fw.invm_major, fw.invm_minor,
+ fw.invm_img_type);
+ break;
+ }
+ /* fall through */
default:
/* if option is rom valid, display its version too */
if (fw.or_valid) {
@@ -1944,11 +1951,16 @@
fw.eep_major, fw.eep_minor, fw.etrack_id,
fw.or_major, fw.or_build, fw.or_patch);
/* no option rom */
- } else {
+ } else if (fw.etrack_id != 0X0000) {
snprintf(adapter->fw_version,
- sizeof(adapter->fw_version),
- "%d.%d, 0x%08x",
- fw.eep_major, fw.eep_minor, fw.etrack_id);
+ sizeof(adapter->fw_version),
+ "%d.%d, 0x%08x",
+ fw.eep_major, fw.eep_minor, fw.etrack_id);
+ } else {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d.%d",
+ fw.eep_major, fw.eep_minor, fw.eep_build);
}
break;
}
@@ -2166,15 +2178,28 @@
*/
hw->mac.ops.reset_hw(hw);
- /* make sure the NVM is good , i211 parts have special NVM that
- * doesn't contain a checksum
+ /* make sure the NVM is good , i211/i210 parts can have special NVM
+ * that doesn't contain a checksum
*/
- if (hw->mac.type != e1000_i211) {
+ switch (hw->mac.type) {
+ case e1000_i210:
+ case e1000_i211:
+ if (igb_get_flash_presence_i210(hw)) {
+ if (hw->nvm.ops.validate(hw) < 0) {
+ dev_err(&pdev->dev,
+ "The NVM Checksum Is Not Valid\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+ }
+ break;
+ default:
if (hw->nvm.ops.validate(hw) < 0) {
dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
err = -EIO;
goto err_eeprom;
}
+ break;
}
/* copy the MAC address out of the NVM */
@@ -2436,6 +2461,11 @@
int err = 0;
int i;
+ if (!adapter->msix_entries) {
+ err = -EPERM;
+ goto out;
+ }
+
if (!num_vfs)
goto out;
else if (old_vfs && old_vfs == num_vfs)
@@ -3096,7 +3126,7 @@
{
struct e1000_hw *hw = &adapter->hw;
u32 mrqc, rxcsum;
- u32 j, num_rx_queues, shift = 0;
+ u32 j, num_rx_queues;
static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
0xA32DCB77, 0x0CF23080, 0x3BB7426A,
@@ -3109,35 +3139,21 @@
num_rx_queues = adapter->rss_queues;
switch (hw->mac.type) {
- case e1000_82575:
- shift = 6;
- break;
case e1000_82576:
/* 82576 supports 2 RSS queues for SR-IOV */
- if (adapter->vfs_allocated_count) {
- shift = 3;
+ if (adapter->vfs_allocated_count)
num_rx_queues = 2;
- }
break;
default:
break;
}
- /* Populate the indirection table 4 entries at a time. To do this
- * we are generating the results for n and n+2 and then interleaving
- * those with the results with n+1 and n+3.
- */
- for (j = 0; j < 32; j++) {
- /* first pass generates n and n+2 */
- u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues;
- u32 reta = (base & 0x07800780) >> (7 - shift);
-
- /* second pass generates n+1 and n+3 */
- base += 0x00010001 * num_rx_queues;
- reta |= (base & 0x07800780) << (1 + shift);
-
- wr32(E1000_RETA(j), reta);
+ if (adapter->rss_indir_tbl_init != num_rx_queues) {
+ for (j = 0; j < IGB_RETA_SIZE; j++)
+ adapter->rss_indir_tbl[j] = (j * num_rx_queues) / IGB_RETA_SIZE;
+ adapter->rss_indir_tbl_init = num_rx_queues;
}
+ igb_write_rss_indir_tbl(adapter);
/* Disable raw packet checksumming so that RSS hash is placed in
* descriptor on writeback. No need to enable TCP/UDP/IP checksum
@@ -3844,7 +3860,6 @@
{
struct e1000_hw *hw = &adapter->hw;
bool link_active = false;
- s32 ret_val = 0;
/* get_link_status is set on LSC (link status) interrupt or
* rx sequence error interrupt. get_link_status will stay
@@ -3853,16 +3868,11 @@
*/
switch (hw->phy.media_type) {
case e1000_media_type_copper:
- if (hw->mac.get_link_status) {
- ret_val = hw->mac.ops.check_for_link(hw);
- link_active = !hw->mac.get_link_status;
- } else {
- link_active = true;
- }
- break;
+ if (!hw->mac.get_link_status)
+ return true;
case e1000_media_type_internal_serdes:
- ret_val = hw->mac.ops.check_for_link(hw);
- link_active = hw->mac.serdes_has_link;
+ hw->mac.ops.check_for_link(hw);
+ link_active = !hw->mac.get_link_status;
break;
default:
case e1000_media_type_unknown:
@@ -4814,6 +4824,10 @@
return -EINVAL;
}
+ /* adjust max frame to be at least the size of a standard frame */
+ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+ max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
msleep(1);
@@ -4865,6 +4879,8 @@
bytes = 0;
packets = 0;
+
+ rcu_read_lock();
for (i = 0; i < adapter->num_rx_queues; i++) {
u32 rqdpc = rd32(E1000_RQDPC(i));
struct igb_ring *ring = adapter->rx_ring[i];
@@ -4900,6 +4916,7 @@
}
net_stats->tx_bytes = bytes;
net_stats->tx_packets = packets;
+ rcu_read_unlock();
/* read stats registers */
adapter->stats.crcerrs += rd32(E1000_CRCERRS);
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 7e8c477..5a54e3d 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -97,14 +97,14 @@
{
struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
struct e1000_hw *hw = &igb->hw;
+ u32 lo, hi;
u64 val;
- u32 lo, hi, jk;
/* The timestamp latches on lowest register read. For the 82580
* the lowest register is SYSTIMR instead of SYSTIML. However we only
* need to provide nanosecond resolution, so we just ignore it.
*/
- jk = rd32(E1000_SYSTIMR);
+ rd32(E1000_SYSTIMR);
lo = rd32(E1000_SYSTIML);
hi = rd32(E1000_SYSTIMH);
@@ -118,13 +118,13 @@
static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
{
struct e1000_hw *hw = &adapter->hw;
- u32 sec, nsec, jk;
+ u32 sec, nsec;
/* The timestamp latches on lowest register read. For I210/I211, the
* lowest register is SYSTIMR. Since we only need to provide nanosecond
* resolution, we can ignore it.
*/
- jk = rd32(E1000_SYSTIMR);
+ rd32(E1000_SYSTIMR);
nsec = rd32(E1000_SYSTIML);
sec = rd32(E1000_SYSTIMH);
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index fce3e92..9f6b236 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -718,8 +718,8 @@
txdr->size = txdr->count * sizeof(struct ixgb_tx_desc);
txdr->size = ALIGN(txdr->size, 4096);
- txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
- GFP_KERNEL | __GFP_ZERO);
+ txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+ GFP_KERNEL);
if (!txdr->desc) {
vfree(txdr->buffer_info);
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 207f68f..007a008 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -49,6 +49,7 @@
static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
+static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
bool autoneg_wait_to_complete);
static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
@@ -141,11 +142,13 @@
goto setup_sfp_out;
}
- hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+ if (hw->eeprom.ops.read(hw, ++data_offset, &data_value))
+ goto setup_sfp_err;
while (data_value != 0xffff) {
IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
IXGBE_WRITE_FLUSH(hw);
- hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+ if (hw->eeprom.ops.read(hw, ++data_offset, &data_value))
+ goto setup_sfp_err;
}
/* Release the semaphore */
@@ -191,6 +194,17 @@
setup_sfp_out:
return ret_val;
+
+setup_sfp_err:
+ /* Release the semaphore */
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+ /* Delay obtaining semaphore again to allow FW access,
+ * semaphore_delay is in ms usleep_range needs us.
+ */
+ usleep_range(hw->eeprom.semaphore_delay * 1000,
+ hw->eeprom.semaphore_delay * 2000);
+ hw_err(hw, "eeprom read at offset %d failed\n", data_offset);
+ return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
}
static s32 ixgbe_get_invariants_82599(struct ixgbe_hw *hw)
@@ -365,8 +379,13 @@
if (hw->phy.multispeed_fiber) {
*speed |= IXGBE_LINK_SPEED_10GB_FULL |
- IXGBE_LINK_SPEED_1GB_FULL;
- *autoneg = true;
+ IXGBE_LINK_SPEED_1GB_FULL;
+
+ /* QSFP must not enable auto-negotiation */
+ if (hw->phy.media_type == ixgbe_media_type_fiber_qsfp)
+ *autoneg = false;
+ else
+ *autoneg = true;
}
out:
@@ -432,6 +451,24 @@
}
/**
+ * ixgbe_stop_mac_link_on_d3_82599 - Disables link on D3
+ * @hw: pointer to hardware structure
+ *
+ * Disables link, should be called during D3 power down sequence.
+ *
+ */
+static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw)
+{
+ u32 autoc2_reg;
+
+ if (!hw->mng_fw_enabled && !hw->wol_enabled) {
+ autoc2_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ autoc2_reg |= IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2_reg);
+ }
+}
+
+/**
* ixgbe_start_mac_link_82599 - Setup MAC link settings
* @hw: pointer to hardware structure
* @autoneg_wait_to_complete: true when waiting for completion is needed
@@ -668,13 +705,18 @@
goto out;
/* Set the module link speed */
- if (hw->phy.media_type == ixgbe_media_type_fiber_fixed) {
- ixgbe_set_fiber_fixed_speed(hw,
- IXGBE_LINK_SPEED_10GB_FULL);
- } else {
+ switch (hw->phy.media_type) {
+ case ixgbe_media_type_fiber:
esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
IXGBE_WRITE_FLUSH(hw);
+ break;
+ case ixgbe_media_type_fiber_qsfp:
+ /* QSFP module automatically detects MAC link speed */
+ break;
+ default:
+ hw_dbg(hw, "Unexpected media type.\n");
+ break;
}
/* Allow module to change analog characteristics (1G->10G) */
@@ -725,14 +767,23 @@
goto out;
/* Set the module link speed */
- if (hw->phy.media_type == ixgbe_media_type_fiber_fixed) {
+ switch (hw->phy.media_type) {
+ case ixgbe_media_type_fiber_fixed:
ixgbe_set_fiber_fixed_speed(hw,
IXGBE_LINK_SPEED_1GB_FULL);
- } else {
+ break;
+ case ixgbe_media_type_fiber:
esdp_reg &= ~IXGBE_ESDP_SDP5;
esdp_reg |= IXGBE_ESDP_SDP5_DIR;
IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
IXGBE_WRITE_FLUSH(hw);
+ break;
+ case ixgbe_media_type_fiber_qsfp:
+ /* QSFP module automatically detects MAC link speed */
+ break;
+ default:
+ hw_dbg(hw, "Unexpected media type.\n");
+ break;
}
/* Allow module to change analog characteristics (10G->1G) */
@@ -2161,6 +2212,7 @@
{
s32 status = IXGBE_ERR_EEPROM_VERSION;
u16 fw_offset, fw_ptp_cfg_offset;
+ u16 offset;
u16 fw_version = 0;
/* firmware check is only necessary for SFI devices */
@@ -2170,29 +2222,35 @@
}
/* get the offset to the Firmware Module block */
- hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+ offset = IXGBE_FW_PTR;
+ if (hw->eeprom.ops.read(hw, offset, &fw_offset))
+ goto fw_version_err;
if ((fw_offset == 0) || (fw_offset == 0xFFFF))
goto fw_version_out;
/* get the offset to the Pass Through Patch Configuration block */
- hw->eeprom.ops.read(hw, (fw_offset +
- IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR),
- &fw_ptp_cfg_offset);
+ offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR;
+ if (hw->eeprom.ops.read(hw, offset, &fw_ptp_cfg_offset))
+ goto fw_version_err;
if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF))
goto fw_version_out;
/* get the firmware version */
- hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset +
- IXGBE_FW_PATCH_VERSION_4),
- &fw_version);
+ offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4;
+ if (hw->eeprom.ops.read(hw, offset, &fw_version))
+ goto fw_version_err;
if (fw_version > 0x5)
status = 0;
fw_version_out:
return status;
+
+fw_version_err:
+ hw_err(hw, "eeprom read at offset %d failed\n", offset);
+ return IXGBE_ERR_EEPROM_VERSION;
}
/**
@@ -2477,6 +2535,7 @@
.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie,
.read_analog_reg8 = &ixgbe_read_analog_reg8_82599,
.write_analog_reg8 = &ixgbe_write_analog_reg8_82599,
+ .stop_link_on_d3 = &ixgbe_stop_mac_link_on_d3_82599,
.setup_link = &ixgbe_setup_mac_link_82599,
.set_rxpba = &ixgbe_set_rxpba_generic,
.check_link = &ixgbe_check_mac_link_generic,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 50e62a2..b5c434b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -2740,13 +2740,19 @@
static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
u16 *san_mac_offset)
{
+ s32 ret_val;
+
/*
* First read the EEPROM pointer to see if the MAC addresses are
* available.
*/
- hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset);
+ ret_val = hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR,
+ san_mac_offset);
+ if (ret_val)
+ hw_err(hw, "eeprom read at offset %d failed\n",
+ IXGBE_SAN_MAC_ADDR_PTR);
- return 0;
+ return ret_val;
}
/**
@@ -2763,23 +2769,16 @@
{
u16 san_mac_data, san_mac_offset;
u8 i;
+ s32 ret_val;
/*
* First read the EEPROM pointer to see if the MAC addresses are
* available. If they're not, no point in calling set_lan_id() here.
*/
- ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+ ret_val = ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+ if (ret_val || san_mac_offset == 0 || san_mac_offset == 0xFFFF)
- if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
- /*
- * No addresses available in this EEPROM. It's not an
- * error though, so just wipe the local address and return.
- */
- for (i = 0; i < 6; i++)
- san_mac_addr[i] = 0xFF;
-
- goto san_mac_addr_out;
- }
+ goto san_mac_addr_clr;
/* make sure we know which port we need to program */
hw->mac.ops.set_lan_id(hw);
@@ -2787,14 +2786,26 @@
(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
(san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
for (i = 0; i < 3; i++) {
- hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data);
+ ret_val = hw->eeprom.ops.read(hw, san_mac_offset,
+ &san_mac_data);
+ if (ret_val) {
+ hw_err(hw, "eeprom read at offset %d failed\n",
+ san_mac_offset);
+ goto san_mac_addr_clr;
+ }
san_mac_addr[i * 2] = (u8)(san_mac_data);
san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8);
san_mac_offset++;
}
-
-san_mac_addr_out:
return 0;
+
+san_mac_addr_clr:
+ /* No addresses available in this EEPROM. It's not necessarily an
+ * error though, so just wipe the local address and return.
+ */
+ for (i = 0; i < 6; i++)
+ san_mac_addr[i] = 0xFF;
+ return ret_val;
}
/**
@@ -3243,8 +3254,9 @@
*wwpn_prefix = 0xFFFF;
/* check if alternative SAN MAC is supported */
- hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR,
- &alt_san_mac_blk_offset);
+ offset = IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR;
+ if (hw->eeprom.ops.read(hw, offset, &alt_san_mac_blk_offset))
+ goto wwn_prefix_err;
if ((alt_san_mac_blk_offset == 0) ||
(alt_san_mac_blk_offset == 0xFFFF))
@@ -3252,19 +3264,26 @@
/* check capability in alternative san mac address block */
offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET;
- hw->eeprom.ops.read(hw, offset, &caps);
+ if (hw->eeprom.ops.read(hw, offset, &caps))
+ goto wwn_prefix_err;
if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN))
goto wwn_prefix_out;
/* get the corresponding prefix for WWNN/WWPN */
offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET;
- hw->eeprom.ops.read(hw, offset, wwnn_prefix);
+ if (hw->eeprom.ops.read(hw, offset, wwnn_prefix))
+ hw_err(hw, "eeprom read at offset %d failed\n", offset);
offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET;
- hw->eeprom.ops.read(hw, offset, wwpn_prefix);
+ if (hw->eeprom.ops.read(hw, offset, wwpn_prefix))
+ goto wwn_prefix_err;
wwn_prefix_out:
return 0;
+
+wwn_prefix_err:
+ hw_err(hw, "eeprom read at offset %d failed\n", offset);
+ return 0;
}
/**
@@ -3778,7 +3797,11 @@
u8 sensor_index;
u8 sensor_location;
- hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor);
+ if (hw->eeprom.ops.read(hw, ets_offset + 1 + i, &ets_sensor)) {
+ hw_err(hw, "eeprom read at offset %d failed\n",
+ ets_offset + 1 + i);
+ continue;
+ }
sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
IXGBE_ETS_DATA_INDEX_SHIFT);
sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 1315b8a..d259dc7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -143,8 +143,12 @@
#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS)
+#define ixgbe_hw_to_netdev(hw) (((struct ixgbe_adapter *)(hw)->back)->netdev)
+
#define hw_dbg(hw, format, arg...) \
- netdev_dbg(((struct ixgbe_adapter *)(hw->back))->netdev, format, ##arg)
+ netdev_dbg(ixgbe_hw_to_netdev(hw), format, ## arg)
+#define hw_err(hw, format, arg...) \
+ netdev_err(ixgbe_hw_to_netdev(hw), format, ## arg)
#define e_dev_info(format, arg...) \
dev_info(&adapter->pdev->dev, format, ## arg)
#define e_dev_warn(format, arg...) \
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 50c1e9b..0e1b973 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -311,9 +311,6 @@
* this function does not support duplex forcing, but can
* limit the advertising of the adapter to the specified speed
*/
- if (ecmd->autoneg == AUTONEG_DISABLE)
- return -EINVAL;
-
if (ecmd->advertising & ~ecmd->supported)
return -EINVAL;
@@ -1049,7 +1046,7 @@
data[i] = (ixgbe_gstrings_stats[i].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
- for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) {
+ for (j = 0; j < netdev->num_tx_queues; j++) {
ring = adapter->tx_ring[j];
if (!ring) {
data[i] = 0;
@@ -1885,11 +1882,12 @@
struct ethtool_test *eth_test, u64 *data)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
- struct ixgbe_hw *hw = &adapter->hw;
bool if_running = netif_running(netdev);
set_bit(__IXGBE_TESTING, &adapter->state);
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ struct ixgbe_hw *hw = &adapter->hw;
+
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
int i;
for (i = 0; i < adapter->num_vfs; i++) {
@@ -1913,21 +1911,18 @@
/* Offline tests */
e_info(hw, "offline testing starting\n");
- if (if_running)
- /* indicate we're in test mode */
- dev_close(netdev);
-
- /* bringing adapter down disables SFP+ optics */
- if (hw->mac.ops.enable_tx_laser)
- hw->mac.ops.enable_tx_laser(hw);
-
/* Link test performed before hardware reset so autoneg doesn't
* interfere with test result
*/
if (ixgbe_link_test(adapter, &data[4]))
eth_test->flags |= ETH_TEST_FL_FAILED;
- ixgbe_reset(adapter);
+ if (if_running)
+ /* indicate we're in test mode */
+ dev_close(netdev);
+ else
+ ixgbe_reset(adapter);
+
e_info(hw, "register testing starting\n");
if (ixgbe_reg_test(adapter, &data[0]))
eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1964,13 +1959,11 @@
clear_bit(__IXGBE_TESTING, &adapter->state);
if (if_running)
dev_open(netdev);
+ else if (hw->mac.ops.disable_tx_laser)
+ hw->mac.ops.disable_tx_laser(hw);
} else {
e_info(hw, "online testing starting\n");
- /* if adapter is down, SFP+ optics will be disabled */
- if (!if_running && hw->mac.ops.enable_tx_laser)
- hw->mac.ops.enable_tx_laser(hw);
-
/* Online tests */
if (ixgbe_link_test(adapter, &data[4]))
eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1984,9 +1977,6 @@
clear_bit(__IXGBE_TESTING, &adapter->state);
}
- /* if adapter was down, ensure SFP+ optics are disabled again */
- if (!if_running && hw->mac.ops.disable_tx_laser)
- hw->mac.ops.disable_tx_laser(hw);
skip_ol_tests:
msleep_interruptible(4 * 1000);
}
@@ -2953,28 +2943,27 @@
u32 status = IXGBE_ERR_PHY_ADDR_INVALID;
u8 databyte = 0xFF;
int i = 0;
- int ret_val = 0;
if (ee->len == 0)
return -EINVAL;
- for (i = ee->offset; i < ee->len; i++) {
+ for (i = ee->offset; i < ee->offset + ee->len; i++) {
/* I2C reads can take long time */
if (test_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
return -EBUSY;
if (i < ETH_MODULE_SFF_8079_LEN)
- status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte);
+ status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte);
else
status = hw->phy.ops.read_i2c_sff8472(hw, i, &databyte);
if (status != 0)
- ret_val = -EIO;
+ return -EIO;
data[i - ee->offset] = databyte;
}
- return ret_val;
+ return 0;
}
static const struct ethtool_ops ixgbe_ethtool_ops = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 128d6b8..7aba452 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4175,6 +4175,10 @@
case ixgbe_phy_sfp_passive_unknown:
case ixgbe_phy_sfp_active_unknown:
case ixgbe_phy_sfp_ftl_active:
+ case ixgbe_phy_qsfp_passive_unknown:
+ case ixgbe_phy_qsfp_active_unknown:
+ case ixgbe_phy_qsfp_intel:
+ case ixgbe_phy_qsfp_unknown:
return true;
case ixgbe_phy_nl:
if (hw->mac.type == ixgbe_mac_82598EB)
@@ -5292,6 +5296,9 @@
return retval;
#endif
+ if (hw->mac.ops.stop_link_on_d3)
+ hw->mac.ops.stop_link_on_d3(hw);
+
if (wufc) {
ixgbe_set_rx_mode(netdev);
@@ -7602,10 +7609,8 @@
hw->mac.type == ixgbe_mac_82598EB) {
err = 0;
} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
- e_dev_err("failed to load because an unsupported SFP+ "
- "module type was detected.\n");
- e_dev_err("Reload the driver after installing a supported "
- "module.\n");
+ e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
+ e_dev_err("Reload the driver after installing a supported module.\n");
goto err_sw_init;
} else if (err) {
e_dev_err("HW Init failed: %d\n", err);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 369eef5..e4c6760 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -791,6 +791,8 @@
* Read control word from PHY init contents offset
*/
ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+ if (ret_val)
+ goto err_eeprom;
control = (eword & IXGBE_CONTROL_MASK_NL) >>
IXGBE_CONTROL_SHIFT_NL;
edata = eword & IXGBE_DATA_MASK_NL;
@@ -803,10 +805,15 @@
case IXGBE_DATA_NL:
hw_dbg(hw, "DATA:\n");
data_offset++;
- hw->eeprom.ops.read(hw, data_offset++,
- &phy_offset);
+ ret_val = hw->eeprom.ops.read(hw, data_offset++,
+ &phy_offset);
+ if (ret_val)
+ goto err_eeprom;
for (i = 0; i < edata; i++) {
- hw->eeprom.ops.read(hw, data_offset, &eword);
+ ret_val = hw->eeprom.ops.read(hw, data_offset,
+ &eword);
+ if (ret_val)
+ goto err_eeprom;
hw->phy.ops.write_reg(hw, phy_offset,
MDIO_MMD_PMAPMD, eword);
hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
@@ -838,6 +845,10 @@
out:
return ret_val;
+
+err_eeprom:
+ hw_err(hw, "eeprom read at offset %d failed\n", data_offset);
+ return IXGBE_ERR_PHY;
}
/**
@@ -1164,6 +1175,10 @@
u8 comp_codes_10g = 0;
u8 oui_bytes[3] = {0, 0, 0};
u16 enforce_sfp = 0;
+ u8 connector = 0;
+ u8 cable_length = 0;
+ u8 device_tech = 0;
+ bool active_cable = false;
if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) {
hw->phy.sfp_type = ixgbe_sfp_type_not_present;
@@ -1194,18 +1209,18 @@
if (status != 0)
goto err_read_i2c_eeprom;
+ status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_QSFP_1GBE_COMP,
+ &comp_codes_1g);
+
+ if (status != 0)
+ goto err_read_i2c_eeprom;
+
if (comp_codes_10g & IXGBE_SFF_QSFP_DA_PASSIVE_CABLE) {
hw->phy.type = ixgbe_phy_qsfp_passive_unknown;
if (hw->bus.lan_id == 0)
hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core0;
else
hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core1;
- } else if (comp_codes_10g & IXGBE_SFF_QSFP_DA_ACTIVE_CABLE) {
- hw->phy.type = ixgbe_phy_qsfp_active_unknown;
- if (hw->bus.lan_id == 0)
- hw->phy.sfp_type = ixgbe_sfp_type_da_act_lmt_core0;
- else
- hw->phy.sfp_type = ixgbe_sfp_type_da_act_lmt_core1;
} else if (comp_codes_10g & (IXGBE_SFF_10GBASESR_CAPABLE |
IXGBE_SFF_10GBASELR_CAPABLE)) {
if (hw->bus.lan_id == 0)
@@ -1213,10 +1228,47 @@
else
hw->phy.sfp_type = ixgbe_sfp_type_srlr_core1;
} else {
- /* unsupported module type */
- hw->phy.type = ixgbe_phy_sfp_unsupported;
- status = IXGBE_ERR_SFP_NOT_SUPPORTED;
- goto out;
+ if (comp_codes_10g & IXGBE_SFF_QSFP_DA_ACTIVE_CABLE)
+ active_cable = true;
+
+ if (!active_cable) {
+ /* check for active DA cables that pre-date
+ * SFF-8436 v3.6
+ */
+ hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_QSFP_CONNECTOR,
+ &connector);
+
+ hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_QSFP_CABLE_LENGTH,
+ &cable_length);
+
+ hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_QSFP_DEVICE_TECH,
+ &device_tech);
+
+ if ((connector ==
+ IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE) &&
+ (cable_length > 0) &&
+ ((device_tech >> 4) ==
+ IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL))
+ active_cable = true;
+ }
+
+ if (active_cable) {
+ hw->phy.type = ixgbe_phy_qsfp_active_unknown;
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_act_lmt_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_act_lmt_core1;
+ } else {
+ /* unsupported module type */
+ hw->phy.type = ixgbe_phy_sfp_unsupported;
+ status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ goto out;
+ }
}
if (hw->phy.sfp_type != stored_sfp_type)
@@ -1271,7 +1323,7 @@
status = 0;
} else {
if (hw->allow_unsupported_sfp == true) {
- e_warn(hw, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics. Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. Intel Corporation is not responsible for any harm caused by using untested modules.\n");
+ e_warn(drv, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics. Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. Intel Corporation is not responsible for any harm caused by using untested modules.\n");
status = 0;
} else {
hw_dbg(hw,
@@ -1339,7 +1391,11 @@
sfp_type = ixgbe_sfp_type_srlr_core1;
/* Read offset to PHY init contents */
- hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset);
+ if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) {
+ hw_err(hw, "eeprom read at %d failed\n",
+ IXGBE_PHY_INIT_OFFSET_NL);
+ return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
+ }
if ((!*list_offset) || (*list_offset == 0xFFFF))
return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
@@ -1351,12 +1407,14 @@
* Find the matching SFP ID in the EEPROM
* and program the init sequence
*/
- hw->eeprom.ops.read(hw, *list_offset, &sfp_id);
+ if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
+ goto err_phy;
while (sfp_id != IXGBE_PHY_INIT_END_NL) {
if (sfp_id == sfp_type) {
(*list_offset)++;
- hw->eeprom.ops.read(hw, *list_offset, data_offset);
+ if (hw->eeprom.ops.read(hw, *list_offset, data_offset))
+ goto err_phy;
if ((!*data_offset) || (*data_offset == 0xFFFF)) {
hw_dbg(hw, "SFP+ module not supported\n");
return IXGBE_ERR_SFP_NOT_SUPPORTED;
@@ -1366,7 +1424,7 @@
} else {
(*list_offset) += 2;
if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
- return IXGBE_ERR_PHY;
+ goto err_phy;
}
}
@@ -1376,6 +1434,10 @@
}
return 0;
+
+err_phy:
+ hw_err(hw, "eeprom read at offset %d failed\n", *list_offset);
+ return IXGBE_ERR_PHY;
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index 138dadd..24af12e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -50,8 +50,11 @@
#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE0 0xA5
#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE1 0xA6
#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE2 0xA7
+#define IXGBE_SFF_QSFP_CONNECTOR 0x82
#define IXGBE_SFF_QSFP_10GBE_COMP 0x83
#define IXGBE_SFF_QSFP_1GBE_COMP 0x86
+#define IXGBE_SFF_QSFP_CABLE_LENGTH 0x92
+#define IXGBE_SFF_QSFP_DEVICE_TECH 0x93
/* Bitmasks */
#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4
@@ -68,6 +71,8 @@
#define IXGBE_SFF_ADDRESSING_MODE 0x4
#define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE 0x1
#define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE 0x8
+#define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE 0x23
+#define IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL 0x0
#define IXGBE_I2C_EEPROM_READ_MASK 0x100
#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3
#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 73c8e73..276d7b1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -639,8 +639,8 @@
{
struct ixgbe_hw *hw = &adapter->hw;
unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
- u32 reg, msgbuf[4];
- u32 reg_offset, vf_shift;
+ u32 reg, reg_offset, vf_shift;
+ u32 msgbuf[4] = {0, 0, 0, 0};
u8 *addr = (u8 *)(&msgbuf[1]);
e_info(probe, "VF Reset msg received from vf %d\n", vf);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 161ff18..6442cf8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1596,6 +1596,7 @@
#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK 0x50000000
#define IXGBE_AUTOC2_LINK_DISABLE_MASK 0x70000000
#define IXGBE_MACC_FLU 0x00000001
@@ -2847,6 +2848,7 @@
void (*disable_tx_laser)(struct ixgbe_hw *);
void (*enable_tx_laser)(struct ixgbe_hw *);
void (*flap_tx_laser)(struct ixgbe_hw *);
+ void (*stop_link_on_d3)(struct ixgbe_hw *);
s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index c35db73..7fb5677 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2641,7 +2641,7 @@
ret = mv643xx_eth_shared_of_probe(pdev);
if (ret)
return ret;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ?
pd->tx_csum_limit : 9 * 1024;
@@ -2833,7 +2833,7 @@
struct resource *res;
int err;
- pd = pdev->dev.platform_data;
+ pd = dev_get_platdata(&pdev->dev);
if (pd == NULL) {
dev_err(&pdev->dev, "no mv643xx_eth_platform_data\n");
return -ENODEV;
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index db48147..4ae0c74 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -583,10 +583,9 @@
* table is full.
*/
if (pep->htpr == NULL) {
- pep->htpr = dma_alloc_coherent(pep->dev->dev.parent,
- HASH_ADDR_TABLE_SIZE,
- &pep->htpr_dma,
- GFP_KERNEL | __GFP_ZERO);
+ pep->htpr = dma_zalloc_coherent(pep->dev->dev.parent,
+ HASH_ADDR_TABLE_SIZE,
+ &pep->htpr_dma, GFP_KERNEL);
if (pep->htpr == NULL)
return -ENOMEM;
} else {
@@ -1024,9 +1023,9 @@
pep->rx_desc_count = 0;
size = pep->rx_ring_size * sizeof(struct rx_desc);
pep->rx_desc_area_size = size;
- pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
- &pep->rx_desc_dma,
- GFP_KERNEL | __GFP_ZERO);
+ pep->p_rx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size,
+ &pep->rx_desc_dma,
+ GFP_KERNEL);
if (!pep->p_rx_desc_area)
goto out;
@@ -1085,9 +1084,9 @@
pep->tx_desc_count = 0;
size = pep->tx_ring_size * sizeof(struct tx_desc);
pep->tx_desc_area_size = size;
- pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
- &pep->tx_desc_dma,
- GFP_KERNEL | __GFP_ZERO);
+ pep->p_tx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size,
+ &pep->tx_desc_dma,
+ GFP_KERNEL);
if (!pep->p_tx_desc_area)
goto out;
/* Initialize the next_desc_ptr links in the Tx descriptors ring */
@@ -1517,7 +1516,7 @@
printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME);
eth_hw_addr_random(dev);
- pep->pd = pdev->dev.platform_data;
+ pep->pd = dev_get_platdata(&pdev->dev);
pep->rx_ring_size = NUM_RX_DESCS;
if (pep->pd->rx_queue_size)
pep->rx_ring_size = pep->pd->rx_queue_size;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 9d4a1ea..b4881b6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -160,6 +160,7 @@
struct ieee_pfc *pfc)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_port_profile *prof = priv->prof;
struct mlx4_en_dev *mdev = priv->mdev;
int err;
@@ -169,15 +170,17 @@
pfc->mbc,
pfc->delay);
- priv->prof->rx_pause = priv->prof->tx_pause = !!pfc->pfc_en;
- priv->prof->rx_ppp = priv->prof->tx_ppp = pfc->pfc_en;
+ prof->rx_pause = !pfc->pfc_en;
+ prof->tx_pause = !pfc->pfc_en;
+ prof->rx_ppp = pfc->pfc_en;
+ prof->tx_ppp = pfc->pfc_en;
err = mlx4_SET_PORT_general(mdev->dev, priv->port,
priv->rx_skb_size + ETH_FCS_LEN,
- priv->prof->tx_pause,
- priv->prof->tx_ppp,
- priv->prof->rx_pause,
- priv->prof->rx_ppp);
+ prof->tx_pause,
+ prof->tx_ppp,
+ prof->rx_pause,
+ prof->rx_ppp);
if (err)
en_err(priv, "Failed setting pause params\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 6dcca98..0698c82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -362,6 +362,15 @@
*/
rmb();
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR)) {
+ struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe;
+
+ en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n",
+ cqe_err->vendor_err_syndrome,
+ cqe_err->syndrome);
+ }
+
/* Skip over last polled CQE */
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
@@ -579,17 +588,15 @@
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
+ struct device *ddev = priv->ddev;
struct mlx4_en_tx_ring *ring;
struct mlx4_en_tx_desc *tx_desc;
struct mlx4_wqe_data_seg *data;
- struct skb_frag_struct *frag;
struct mlx4_en_tx_info *tx_info;
- struct ethhdr *ethh;
int tx_ind = 0;
int nr_txbb;
int desc_size;
int real_size;
- dma_addr_t dma;
u32 index, bf_index;
__be32 op_own;
u16 vlan_tag = 0;
@@ -665,6 +672,61 @@
tx_info->skb = skb;
tx_info->nr_txbb = nr_txbb;
+ if (lso_header_size)
+ data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4,
+ DS_SIZE));
+ else
+ data = &tx_desc->data;
+
+ /* valid only for none inline segments */
+ tx_info->data_offset = (void *)data - (void *)tx_desc;
+
+ tx_info->linear = (lso_header_size < skb_headlen(skb) &&
+ !is_inline(skb, NULL)) ? 1 : 0;
+
+ data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1;
+
+ if (is_inline(skb, &fragptr)) {
+ tx_info->inl = 1;
+ } else {
+ /* Map fragments */
+ for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
+ struct skb_frag_struct *frag;
+ dma_addr_t dma;
+
+ frag = &skb_shinfo(skb)->frags[i];
+ dma = skb_frag_dma_map(ddev, frag,
+ 0, skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma))
+ goto tx_drop_unmap;
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = cpu_to_be32(mdev->mr.key);
+ wmb();
+ data->byte_count = cpu_to_be32(skb_frag_size(frag));
+ --data;
+ }
+
+ /* Map linear part */
+ if (tx_info->linear) {
+ u32 byte_count = skb_headlen(skb) - lso_header_size;
+ dma_addr_t dma;
+
+ dma = dma_map_single(ddev, skb->data +
+ lso_header_size, byte_count,
+ PCI_DMA_TODEVICE);
+ if (dma_mapping_error(ddev, dma))
+ goto tx_drop_unmap;
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = cpu_to_be32(mdev->mr.key);
+ wmb();
+ data->byte_count = cpu_to_be32(byte_count);
+ }
+ tx_info->inl = 0;
+ }
+
/*
* For timestamping add flag to skb_shinfo and
* set flag for further reference
@@ -689,6 +751,8 @@
}
if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
+ struct ethhdr *ethh;
+
/* Copy dst mac address to wqe. This allows loopback in eSwitch,
* so that VFs and PF can communicate with each other
*/
@@ -711,8 +775,6 @@
/* Copy headers;
* note that we already verified that it is linear */
memcpy(tx_desc->lso.header, skb->data, lso_header_size);
- data = ((void *) &tx_desc->lso +
- ALIGN(lso_header_size + 4, DS_SIZE));
priv->port_stats.tso_packets++;
i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) +
@@ -724,7 +786,6 @@
op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
((ring->prod & ring->size) ?
cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
- data = &tx_desc->data;
tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
ring->packets++;
@@ -733,38 +794,7 @@
netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
-
- /* valid only for none inline segments */
- tx_info->data_offset = (void *) data - (void *) tx_desc;
-
- tx_info->linear = (lso_header_size < skb_headlen(skb) && !is_inline(skb, NULL)) ? 1 : 0;
- data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1;
-
- if (!is_inline(skb, &fragptr)) {
- /* Map fragments */
- for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
- frag = &skb_shinfo(skb)->frags[i];
- dma = skb_frag_dma_map(priv->ddev, frag,
- 0, skb_frag_size(frag),
- DMA_TO_DEVICE);
- data->addr = cpu_to_be64(dma);
- data->lkey = cpu_to_be32(mdev->mr.key);
- wmb();
- data->byte_count = cpu_to_be32(skb_frag_size(frag));
- --data;
- }
-
- /* Map linear part */
- if (tx_info->linear) {
- dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
- skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
- data->addr = cpu_to_be64(dma);
- data->lkey = cpu_to_be32(mdev->mr.key);
- wmb();
- data->byte_count = cpu_to_be32(skb_headlen(skb) - lso_header_size);
- }
- tx_info->inl = 0;
- } else {
+ if (tx_info->inl) {
build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr);
tx_info->inl = 1;
}
@@ -804,6 +834,16 @@
return NETDEV_TX_OK;
+tx_drop_unmap:
+ en_err(priv, "DMA mapping error\n");
+
+ for (i++; i < skb_shinfo(skb)->nr_frags; i++) {
+ data++;
+ dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
+ }
+
tx_drop:
dev_kfree_skb_any(skb);
priv->stats.tx_dropped++;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 94b3bd6..0951f7a 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1148,7 +1148,7 @@
struct resource *iomem;
struct net_device *netdev;
struct ks8842_adapter *adapter;
- struct ks8842_platform_data *pdata = pdev->dev.platform_data;
+ struct ks8842_platform_data *pdata = dev_get_platdata(&pdev->dev);
u16 id;
unsigned i;
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 9f3f5db..0fba153 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -1636,7 +1636,7 @@
} else {
struct ks8851_mll_platform_data *pdata;
- pdata = pdev->dev.platform_data;
+ pdata = dev_get_platdata(&pdev->dev);
if (!pdata) {
netdev_err(netdev, "No platform data\n");
err = -ENODEV;
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index abd2c54..83c2091 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -314,6 +314,7 @@
unsigned int len;
unsigned int tx_head = priv->tx_head;
u32 txdes1;
+ int ret = NETDEV_TX_BUSY;
desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
@@ -321,7 +322,7 @@
if (readl(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
net_dbg_ratelimited("no TX space for packet\n");
priv->stats.tx_dropped++;
- return NETDEV_TX_BUSY;
+ goto out_unlock;
}
len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
@@ -330,7 +331,7 @@
len, DMA_TO_DEVICE);
if (dma_mapping_error(&ndev->dev, priv->tx_mapping[tx_head])) {
netdev_err(ndev, "DMA mapping error\n");
- return NETDEV_TX_BUSY;
+ goto out_unlock;
}
priv->tx_len[tx_head] = len;
@@ -360,10 +361,11 @@
priv->tx_head = TX_NEXT(tx_head);
ndev->trans_start = jiffies;
-
+ ret = NETDEV_TX_OK;
+out_unlock:
spin_unlock_irq(&priv->txlock);
- return NETDEV_TX_OK;
+ return ret;
}
static struct net_device_stats *moxart_mac_get_stats(struct net_device *ndev)
@@ -531,7 +533,6 @@
unregister_netdev(ndev);
free_irq(ndev->irq, ndev);
moxart_mac_free_memory(ndev);
- platform_set_drvdata(pdev, NULL);
free_netdev(ndev);
return 0;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 66c0400..149355b 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -74,6 +74,7 @@
#ifdef CONFIG_MTRR
#include <asm/mtrr.h>
#endif
+#include <net/busy_poll.h>
#include "myri10ge_mcp.h"
#include "myri10ge_mcp_gen_header.h"
@@ -194,6 +195,21 @@
int cpu;
__be32 __iomem *dca_tag;
#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int state;
+#define SLICE_STATE_IDLE 0
+#define SLICE_STATE_NAPI 1 /* NAPI owns this slice */
+#define SLICE_STATE_POLL 2 /* poll owns this slice */
+#define SLICE_LOCKED (SLICE_STATE_NAPI | SLICE_STATE_POLL)
+#define SLICE_STATE_NAPI_YIELD 4 /* NAPI yielded this slice */
+#define SLICE_STATE_POLL_YIELD 8 /* poll yielded this slice */
+#define SLICE_USER_PEND (SLICE_STATE_POLL | SLICE_STATE_POLL_YIELD)
+ spinlock_t lock;
+ unsigned long lock_napi_yield;
+ unsigned long lock_poll_yield;
+ unsigned long busy_poll_miss;
+ unsigned long busy_poll_cnt;
+#endif /* CONFIG_NET_RX_BUSY_POLL */
char irq_desc[32];
};
@@ -909,6 +925,92 @@
return status;
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static inline void myri10ge_ss_init_lock(struct myri10ge_slice_state *ss)
+{
+ spin_lock_init(&ss->lock);
+ ss->state = SLICE_STATE_IDLE;
+}
+
+static inline bool myri10ge_ss_lock_napi(struct myri10ge_slice_state *ss)
+{
+ int rc = true;
+ spin_lock(&ss->lock);
+ if ((ss->state & SLICE_LOCKED)) {
+ WARN_ON((ss->state & SLICE_STATE_NAPI));
+ ss->state |= SLICE_STATE_NAPI_YIELD;
+ rc = false;
+ ss->lock_napi_yield++;
+ } else
+ ss->state = SLICE_STATE_NAPI;
+ spin_unlock(&ss->lock);
+ return rc;
+}
+
+static inline void myri10ge_ss_unlock_napi(struct myri10ge_slice_state *ss)
+{
+ spin_lock(&ss->lock);
+ WARN_ON((ss->state & (SLICE_STATE_POLL | SLICE_STATE_NAPI_YIELD)));
+ ss->state = SLICE_STATE_IDLE;
+ spin_unlock(&ss->lock);
+}
+
+static inline bool myri10ge_ss_lock_poll(struct myri10ge_slice_state *ss)
+{
+ int rc = true;
+ spin_lock_bh(&ss->lock);
+ if ((ss->state & SLICE_LOCKED)) {
+ ss->state |= SLICE_STATE_POLL_YIELD;
+ rc = false;
+ ss->lock_poll_yield++;
+ } else
+ ss->state |= SLICE_STATE_POLL;
+ spin_unlock_bh(&ss->lock);
+ return rc;
+}
+
+static inline void myri10ge_ss_unlock_poll(struct myri10ge_slice_state *ss)
+{
+ spin_lock_bh(&ss->lock);
+ WARN_ON((ss->state & SLICE_STATE_NAPI));
+ ss->state = SLICE_STATE_IDLE;
+ spin_unlock_bh(&ss->lock);
+}
+
+static inline bool myri10ge_ss_busy_polling(struct myri10ge_slice_state *ss)
+{
+ WARN_ON(!(ss->state & SLICE_LOCKED));
+ return (ss->state & SLICE_USER_PEND);
+}
+#else /* CONFIG_NET_RX_BUSY_POLL */
+static inline void myri10ge_ss_init_lock(struct myri10ge_slice_state *ss)
+{
+}
+
+static inline bool myri10ge_ss_lock_napi(struct myri10ge_slice_state *ss)
+{
+ return false;
+}
+
+static inline void myri10ge_ss_unlock_napi(struct myri10ge_slice_state *ss)
+{
+}
+
+static inline bool myri10ge_ss_lock_poll(struct myri10ge_slice_state *ss)
+{
+ return false;
+}
+
+static inline void myri10ge_ss_unlock_poll(struct myri10ge_slice_state *ss)
+{
+}
+
+static inline bool myri10ge_ss_busy_polling(struct myri10ge_slice_state *ss)
+{
+ return false;
+}
+#endif
+
static int myri10ge_reset(struct myri10ge_priv *mgp)
{
struct myri10ge_cmd cmd;
@@ -1300,6 +1402,8 @@
}
}
+#define MYRI10GE_HLEN 64 /* Bytes to copy from page to skb linear memory */
+
static inline int
myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
{
@@ -1311,6 +1415,7 @@
struct pci_dev *pdev = mgp->pdev;
struct net_device *dev = mgp->dev;
u8 *va;
+ bool polling;
if (len <= mgp->small_bytes) {
rx = &ss->rx_small;
@@ -1325,7 +1430,15 @@
va = page_address(rx->info[idx].page) + rx->info[idx].page_offset;
prefetch(va);
- skb = napi_get_frags(&ss->napi);
+ /* When busy polling in user context, allocate skb and copy headers to
+ * skb's linear memory ourselves. When not busy polling, use the napi
+ * gro api.
+ */
+ polling = myri10ge_ss_busy_polling(ss);
+ if (polling)
+ skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16);
+ else
+ skb = napi_get_frags(&ss->napi);
if (unlikely(skb == NULL)) {
ss->stats.rx_dropped++;
for (i = 0, remainder = len; remainder > 0; i++) {
@@ -1364,8 +1477,29 @@
}
myri10ge_vlan_rx(mgp->dev, va, skb);
skb_record_rx_queue(skb, ss - &mgp->ss[0]);
+ skb_mark_napi_id(skb, &ss->napi);
- napi_gro_frags(&ss->napi);
+ if (polling) {
+ int hlen;
+
+ /* myri10ge_vlan_rx might have moved the header, so compute
+ * length and address again.
+ */
+ hlen = MYRI10GE_HLEN > skb->len ? skb->len : MYRI10GE_HLEN;
+ va = page_address(skb_frag_page(&rx_frags[0])) +
+ rx_frags[0].page_offset;
+ /* Copy header into the skb linear memory */
+ skb_copy_to_linear_data(skb, va, hlen);
+ rx_frags[0].page_offset += hlen;
+ rx_frags[0].size -= hlen;
+ skb->data_len -= hlen;
+ skb->tail += hlen;
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_receive_skb(skb);
+ }
+ else
+ napi_gro_frags(&ss->napi);
+
return 1;
}
@@ -1524,10 +1658,14 @@
if (ss->mgp->dca_enabled)
myri10ge_update_dca(ss);
#endif
+ /* Try later if the busy_poll handler is running. */
+ if (!myri10ge_ss_lock_napi(ss))
+ return budget;
/* process as many rx events as NAPI will allow */
work_done = myri10ge_clean_rx_done(ss, budget);
+ myri10ge_ss_unlock_napi(ss);
if (work_done < budget) {
napi_complete(napi);
put_be32(htonl(3), ss->irq_claim);
@@ -1535,6 +1673,34 @@
return work_done;
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static int myri10ge_busy_poll(struct napi_struct *napi)
+{
+ struct myri10ge_slice_state *ss =
+ container_of(napi, struct myri10ge_slice_state, napi);
+ struct myri10ge_priv *mgp = ss->mgp;
+ int work_done;
+
+ /* Poll only when the link is up */
+ if (mgp->link_state != MXGEFW_LINK_UP)
+ return LL_FLUSH_FAILED;
+
+ if (!myri10ge_ss_lock_poll(ss))
+ return LL_FLUSH_BUSY;
+
+ /* Process a small number of packets */
+ work_done = myri10ge_clean_rx_done(ss, 4);
+ if (work_done)
+ ss->busy_poll_cnt += work_done;
+ else
+ ss->busy_poll_miss++;
+
+ myri10ge_ss_unlock_poll(ss);
+
+ return work_done;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
static irqreturn_t myri10ge_intr(int irq, void *arg)
{
struct myri10ge_slice_state *ss = arg;
@@ -1742,6 +1908,10 @@
"tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done",
"rx_small_cnt", "rx_big_cnt",
"wake_queue", "stop_queue", "tx_linearized",
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ "rx_lock_napi_yield", "rx_lock_poll_yield", "rx_busy_poll_miss",
+ "rx_busy_poll_cnt",
+#endif
};
#define MYRI10GE_NET_STATS_LEN 21
@@ -1842,6 +2012,12 @@
data[i++] = (unsigned int)ss->tx.wake_queue;
data[i++] = (unsigned int)ss->tx.stop_queue;
data[i++] = (unsigned int)ss->tx.linearized;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ data[i++] = ss->lock_napi_yield;
+ data[i++] = ss->lock_poll_yield;
+ data[i++] = ss->busy_poll_miss;
+ data[i++] = ss->busy_poll_cnt;
+#endif
}
}
@@ -2405,6 +2581,9 @@
goto abort_with_rings;
}
+ /* Initialize the slice spinlock and state used for polling */
+ myri10ge_ss_init_lock(ss);
+
/* must happen prior to any irq */
napi_enable(&(ss)->napi);
}
@@ -2481,9 +2660,19 @@
del_timer_sync(&mgp->watchdog_timer);
mgp->running = MYRI10GE_ETH_STOPPING;
+ local_bh_disable(); /* myri10ge_ss_lock_napi needs bh disabled */
for (i = 0; i < mgp->num_slices; i++) {
napi_disable(&mgp->ss[i].napi);
+ /* Lock the slice to prevent the busy_poll handler from
+ * accessing it. Later when we bring the NIC up, myri10ge_open
+ * resets the slice including this lock.
+ */
+ while (!myri10ge_ss_lock_napi(&mgp->ss[i])) {
+ pr_info("Slice %d locked\n", i);
+ mdelay(1);
+ }
}
+ local_bh_enable();
netif_carrier_off(dev);
netif_tx_stop_all_queues(dev);
@@ -3569,8 +3758,11 @@
ss->fw_stats, ss->fw_stats_bus);
ss->fw_stats = NULL;
}
+ napi_hash_del(&ss->napi);
netif_napi_del(&ss->napi);
}
+ /* Wait till napi structs are no longer used, and then free ss. */
+ synchronize_rcu();
kfree(mgp->ss);
mgp->ss = NULL;
}
@@ -3591,9 +3783,9 @@
for (i = 0; i < mgp->num_slices; i++) {
ss = &mgp->ss[i];
bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry);
- ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
- &ss->rx_done.bus,
- GFP_KERNEL | __GFP_ZERO);
+ ss->rx_done.entry = dma_zalloc_coherent(&pdev->dev, bytes,
+ &ss->rx_done.bus,
+ GFP_KERNEL);
if (ss->rx_done.entry == NULL)
goto abort;
bytes = sizeof(*ss->fw_stats);
@@ -3606,6 +3798,7 @@
ss->dev = mgp->dev;
netif_napi_add(ss->dev, &ss->napi, myri10ge_poll,
myri10ge_napi_weight);
+ napi_hash_add(&ss->napi);
}
return 0;
abort:
@@ -3748,6 +3941,9 @@
.ndo_change_mtu = myri10ge_change_mtu,
.ndo_set_rx_mode = myri10ge_set_multicast_list,
.ndo_set_mac_address = myri10ge_set_mac_address,
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ .ndo_busy_poll = myri10ge_busy_poll,
+#endif
};
static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
index dc2c6f56..235fd51 100644
--- a/drivers/net/ethernet/netx-eth.c
+++ b/drivers/net/ethernet/netx-eth.c
@@ -390,7 +390,7 @@
priv = netdev_priv(ndev);
- pdata = (struct netxeth_platform_data *)pdev->dev.platform_data;
+ pdata = (struct netxeth_platform_data *)dev_get_platdata(&pdev->dev);
priv->xc = request_xc(pdata->xcno, &pdev->dev);
if (!priv->xc) {
dev_err(&pdev->dev, "unable to request xc engine\n");
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 1129db0..f0ceb89 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -118,6 +118,7 @@
* filled by get_settings() on a down link, speed is -1: */
if (speed == UINT_MAX) {
speed = SPEED_1000;
+ ethtool_cmd_speed_set(ecmd, speed);
ecmd->duplex = DUPLEX_FULL;
}
ret = mii_ethtool_sset(&adapter->mii, ecmd);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index e19f1be..5a0f04c 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -1491,9 +1491,9 @@
bufsz = adapter->rx_buffer_len;
size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY;
- rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size,
- &rx_ring->rx_buff_pool_logic,
- GFP_KERNEL | __GFP_ZERO);
+ rx_ring->rx_buff_pool =
+ dma_zalloc_coherent(&pdev->dev, size,
+ &rx_ring->rx_buff_pool_logic, GFP_KERNEL);
if (!rx_ring->rx_buff_pool)
return -ENOMEM;
@@ -1807,9 +1807,8 @@
tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc);
- tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
- &tx_ring->dma,
- GFP_KERNEL | __GFP_ZERO);
+ tx_ring->desc = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
if (!tx_ring->desc) {
vfree(tx_ring->buffer_info);
return -ENOMEM;
@@ -1852,9 +1851,8 @@
return -ENOMEM;
rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc);
- rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
- &rx_ring->dma,
- GFP_KERNEL | __GFP_ZERO);
+ rx_ring->desc = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
if (!rx_ring->desc) {
vfree(rx_ring->buffer_info);
return -ENOMEM;
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index f21ae7b..c498181 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -440,10 +440,9 @@
if (pasemi_dma_alloc_ring(&ring->chan, RX_RING_SIZE))
goto out_ring_desc;
- ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev,
- RX_RING_SIZE * sizeof(u64),
- &ring->buf_dma,
- GFP_KERNEL | __GFP_ZERO);
+ ring->buffers = dma_zalloc_coherent(&mac->dma_pdev->dev,
+ RX_RING_SIZE * sizeof(u64),
+ &ring->buf_dma, GFP_KERNEL);
if (!ring->buffers)
goto out_ring_desc;
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 0e17972..f59e6be 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -45,6 +45,17 @@
This allows for virtual function acceleration in virtualized
environments.
+config QLCNIC_DCB
+ bool "QLOGIC QLCNIC 82XX and 83XX family DCB Support"
+ depends on QLCNIC && DCB
+ default y
+ ---help---
+ This configuration parameter enables DCB support in QLE83XX
+ and QLE82XX Converged Ethernet devices. This allows for DCB
+ get operations support through rtNetlink interface. Only CEE
+ mode of DCB is supported. PG and PFC values are related only
+ to Tx.
+
config QLGE
tristate "QLogic QLGE 10Gb Ethernet Driver Support"
depends on PCI
diff --git a/drivers/net/ethernet/qlogic/qlcnic/Makefile b/drivers/net/ethernet/qlogic/qlcnic/Makefile
index 4b1fb3f..a848d29 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/Makefile
+++ b/drivers/net/ethernet/qlogic/qlcnic/Makefile
@@ -11,3 +11,5 @@
qlcnic_minidump.o qlcnic_sriov_common.o
qlcnic-$(CONFIG_QLCNIC_SRIOV) += qlcnic_sriov_pf.o
+
+qlcnic-$(CONFIG_QLCNIC_DCB) += qlcnic_dcb.o
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 7387354..22bd425 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -34,11 +34,12 @@
#include "qlcnic_hdr.h"
#include "qlcnic_hw.h"
#include "qlcnic_83xx_hw.h"
+#include "qlcnic_dcb.h"
#define _QLCNIC_LINUX_MAJOR 5
-#define _QLCNIC_LINUX_MINOR 2
-#define _QLCNIC_LINUX_SUBVERSION 46
-#define QLCNIC_LINUX_VERSIONID "5.2.46"
+#define _QLCNIC_LINUX_MINOR 3
+#define _QLCNIC_LINUX_SUBVERSION 49
+#define QLCNIC_LINUX_VERSIONID "5.3.49"
#define QLCNIC_DRV_IDC_VER 0x01
#define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\
(_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
@@ -97,6 +98,9 @@
#define TX_STOP_THRESH ((MAX_SKB_FRAGS >> 2) + MAX_TSO_HEADER_DESC \
+ MGMT_CMD_DESC_RESV)
#define QLCNIC_MAX_TX_TIMEOUTS 2
+#define QLCNIC_MAX_TX_RINGS 8
+#define QLCNIC_MAX_SDS_RINGS 8
+
/*
* Following are the states of the Phantom. Phantom will set them and
* Host will read to check if the fields are correct.
@@ -467,6 +471,8 @@
u32 mbox_aen[QLC_83XX_MBX_AEN_CNT];
u32 mbox_reg[4];
struct qlcnic_mailbox *mailbox;
+ u8 extend_lb_time;
+ u8 phys_port_id[ETH_ALEN];
};
struct qlcnic_adapter_stats {
@@ -514,6 +520,7 @@
u32 num_desc;
void __iomem *crb_sts_consumer;
+ struct qlcnic_host_tx_ring *tx_ring;
struct status_desc *desc_head;
struct qlcnic_adapter *adapter;
struct napi_struct napi;
@@ -531,9 +538,17 @@
void __iomem *crb_intr_mask;
char name[IFNAMSIZ + 12];
u16 ctx_id;
+
+ u32 state;
u32 producer;
u32 sw_consumer;
u32 num_desc;
+
+ u64 xmit_on;
+ u64 xmit_off;
+ u64 xmit_called;
+ u64 xmit_finished;
+
void __iomem *crb_cmd_producer;
struct cmd_desc_type0 *desc_head;
struct qlcnic_adapter *adapter;
@@ -558,7 +573,6 @@
u32 state;
u16 context_id;
u16 virt_port;
-
};
/* HW context creation */
@@ -603,6 +617,7 @@
#define QLCNIC_CAP0_LRO_CONTIGUOUS (1 << 8)
#define QLCNIC_CAP0_VALIDOFF (1 << 11)
#define QLCNIC_CAP0_LRO_MSS (1 << 21)
+#define QLCNIC_CAP0_TX_MULTI (1 << 22)
/*
* Context state
@@ -630,7 +645,7 @@
struct qlcnic_hostrq_rx_ctx {
__le64 host_rsp_dma_addr; /* Response dma'd here */
- __le32 capabilities[4]; /* Flag bit vector */
+ __le32 capabilities[4]; /* Flag bit vector */
__le32 host_int_crb_mode; /* Interrupt crb usage */
__le32 host_rds_crb_mode; /* RDS crb usage */
/* These ring offsets are relative to data[0] below */
@@ -801,6 +816,7 @@
#define QLCNIC_C2H_OPCODE_CONFIG_LOOPBACK 0x8f
#define QLCNIC_C2H_OPCODE_GET_LINKEVENT_RESPONSE 0x8D
+#define QLCNIC_C2H_OPCODE_GET_DCB_AEN 0x90
#define VPORT_MISS_MODE_DROP 0 /* drop all unmatched */
#define VPORT_MISS_MODE_ACCEPT_ALL 1 /* accept all packets */
@@ -813,6 +829,7 @@
#define QLCNIC_FW_CAPABILITY_BDG BIT_8
#define QLCNIC_FW_CAPABILITY_FVLANTX BIT_9
#define QLCNIC_FW_CAPABILITY_HW_LRO BIT_10
+#define QLCNIC_FW_CAPABILITY_2_MULTI_TX BIT_4
#define QLCNIC_FW_CAPABILITY_MULTI_LOOPBACK BIT_27
#define QLCNIC_FW_CAPABILITY_MORE_CAPS BIT_31
@@ -912,6 +929,8 @@
#define QLCNIC_FW_LRO_MSS_CAP 0x8000
#define QLCNIC_TX_INTR_SHARED 0x10000
#define QLCNIC_APP_CHANGED_FLAGS 0x20000
+#define QLCNIC_HAS_PHYS_PORT_ID 0x40000
+
#define QLCNIC_IS_MSI_FAMILY(adapter) \
((adapter)->flags & (QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED))
#define QLCNIC_IS_TSO_CAPABLE(adapter) \
@@ -921,6 +940,7 @@
#define QLCNIC_BEACON_DISABLE 0xD
#define QLCNIC_DEF_NUM_STS_DESC_RINGS 4
+#define QLCNIC_DEF_NUM_TX_RINGS 4
#define QLCNIC_MSIX_TBL_SPACE 8192
#define QLCNIC_PCI_REG_MSIX_TBL 0x44
#define QLCNIC_MSIX_TBL_PGSIZE 4096
@@ -936,10 +956,13 @@
#define __QLCNIC_DIAG_RES_ALLOC 6
#define __QLCNIC_LED_ENABLE 7
#define __QLCNIC_ELB_INPROGRESS 8
+#define __QLCNIC_MULTI_TX_UNIQUE 9
#define __QLCNIC_SRIOV_ENABLE 10
#define __QLCNIC_SRIOV_CAPABLE 11
#define __QLCNIC_MBX_POLL_ENABLE 12
#define __QLCNIC_DIAG_MODE 13
+#define __QLCNIC_DCB_STATE 14
+#define __QLCNIC_DCB_IN_AEN 15
#define QLCNIC_INTERRUPT_TEST 1
#define QLCNIC_LOOPBACK_TEST 2
@@ -1043,6 +1066,7 @@
struct delayed_work fw_work;
struct delayed_work idc_aen_work;
struct delayed_work mbx_poll_work;
+ struct qlcnic_dcb *dcb;
struct qlcnic_filter_hash fhash;
struct qlcnic_filter_hash rx_fhash;
@@ -1481,7 +1505,8 @@
void qlcnic_reset_rx_buffers_list(struct qlcnic_adapter *adapter);
void qlcnic_release_rx_buffers(struct qlcnic_adapter *adapter);
-void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter);
+void qlcnic_release_tx_buffers(struct qlcnic_adapter *,
+ struct qlcnic_host_tx_ring *);
int qlcnic_check_fw_status(struct qlcnic_adapter *adapter);
void qlcnic_watchdog_task(struct work_struct *work);
@@ -1493,6 +1518,7 @@
int qlcnic_nic_add_mac(struct qlcnic_adapter *, const u8 *, u16);
int qlcnic_nic_del_mac(struct qlcnic_adapter *, const u8 *);
void qlcnic_82xx_free_mac_list(struct qlcnic_adapter *adapter);
+int qlcnic_82xx_read_phys_port_id(struct qlcnic_adapter *);
int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu);
int qlcnic_fw_cmd_set_drv_version(struct qlcnic_adapter *, u32);
@@ -1514,8 +1540,9 @@
void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings);
int qlcnic_diag_alloc_res(struct net_device *netdev, int test);
netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-int qlcnic_set_max_rss(struct qlcnic_adapter *, u8, size_t);
+int qlcnic_set_max_rss(struct qlcnic_adapter *, u8, int);
int qlcnic_validate_max_rss(struct qlcnic_adapter *, __u32);
+int qlcnic_validate_max_tx_rings(struct qlcnic_adapter *, u32 txq);
void qlcnic_alloc_lb_filters_mem(struct qlcnic_adapter *adapter);
void qlcnic_82xx_set_mac_filter_count(struct qlcnic_adapter *);
int qlcnic_enable_msix(struct qlcnic_adapter *, u32);
@@ -1542,6 +1569,7 @@
void qlcnic_advert_link_change(struct qlcnic_adapter *, int);
void qlcnic_free_tx_rings(struct qlcnic_adapter *);
int qlcnic_alloc_tx_rings(struct qlcnic_adapter *, struct net_device *);
+void qlcnic_dump_mbx(struct qlcnic_adapter *, struct qlcnic_cmd_args *);
void qlcnic_create_sysfs_entries(struct qlcnic_adapter *adapter);
void qlcnic_remove_sysfs_entries(struct qlcnic_adapter *adapter);
@@ -1604,6 +1632,26 @@
tx_ring->producer;
}
+static inline int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter,
+ struct net_device *netdev)
+{
+ int err, tx_q;
+
+ tx_q = adapter->max_drv_tx_rings;
+
+ netdev->num_tx_queues = tx_q;
+ netdev->real_num_tx_queues = tx_q;
+
+ err = netif_set_real_num_tx_queues(netdev, tx_q);
+ if (err)
+ dev_err(&adapter->pdev->dev, "failed to set %d Tx queues\n",
+ tx_q);
+ else
+ dev_info(&adapter->pdev->dev, "set %d Tx queues\n", tx_q);
+
+ return err;
+}
+
struct qlcnic_nic_template {
int (*config_bridged_mode) (struct qlcnic_adapter *, u32);
int (*config_led) (struct qlcnic_adapter *, u32, u32);
@@ -1640,8 +1688,8 @@
int (*read_reg) (struct qlcnic_adapter *, ulong, int *);
int (*write_reg) (struct qlcnic_adapter *, ulong, u32);
void (*get_ocm_win) (struct qlcnic_hardware_context *);
- int (*get_mac_address) (struct qlcnic_adapter *, u8 *);
- int (*setup_intr) (struct qlcnic_adapter *, u8);
+ int (*get_mac_address) (struct qlcnic_adapter *, u8 *, u8);
+ int (*setup_intr) (struct qlcnic_adapter *, u8, int);
int (*alloc_mbx_args)(struct qlcnic_cmd_args *,
struct qlcnic_adapter *, u32);
int (*mbx_cmd) (struct qlcnic_adapter *, struct qlcnic_cmd_args *);
@@ -1674,6 +1722,7 @@
int (*get_board_info) (struct qlcnic_adapter *);
void (*set_mac_filter_count) (struct qlcnic_adapter *);
void (*free_mac_list) (struct qlcnic_adapter *);
+ int (*read_phys_port_id) (struct qlcnic_adapter *);
};
extern struct qlcnic_nic_template qlcnic_vf_ops;
@@ -1702,14 +1751,15 @@
}
static inline int qlcnic_get_mac_address(struct qlcnic_adapter *adapter,
- u8 *mac)
+ u8 *mac, u8 function)
{
- return adapter->ahw->hw_ops->get_mac_address(adapter, mac);
+ return adapter->ahw->hw_ops->get_mac_address(adapter, mac, function);
}
-static inline int qlcnic_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr)
+static inline int qlcnic_setup_intr(struct qlcnic_adapter *adapter,
+ u8 num_intr, int txq)
{
- return adapter->ahw->hw_ops->setup_intr(adapter, num_intr);
+ return adapter->ahw->hw_ops->setup_intr(adapter, num_intr, txq);
}
static inline int qlcnic_alloc_mbx_args(struct qlcnic_cmd_args *mbx,
@@ -1900,6 +1950,12 @@
adapter->ahw->hw_ops->set_mac_filter_count(adapter);
}
+static inline void qlcnic_read_phys_port_id(struct qlcnic_adapter *adapter)
+{
+ if (adapter->ahw->hw_ops->read_phys_port_id)
+ adapter->ahw->hw_ops->read_phys_port_id(adapter);
+}
+
static inline void qlcnic_dev_request_reset(struct qlcnic_adapter *adapter,
u32 key)
{
@@ -1931,16 +1987,45 @@
adapter->nic_ops->config_ipaddr(adapter, ip, cmd);
}
-static inline void qlcnic_disable_int(struct qlcnic_host_sds_ring *sds_ring)
+static inline bool qlcnic_check_multi_tx(struct qlcnic_adapter *adapter)
{
- writel(0, sds_ring->crb_intr_mask);
+ return test_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state);
}
+static inline void qlcnic_disable_multi_tx(struct qlcnic_adapter *adapter)
+{
+ test_and_clear_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state);
+ adapter->max_drv_tx_rings = 1;
+}
+
+/* When operating in a muti tx mode, driver needs to write 0x1
+ * to src register, instead of 0x0 to disable receiving interrupt.
+ */
+static inline void qlcnic_disable_int(struct qlcnic_host_sds_ring *sds_ring)
+{
+ struct qlcnic_adapter *adapter = sds_ring->adapter;
+
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED))
+ writel(0x1, sds_ring->crb_intr_mask);
+ else
+ writel(0, sds_ring->crb_intr_mask);
+}
+
+/* When operating in a muti tx mode, driver needs to write 0x0
+ * to src register, instead of 0x1 to enable receiving interrupts.
+ */
static inline void qlcnic_enable_int(struct qlcnic_host_sds_ring *sds_ring)
{
struct qlcnic_adapter *adapter = sds_ring->adapter;
- writel(0x1, sds_ring->crb_intr_mask);
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED))
+ writel(0, sds_ring->crb_intr_mask);
+ else
+ writel(0x1, sds_ring->crb_intr_mask);
if (!QLCNIC_IS_MSI_FAMILY(adapter))
writel(0xfbff, adapter->tgt_mask_reg);
@@ -1972,9 +2057,11 @@
__func__, ##_args); \
} while (0)
-#define PCI_DEVICE_ID_QLOGIC_QLE834X 0x8030
+#define PCI_DEVICE_ID_QLOGIC_QLE824X 0x8020
+#define PCI_DEVICE_ID_QLOGIC_QLE834X 0x8030
#define PCI_DEVICE_ID_QLOGIC_VF_QLE834X 0x8430
-#define PCI_DEVICE_ID_QLOGIC_QLE824X 0x8020
+#define PCI_DEVICE_ID_QLOGIC_QLE844X 0x8040
+#define PCI_DEVICE_ID_QLOGIC_VF_QLE844X 0x8440
static inline bool qlcnic_82xx_check(struct qlcnic_adapter *adapter)
{
@@ -1988,6 +2075,8 @@
bool status;
status = ((device == PCI_DEVICE_ID_QLOGIC_QLE834X) ||
+ (device == PCI_DEVICE_ID_QLOGIC_QLE844X) ||
+ (device == PCI_DEVICE_ID_QLOGIC_VF_QLE844X) ||
(device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X)) ? true : false;
return status;
@@ -2001,7 +2090,105 @@
static inline bool qlcnic_sriov_vf_check(struct qlcnic_adapter *adapter)
{
unsigned short device = adapter->pdev->device;
+ bool status;
- return (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X) ? true : false;
+ status = ((device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X) ||
+ (device == PCI_DEVICE_ID_QLOGIC_VF_QLE844X)) ? true : false;
+
+ return status;
+}
+
+static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->get_hw_capability)
+ return dcb->ops->get_hw_capability(adapter);
+
+ return 0;
+}
+
+static inline void qlcnic_dcb_free(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->free)
+ dcb->ops->free(adapter);
+}
+
+static inline int qlcnic_dcb_attach(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->attach)
+ return dcb->ops->attach(adapter);
+
+ return 0;
+}
+
+static inline int
+qlcnic_dcb_query_hw_capability(struct qlcnic_adapter *adapter, char *buf)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->query_hw_capability)
+ return dcb->ops->query_hw_capability(adapter, buf);
+
+ return 0;
+}
+
+static inline void qlcnic_dcb_get_info(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->get_info)
+ dcb->ops->get_info(adapter);
+}
+
+static inline int
+qlcnic_dcb_query_cee_param(struct qlcnic_adapter *adapter, char *buf, u8 type)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->query_cee_param)
+ return dcb->ops->query_cee_param(adapter, buf, type);
+
+ return 0;
+}
+
+static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->get_cee_cfg)
+ return dcb->ops->get_cee_cfg(adapter);
+
+ return 0;
+}
+
+static inline void
+qlcnic_dcb_register_aen(struct qlcnic_adapter *adapter, u8 flag)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->register_aen)
+ dcb->ops->register_aen(adapter, flag);
+}
+
+static inline void qlcnic_dcb_handle_aen(struct qlcnic_adapter *adapter,
+ void *msg)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->handle_aen)
+ dcb->ops->handle_aen(adapter, msg);
+}
+
+static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (dcb && dcb->ops->init_dcbnl_ops)
+ dcb->ops->init_dcbnl_ops(adapter);
}
#endif /* __QLCNIC_H_ */
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index e53caaa..8fce1d3 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -67,6 +67,8 @@
{QLCNIC_CMD_ADD_RCV_RINGS, 130, 26},
{QLCNIC_CMD_CONFIG_VPORT, 4, 4},
{QLCNIC_CMD_BC_EVENT_SETUP, 2, 1},
+ {QLCNIC_CMD_DCB_QUERY_CAP, 1, 2},
+ {QLCNIC_CMD_DCB_QUERY_PARAM, 2, 50},
};
const u32 qlcnic_83xx_ext_reg_tbl[] = {
@@ -261,7 +263,7 @@
}
}
-int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr)
+int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr, int txq)
{
int err, i, num_msix;
struct qlcnic_hardware_context *ahw = adapter->ahw;
@@ -695,8 +697,8 @@
static void qlcnic_83xx_handle_idc_comp_aen(struct qlcnic_adapter *adapter,
u32 data[]);
-static void qlcnic_dump_mbx(struct qlcnic_adapter *adapter,
- struct qlcnic_cmd_args *cmd)
+void qlcnic_dump_mbx(struct qlcnic_adapter *adapter,
+ struct qlcnic_cmd_args *cmd)
{
int i;
@@ -860,9 +862,9 @@
void __qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter)
{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
u32 event[QLC_83XX_MBX_AEN_CNT];
int i;
- struct qlcnic_hardware_context *ahw = adapter->ahw;
for (i = 0; i < QLC_83XX_MBX_AEN_CNT; i++)
event[i] = readl(QLCNIC_MBX_FW(ahw, i));
@@ -882,6 +884,7 @@
&adapter->idc_aen_work, 0);
break;
case QLCNIC_MBX_TIME_EXTEND_EVENT:
+ ahw->extend_lb_time = event[1] >> 8 & 0xf;
break;
case QLCNIC_MBX_BC_EVENT:
qlcnic_sriov_handle_bc_event(adapter, event[1]);
@@ -894,6 +897,9 @@
dev_info(&adapter->pdev->dev, "SFP Removed AEN:0x%x.\n",
QLCNIC_MBX_RSP(event[0]));
break;
+ case QLCNIC_MBX_DCBX_CONFIG_CHANGE_EVENT:
+ qlcnic_dcb_handle_aen(adapter, (void *)&event[1]);
+ break;
default:
dev_dbg(&adapter->pdev->dev, "Unsupported AEN:0x%x.\n",
QLCNIC_MBX_RSP(event[0]));
@@ -1690,7 +1696,7 @@
/* Make sure carrier is off and queue is stopped during loopback */
if (netif_running(netdev)) {
netif_carrier_off(netdev);
- netif_stop_queue(netdev);
+ netif_tx_stop_all_queues(netdev);
}
ret = qlcnic_do_lb_test(adapter, mode);
@@ -1706,13 +1712,28 @@
return ret;
}
+static void qlcnic_extend_lb_idc_cmpltn_wait(struct qlcnic_adapter *adapter,
+ u32 *max_wait_count)
+{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ int temp;
+
+ netdev_info(adapter->netdev, "Recieved loopback IDC time extend event for 0x%x seconds\n",
+ ahw->extend_lb_time);
+ temp = ahw->extend_lb_time * 1000;
+ *max_wait_count += temp / QLC_83XX_LB_MSLEEP_COUNT;
+ ahw->extend_lb_time = 0;
+}
+
int qlcnic_83xx_set_lb_mode(struct qlcnic_adapter *adapter, u8 mode)
{
struct qlcnic_hardware_context *ahw = adapter->ahw;
struct net_device *netdev = adapter->netdev;
+ u32 config, max_wait_count;
int status = 0, loop = 0;
- u32 config;
+ ahw->extend_lb_time = 0;
+ max_wait_count = QLC_83XX_LB_WAIT_COUNT;
status = qlcnic_83xx_get_port_config(adapter);
if (status)
return status;
@@ -1754,9 +1775,14 @@
clear_bit(QLC_83XX_IDC_COMP_AEN, &ahw->idc.status);
return -EBUSY;
}
- if (loop++ > QLC_83XX_LB_WAIT_COUNT) {
- netdev_err(netdev,
- "Did not receive IDC completion AEN\n");
+
+ if (ahw->extend_lb_time)
+ qlcnic_extend_lb_idc_cmpltn_wait(adapter,
+ &max_wait_count);
+
+ if (loop++ > max_wait_count) {
+ netdev_err(netdev, "%s: Did not receive loopback IDC completion AEN\n",
+ __func__);
clear_bit(QLC_83XX_IDC_COMP_AEN, &ahw->idc.status);
qlcnic_83xx_clear_lb_mode(adapter, mode);
return -ETIMEDOUT;
@@ -1771,10 +1797,12 @@
int qlcnic_83xx_clear_lb_mode(struct qlcnic_adapter *adapter, u8 mode)
{
struct qlcnic_hardware_context *ahw = adapter->ahw;
+ u32 config = ahw->port_config, max_wait_count;
struct net_device *netdev = adapter->netdev;
int status = 0, loop = 0;
- u32 config = ahw->port_config;
+ ahw->extend_lb_time = 0;
+ max_wait_count = QLC_83XX_LB_WAIT_COUNT;
set_bit(QLC_83XX_IDC_COMP_AEN, &ahw->idc.status);
if (mode == QLCNIC_ILB_MODE)
ahw->port_config &= ~QLC_83XX_CFG_LOOPBACK_HSS;
@@ -1802,9 +1830,13 @@
return -EBUSY;
}
- if (loop++ > QLC_83XX_LB_WAIT_COUNT) {
- netdev_err(netdev,
- "Did not receive IDC completion AEN\n");
+ if (ahw->extend_lb_time)
+ qlcnic_extend_lb_idc_cmpltn_wait(adapter,
+ &max_wait_count);
+
+ if (loop++ > max_wait_count) {
+ netdev_err(netdev, "%s: Did not receive loopback IDC completion AEN\n",
+ __func__);
clear_bit(QLC_83XX_IDC_COMP_AEN, &ahw->idc.status);
return -ETIMEDOUT;
}
@@ -2010,12 +2042,14 @@
cmd->req.arg[1] = type;
}
-int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *adapter, u8 *mac)
+int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *adapter, u8 *mac,
+ u8 function)
{
int err, i;
struct qlcnic_cmd_args cmd;
u32 mac_low, mac_high;
+ function = 0;
err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_MAC_ADDRESS);
if (err)
return err;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index dd22ef3..0fc5616 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -84,9 +84,11 @@
/* Firmware image definitions */
#define QLC_83XX_BOOTLOADER_FLASH_ADDR 0x10000
#define QLC_83XX_FW_FILE_NAME "83xx_fw.bin"
+#define QLC_84XX_FW_FILE_NAME "84xx_fw.bin"
#define QLC_83XX_BOOT_FROM_FLASH 0
#define QLC_83XX_BOOT_FROM_FILE 0x12345678
+#define QLC_FW_FILE_NAME_LEN 20
#define QLC_83XX_MAX_RESET_SEQ_ENTRIES 16
#define QLC_83XX_MBX_POST_BC_OP 0x1
@@ -404,6 +406,7 @@
#define QLC_83XX_MAX_MC_COUNT 38
#define QLC_83XX_MAX_UC_COUNT 4096
+#define QLC_83XX_PVID_STRIP_CAPABILITY BIT_22
#define QLC_83XX_GET_FUNC_MODE_FROM_NPAR_INFO(val) (val & 0x80000000)
#define QLC_83XX_GET_LRO_CAPABILITY(val) (val & 0x20)
#define QLC_83XX_GET_LSO_CAPABILITY(val) (val & 0x40)
@@ -520,7 +523,7 @@
/* 83xx funcitons */
int qlcnic_83xx_get_fw_version(struct qlcnic_adapter *);
int qlcnic_83xx_issue_cmd(struct qlcnic_adapter *, struct qlcnic_cmd_args *);
-int qlcnic_83xx_setup_intr(struct qlcnic_adapter *, u8);
+int qlcnic_83xx_setup_intr(struct qlcnic_adapter *, u8, int);
void qlcnic_83xx_get_func_no(struct qlcnic_adapter *);
int qlcnic_83xx_cam_lock(struct qlcnic_adapter *);
void qlcnic_83xx_cam_unlock(struct qlcnic_adapter *);
@@ -561,7 +564,7 @@
void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *);
int qlcnic_83xx_config_intrpt(struct qlcnic_adapter *, bool);
int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, u16, u8);
-int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *, u8 *);
+int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *, u8 *, u8);
void qlcnic_83xx_configure_mac(struct qlcnic_adapter *, u8 *, u8,
struct qlcnic_cmd_args *);
int qlcnic_83xx_alloc_mbx_args(struct qlcnic_cmd_args *,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index c97e2e0..a969ac2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -635,6 +635,8 @@
if (adapter->portnum == 0)
qlcnic_set_drv_version(adapter);
+
+ qlcnic_dcb_get_info(adapter);
qlcnic_83xx_idc_attach_driver(adapter);
return 0;
@@ -1948,12 +1950,36 @@
dev_err(&p_dev->pdev->dev, "%s: failed\n", __func__);
}
+static inline void qlcnic_83xx_get_fw_file_name(struct qlcnic_adapter *adapter,
+ char *file_name)
+{
+ struct pci_dev *pdev = adapter->pdev;
+
+ memset(file_name, 0, QLC_FW_FILE_NAME_LEN);
+
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_QLOGIC_QLE834X:
+ strncpy(file_name, QLC_83XX_FW_FILE_NAME,
+ QLC_FW_FILE_NAME_LEN);
+ break;
+ case PCI_DEVICE_ID_QLOGIC_QLE844X:
+ strncpy(file_name, QLC_84XX_FW_FILE_NAME,
+ QLC_FW_FILE_NAME_LEN);
+ break;
+ default:
+ dev_err(&pdev->dev, "%s: Invalid device id\n",
+ __func__);
+ }
+}
+
static int qlcnic_83xx_load_fw_image_from_host(struct qlcnic_adapter *adapter)
{
+ char fw_file_name[QLC_FW_FILE_NAME_LEN];
int err = -EIO;
- if (request_firmware(&adapter->ahw->fw_info.fw,
- QLC_83XX_FW_FILE_NAME, &(adapter->pdev->dev))) {
+ qlcnic_83xx_get_fw_file_name(adapter, fw_file_name);
+ if (request_firmware(&adapter->ahw->fw_info.fw, fw_file_name,
+ &(adapter->pdev->dev))) {
dev_err(&adapter->pdev->dev,
"No file FW image, loading flash FW image.\n");
QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID,
@@ -2177,7 +2203,7 @@
if (err)
goto detach_mbx;
- err = qlcnic_setup_intr(adapter, 0);
+ err = qlcnic_setup_intr(adapter, 0, 0);
if (err) {
dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n");
goto disable_intr;
@@ -2204,6 +2230,9 @@
if (err)
goto disable_mbx_intr;
+ if (adapter->dcb && qlcnic_dcb_attach(adapter))
+ qlcnic_clear_dcb_ops(adapter);
+
/* Periodically monitor device status */
qlcnic_83xx_idc_poll_dev_state(&adapter->fw_work.work);
return 0;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index d09389b..86850dd 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -38,6 +38,9 @@
{QLCNIC_CMD_GET_TEMP_HDR, 4, 1},
{QLCNIC_CMD_82XX_SET_DRV_VER, 4, 1},
{QLCNIC_CMD_GET_LED_STATUS, 4, 2},
+ {QLCNIC_CMD_MQ_TX_CONFIG_INTR, 2, 3},
+ {QLCNIC_CMD_DCB_QUERY_CAP, 1, 2},
+ {QLCNIC_CMD_DCB_QUERY_PARAM, 4, 1},
};
static inline u32 qlcnic_get_cmd_signature(struct qlcnic_hardware_context *ahw)
@@ -171,6 +174,7 @@
break;
}
dev_err(&pdev->dev, fmt, cmd->rsp.arg[0]);
+ qlcnic_dump_mbx(adapter, cmd);
} else if (rsp == QLCNIC_CDRP_RSP_OK)
cmd->rsp.arg[0] = QLCNIC_RCODE_SUCCESS;
@@ -243,40 +247,38 @@
int qlcnic_82xx_fw_cmd_create_rx_ctx(struct qlcnic_adapter *adapter)
{
- void *addr;
- struct qlcnic_hostrq_rx_ctx *prq;
- struct qlcnic_cardrsp_rx_ctx *prsp;
- struct qlcnic_hostrq_rds_ring *prq_rds;
- struct qlcnic_hostrq_sds_ring *prq_sds;
+ struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ dma_addr_t hostrq_phys_addr, cardrsp_phys_addr;
+ struct net_device *netdev = adapter->netdev;
+ u32 temp_intr_crb_mode, temp_rds_crb_mode;
struct qlcnic_cardrsp_rds_ring *prsp_rds;
struct qlcnic_cardrsp_sds_ring *prsp_sds;
+ struct qlcnic_hostrq_rds_ring *prq_rds;
+ struct qlcnic_hostrq_sds_ring *prq_sds;
struct qlcnic_host_rds_ring *rds_ring;
struct qlcnic_host_sds_ring *sds_ring;
- struct qlcnic_cmd_args cmd;
-
- dma_addr_t hostrq_phys_addr, cardrsp_phys_addr;
- u64 phys_addr;
-
+ struct qlcnic_cardrsp_rx_ctx *prsp;
+ struct qlcnic_hostrq_rx_ctx *prq;
u8 i, nrds_rings, nsds_rings;
- u16 temp_u16;
+ struct qlcnic_cmd_args cmd;
size_t rq_size, rsp_size;
u32 cap, reg, val, reg2;
+ u64 phys_addr;
+ u16 temp_u16;
+ void *addr;
int err;
- struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
-
nrds_rings = adapter->max_rds_rings;
nsds_rings = adapter->max_sds_rings;
- rq_size =
- SIZEOF_HOSTRQ_RX(struct qlcnic_hostrq_rx_ctx, nrds_rings,
- nsds_rings);
- rsp_size =
- SIZEOF_CARDRSP_RX(struct qlcnic_cardrsp_rx_ctx, nrds_rings,
- nsds_rings);
+ rq_size = SIZEOF_HOSTRQ_RX(struct qlcnic_hostrq_rx_ctx, nrds_rings,
+ nsds_rings);
+ rsp_size = SIZEOF_CARDRSP_RX(struct qlcnic_cardrsp_rx_ctx, nrds_rings,
+ nsds_rings);
addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size,
- &hostrq_phys_addr, GFP_KERNEL);
+ &hostrq_phys_addr, GFP_KERNEL);
if (addr == NULL)
return -ENOMEM;
prq = addr;
@@ -295,15 +297,20 @@
| QLCNIC_CAP0_VALIDOFF);
cap |= (QLCNIC_CAP0_JUMBO_CONTIGUOUS | QLCNIC_CAP0_LRO_CONTIGUOUS);
- temp_u16 = offsetof(struct qlcnic_hostrq_rx_ctx, msix_handler);
- prq->valid_field_offset = cpu_to_le16(temp_u16);
- prq->txrx_sds_binding = nsds_rings - 1;
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test) {
+ cap |= QLCNIC_CAP0_TX_MULTI;
+ } else {
+ temp_u16 = offsetof(struct qlcnic_hostrq_rx_ctx, msix_handler);
+ prq->valid_field_offset = cpu_to_le16(temp_u16);
+ prq->txrx_sds_binding = nsds_rings - 1;
+ temp_intr_crb_mode = QLCNIC_HOST_INT_CRB_MODE_SHARED;
+ prq->host_int_crb_mode = cpu_to_le32(temp_intr_crb_mode);
+ temp_rds_crb_mode = QLCNIC_HOST_RDS_CRB_MODE_UNIQUE;
+ prq->host_rds_crb_mode = cpu_to_le32(temp_rds_crb_mode);
+ }
prq->capabilities[0] = cpu_to_le32(cap);
- prq->host_int_crb_mode =
- cpu_to_le32(QLCNIC_HOST_INT_CRB_MODE_SHARED);
- prq->host_rds_crb_mode =
- cpu_to_le32(QLCNIC_HOST_RDS_CRB_MODE_UNIQUE);
prq->num_rds_rings = cpu_to_le16(nrds_rings);
prq->num_sds_rings = cpu_to_le16(nsds_rings);
@@ -317,10 +324,8 @@
le32_to_cpu(prq->rds_ring_offset));
for (i = 0; i < nrds_rings; i++) {
-
rds_ring = &recv_ctx->rds_rings[i];
rds_ring->producer = 0;
-
prq_rds[i].host_phys_addr = cpu_to_le64(rds_ring->phys_addr);
prq_rds[i].ring_size = cpu_to_le32(rds_ring->num_desc);
prq_rds[i].ring_kind = cpu_to_le32(i);
@@ -331,14 +336,16 @@
le32_to_cpu(prq->sds_ring_offset));
for (i = 0; i < nsds_rings; i++) {
-
sds_ring = &recv_ctx->sds_rings[i];
sds_ring->consumer = 0;
memset(sds_ring->desc_head, 0, STATUS_DESC_RINGSIZE(sds_ring));
-
prq_sds[i].host_phys_addr = cpu_to_le64(sds_ring->phys_addr);
prq_sds[i].ring_size = cpu_to_le32(sds_ring->num_desc);
- prq_sds[i].msi_index = cpu_to_le16(i);
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test)
+ prq_sds[i].msi_index = cpu_to_le16(ahw->intr_tbl[i].id);
+ else
+ prq_sds[i].msi_index = cpu_to_le16(i);
}
phys_addr = hostrq_phys_addr;
@@ -361,9 +368,8 @@
for (i = 0; i < le16_to_cpu(prsp->num_rds_rings); i++) {
rds_ring = &recv_ctx->rds_rings[i];
-
reg = le32_to_cpu(prsp_rds[i].host_producer_crb);
- rds_ring->crb_rcv_producer = adapter->ahw->pci_base0 + reg;
+ rds_ring->crb_rcv_producer = ahw->pci_base0 + reg;
}
prsp_sds = ((struct qlcnic_cardrsp_sds_ring *)
@@ -371,24 +377,30 @@
for (i = 0; i < le16_to_cpu(prsp->num_sds_rings); i++) {
sds_ring = &recv_ctx->sds_rings[i];
-
reg = le32_to_cpu(prsp_sds[i].host_consumer_crb);
- reg2 = le32_to_cpu(prsp_sds[i].interrupt_crb);
+ if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test)
+ reg2 = ahw->intr_tbl[i].src;
+ else
+ reg2 = le32_to_cpu(prsp_sds[i].interrupt_crb);
- sds_ring->crb_sts_consumer = adapter->ahw->pci_base0 + reg;
- sds_ring->crb_intr_mask = adapter->ahw->pci_base0 + reg2;
+ sds_ring->crb_intr_mask = ahw->pci_base0 + reg2;
+ sds_ring->crb_sts_consumer = ahw->pci_base0 + reg;
}
recv_ctx->state = le32_to_cpu(prsp->host_ctx_state);
recv_ctx->context_id = le16_to_cpu(prsp->context_id);
recv_ctx->virt_port = prsp->virt_port;
+ netdev_info(netdev, "Rx Context[%d] Created, state 0x%x\n",
+ recv_ctx->context_id, recv_ctx->state);
qlcnic_free_mbx_args(&cmd);
+
out_free_rsp:
dma_free_coherent(&adapter->pdev->dev, rsp_size, prsp,
cardrsp_phys_addr);
out_free_rq:
dma_free_coherent(&adapter->pdev->dev, rq_size, prq, hostrq_phys_addr);
+
return err;
}
@@ -416,16 +428,19 @@
struct qlcnic_host_tx_ring *tx_ring,
int ring)
{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ struct net_device *netdev = adapter->netdev;
struct qlcnic_hostrq_tx_ctx *prq;
struct qlcnic_hostrq_cds_ring *prq_cds;
struct qlcnic_cardrsp_tx_ctx *prsp;
- void *rq_addr, *rsp_addr;
- size_t rq_size, rsp_size;
- u32 temp;
struct qlcnic_cmd_args cmd;
- int err;
- u64 phys_addr;
- dma_addr_t rq_phys_addr, rsp_phys_addr;
+ u32 temp, intr_mask, temp_int_crb_mode;
+ dma_addr_t rq_phys_addr, rsp_phys_addr;
+ int temp_nsds_rings, index, err;
+ void *rq_addr, *rsp_addr;
+ size_t rq_size, rsp_size;
+ u64 phys_addr;
+ u16 msix_id;
/* reset host resources */
tx_ring->producer = 0;
@@ -433,32 +448,42 @@
*(tx_ring->hw_consumer) = 0;
rq_size = SIZEOF_HOSTRQ_TX(struct qlcnic_hostrq_tx_ctx);
- rq_addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size,
- &rq_phys_addr, GFP_KERNEL | __GFP_ZERO);
+ rq_addr = dma_zalloc_coherent(&adapter->pdev->dev, rq_size,
+ &rq_phys_addr, GFP_KERNEL);
if (!rq_addr)
return -ENOMEM;
rsp_size = SIZEOF_CARDRSP_TX(struct qlcnic_cardrsp_tx_ctx);
- rsp_addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size,
- &rsp_phys_addr, GFP_KERNEL | __GFP_ZERO);
+ rsp_addr = dma_zalloc_coherent(&adapter->pdev->dev, rsp_size,
+ &rsp_phys_addr, GFP_KERNEL);
if (!rsp_addr) {
err = -ENOMEM;
goto out_free_rq;
}
prq = rq_addr;
-
prsp = rsp_addr;
prq->host_rsp_dma_addr = cpu_to_le64(rsp_phys_addr);
temp = (QLCNIC_CAP0_LEGACY_CONTEXT | QLCNIC_CAP0_LEGACY_MN |
- QLCNIC_CAP0_LSO);
+ QLCNIC_CAP0_LSO);
+ if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test)
+ temp |= QLCNIC_CAP0_TX_MULTI;
+
prq->capabilities[0] = cpu_to_le32(temp);
- prq->host_int_crb_mode =
- cpu_to_le32(QLCNIC_HOST_INT_CRB_MODE_SHARED);
- prq->msi_index = 0;
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test) {
+ temp_nsds_rings = adapter->max_sds_rings;
+ index = temp_nsds_rings + ring;
+ msix_id = ahw->intr_tbl[index].id;
+ prq->msi_index = cpu_to_le16(msix_id);
+ } else {
+ temp_int_crb_mode = QLCNIC_HOST_INT_CRB_MODE_SHARED;
+ prq->host_int_crb_mode = cpu_to_le32(temp_int_crb_mode);
+ prq->msi_index = 0;
+ }
prq->interrupt_ctl = 0;
prq->cmd_cons_dma_addr = cpu_to_le64(tx_ring->hw_cons_phys_addr);
@@ -480,15 +505,25 @@
err = qlcnic_issue_cmd(adapter, &cmd);
if (err == QLCNIC_RCODE_SUCCESS) {
+ tx_ring->state = le32_to_cpu(prsp->host_ctx_state);
temp = le32_to_cpu(prsp->cds_ring.host_producer_crb);
tx_ring->crb_cmd_producer = adapter->ahw->pci_base0 + temp;
tx_ring->ctx_id = le16_to_cpu(prsp->context_id);
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED)) {
+ index = adapter->max_sds_rings + ring;
+ intr_mask = ahw->intr_tbl[index].src;
+ tx_ring->crb_intr_mask = ahw->pci_base0 + intr_mask;
+ }
+
+ netdev_info(netdev, "Tx Context[0x%x] Created, state 0x%x\n",
+ tx_ring->ctx_id, tx_ring->state);
} else {
- dev_err(&adapter->pdev->dev,
- "Failed to create tx ctx in firmware%d\n", err);
+ netdev_err(netdev, "Failed to create tx ctx in firmware%d\n",
+ err);
err = -EIO;
}
-
qlcnic_free_mbx_args(&cmd);
out_free_rsp:
@@ -618,6 +653,13 @@
}
}
+ if (qlcnic_82xx_check(dev) && (dev->flags & QLCNIC_MSIX_ENABLED) &&
+ qlcnic_check_multi_tx(dev) && !dev->ahw->diag_test) {
+ err = qlcnic_82xx_mq_intrpt(dev, 1);
+ if (err)
+ return err;
+ }
+
err = qlcnic_fw_cmd_create_rx_ctx(dev);
if (err)
goto err_out;
@@ -639,13 +681,19 @@
}
set_bit(__QLCNIC_FW_ATTACHED, &dev->state);
+
return 0;
err_out:
+ if (qlcnic_82xx_check(dev) && (dev->flags & QLCNIC_MSIX_ENABLED) &&
+ qlcnic_check_multi_tx(dev) && !dev->ahw->diag_test)
+ qlcnic_82xx_config_intrpt(dev, 0);
+
if (qlcnic_83xx_check(dev) && (dev->flags & QLCNIC_MSIX_ENABLED)) {
if (dev->ahw->diag_test != QLCNIC_LOOPBACK_TEST)
qlcnic_83xx_config_intrpt(dev, 0);
}
+
return err;
}
@@ -659,6 +707,12 @@
qlcnic_fw_cmd_del_tx_ctx(adapter,
&adapter->tx_ring[ring]);
+ if (qlcnic_82xx_check(adapter) &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED) &&
+ qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test)
+ qlcnic_82xx_config_intrpt(adapter, 0);
+
if (qlcnic_83xx_check(adapter) &&
(adapter->flags & QLCNIC_MSIX_ENABLED)) {
if (adapter->ahw->diag_test != QLCNIC_LOOPBACK_TEST)
@@ -723,8 +777,54 @@
}
}
+int qlcnic_82xx_config_intrpt(struct qlcnic_adapter *adapter, u8 op_type)
+{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ struct net_device *netdev = adapter->netdev;
+ struct qlcnic_cmd_args cmd;
+ u32 type, val;
+ int i, err = 0;
-int qlcnic_82xx_get_mac_address(struct qlcnic_adapter *adapter, u8 *mac)
+ for (i = 0; i < ahw->num_msix; i++) {
+ qlcnic_alloc_mbx_args(&cmd, adapter,
+ QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+ type = op_type ? QLCNIC_INTRPT_ADD : QLCNIC_INTRPT_DEL;
+ val = type | (ahw->intr_tbl[i].type << 4);
+ if (ahw->intr_tbl[i].type == QLCNIC_INTRPT_MSIX)
+ val |= (ahw->intr_tbl[i].id << 16);
+ cmd.req.arg[1] = val;
+ err = qlcnic_issue_cmd(adapter, &cmd);
+ if (err) {
+ netdev_err(netdev, "Failed to %s interrupts %d\n",
+ op_type == QLCNIC_INTRPT_ADD ? "Add" :
+ "Delete", err);
+ qlcnic_free_mbx_args(&cmd);
+ return err;
+ }
+ val = cmd.rsp.arg[1];
+ if (LSB(val)) {
+ netdev_info(netdev,
+ "failed to configure interrupt for %d\n",
+ ahw->intr_tbl[i].id);
+ continue;
+ }
+ if (op_type) {
+ ahw->intr_tbl[i].id = MSW(val);
+ ahw->intr_tbl[i].enabled = 1;
+ ahw->intr_tbl[i].src = cmd.rsp.arg[2];
+ } else {
+ ahw->intr_tbl[i].id = i;
+ ahw->intr_tbl[i].enabled = 0;
+ ahw->intr_tbl[i].src = 0;
+ }
+ qlcnic_free_mbx_args(&cmd);
+ }
+
+ return err;
+}
+
+int qlcnic_82xx_get_mac_address(struct qlcnic_adapter *adapter, u8 *mac,
+ u8 function)
{
int err, i;
struct qlcnic_cmd_args cmd;
@@ -734,7 +834,7 @@
if (err)
return err;
- cmd.req.arg[1] = adapter->ahw->pci_func | BIT_8;
+ cmd.req.arg[1] = function | BIT_8;
err = qlcnic_issue_cmd(adapter, &cmd);
if (err == QLCNIC_RCODE_SUCCESS) {
@@ -765,8 +865,8 @@
struct qlcnic_cmd_args cmd;
size_t nic_size = sizeof(struct qlcnic_info_le);
- nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size,
- &nic_dma_t, GFP_KERNEL | __GFP_ZERO);
+ nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size,
+ &nic_dma_t, GFP_KERNEL);
if (!nic_info_addr)
return -ENOMEM;
@@ -819,8 +919,8 @@
if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
return err;
- nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size,
- &nic_dma_t, GFP_KERNEL | __GFP_ZERO);
+ nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size,
+ &nic_dma_t, GFP_KERNEL);
if (!nic_info_addr)
return -ENOMEM;
@@ -872,9 +972,8 @@
size_t npar_size = sizeof(struct qlcnic_pci_info_le);
size_t pci_size = npar_size * QLCNIC_MAX_PCI_FUNC;
- pci_info_addr = dma_alloc_coherent(&adapter->pdev->dev, pci_size,
- &pci_info_dma_t,
- GFP_KERNEL | __GFP_ZERO);
+ pci_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, pci_size,
+ &pci_info_dma_t, GFP_KERNEL);
if (!pci_info_addr)
return -ENOMEM;
@@ -974,8 +1073,8 @@
return -EIO;
}
- stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size,
- &stats_dma_t, GFP_KERNEL | __GFP_ZERO);
+ stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size,
+ &stats_dma_t, GFP_KERNEL);
if (!stats_addr)
return -ENOMEM;
@@ -1030,8 +1129,8 @@
if (mac_stats == NULL)
return -ENOMEM;
- stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size,
- &stats_dma_t, GFP_KERNEL | __GFP_ZERO);
+ stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size,
+ &stats_dma_t, GFP_KERNEL);
if (!stats_addr)
return -ENOMEM;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
new file mode 100644
index 0000000..2e10e79
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
@@ -0,0 +1,1179 @@
+/*
+ * QLogic qlcnic NIC Driver
+ * Copyright (c) 2009-2013 QLogic Corporation
+ *
+ * See LICENSE.qlcnic for copyright and licensing details.
+ */
+
+#include <linux/types.h>
+#include "qlcnic.h"
+
+#define QLC_DCB_NUM_PARAM 3
+#define QLC_DCB_LOCAL_IDX 0
+#define QLC_DCB_OPER_IDX 1
+#define QLC_DCB_PEER_IDX 2
+
+#define QLC_DCB_GET_MAP(V) (1 << V)
+
+#define QLC_DCB_AEN_BIT 0x2
+#define QLC_DCB_FW_VER 0x2
+#define QLC_DCB_MAX_TC 0x8
+#define QLC_DCB_MAX_APP 0x8
+#define QLC_DCB_MAX_PRIO QLC_DCB_MAX_TC
+#define QLC_DCB_MAX_PG QLC_DCB_MAX_TC
+
+#define QLC_DCB_TSA_SUPPORT(V) (V & 0x1)
+#define QLC_DCB_ETS_SUPPORT(V) ((V >> 1) & 0x1)
+#define QLC_DCB_VERSION_SUPPORT(V) ((V >> 2) & 0xf)
+#define QLC_DCB_MAX_NUM_TC(V) ((V >> 20) & 0xf)
+#define QLC_DCB_MAX_NUM_ETS_TC(V) ((V >> 24) & 0xf)
+#define QLC_DCB_MAX_NUM_PFC_TC(V) ((V >> 28) & 0xf)
+#define QLC_DCB_GET_TC_PRIO(X, P) ((X >> (P * 3)) & 0x7)
+#define QLC_DCB_GET_PGID_PRIO(X, P) ((X >> (P * 8)) & 0xff)
+#define QLC_DCB_GET_BWPER_PG(X, P) ((X >> (P * 8)) & 0xff)
+#define QLC_DCB_GET_TSA_PG(X, P) ((X >> (P * 8)) & 0xff)
+#define QLC_DCB_GET_PFC_PRIO(X, P) (((X >> 24) >> P) & 0x1)
+#define QLC_DCB_GET_PROTO_ID_APP(X) ((X >> 8) & 0xffff)
+#define QLC_DCB_GET_SELECTOR_APP(X) (X & 0xff)
+
+#define QLC_DCB_LOCAL_PARAM_FWID 0x3
+#define QLC_DCB_OPER_PARAM_FWID 0x1
+#define QLC_DCB_PEER_PARAM_FWID 0x2
+
+#define QLC_83XX_DCB_GET_NUMAPP(X) ((X >> 2) & 0xf)
+#define QLC_83XX_DCB_TSA_VALID(X) (X & 0x1)
+#define QLC_83XX_DCB_PFC_VALID(X) ((X >> 1) & 0x1)
+#define QLC_83XX_DCB_GET_PRIOMAP_APP(X) (X >> 24)
+
+#define QLC_82XX_DCB_GET_NUMAPP(X) ((X >> 12) & 0xf)
+#define QLC_82XX_DCB_TSA_VALID(X) ((X >> 4) & 0x1)
+#define QLC_82XX_DCB_PFC_VALID(X) ((X >> 5) & 0x1)
+#define QLC_82XX_DCB_GET_PRIOVAL_APP(X) ((X >> 24) & 0x7)
+#define QLC_82XX_DCB_GET_PRIOMAP_APP(X) (1 << X)
+#define QLC_82XX_DCB_PRIO_TC_MAP (0x76543210)
+
+static const struct dcbnl_rtnl_ops qlcnic_dcbnl_ops;
+
+static void qlcnic_dcb_aen_work(struct work_struct *);
+static void qlcnic_dcb_data_cee_param_map(struct qlcnic_adapter *);
+
+static inline void __qlcnic_init_dcbnl_ops(struct qlcnic_adapter *);
+static void __qlcnic_dcb_free(struct qlcnic_adapter *);
+static int __qlcnic_dcb_attach(struct qlcnic_adapter *);
+static int __qlcnic_dcb_query_hw_capability(struct qlcnic_adapter *, char *);
+static void __qlcnic_dcb_get_info(struct qlcnic_adapter *);
+
+static int qlcnic_82xx_dcb_get_hw_capability(struct qlcnic_adapter *);
+static int qlcnic_82xx_dcb_query_cee_param(struct qlcnic_adapter *, char *, u8);
+static int qlcnic_82xx_dcb_get_cee_cfg(struct qlcnic_adapter *);
+static void qlcnic_82xx_dcb_handle_aen(struct qlcnic_adapter *, void *);
+
+static int qlcnic_83xx_dcb_get_hw_capability(struct qlcnic_adapter *);
+static int qlcnic_83xx_dcb_query_cee_param(struct qlcnic_adapter *, char *, u8);
+static int qlcnic_83xx_dcb_get_cee_cfg(struct qlcnic_adapter *);
+static int qlcnic_83xx_dcb_register_aen(struct qlcnic_adapter *, bool);
+static void qlcnic_83xx_dcb_handle_aen(struct qlcnic_adapter *, void *);
+
+struct qlcnic_dcb_capability {
+ bool tsa_capability;
+ bool ets_capability;
+ u8 max_num_tc;
+ u8 max_ets_tc;
+ u8 max_pfc_tc;
+ u8 dcb_capability;
+};
+
+struct qlcnic_dcb_param {
+ u32 hdr_prio_pfc_map[2];
+ u32 prio_pg_map[2];
+ u32 pg_bw_map[2];
+ u32 pg_tsa_map[2];
+ u32 app[QLC_DCB_MAX_APP];
+};
+
+struct qlcnic_dcb_mbx_params {
+ /* 1st local, 2nd operational 3rd remote */
+ struct qlcnic_dcb_param type[3];
+ u32 prio_tc_map;
+};
+
+struct qlcnic_82xx_dcb_param_mbx_le {
+ __le32 hdr_prio_pfc_map[2];
+ __le32 prio_pg_map[2];
+ __le32 pg_bw_map[2];
+ __le32 pg_tsa_map[2];
+ __le32 app[QLC_DCB_MAX_APP];
+};
+
+enum qlcnic_dcb_selector {
+ QLC_SELECTOR_DEF = 0x0,
+ QLC_SELECTOR_ETHER,
+ QLC_SELECTOR_TCP,
+ QLC_SELECTOR_UDP,
+};
+
+enum qlcnic_dcb_prio_type {
+ QLC_PRIO_NONE = 0,
+ QLC_PRIO_GROUP,
+ QLC_PRIO_LINK,
+};
+
+enum qlcnic_dcb_pfc_type {
+ QLC_PFC_DISABLED = 0,
+ QLC_PFC_FULL,
+ QLC_PFC_TX,
+ QLC_PFC_RX
+};
+
+struct qlcnic_dcb_prio_cfg {
+ bool valid;
+ enum qlcnic_dcb_pfc_type pfc_type;
+};
+
+struct qlcnic_dcb_pg_cfg {
+ bool valid;
+ u8 total_bw_percent; /* of Link/ port BW */
+ u8 prio_count;
+ u8 tsa_type;
+};
+
+struct qlcnic_dcb_tc_cfg {
+ bool valid;
+ struct qlcnic_dcb_prio_cfg prio_cfg[QLC_DCB_MAX_PRIO];
+ enum qlcnic_dcb_prio_type prio_type; /* always prio_link */
+ u8 link_percent; /* % of link bandwidth */
+ u8 bwg_percent; /* % of BWG's bandwidth */
+ u8 up_tc_map;
+ u8 pgid;
+};
+
+struct qlcnic_dcb_app {
+ bool valid;
+ enum qlcnic_dcb_selector selector;
+ u16 protocol;
+ u8 priority;
+};
+
+struct qlcnic_dcb_cee {
+ struct qlcnic_dcb_tc_cfg tc_cfg[QLC_DCB_MAX_TC];
+ struct qlcnic_dcb_pg_cfg pg_cfg[QLC_DCB_MAX_PG];
+ struct qlcnic_dcb_app app[QLC_DCB_MAX_APP];
+ bool tc_param_valid;
+ bool pfc_mode_enable;
+};
+
+struct qlcnic_dcb_cfg {
+ /* 0 - local, 1 - operational, 2 - remote */
+ struct qlcnic_dcb_cee type[QLC_DCB_NUM_PARAM];
+ struct qlcnic_dcb_capability capability;
+ u32 version;
+};
+
+static struct qlcnic_dcb_ops qlcnic_83xx_dcb_ops = {
+ .init_dcbnl_ops = __qlcnic_init_dcbnl_ops,
+ .free = __qlcnic_dcb_free,
+ .attach = __qlcnic_dcb_attach,
+ .query_hw_capability = __qlcnic_dcb_query_hw_capability,
+ .get_info = __qlcnic_dcb_get_info,
+
+ .get_hw_capability = qlcnic_83xx_dcb_get_hw_capability,
+ .query_cee_param = qlcnic_83xx_dcb_query_cee_param,
+ .get_cee_cfg = qlcnic_83xx_dcb_get_cee_cfg,
+ .register_aen = qlcnic_83xx_dcb_register_aen,
+ .handle_aen = qlcnic_83xx_dcb_handle_aen,
+};
+
+static struct qlcnic_dcb_ops qlcnic_82xx_dcb_ops = {
+ .init_dcbnl_ops = __qlcnic_init_dcbnl_ops,
+ .free = __qlcnic_dcb_free,
+ .attach = __qlcnic_dcb_attach,
+ .query_hw_capability = __qlcnic_dcb_query_hw_capability,
+ .get_info = __qlcnic_dcb_get_info,
+
+ .get_hw_capability = qlcnic_82xx_dcb_get_hw_capability,
+ .query_cee_param = qlcnic_82xx_dcb_query_cee_param,
+ .get_cee_cfg = qlcnic_82xx_dcb_get_cee_cfg,
+ .handle_aen = qlcnic_82xx_dcb_handle_aen,
+};
+
+static u8 qlcnic_dcb_get_num_app(struct qlcnic_adapter *adapter, u32 val)
+{
+ if (qlcnic_82xx_check(adapter))
+ return QLC_82XX_DCB_GET_NUMAPP(val);
+ else
+ return QLC_83XX_DCB_GET_NUMAPP(val);
+}
+
+static inline u8 qlcnic_dcb_pfc_hdr_valid(struct qlcnic_adapter *adapter,
+ u32 val)
+{
+ if (qlcnic_82xx_check(adapter))
+ return QLC_82XX_DCB_PFC_VALID(val);
+ else
+ return QLC_83XX_DCB_PFC_VALID(val);
+}
+
+static inline u8 qlcnic_dcb_tsa_hdr_valid(struct qlcnic_adapter *adapter,
+ u32 val)
+{
+ if (qlcnic_82xx_check(adapter))
+ return QLC_82XX_DCB_TSA_VALID(val);
+ else
+ return QLC_83XX_DCB_TSA_VALID(val);
+}
+
+static inline u8 qlcnic_dcb_get_prio_map_app(struct qlcnic_adapter *adapter,
+ u32 val)
+{
+ if (qlcnic_82xx_check(adapter))
+ return QLC_82XX_DCB_GET_PRIOMAP_APP(val);
+ else
+ return QLC_83XX_DCB_GET_PRIOMAP_APP(val);
+}
+
+static int qlcnic_dcb_prio_count(u8 up_tc_map)
+{
+ int j;
+
+ for (j = 0; j < QLC_DCB_MAX_TC; j++)
+ if (up_tc_map & QLC_DCB_GET_MAP(j))
+ break;
+
+ return j;
+}
+
+static inline void __qlcnic_init_dcbnl_ops(struct qlcnic_adapter *adapter)
+{
+ if (test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ adapter->netdev->dcbnl_ops = &qlcnic_dcbnl_ops;
+}
+
+void qlcnic_set_dcb_ops(struct qlcnic_adapter *adapter)
+{
+ if (qlcnic_82xx_check(adapter))
+ adapter->dcb->ops = &qlcnic_82xx_dcb_ops;
+ else if (qlcnic_83xx_check(adapter))
+ adapter->dcb->ops = &qlcnic_83xx_dcb_ops;
+}
+
+int __qlcnic_register_dcb(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb;
+
+ dcb = kzalloc(sizeof(struct qlcnic_dcb), GFP_ATOMIC);
+ if (!dcb)
+ return -ENOMEM;
+
+ adapter->dcb = dcb;
+ dcb->adapter = adapter;
+ qlcnic_set_dcb_ops(adapter);
+
+ return 0;
+}
+
+static void __qlcnic_dcb_free(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (!dcb)
+ return;
+
+ qlcnic_dcb_register_aen(adapter, 0);
+
+ while (test_bit(__QLCNIC_DCB_IN_AEN, &adapter->state))
+ usleep_range(10000, 11000);
+
+ cancel_delayed_work_sync(&dcb->aen_work);
+
+ if (dcb->wq) {
+ destroy_workqueue(dcb->wq);
+ dcb->wq = NULL;
+ }
+
+ kfree(dcb->cfg);
+ dcb->cfg = NULL;
+ kfree(dcb->param);
+ dcb->param = NULL;
+ kfree(dcb);
+ adapter->dcb = NULL;
+}
+
+static void __qlcnic_dcb_get_info(struct qlcnic_adapter *adapter)
+{
+ qlcnic_dcb_get_hw_capability(adapter);
+ qlcnic_dcb_get_cee_cfg(adapter);
+ qlcnic_dcb_register_aen(adapter, 1);
+}
+
+static int __qlcnic_dcb_attach(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+ int err = 0;
+
+ INIT_DELAYED_WORK(&dcb->aen_work, qlcnic_dcb_aen_work);
+
+ dcb->wq = create_singlethread_workqueue("qlcnic-dcb");
+ if (!dcb->wq) {
+ dev_err(&adapter->pdev->dev,
+ "DCB workqueue allocation failed. DCB will be disabled\n");
+ return -1;
+ }
+
+ dcb->cfg = kzalloc(sizeof(struct qlcnic_dcb_cfg), GFP_ATOMIC);
+ if (!dcb->cfg) {
+ err = -ENOMEM;
+ goto out_free_wq;
+ }
+
+ dcb->param = kzalloc(sizeof(struct qlcnic_dcb_mbx_params), GFP_ATOMIC);
+ if (!dcb->param) {
+ err = -ENOMEM;
+ goto out_free_cfg;
+ }
+
+ qlcnic_dcb_get_info(adapter);
+
+ return 0;
+out_free_cfg:
+ kfree(dcb->cfg);
+ dcb->cfg = NULL;
+
+out_free_wq:
+ destroy_workqueue(dcb->wq);
+ dcb->wq = NULL;
+
+ return err;
+}
+
+static int __qlcnic_dcb_query_hw_capability(struct qlcnic_adapter *adapter,
+ char *buf)
+{
+ struct qlcnic_cmd_args cmd;
+ u32 mbx_out;
+ int err;
+
+ err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DCB_QUERY_CAP);
+ if (err)
+ return err;
+
+ err = qlcnic_issue_cmd(adapter, &cmd);
+ if (err) {
+ dev_err(&adapter->pdev->dev,
+ "Failed to query DCBX capability, err %d\n", err);
+ } else {
+ mbx_out = cmd.rsp.arg[1];
+ if (buf)
+ memcpy(buf, &mbx_out, sizeof(u32));
+ }
+
+ qlcnic_free_mbx_args(&cmd);
+
+ return err;
+}
+
+static int __qlcnic_dcb_get_capability(struct qlcnic_adapter *adapter, u32 *val)
+{
+ struct qlcnic_dcb_capability *cap = &adapter->dcb->cfg->capability;
+ u32 mbx_out;
+ int err;
+
+ memset(cap, 0, sizeof(struct qlcnic_dcb_capability));
+
+ err = qlcnic_dcb_query_hw_capability(adapter, (char *)val);
+ if (err)
+ return err;
+
+ mbx_out = *val;
+ if (QLC_DCB_TSA_SUPPORT(mbx_out))
+ cap->tsa_capability = true;
+
+ if (QLC_DCB_ETS_SUPPORT(mbx_out))
+ cap->ets_capability = true;
+
+ cap->max_num_tc = QLC_DCB_MAX_NUM_TC(mbx_out);
+ cap->max_ets_tc = QLC_DCB_MAX_NUM_ETS_TC(mbx_out);
+ cap->max_pfc_tc = QLC_DCB_MAX_NUM_PFC_TC(mbx_out);
+
+ if (cap->max_num_tc > QLC_DCB_MAX_TC ||
+ cap->max_ets_tc > cap->max_num_tc ||
+ cap->max_pfc_tc > cap->max_num_tc) {
+ dev_err(&adapter->pdev->dev, "Invalid DCB configuration\n");
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int qlcnic_82xx_dcb_get_hw_capability(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb_cfg *cfg = adapter->dcb->cfg;
+ struct qlcnic_dcb_capability *cap;
+ u32 mbx_out;
+ int err;
+
+ err = __qlcnic_dcb_get_capability(adapter, &mbx_out);
+ if (err)
+ return err;
+
+ cap = &cfg->capability;
+ cap->dcb_capability = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_LLD_MANAGED;
+
+ if (cap->dcb_capability && cap->tsa_capability && cap->ets_capability)
+ set_bit(__QLCNIC_DCB_STATE, &adapter->state);
+
+ return err;
+}
+
+static int qlcnic_82xx_dcb_query_cee_param(struct qlcnic_adapter *adapter,
+ char *buf, u8 type)
+{
+ u16 size = sizeof(struct qlcnic_82xx_dcb_param_mbx_le);
+ struct qlcnic_82xx_dcb_param_mbx_le *prsp_le;
+ struct device *dev = &adapter->pdev->dev;
+ dma_addr_t cardrsp_phys_addr;
+ struct qlcnic_dcb_param rsp;
+ struct qlcnic_cmd_args cmd;
+ u64 phys_addr;
+ void *addr;
+ int err, i;
+
+ switch (type) {
+ case QLC_DCB_LOCAL_PARAM_FWID:
+ case QLC_DCB_OPER_PARAM_FWID:
+ case QLC_DCB_PEER_PARAM_FWID:
+ break;
+ default:
+ dev_err(dev, "Invalid parameter type %d\n", type);
+ return -EINVAL;
+ }
+
+ addr = dma_alloc_coherent(&adapter->pdev->dev, size, &cardrsp_phys_addr,
+ GFP_KERNEL);
+ if (addr == NULL)
+ return -ENOMEM;
+
+ prsp_le = addr;
+
+ err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DCB_QUERY_PARAM);
+ if (err)
+ goto out_free_rsp;
+
+ phys_addr = cardrsp_phys_addr;
+ cmd.req.arg[1] = size | (type << 16);
+ cmd.req.arg[2] = MSD(phys_addr);
+ cmd.req.arg[3] = LSD(phys_addr);
+
+ err = qlcnic_issue_cmd(adapter, &cmd);
+ if (err) {
+ dev_err(dev, "Failed to query DCBX parameter, err %d\n", err);
+ goto out;
+ }
+
+ memset(&rsp, 0, sizeof(struct qlcnic_dcb_param));
+ rsp.hdr_prio_pfc_map[0] = le32_to_cpu(prsp_le->hdr_prio_pfc_map[0]);
+ rsp.hdr_prio_pfc_map[1] = le32_to_cpu(prsp_le->hdr_prio_pfc_map[1]);
+ rsp.prio_pg_map[0] = le32_to_cpu(prsp_le->prio_pg_map[0]);
+ rsp.prio_pg_map[1] = le32_to_cpu(prsp_le->prio_pg_map[1]);
+ rsp.pg_bw_map[0] = le32_to_cpu(prsp_le->pg_bw_map[0]);
+ rsp.pg_bw_map[1] = le32_to_cpu(prsp_le->pg_bw_map[1]);
+ rsp.pg_tsa_map[0] = le32_to_cpu(prsp_le->pg_tsa_map[0]);
+ rsp.pg_tsa_map[1] = le32_to_cpu(prsp_le->pg_tsa_map[1]);
+
+ for (i = 0; i < QLC_DCB_MAX_APP; i++)
+ rsp.app[i] = le32_to_cpu(prsp_le->app[i]);
+
+ if (buf)
+ memcpy(buf, &rsp, size);
+out:
+ qlcnic_free_mbx_args(&cmd);
+
+out_free_rsp:
+ dma_free_coherent(&adapter->pdev->dev, size, addr, cardrsp_phys_addr);
+
+ return err;
+}
+
+static int qlcnic_82xx_dcb_get_cee_cfg(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb_mbx_params *mbx;
+ int err;
+
+ mbx = adapter->dcb->param;
+ if (!mbx)
+ return 0;
+
+ err = qlcnic_dcb_query_cee_param(adapter, (char *)&mbx->type[0],
+ QLC_DCB_LOCAL_PARAM_FWID);
+ if (err)
+ return err;
+
+ err = qlcnic_dcb_query_cee_param(adapter, (char *)&mbx->type[1],
+ QLC_DCB_OPER_PARAM_FWID);
+ if (err)
+ return err;
+
+ err = qlcnic_dcb_query_cee_param(adapter, (char *)&mbx->type[2],
+ QLC_DCB_PEER_PARAM_FWID);
+ if (err)
+ return err;
+
+ mbx->prio_tc_map = QLC_82XX_DCB_PRIO_TC_MAP;
+
+ qlcnic_dcb_data_cee_param_map(adapter);
+
+ return err;
+}
+
+static void qlcnic_dcb_aen_work(struct work_struct *work)
+{
+ struct qlcnic_adapter *adapter;
+ struct qlcnic_dcb *dcb;
+
+ dcb = container_of(work, struct qlcnic_dcb, aen_work.work);
+ adapter = dcb->adapter;
+
+ qlcnic_dcb_get_cee_cfg(adapter);
+ clear_bit(__QLCNIC_DCB_IN_AEN, &adapter->state);
+}
+
+static void qlcnic_82xx_dcb_handle_aen(struct qlcnic_adapter *adapter,
+ void *data)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (test_and_set_bit(__QLCNIC_DCB_IN_AEN, &adapter->state))
+ return;
+
+ queue_delayed_work(dcb->wq, &dcb->aen_work, 0);
+}
+
+static int qlcnic_83xx_dcb_get_hw_capability(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb_capability *cap = &adapter->dcb->cfg->capability;
+ u32 mbx_out;
+ int err;
+
+ err = __qlcnic_dcb_get_capability(adapter, &mbx_out);
+ if (err)
+ return err;
+
+ if (mbx_out & BIT_2)
+ cap->dcb_capability = DCB_CAP_DCBX_VER_CEE;
+ if (mbx_out & BIT_3)
+ cap->dcb_capability |= DCB_CAP_DCBX_VER_IEEE;
+ if (cap->dcb_capability)
+ cap->dcb_capability |= DCB_CAP_DCBX_LLD_MANAGED;
+
+ if (cap->dcb_capability && cap->tsa_capability && cap->ets_capability)
+ set_bit(__QLCNIC_DCB_STATE, &adapter->state);
+
+ return err;
+}
+
+static int qlcnic_83xx_dcb_query_cee_param(struct qlcnic_adapter *adapter,
+ char *buf, u8 idx)
+{
+ struct qlcnic_dcb_mbx_params mbx_out;
+ int err, i, j, k, max_app, size;
+ struct qlcnic_dcb_param *each;
+ struct qlcnic_cmd_args cmd;
+ u32 val;
+ char *p;
+
+ size = 0;
+ memset(&mbx_out, 0, sizeof(struct qlcnic_dcb_mbx_params));
+ memset(buf, 0, sizeof(struct qlcnic_dcb_mbx_params));
+
+ err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DCB_QUERY_PARAM);
+ if (err)
+ return err;
+
+ cmd.req.arg[0] |= QLC_DCB_FW_VER << 29;
+ err = qlcnic_issue_cmd(adapter, &cmd);
+ if (err) {
+ dev_err(&adapter->pdev->dev,
+ "Failed to query DCBX param, err %d\n", err);
+ goto out;
+ }
+
+ mbx_out.prio_tc_map = cmd.rsp.arg[1];
+ p = memcpy(buf, &mbx_out, sizeof(u32));
+ k = 2;
+ p += sizeof(u32);
+
+ for (j = 0; j < QLC_DCB_NUM_PARAM; j++) {
+ each = &mbx_out.type[j];
+
+ each->hdr_prio_pfc_map[0] = cmd.rsp.arg[k++];
+ each->hdr_prio_pfc_map[1] = cmd.rsp.arg[k++];
+ each->prio_pg_map[0] = cmd.rsp.arg[k++];
+ each->prio_pg_map[1] = cmd.rsp.arg[k++];
+ each->pg_bw_map[0] = cmd.rsp.arg[k++];
+ each->pg_bw_map[1] = cmd.rsp.arg[k++];
+ each->pg_tsa_map[0] = cmd.rsp.arg[k++];
+ each->pg_tsa_map[1] = cmd.rsp.arg[k++];
+ val = each->hdr_prio_pfc_map[0];
+
+ max_app = qlcnic_dcb_get_num_app(adapter, val);
+ for (i = 0; i < max_app; i++)
+ each->app[i] = cmd.rsp.arg[i + k];
+
+ size = 16 * sizeof(u32);
+ memcpy(p, &each->hdr_prio_pfc_map[0], size);
+ p += size;
+ if (j == 0)
+ k = 18;
+ else
+ k = 34;
+ }
+out:
+ qlcnic_free_mbx_args(&cmd);
+
+ return err;
+}
+
+static int qlcnic_83xx_dcb_get_cee_cfg(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+ int err;
+
+ err = qlcnic_dcb_query_cee_param(adapter, (char *)dcb->param, 0);
+ if (err)
+ return err;
+
+ qlcnic_dcb_data_cee_param_map(adapter);
+
+ return err;
+}
+
+static int qlcnic_83xx_dcb_register_aen(struct qlcnic_adapter *adapter,
+ bool flag)
+{
+ u8 val = (flag ? QLCNIC_CMD_INIT_NIC_FUNC : QLCNIC_CMD_STOP_NIC_FUNC);
+ struct qlcnic_cmd_args cmd;
+ int err;
+
+ err = qlcnic_alloc_mbx_args(&cmd, adapter, val);
+ if (err)
+ return err;
+
+ cmd.req.arg[1] = QLC_DCB_AEN_BIT;
+
+ err = qlcnic_issue_cmd(adapter, &cmd);
+ if (err)
+ dev_err(&adapter->pdev->dev, "Failed to %s DCBX AEN, err %d\n",
+ (flag ? "register" : "unregister"), err);
+
+ qlcnic_free_mbx_args(&cmd);
+
+ return err;
+}
+
+static void qlcnic_83xx_dcb_handle_aen(struct qlcnic_adapter *adapter,
+ void *data)
+{
+ struct qlcnic_dcb *dcb = adapter->dcb;
+ u32 *val = data;
+
+ if (test_and_set_bit(__QLCNIC_DCB_IN_AEN, &adapter->state))
+ return;
+
+ if (*val & BIT_8)
+ set_bit(__QLCNIC_DCB_STATE, &adapter->state);
+ else
+ clear_bit(__QLCNIC_DCB_STATE, &adapter->state);
+
+ queue_delayed_work(dcb->wq, &dcb->aen_work, 0);
+}
+
+static void qlcnic_dcb_fill_cee_tc_params(struct qlcnic_dcb_mbx_params *mbx,
+ struct qlcnic_dcb_param *each,
+ struct qlcnic_dcb_cee *type)
+{
+ struct qlcnic_dcb_tc_cfg *tc_cfg;
+ u8 i, tc, pgid;
+
+ for (i = 0; i < QLC_DCB_MAX_PRIO; i++) {
+ tc = QLC_DCB_GET_TC_PRIO(mbx->prio_tc_map, i);
+ tc_cfg = &type->tc_cfg[tc];
+ tc_cfg->valid = true;
+ tc_cfg->up_tc_map |= QLC_DCB_GET_MAP(i);
+
+ if (QLC_DCB_GET_PFC_PRIO(each->hdr_prio_pfc_map[1], i) &&
+ type->pfc_mode_enable) {
+ tc_cfg->prio_cfg[i].valid = true;
+ tc_cfg->prio_cfg[i].pfc_type = QLC_PFC_FULL;
+ }
+
+ if (i < 4)
+ pgid = QLC_DCB_GET_PGID_PRIO(each->prio_pg_map[0], i);
+ else
+ pgid = QLC_DCB_GET_PGID_PRIO(each->prio_pg_map[1], i);
+
+ tc_cfg->pgid = pgid;
+
+ tc_cfg->prio_type = QLC_PRIO_LINK;
+ type->pg_cfg[tc_cfg->pgid].prio_count++;
+ }
+}
+
+static void qlcnic_dcb_fill_cee_pg_params(struct qlcnic_dcb_param *each,
+ struct qlcnic_dcb_cee *type)
+{
+ struct qlcnic_dcb_pg_cfg *pg_cfg;
+ u8 i, tsa, bw_per;
+
+ for (i = 0; i < QLC_DCB_MAX_PG; i++) {
+ pg_cfg = &type->pg_cfg[i];
+ pg_cfg->valid = true;
+
+ if (i < 4) {
+ bw_per = QLC_DCB_GET_BWPER_PG(each->pg_bw_map[0], i);
+ tsa = QLC_DCB_GET_TSA_PG(each->pg_tsa_map[0], i);
+ } else {
+ bw_per = QLC_DCB_GET_BWPER_PG(each->pg_bw_map[1], i);
+ tsa = QLC_DCB_GET_TSA_PG(each->pg_tsa_map[1], i);
+ }
+
+ pg_cfg->total_bw_percent = bw_per;
+ pg_cfg->tsa_type = tsa;
+ }
+}
+
+static void
+qlcnic_dcb_fill_cee_app_params(struct qlcnic_adapter *adapter, u8 idx,
+ struct qlcnic_dcb_param *each,
+ struct qlcnic_dcb_cee *type)
+{
+ struct qlcnic_dcb_app *app;
+ u8 i, num_app, map, cnt;
+ struct dcb_app new_app;
+
+ num_app = qlcnic_dcb_get_num_app(adapter, each->hdr_prio_pfc_map[0]);
+ for (i = 0; i < num_app; i++) {
+ app = &type->app[i];
+ app->valid = true;
+
+ /* Only for CEE (-1) */
+ app->selector = QLC_DCB_GET_SELECTOR_APP(each->app[i]) - 1;
+ new_app.selector = app->selector;
+ app->protocol = QLC_DCB_GET_PROTO_ID_APP(each->app[i]);
+ new_app.protocol = app->protocol;
+ map = qlcnic_dcb_get_prio_map_app(adapter, each->app[i]);
+ cnt = qlcnic_dcb_prio_count(map);
+
+ if (cnt >= QLC_DCB_MAX_TC)
+ cnt = 0;
+
+ app->priority = cnt;
+ new_app.priority = cnt;
+
+ if (idx == QLC_DCB_OPER_IDX && adapter->netdev->dcbnl_ops)
+ dcb_setapp(adapter->netdev, &new_app);
+ }
+}
+
+static void qlcnic_dcb_map_cee_params(struct qlcnic_adapter *adapter, u8 idx)
+{
+ struct qlcnic_dcb_mbx_params *mbx = adapter->dcb->param;
+ struct qlcnic_dcb_param *each = &mbx->type[idx];
+ struct qlcnic_dcb_cfg *cfg = adapter->dcb->cfg;
+ struct qlcnic_dcb_cee *type = &cfg->type[idx];
+
+ type->tc_param_valid = false;
+ type->pfc_mode_enable = false;
+ memset(type->tc_cfg, 0,
+ sizeof(struct qlcnic_dcb_tc_cfg) * QLC_DCB_MAX_TC);
+ memset(type->pg_cfg, 0,
+ sizeof(struct qlcnic_dcb_pg_cfg) * QLC_DCB_MAX_TC);
+
+ if (qlcnic_dcb_pfc_hdr_valid(adapter, each->hdr_prio_pfc_map[0]) &&
+ cfg->capability.max_pfc_tc)
+ type->pfc_mode_enable = true;
+
+ if (qlcnic_dcb_tsa_hdr_valid(adapter, each->hdr_prio_pfc_map[0]) &&
+ cfg->capability.max_ets_tc)
+ type->tc_param_valid = true;
+
+ qlcnic_dcb_fill_cee_tc_params(mbx, each, type);
+ qlcnic_dcb_fill_cee_pg_params(each, type);
+ qlcnic_dcb_fill_cee_app_params(adapter, idx, each, type);
+}
+
+static void qlcnic_dcb_data_cee_param_map(struct qlcnic_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < QLC_DCB_NUM_PARAM; i++)
+ qlcnic_dcb_map_cee_params(adapter, i);
+
+ dcbnl_cee_notify(adapter->netdev, RTM_GETDCB, DCB_CMD_CEE_GET, 0, 0);
+}
+
+static u8 qlcnic_dcb_get_state(struct net_device *netdev)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+
+ return test_bit(__QLCNIC_DCB_STATE, &adapter->state);
+}
+
+static void qlcnic_dcb_get_perm_hw_addr(struct net_device *netdev, u8 *addr)
+{
+ memcpy(addr, netdev->dev_addr, netdev->addr_len);
+}
+
+static void
+qlcnic_dcb_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, u8 *prio,
+ u8 *pgid, u8 *bw_per, u8 *up_tc_map)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_tc_cfg *tc_cfg, *temp;
+ struct qlcnic_dcb_cee *type;
+ u8 i, cnt, pg;
+
+ type = &adapter->dcb->cfg->type[QLC_DCB_OPER_IDX];
+ *prio = *pgid = *bw_per = *up_tc_map = 0;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state) ||
+ !type->tc_param_valid)
+ return;
+
+ if (tc < 0 || (tc > QLC_DCB_MAX_TC))
+ return;
+
+ tc_cfg = &type->tc_cfg[tc];
+ if (!tc_cfg->valid)
+ return;
+
+ *pgid = tc_cfg->pgid;
+ *prio = tc_cfg->prio_type;
+ *up_tc_map = tc_cfg->up_tc_map;
+ pg = *pgid;
+
+ for (i = 0, cnt = 0; i < QLC_DCB_MAX_TC; i++) {
+ temp = &type->tc_cfg[i];
+ if (temp->valid && (pg == temp->pgid))
+ cnt++;
+ }
+
+ tc_cfg->bwg_percent = (100 / cnt);
+ *bw_per = tc_cfg->bwg_percent;
+}
+
+static void qlcnic_dcb_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid,
+ u8 *bw_pct)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_pg_cfg *pgcfg;
+ struct qlcnic_dcb_cee *type;
+
+ *bw_pct = 0;
+ type = &adapter->dcb->cfg->type[QLC_DCB_OPER_IDX];
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state) ||
+ !type->tc_param_valid)
+ return;
+
+ if (pgid < 0 || pgid > QLC_DCB_MAX_PG)
+ return;
+
+ pgcfg = &type->pg_cfg[pgid];
+ if (!pgcfg->valid)
+ return;
+
+ *bw_pct = pgcfg->total_bw_percent;
+}
+
+static void qlcnic_dcb_get_pfc_cfg(struct net_device *netdev, int prio,
+ u8 *setting)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_tc_cfg *tc_cfg;
+ u8 val = QLC_DCB_GET_MAP(prio);
+ struct qlcnic_dcb_cee *type;
+ u8 i;
+
+ *setting = 0;
+ type = &adapter->dcb->cfg->type[QLC_DCB_OPER_IDX];
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state) ||
+ !type->pfc_mode_enable)
+ return;
+
+ for (i = 0; i < QLC_DCB_MAX_TC; i++) {
+ tc_cfg = &type->tc_cfg[i];
+ if (!tc_cfg->valid)
+ continue;
+
+ if ((val & tc_cfg->up_tc_map) && (tc_cfg->prio_cfg[prio].valid))
+ *setting = tc_cfg->prio_cfg[prio].pfc_type;
+ }
+}
+
+static u8 qlcnic_dcb_get_capability(struct net_device *netdev, int capid,
+ u8 *cap)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ case DCB_CAP_ATTR_UP2TC:
+ case DCB_CAP_ATTR_PFC:
+ case DCB_CAP_ATTR_GSP:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 0x80; /* 8 priorities for PGs */
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = adapter->dcb->cfg->capability.dcb_capability;
+ break;
+ default:
+ *cap = false;
+ }
+
+ return 0;
+}
+
+static int qlcnic_dcb_get_num_tcs(struct net_device *netdev, int attr, u8 *num)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cfg *cfg = adapter->dcb->cfg;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return -EINVAL;
+
+ switch (attr) {
+ case DCB_NUMTCS_ATTR_PG:
+ *num = cfg->capability.max_ets_tc;
+ return 0;
+ case DCB_NUMTCS_ATTR_PFC:
+ *num = cfg->capability.max_pfc_tc;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u8 qlcnic_dcb_get_app(struct net_device *netdev, u8 idtype, u16 id)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct dcb_app app = {
+ .selector = idtype,
+ .protocol = id,
+ };
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ return dcb_getapp(netdev, &app);
+}
+
+static u8 qlcnic_dcb_get_pfc_state(struct net_device *netdev)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb *dcb = adapter->dcb;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ return dcb->cfg->type[QLC_DCB_OPER_IDX].pfc_mode_enable;
+}
+
+static u8 qlcnic_dcb_get_dcbx(struct net_device *netdev)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cfg *cfg = adapter->dcb->cfg;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ return cfg->capability.dcb_capability;
+}
+
+static u8 qlcnic_dcb_get_feat_cfg(struct net_device *netdev, int fid, u8 *flag)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cee *type;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 1;
+
+ type = &adapter->dcb->cfg->type[QLC_DCB_OPER_IDX];
+ *flag = 0;
+
+ switch (fid) {
+ case DCB_FEATCFG_ATTR_PG:
+ if (type->tc_param_valid)
+ *flag |= DCB_FEATCFG_ENABLE;
+ else
+ *flag |= DCB_FEATCFG_ERROR;
+ break;
+ case DCB_FEATCFG_ATTR_PFC:
+ if (type->pfc_mode_enable) {
+ if (type->tc_cfg[0].prio_cfg[0].pfc_type)
+ *flag |= DCB_FEATCFG_ENABLE;
+ } else {
+ *flag |= DCB_FEATCFG_ERROR;
+ }
+ break;
+ case DCB_FEATCFG_ATTR_APP:
+ *flag |= DCB_FEATCFG_ENABLE;
+ break;
+ default:
+ netdev_err(netdev, "Invalid Feature ID %d\n", fid);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void
+qlcnic_dcb_get_pg_tc_cfg_rx(struct net_device *netdev, int prio, u8 *prio_type,
+ u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+ *prio_type = *pgid = *bw_pct = *up_map = 0;
+}
+
+static inline void
+qlcnic_dcb_get_pg_bwg_cfg_rx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+ *bw_pct = 0;
+}
+
+static int qlcnic_dcb_peer_app_info(struct net_device *netdev,
+ struct dcb_peer_app_info *info,
+ u16 *app_count)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cee *peer;
+ int i;
+
+ *app_count = 0;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ peer = &adapter->dcb->cfg->type[QLC_DCB_PEER_IDX];
+
+ for (i = 0; i < QLC_DCB_MAX_APP; i++) {
+ if (peer->app[i].valid)
+ (*app_count)++;
+ }
+
+ return 0;
+}
+
+static int qlcnic_dcb_peer_app_table(struct net_device *netdev,
+ struct dcb_app *table)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cee *peer;
+ struct qlcnic_dcb_app *app;
+ int i, j;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ peer = &adapter->dcb->cfg->type[QLC_DCB_PEER_IDX];
+
+ for (i = 0, j = 0; i < QLC_DCB_MAX_APP; i++) {
+ app = &peer->app[i];
+ if (!app->valid)
+ continue;
+
+ table[j].selector = app->selector;
+ table[j].priority = app->priority;
+ table[j++].protocol = app->protocol;
+ }
+
+ return 0;
+}
+
+static int qlcnic_dcb_cee_peer_get_pg(struct net_device *netdev,
+ struct cee_pg *pg)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cee *peer;
+ u8 i, j, k, map;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ peer = &adapter->dcb->cfg->type[QLC_DCB_PEER_IDX];
+
+ for (i = 0, j = 0; i < QLC_DCB_MAX_PG; i++) {
+ if (!peer->pg_cfg[i].valid)
+ continue;
+
+ pg->pg_bw[j] = peer->pg_cfg[i].total_bw_percent;
+
+ for (k = 0; k < QLC_DCB_MAX_TC; k++) {
+ if (peer->tc_cfg[i].valid &&
+ (peer->tc_cfg[i].pgid == i)) {
+ map = peer->tc_cfg[i].up_tc_map;
+ pg->prio_pg[j++] = map;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int qlcnic_dcb_cee_peer_get_pfc(struct net_device *netdev,
+ struct cee_pfc *pfc)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_dcb_cfg *cfg = adapter->dcb->cfg;
+ struct qlcnic_dcb_tc_cfg *tc;
+ struct qlcnic_dcb_cee *peer;
+ u8 i, setting, prio;
+
+ pfc->pfc_en = 0;
+
+ if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state))
+ return 0;
+
+ peer = &cfg->type[QLC_DCB_PEER_IDX];
+
+ for (i = 0; i < QLC_DCB_MAX_TC; i++) {
+ tc = &peer->tc_cfg[i];
+ prio = qlcnic_dcb_prio_count(tc->up_tc_map);
+
+ setting = 0;
+ qlcnic_dcb_get_pfc_cfg(netdev, prio, &setting);
+ if (setting)
+ pfc->pfc_en |= QLC_DCB_GET_MAP(i);
+ }
+
+ pfc->tcs_supported = cfg->capability.max_pfc_tc;
+
+ return 0;
+}
+
+static const struct dcbnl_rtnl_ops qlcnic_dcbnl_ops = {
+ .getstate = qlcnic_dcb_get_state,
+ .getpermhwaddr = qlcnic_dcb_get_perm_hw_addr,
+ .getpgtccfgtx = qlcnic_dcb_get_pg_tc_cfg_tx,
+ .getpgbwgcfgtx = qlcnic_dcb_get_pg_bwg_cfg_tx,
+ .getpfccfg = qlcnic_dcb_get_pfc_cfg,
+ .getcap = qlcnic_dcb_get_capability,
+ .getnumtcs = qlcnic_dcb_get_num_tcs,
+ .getapp = qlcnic_dcb_get_app,
+ .getpfcstate = qlcnic_dcb_get_pfc_state,
+ .getdcbx = qlcnic_dcb_get_dcbx,
+ .getfeatcfg = qlcnic_dcb_get_feat_cfg,
+
+ .getpgtccfgrx = qlcnic_dcb_get_pg_tc_cfg_rx,
+ .getpgbwgcfgrx = qlcnic_dcb_get_pg_bwg_cfg_rx,
+
+ .peer_getappinfo = qlcnic_dcb_peer_app_info,
+ .peer_getapptable = qlcnic_dcb_peer_app_table,
+ .cee_peer_getpg = qlcnic_dcb_cee_peer_get_pg,
+ .cee_peer_getpfc = qlcnic_dcb_cee_peer_get_pfc,
+};
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
new file mode 100644
index 0000000..b87ce9f
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
@@ -0,0 +1,41 @@
+/*
+ * QLogic qlcnic NIC Driver
+ * Copyright (c) 2009-2013 QLogic Corporation
+ *
+ * See LICENSE.qlcnic for copyright and licensing details.
+ */
+
+#ifndef __QLCNIC_DCBX_H
+#define __QLCNIC_DCBX_H
+
+void qlcnic_clear_dcb_ops(struct qlcnic_adapter *);
+
+#ifdef CONFIG_QLCNIC_DCB
+int __qlcnic_register_dcb(struct qlcnic_adapter *);
+#else
+static inline int __qlcnic_register_dcb(struct qlcnic_adapter *adapter)
+{ return 0; }
+#endif
+
+struct qlcnic_dcb_ops {
+ void (*init_dcbnl_ops) (struct qlcnic_adapter *);
+ void (*free) (struct qlcnic_adapter *);
+ int (*attach) (struct qlcnic_adapter *);
+ int (*query_hw_capability) (struct qlcnic_adapter *, char *);
+ int (*get_hw_capability) (struct qlcnic_adapter *);
+ void (*get_info) (struct qlcnic_adapter *);
+ int (*query_cee_param) (struct qlcnic_adapter *, char *, u8);
+ int (*get_cee_cfg) (struct qlcnic_adapter *);
+ int (*register_aen) (struct qlcnic_adapter *, bool);
+ void (*handle_aen) (struct qlcnic_adapter *, void *);
+};
+
+struct qlcnic_dcb {
+ struct qlcnic_dcb_mbx_params *param;
+ struct qlcnic_adapter *adapter;
+ struct delayed_work aen_work;
+ struct workqueue_struct *wq;
+ struct qlcnic_dcb_ops *ops;
+ struct qlcnic_dcb_cfg *cfg;
+};
+#endif
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 79a5855..7b0c90e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -125,6 +125,14 @@
};
#define QLCNIC_STATS_LEN ARRAY_SIZE(qlcnic_gstrings_stats)
+
+static const char qlcnic_tx_ring_stats_strings[][ETH_GSTRING_LEN] = {
+ "xmit_on",
+ "xmit_off",
+ "xmit_called",
+ "xmit_finished",
+};
+
static const char qlcnic_83xx_rx_stats_strings[][ETH_GSTRING_LEN] = {
"ctx_rx_bytes",
"ctx_rx_pkts",
@@ -630,15 +638,15 @@
static void qlcnic_get_channels(struct net_device *dev,
struct ethtool_channels *channel)
{
- int min;
struct qlcnic_adapter *adapter = netdev_priv(dev);
+ int min;
min = min_t(int, adapter->ahw->max_rx_ques, num_online_cpus());
channel->max_rx = rounddown_pow_of_two(min);
- channel->max_tx = adapter->ahw->max_tx_ques;
+ channel->max_tx = min_t(int, QLCNIC_MAX_TX_RINGS, num_online_cpus());
channel->rx_count = adapter->max_sds_rings;
- channel->tx_count = adapter->ahw->max_tx_ques;
+ channel->tx_count = adapter->max_drv_tx_rings;
}
static int qlcnic_set_channels(struct net_device *dev,
@@ -646,18 +654,27 @@
{
struct qlcnic_adapter *adapter = netdev_priv(dev);
int err;
+ int txq = 0;
- if (channel->other_count || channel->combined_count ||
- channel->tx_count != channel->max_tx)
+ if (channel->other_count || channel->combined_count)
return -EINVAL;
- err = qlcnic_validate_max_rss(adapter, channel->rx_count);
- if (err)
- return err;
+ if (channel->rx_count) {
+ err = qlcnic_validate_max_rss(adapter, channel->rx_count);
+ if (err)
+ return err;
+ }
- err = qlcnic_set_max_rss(adapter, channel->rx_count, 0);
- netdev_info(dev, "allocated 0x%x sds rings\n",
- adapter->max_sds_rings);
+ if (channel->tx_count) {
+ err = qlcnic_validate_max_tx_rings(adapter, channel->tx_count);
+ if (err)
+ return err;
+ txq = channel->tx_count;
+ }
+
+ err = qlcnic_set_max_rss(adapter, channel->rx_count, txq);
+ netdev_info(dev, "allocated 0x%x sds rings and 0x%x tx rings\n",
+ adapter->max_sds_rings, adapter->max_drv_tx_rings);
return err;
}
@@ -893,6 +910,7 @@
clear_diag_irq:
adapter->max_sds_rings = max_sds_rings;
clear_bit(__QLCNIC_RESETTING, &adapter->state);
+
return ret;
}
@@ -966,6 +984,7 @@
int qlcnic_loopback_test(struct net_device *netdev, u8 mode)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ int max_drv_tx_rings = adapter->max_drv_tx_rings;
int max_sds_rings = adapter->max_sds_rings;
struct qlcnic_host_sds_ring *sds_ring;
struct qlcnic_hardware_context *ahw = adapter->ahw;
@@ -1025,6 +1044,7 @@
clear_it:
adapter->max_sds_rings = max_sds_rings;
+ adapter->max_drv_tx_rings = max_drv_tx_rings;
clear_bit(__QLCNIC_RESETTING, &adapter->state);
return ret;
}
@@ -1077,11 +1097,21 @@
QLCNIC_TEST_LEN * ETH_GSTRING_LEN);
break;
case ETH_SS_STATS:
+ num_stats = ARRAY_SIZE(qlcnic_tx_ring_stats_strings);
+ for (i = 0; i < adapter->max_drv_tx_rings; i++) {
+ for (index = 0; index < num_stats; index++) {
+ sprintf(data, "tx_ring_%d %s", i,
+ qlcnic_tx_ring_stats_strings[index]);
+ data += ETH_GSTRING_LEN;
+ }
+ }
+
for (index = 0; index < QLCNIC_STATS_LEN; index++) {
memcpy(data + index * ETH_GSTRING_LEN,
qlcnic_gstrings_stats[index].stat_string,
ETH_GSTRING_LEN);
}
+
if (qlcnic_83xx_check(adapter)) {
num_stats = ARRAY_SIZE(qlcnic_83xx_tx_stats_strings);
for (i = 0; i < num_stats; i++, index++)
@@ -1173,11 +1203,22 @@
struct ethtool_stats *stats, u64 *data)
{
struct qlcnic_adapter *adapter = netdev_priv(dev);
+ struct qlcnic_host_tx_ring *tx_ring;
struct qlcnic_esw_statistics port_stats;
struct qlcnic_mac_statistics mac_stats;
- int index, ret, length, size;
+ int index, ret, length, size, ring;
char *p;
+ memset(data, 0, adapter->max_drv_tx_rings * 4 * sizeof(u64));
+ for (ring = 0, index = 0; ring < adapter->max_drv_tx_rings; ring++) {
+ if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) {
+ tx_ring = &adapter->tx_ring[ring];
+ *data++ = tx_ring->xmit_on;
+ *data++ = tx_ring->xmit_off;
+ *data++ = tx_ring->xmit_called;
+ *data++ = tx_ring->xmit_finished;
+ }
+ }
memset(data, 0, stats->n_stats * sizeof(u64));
length = QLCNIC_STATS_LEN;
for (index = 0; index < length; index++) {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 4d5f59b..f8adc7b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -387,7 +387,7 @@
if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state))
return -EIO;
- tx_ring = adapter->tx_ring;
+ tx_ring = &adapter->tx_ring[0];
__netif_tx_lock_bh(tx_ring->txq);
producer = tx_ring->producer;
@@ -740,6 +740,22 @@
return 0;
}
+int qlcnic_82xx_read_phys_port_id(struct qlcnic_adapter *adapter)
+{
+ u8 mac[ETH_ALEN];
+ int ret;
+
+ ret = qlcnic_get_mac_address(adapter, mac,
+ adapter->ahw->physical_port);
+ if (ret)
+ return ret;
+
+ memcpy(adapter->ahw->phys_port_id, mac, ETH_ALEN);
+ adapter->flags |= QLCNIC_HAS_PHYS_PORT_ID;
+
+ return 0;
+}
+
/*
* Send the interrupt coalescing parameter set by ethtool to the card.
*/
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
index 4a71b28..272c356 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
@@ -85,8 +85,11 @@
#define QLCNIC_CMD_GET_TEMP_HDR 0x30
#define QLCNIC_CMD_BC_EVENT_SETUP 0x31
#define QLCNIC_CMD_CONFIG_VPORT 0x32
+#define QLCNIC_CMD_DCB_QUERY_CAP 0x34
+#define QLCNIC_CMD_DCB_QUERY_PARAM 0x35
#define QLCNIC_CMD_GET_MAC_STATS 0x37
#define QLCNIC_CMD_82XX_SET_DRV_VER 0x38
+#define QLCNIC_CMD_MQ_TX_CONFIG_INTR 0x39
#define QLCNIC_CMD_GET_LED_STATUS 0x3C
#define QLCNIC_CMD_CONFIGURE_RSS 0x41
#define QLCNIC_CMD_CONFIG_INTR_COAL 0x43
@@ -122,6 +125,7 @@
#define QLCNIC_MBX_COMP_EVENT 0x8100
#define QLCNIC_MBX_REQUEST_EVENT 0x8101
#define QLCNIC_MBX_TIME_EXTEND_EVENT 0x8102
+#define QLCNIC_MBX_DCBX_CONFIG_CHANGE_EVENT 0x8110
#define QLCNIC_MBX_SFP_INSERT_EVENT 0x8130
#define QLCNIC_MBX_SFP_REMOVE_EVENT 0x8131
@@ -149,7 +153,6 @@
struct pci_device_id;
struct qlcnic_host_sds_ring;
struct qlcnic_host_tx_ring;
-struct qlcnic_host_tx_ring;
struct qlcnic_hardware_context;
struct qlcnic_adapter;
@@ -173,10 +176,12 @@
void qlcnic_82xx_write_crb(struct qlcnic_adapter *, char *, loff_t, size_t);
void qlcnic_82xx_read_crb(struct qlcnic_adapter *, char *, loff_t, size_t);
void qlcnic_82xx_dev_request_reset(struct qlcnic_adapter *, u32);
-int qlcnic_82xx_setup_intr(struct qlcnic_adapter *, u8);
+int qlcnic_82xx_setup_intr(struct qlcnic_adapter *, u8, int);
irqreturn_t qlcnic_82xx_clear_legacy_intr(struct qlcnic_adapter *);
int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter,
struct qlcnic_cmd_args *);
+int qlcnic_82xx_mq_intrpt(struct qlcnic_adapter *, int);
+int qlcnic_82xx_config_intrpt(struct qlcnic_adapter *, u8);
int qlcnic_82xx_fw_cmd_create_rx_ctx(struct qlcnic_adapter *);
int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *,
struct qlcnic_host_tx_ring *tx_ring, int);
@@ -184,7 +189,7 @@
void qlcnic_82xx_fw_cmd_del_tx_ctx(struct qlcnic_adapter *,
struct qlcnic_host_tx_ring *);
int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, u16, u8);
-int qlcnic_82xx_get_mac_address(struct qlcnic_adapter *, u8*);
+int qlcnic_82xx_get_mac_address(struct qlcnic_adapter *, u8*, u8);
int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *, struct qlcnic_info *, u8);
int qlcnic_82xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *);
int qlcnic_82xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info*);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index 974d626..66c26cf 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -127,12 +127,12 @@
}
}
-void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter)
+void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter,
+ struct qlcnic_host_tx_ring *tx_ring)
{
struct qlcnic_cmd_buffer *cmd_buf;
struct qlcnic_skb_frag *buffrag;
int i, j;
- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
cmd_buf = tx_ring->cmd_buf_arr;
for (i = 0; i < tx_ring->num_desc; i++) {
@@ -241,7 +241,13 @@
sds_ring->irq = adapter->msix_entries[ring].vector;
sds_ring->adapter = adapter;
sds_ring->num_desc = adapter->num_rxd;
-
+ if (qlcnic_82xx_check(adapter)) {
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test)
+ sds_ring->tx_ring = &adapter->tx_ring[ring];
+ else
+ sds_ring->tx_ring = &adapter->tx_ring[0];
+ }
for (i = 0; i < NUM_RCV_DESC_RINGS; i++)
INIT_LIST_HEAD(&sds_ring->free_list[i]);
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index f807f3b..8d06f88 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -127,6 +127,23 @@
struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *,
struct qlcnic_host_rds_ring *, u16, u16);
+inline void qlcnic_enable_tx_intr(struct qlcnic_adapter *adapter,
+ struct qlcnic_host_tx_ring *tx_ring)
+{
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test)
+ writel(0x0, tx_ring->crb_intr_mask);
+}
+
+
+static inline void qlcnic_disable_tx_int(struct qlcnic_adapter *adapter,
+ struct qlcnic_host_tx_ring *tx_ring)
+{
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test)
+ writel(1, tx_ring->crb_intr_mask);
+}
+
inline void qlcnic_83xx_enable_tx_intr(struct qlcnic_adapter *adapter,
struct qlcnic_host_tx_ring *tx_ring)
{
@@ -147,10 +164,7 @@
static inline u32 qlcnic_get_ref_handle(struct qlcnic_adapter *adapter,
u16 handle, u8 ring_id)
{
- unsigned short device = adapter->pdev->device;
-
- if ((device == PCI_DEVICE_ID_QLOGIC_QLE834X) ||
- (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X))
+ if (qlcnic_83xx_check(adapter))
return handle | (ring_id << 15);
else
return handle;
@@ -357,14 +371,14 @@
}
static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
- struct cmd_desc_type0 *first_desc, struct sk_buff *skb)
+ struct cmd_desc_type0 *first_desc, struct sk_buff *skb,
+ struct qlcnic_host_tx_ring *tx_ring)
{
u8 l4proto, opcode = 0, hdr_len = 0;
u16 flags = 0, vlan_tci = 0;
int copied, offset, copy_len, size;
struct cmd_desc_type0 *hwdesc;
struct vlan_ethhdr *vh;
- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
u16 protocol = ntohs(skb->protocol);
u32 producer = tx_ring->producer;
@@ -547,7 +561,7 @@
netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
+ struct qlcnic_host_tx_ring *tx_ring;
struct qlcnic_cmd_buffer *pbuf;
struct qlcnic_skb_frag *buffrag;
struct cmd_desc_type0 *hwdesc, *first_desc;
@@ -556,10 +570,8 @@
int i, k, frag_count, delta = 0;
u32 producer, num_txd;
- num_txd = tx_ring->num_desc;
-
if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) {
- netif_stop_queue(netdev);
+ netif_tx_stop_all_queues(netdev);
return NETDEV_TX_BUSY;
}
@@ -569,7 +581,14 @@
goto drop_packet;
}
+ if (qlcnic_check_multi_tx(adapter))
+ tx_ring = &adapter->tx_ring[skb_get_queue_mapping(skb)];
+ else
+ tx_ring = &adapter->tx_ring[0];
+ num_txd = tx_ring->num_desc;
+
frag_count = skb_shinfo(skb)->nr_frags + 1;
+
/* 14 frags supported for normal packet and
* 32 frags supported for TSO packet
*/
@@ -584,11 +603,12 @@
}
if (unlikely(qlcnic_tx_avail(tx_ring) <= TX_STOP_THRESH)) {
- netif_stop_queue(netdev);
+ netif_tx_stop_queue(tx_ring->txq);
if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) {
- netif_start_queue(netdev);
+ netif_tx_start_queue(tx_ring->txq);
} else {
adapter->stats.xmit_off++;
+ tx_ring->xmit_off++;
return NETDEV_TX_BUSY;
}
}
@@ -643,7 +663,7 @@
tx_ring->producer = get_next_index(producer, num_txd);
smp_mb();
- if (unlikely(qlcnic_tx_pkt(adapter, first_desc, skb)))
+ if (unlikely(qlcnic_tx_pkt(adapter, first_desc, skb, tx_ring)))
goto unwind_buff;
if (adapter->drv_mac_learn)
@@ -651,6 +671,7 @@
adapter->stats.txbytes += skb->len;
adapter->stats.xmitcalled++;
+ tx_ring->xmit_called++;
qlcnic_update_cmd_producer(tx_ring);
@@ -673,7 +694,7 @@
adapter->ahw->linkup = 0;
if (netif_running(netdev)) {
netif_carrier_off(netdev);
- netif_stop_queue(netdev);
+ netif_tx_stop_all_queues(netdev);
}
} else if (!adapter->ahw->linkup && linkup) {
netdev_info(netdev, "NIC Link is up\n");
@@ -768,9 +789,6 @@
struct net_device *netdev = adapter->netdev;
struct qlcnic_skb_frag *frag;
- if (!spin_trylock(&adapter->tx_clean_lock))
- return 1;
-
sw_consumer = tx_ring->sw_consumer;
hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer));
@@ -788,6 +806,7 @@
frag->dma = 0ULL;
}
adapter->stats.xmitfinished++;
+ tx_ring->xmit_finished++;
dev_kfree_skb_any(buffer->skb);
buffer->skb = NULL;
}
@@ -800,10 +819,12 @@
if (count && netif_running(netdev)) {
tx_ring->sw_consumer = sw_consumer;
smp_mb();
- if (netif_queue_stopped(netdev) && netif_carrier_ok(netdev)) {
+ if (netif_tx_queue_stopped(tx_ring->txq) &&
+ netif_carrier_ok(netdev)) {
if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) {
- netif_wake_queue(netdev);
+ netif_tx_wake_queue(tx_ring->txq);
adapter->stats.xmit_on++;
+ tx_ring->xmit_on++;
}
}
adapter->tx_timeo_cnt = 0;
@@ -823,7 +844,6 @@
*/
hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer));
done = (sw_consumer == hw_consumer);
- spin_unlock(&adapter->tx_clean_lock);
return done;
}
@@ -833,16 +853,40 @@
int tx_complete, work_done;
struct qlcnic_host_sds_ring *sds_ring;
struct qlcnic_adapter *adapter;
+ struct qlcnic_host_tx_ring *tx_ring;
sds_ring = container_of(napi, struct qlcnic_host_sds_ring, napi);
adapter = sds_ring->adapter;
- tx_complete = qlcnic_process_cmd_ring(adapter, adapter->tx_ring,
+ tx_ring = sds_ring->tx_ring;
+
+ tx_complete = qlcnic_process_cmd_ring(adapter, tx_ring,
budget);
work_done = qlcnic_process_rcv_ring(sds_ring, budget);
if ((work_done < budget) && tx_complete) {
napi_complete(&sds_ring->napi);
- if (test_bit(__QLCNIC_DEV_UP, &adapter->state))
+ if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) {
qlcnic_enable_int(sds_ring);
+ qlcnic_enable_tx_intr(adapter, tx_ring);
+ }
+ }
+
+ return work_done;
+}
+
+static int qlcnic_tx_poll(struct napi_struct *napi, int budget)
+{
+ struct qlcnic_host_tx_ring *tx_ring;
+ struct qlcnic_adapter *adapter;
+ int work_done;
+
+ tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
+ adapter = tx_ring->adapter;
+
+ work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
+ if (work_done) {
+ napi_complete(&tx_ring->napi);
+ if (test_bit(__QLCNIC_DEV_UP, &adapter->state))
+ qlcnic_enable_tx_intr(adapter, tx_ring);
}
return work_done;
@@ -966,6 +1010,9 @@
break;
}
break;
+ case QLCNIC_C2H_OPCODE_GET_DCB_AEN:
+ qlcnic_dcb_handle_aen(adapter, (void *)&msg);
+ break;
default:
break;
}
@@ -1414,6 +1461,7 @@
int ring, max_sds_rings;
struct qlcnic_host_sds_ring *sds_ring;
struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
+ struct qlcnic_host_tx_ring *tx_ring;
if (qlcnic_alloc_sds_rings(recv_ctx, adapter->max_sds_rings))
return -ENOMEM;
@@ -1422,12 +1470,22 @@
for (ring = 0; ring < adapter->max_sds_rings; ring++) {
sds_ring = &recv_ctx->sds_rings[ring];
- if (ring == adapter->max_sds_rings - 1)
- netif_napi_add(netdev, &sds_ring->napi, qlcnic_poll,
- QLCNIC_NETDEV_WEIGHT / max_sds_rings);
- else
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test &&
+ (adapter->max_drv_tx_rings > 1)) {
netif_napi_add(netdev, &sds_ring->napi, qlcnic_rx_poll,
- QLCNIC_NETDEV_WEIGHT*2);
+ QLCNIC_NETDEV_WEIGHT * 2);
+ } else {
+ if (ring == (adapter->max_sds_rings - 1))
+ netif_napi_add(netdev, &sds_ring->napi,
+ qlcnic_poll,
+ QLCNIC_NETDEV_WEIGHT /
+ max_sds_rings);
+ else
+ netif_napi_add(netdev, &sds_ring->napi,
+ qlcnic_rx_poll,
+ QLCNIC_NETDEV_WEIGHT * 2);
+ }
}
if (qlcnic_alloc_tx_rings(adapter, netdev)) {
@@ -1435,6 +1493,14 @@
return -ENOMEM;
}
+ if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) {
+ for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) {
+ tx_ring = &adapter->tx_ring[ring];
+ netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll,
+ QLCNIC_NETDEV_WEIGHT);
+ }
+ }
+
return 0;
}
@@ -1443,6 +1509,7 @@
int ring;
struct qlcnic_host_sds_ring *sds_ring;
struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
+ struct qlcnic_host_tx_ring *tx_ring;
for (ring = 0; ring < adapter->max_sds_rings; ring++) {
sds_ring = &recv_ctx->sds_rings[ring];
@@ -1450,6 +1517,14 @@
}
qlcnic_free_sds_rings(adapter->recv_ctx);
+
+ if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) {
+ for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) {
+ tx_ring = &adapter->tx_ring[ring];
+ netif_napi_del(&tx_ring->napi);
+ }
+ }
+
qlcnic_free_tx_rings(adapter);
}
@@ -1457,6 +1532,7 @@
{
int ring;
struct qlcnic_host_sds_ring *sds_ring;
+ struct qlcnic_host_tx_ring *tx_ring;
struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC)
@@ -1467,12 +1543,24 @@
napi_enable(&sds_ring->napi);
qlcnic_enable_int(sds_ring);
}
+
+ if (qlcnic_check_multi_tx(adapter) &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED) &&
+ !adapter->ahw->diag_test &&
+ (adapter->max_drv_tx_rings > 1)) {
+ for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) {
+ tx_ring = &adapter->tx_ring[ring];
+ napi_enable(&tx_ring->napi);
+ qlcnic_enable_tx_intr(adapter, tx_ring);
+ }
+ }
}
void qlcnic_82xx_napi_disable(struct qlcnic_adapter *adapter)
{
int ring;
struct qlcnic_host_sds_ring *sds_ring;
+ struct qlcnic_host_tx_ring *tx_ring;
struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC)
@@ -1484,6 +1572,17 @@
napi_synchronize(&sds_ring->napi);
napi_disable(&sds_ring->napi);
}
+
+ if ((adapter->flags & QLCNIC_MSIX_ENABLED) &&
+ !adapter->ahw->diag_test &&
+ qlcnic_check_multi_tx(adapter)) {
+ for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) {
+ tx_ring = &adapter->tx_ring[ring];
+ qlcnic_disable_tx_int(adapter, tx_ring);
+ napi_synchronize(&tx_ring->napi);
+ napi_disable(&tx_ring->napi);
+ }
+ }
}
#define QLC_83XX_NORMAL_LB_PKT (1ULL << 36)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a780b73..7dde3ba 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -100,6 +100,8 @@
ENTRY(PCI_DEVICE_ID_QLOGIC_QLE824X),
ENTRY(PCI_DEVICE_ID_QLOGIC_QLE834X),
ENTRY(PCI_DEVICE_ID_QLOGIC_VF_QLE834X),
+ ENTRY(PCI_DEVICE_ID_QLOGIC_QLE844X),
+ ENTRY(PCI_DEVICE_ID_QLOGIC_VF_QLE844X),
{0,}
};
@@ -146,6 +148,11 @@
static const struct qlcnic_board_info qlcnic_boards[] = {
{ PCI_VENDOR_ID_QLOGIC,
+ PCI_DEVICE_ID_QLOGIC_QLE844X,
+ 0x0,
+ 0x0,
+ "8400 series 10GbE Converged Network Adapter (TCP/IP Networking)" },
+ { PCI_VENDOR_ID_QLOGIC,
PCI_DEVICE_ID_QLOGIC_QLE834X,
PCI_VENDOR_ID_QLOGIC,
0x24e,
@@ -254,7 +261,6 @@
};
#define NUM_SUPPORTED_BOARDS ARRAY_SIZE(qlcnic_boards)
-#define QLC_MAX_SDS_RINGS 8
static const
struct qlcnic_legacy_intr_set legacy_intr[] = QLCNIC_LEGACY_INTR_CONFIG;
@@ -278,12 +284,15 @@
int qlcnic_read_mac_addr(struct qlcnic_adapter *adapter)
{
- u8 mac_addr[ETH_ALEN];
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
+ u8 mac_addr[ETH_ALEN];
+ int ret;
- if (qlcnic_get_mac_address(adapter, mac_addr) != 0)
- return -EIO;
+ ret = qlcnic_get_mac_address(adapter, mac_addr,
+ adapter->ahw->pci_func);
+ if (ret)
+ return ret;
memcpy(netdev->dev_addr, mac_addr, ETH_ALEN);
memcpy(adapter->mac_addr, netdev->dev_addr, netdev->addr_len);
@@ -425,6 +434,21 @@
cancel_delayed_work_sync(&adapter->fw_work);
}
+static int qlcnic_get_phys_port_id(struct net_device *netdev,
+ struct netdev_phys_port_id *ppid)
+{
+ struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+
+ if (!(adapter->flags & QLCNIC_HAS_PHYS_PORT_ID))
+ return -EOPNOTSUPP;
+
+ ppid->id_len = sizeof(ahw->phys_port_id);
+ memcpy(ppid->id, ahw->phys_port_id, ppid->id_len);
+
+ return 0;
+}
+
static const struct net_device_ops qlcnic_netdev_ops = {
.ndo_open = qlcnic_open,
.ndo_stop = qlcnic_close,
@@ -442,6 +466,7 @@
.ndo_fdb_add = qlcnic_fdb_add,
.ndo_fdb_del = qlcnic_fdb_del,
.ndo_fdb_dump = qlcnic_fdb_dump,
+ .ndo_get_phys_port_id = qlcnic_get_phys_port_id,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = qlcnic_poll_controller,
#endif
@@ -514,13 +539,33 @@
.get_board_info = qlcnic_82xx_get_board_info,
.set_mac_filter_count = qlcnic_82xx_set_mac_filter_count,
.free_mac_list = qlcnic_82xx_free_mac_list,
+ .read_phys_port_id = qlcnic_82xx_read_phys_port_id,
};
+static void qlcnic_get_multiq_capability(struct qlcnic_adapter *adapter)
+{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ int num_tx_q;
+
+ if (ahw->msix_supported &&
+ (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_2_MULTI_TX)) {
+ num_tx_q = min_t(int, QLCNIC_DEF_NUM_TX_RINGS,
+ num_online_cpus());
+ if (num_tx_q > 1) {
+ test_and_set_bit(__QLCNIC_MULTI_TX_UNIQUE,
+ &adapter->state);
+ adapter->max_drv_tx_rings = num_tx_q;
+ }
+ } else {
+ adapter->max_drv_tx_rings = 1;
+ }
+}
+
int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix)
{
struct pci_dev *pdev = adapter->pdev;
+ int max_tx_rings, max_sds_rings, tx_vector;
int err = -1, i;
- int max_tx_rings, tx_vector;
if (adapter->flags & QLCNIC_TX_INTR_SHARED) {
max_tx_rings = 0;
@@ -554,7 +599,15 @@
adapter->max_sds_rings = num_msix -
max_tx_rings - 1;
} else {
- adapter->max_sds_rings = num_msix;
+ adapter->ahw->num_msix = num_msix;
+ if (qlcnic_check_multi_tx(adapter) &&
+ !adapter->ahw->diag_test &&
+ (adapter->max_drv_tx_rings > 1))
+ max_sds_rings = num_msix - max_tx_rings;
+ else
+ max_sds_rings = num_msix;
+
+ adapter->max_sds_rings = max_sds_rings;
}
dev_info(&pdev->dev, "using msi-x interrupts\n");
return err;
@@ -570,6 +623,8 @@
num_msix += (max_tx_rings + 1);
} else {
num_msix = rounddown_pow_of_two(err);
+ if (qlcnic_check_multi_tx(adapter))
+ num_msix += max_tx_rings;
}
if (num_msix) {
@@ -605,6 +660,7 @@
adapter->msix_entries[0].vector = pdev->irq;
return err;
}
+
if (qlcnic_use_msi || qlcnic_use_msi_x)
return -EOPNOTSUPP;
@@ -621,28 +677,69 @@
return err;
}
-int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr)
+int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr, int txq)
{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
int num_msix, err = 0;
if (!num_intr)
num_intr = QLCNIC_DEF_NUM_STS_DESC_RINGS;
- if (adapter->ahw->msix_supported)
+ if (ahw->msix_supported) {
num_msix = rounddown_pow_of_two(min_t(int, num_online_cpus(),
num_intr));
- else
+ if (qlcnic_check_multi_tx(adapter)) {
+ if (txq)
+ adapter->max_drv_tx_rings = txq;
+ num_msix += adapter->max_drv_tx_rings;
+ }
+ } else {
num_msix = 1;
+ }
err = qlcnic_enable_msix(adapter, num_msix);
- if (err == -ENOMEM || !err)
+ if (err == -ENOMEM)
return err;
- err = qlcnic_enable_msi_legacy(adapter);
- if (!err)
- return err;
+ if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) {
+ qlcnic_disable_multi_tx(adapter);
- return -EIO;
+ err = qlcnic_enable_msi_legacy(adapter);
+ if (!err)
+ return err;
+ }
+
+ return 0;
+}
+
+int qlcnic_82xx_mq_intrpt(struct qlcnic_adapter *adapter, int op_type)
+{
+ struct qlcnic_hardware_context *ahw = adapter->ahw;
+ int err, i;
+
+ if (qlcnic_check_multi_tx(adapter) &&
+ !ahw->diag_test &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED)) {
+ ahw->intr_tbl = vzalloc(ahw->num_msix *
+ sizeof(struct qlcnic_intrpt_config));
+ if (!ahw->intr_tbl)
+ return -ENOMEM;
+
+ for (i = 0; i < ahw->num_msix; i++) {
+ ahw->intr_tbl[i].type = QLCNIC_INTRPT_MSIX;
+ ahw->intr_tbl[i].id = i;
+ ahw->intr_tbl[i].src = 0;
+ }
+
+ err = qlcnic_82xx_config_intrpt(adapter, 1);
+ if (err)
+ dev_err(&adapter->pdev->dev,
+ "Failed to configure Interrupt for %d vector\n",
+ ahw->num_msix);
+ return err;
+ }
+
+ return 0;
}
void qlcnic_teardown_intr(struct qlcnic_adapter *adapter)
@@ -829,7 +926,9 @@
*bar = QLCNIC_82XX_BAR0_LENGTH;
break;
case PCI_DEVICE_ID_QLOGIC_QLE834X:
+ case PCI_DEVICE_ID_QLOGIC_QLE844X:
case PCI_DEVICE_ID_QLOGIC_VF_QLE834X:
+ case PCI_DEVICE_ID_QLOGIC_VF_QLE844X:
*bar = QLCNIC_83XX_BAR0_LENGTH;
break;
default:
@@ -1413,6 +1512,7 @@
for (ring = 0; ring < num_sds_rings; ring++) {
sds_ring = &recv_ctx->sds_rings[ring];
if (qlcnic_82xx_check(adapter) &&
+ !qlcnic_check_multi_tx(adapter) &&
(ring == (num_sds_rings - 1))) {
if (!(adapter->flags &
QLCNIC_MSIX_ENABLED))
@@ -1436,9 +1536,11 @@
return err;
}
}
- if (qlcnic_83xx_check(adapter) &&
- (adapter->flags & QLCNIC_MSIX_ENABLED) &&
- !(adapter->flags & QLCNIC_TX_INTR_SHARED)) {
+ if ((qlcnic_82xx_check(adapter) &&
+ qlcnic_check_multi_tx(adapter)) ||
+ (qlcnic_83xx_check(adapter) &&
+ (adapter->flags & QLCNIC_MSIX_ENABLED) &&
+ !(adapter->flags & QLCNIC_TX_INTR_SHARED))) {
handler = qlcnic_msix_tx_intr;
for (ring = 0; ring < adapter->max_drv_tx_rings;
ring++) {
@@ -1473,8 +1575,10 @@
free_irq(sds_ring->irq, sds_ring);
}
}
- if (qlcnic_83xx_check(adapter) &&
- !(adapter->flags & QLCNIC_TX_INTR_SHARED)) {
+ if ((qlcnic_83xx_check(adapter) &&
+ !(adapter->flags & QLCNIC_TX_INTR_SHARED)) ||
+ (qlcnic_82xx_check(adapter) &&
+ qlcnic_check_multi_tx(adapter))) {
for (ring = 0; ring < adapter->max_drv_tx_rings;
ring++) {
tx_ring = &adapter->tx_ring[ring];
@@ -1510,8 +1614,10 @@
if (test_bit(__QLCNIC_DEV_UP, &adapter->state))
return 0;
+
if (qlcnic_set_eswitch_port_config(adapter))
return -EIO;
+
qlcnic_get_lro_mss_capability(adapter);
if (qlcnic_fw_create_ctx(adapter))
@@ -1558,6 +1664,8 @@
void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev)
{
+ int ring;
+
if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC)
return;
@@ -1567,7 +1675,6 @@
if (qlcnic_sriov_vf_check(adapter))
qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc);
smp_mb();
- spin_lock(&adapter->tx_clean_lock);
netif_carrier_off(netdev);
adapter->ahw->linkup = 0;
netif_tx_disable(netdev);
@@ -1585,8 +1692,9 @@
adapter->flags &= ~QLCNIC_FW_LRO_MSS_CAP;
qlcnic_reset_rx_buffers_list(adapter);
- qlcnic_release_tx_buffers(adapter);
- spin_unlock(&adapter->tx_clean_lock);
+
+ for (ring = 0; ring < adapter->max_drv_tx_rings; ring++)
+ qlcnic_release_tx_buffers(adapter, &adapter->tx_ring[ring]);
}
/* Usage: During suspend and firmware recovery module */
@@ -1666,6 +1774,7 @@
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_host_sds_ring *sds_ring;
+ int max_tx_rings = adapter->max_drv_tx_rings;
int ring;
clear_bit(__QLCNIC_DEV_UP, &adapter->state);
@@ -1682,6 +1791,7 @@
adapter->ahw->diag_test = 0;
adapter->max_sds_rings = max_sds_rings;
+ adapter->max_drv_tx_rings = max_tx_rings;
if (qlcnic_attach(adapter))
goto out;
@@ -1750,6 +1860,7 @@
adapter->max_sds_rings = 1;
adapter->ahw->diag_test = test;
adapter->ahw->linkup = 0;
+ adapter->max_drv_tx_rings = 1;
ret = qlcnic_attach(adapter);
if (ret) {
@@ -1907,12 +2018,18 @@
netdev->priv_flags |= IFF_UNICAST_FLT;
netdev->irq = adapter->msix_entries[0].vector;
+ err = qlcnic_set_real_num_queues(adapter, netdev);
+ if (err)
+ return err;
+
err = register_netdev(netdev);
if (err) {
dev_err(&pdev->dev, "failed to register net device\n");
return err;
}
+ qlcnic_dcb_init_dcbnl_ops(adapter);
+
return 0;
}
@@ -1975,7 +2092,8 @@
tx_ring->cmd_buf_arr = cmd_buf_arr;
}
- if (qlcnic_83xx_check(adapter)) {
+ if (qlcnic_83xx_check(adapter) ||
+ (qlcnic_82xx_check(adapter) && qlcnic_check_multi_tx(adapter))) {
for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) {
tx_ring = &adapter->tx_ring[ring];
tx_ring->adapter = adapter;
@@ -1986,6 +2104,7 @@
}
}
}
+
return 0;
}
@@ -2004,6 +2123,17 @@
qlcnic_fw_cmd_set_drv_version(adapter, fw_cmd);
}
+static int qlcnic_register_dcb(struct qlcnic_adapter *adapter)
+{
+ return __qlcnic_register_dcb(adapter);
+}
+
+void qlcnic_clear_dcb_ops(struct qlcnic_adapter *adapter)
+{
+ kfree(adapter->dcb);
+ adapter->dcb = NULL;
+}
+
static int
qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
@@ -2048,9 +2178,11 @@
ahw->reg_tbl = (u32 *) qlcnic_reg_tbl;
break;
case PCI_DEVICE_ID_QLOGIC_QLE834X:
+ case PCI_DEVICE_ID_QLOGIC_QLE844X:
qlcnic_83xx_register_map(ahw);
break;
case PCI_DEVICE_ID_QLOGIC_VF_QLE834X:
+ case PCI_DEVICE_ID_QLOGIC_VF_QLE844X:
qlcnic_sriov_vf_register_map(ahw);
break;
default:
@@ -2061,7 +2193,8 @@
if (err)
goto err_out_free_hw_res;
- netdev = alloc_etherdev(sizeof(struct qlcnic_adapter));
+ netdev = alloc_etherdev_mq(sizeof(struct qlcnic_adapter),
+ QLCNIC_MAX_TX_RINGS);
if (!netdev) {
err = -ENOMEM;
goto err_out_iounmap;
@@ -2091,14 +2224,14 @@
adapter->fdb_mac_learn = true;
else if (qlcnic_mac_learn == DRV_MAC_LEARN)
adapter->drv_mac_learn = true;
- adapter->max_drv_tx_rings = 1;
rwlock_init(&adapter->ahw->crb_lock);
mutex_init(&adapter->ahw->mem_lock);
- spin_lock_init(&adapter->tx_clean_lock);
INIT_LIST_HEAD(&adapter->mac_list);
+ qlcnic_register_dcb(adapter);
+
if (qlcnic_82xx_check(adapter)) {
qlcnic_check_vf(adapter, ent);
adapter->portnum = adapter->ahw->pci_func;
@@ -2108,12 +2241,31 @@
goto err_out_free_hw;
}
+ qlcnic_get_multiq_capability(adapter);
+
+ if ((adapter->ahw->act_pci_func > 2) &&
+ qlcnic_check_multi_tx(adapter)) {
+ adapter->max_drv_tx_rings = QLCNIC_DEF_NUM_TX_RINGS;
+ dev_info(&adapter->pdev->dev,
+ "vNIC mode enabled, Set max TX rings = %d\n",
+ adapter->max_drv_tx_rings);
+ }
+
+ if (!qlcnic_check_multi_tx(adapter)) {
+ clear_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state);
+ adapter->max_drv_tx_rings = 1;
+ }
err = qlcnic_setup_idc_param(adapter);
if (err)
goto err_out_free_hw;
adapter->flags |= QLCNIC_NEED_FLR;
+
+ if (adapter->dcb && qlcnic_dcb_attach(adapter))
+ qlcnic_clear_dcb_ops(adapter);
+
} else if (qlcnic_83xx_check(adapter)) {
+ adapter->max_drv_tx_rings = 1;
qlcnic_83xx_check_vf(adapter, ent);
adapter->portnum = adapter->ahw->pci_func;
err = qlcnic_83xx_init(adapter, pci_using_dac);
@@ -2132,6 +2284,8 @@
if (qlcnic_read_mac_addr(adapter))
dev_warn(&pdev->dev, "failed to read mac addr\n");
+ qlcnic_read_phys_port_id(adapter);
+
if (adapter->portnum == 0) {
qlcnic_get_board_name(adapter, board_name);
@@ -2146,7 +2300,7 @@
"Device does not support MSI interrupts\n");
if (qlcnic_82xx_check(adapter)) {
- err = qlcnic_setup_intr(adapter, 0);
+ err = qlcnic_setup_intr(adapter, 0, 0);
if (err) {
dev_err(&pdev->dev, "Failed to setup interrupt\n");
goto err_out_disable_msi;
@@ -2234,6 +2388,8 @@
qlcnic_cancel_idc_work(adapter);
ahw = adapter->ahw;
+ qlcnic_dcb_free(adapter);
+
unregister_netdev(netdev);
qlcnic_sriov_cleanup(adapter);
@@ -2276,6 +2432,7 @@
destroy_workqueue(adapter->qlcnic_wq);
adapter->qlcnic_wq = NULL;
}
+
qlcnic_free_adapter_resources(adapter);
kfree(ahw);
free_netdev(netdev);
@@ -2334,7 +2491,7 @@
if (err)
goto err_out;
- netif_start_queue(netdev);
+ netif_tx_start_all_queues(netdev);
return 0;
@@ -2466,6 +2623,8 @@
static void qlcnic_tx_timeout(struct net_device *netdev)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
+ struct qlcnic_host_tx_ring *tx_ring;
+ int ring;
if (test_bit(__QLCNIC_RESETTING, &adapter->state))
return;
@@ -2479,6 +2638,25 @@
QLCNIC_FORCE_FW_DUMP_KEY);
} else {
netdev_info(netdev, "Tx timeout, reset adapter context.\n");
+ if (qlcnic_82xx_check(adapter)) {
+ for (ring = 0; ring < adapter->max_drv_tx_rings;
+ ring++) {
+ tx_ring = &adapter->tx_ring[ring];
+ dev_info(&netdev->dev, "ring=%d\n", ring);
+ dev_info(&netdev->dev, "crb_intr_mask=%d\n",
+ readl(tx_ring->crb_intr_mask));
+ dev_info(&netdev->dev, "producer=%d\n",
+ readl(tx_ring->crb_cmd_producer));
+ dev_info(&netdev->dev, "sw_consumer = %d\n",
+ tx_ring->sw_consumer);
+ dev_info(&netdev->dev, "hw_consumer = %d\n",
+ le32_to_cpu(*(tx_ring->hw_consumer)));
+ dev_info(&netdev->dev, "xmit-on=%llu\n",
+ tx_ring->xmit_on);
+ dev_info(&netdev->dev, "xmit-off=%llu\n",
+ tx_ring->xmit_off);
+ }
+ }
adapter->ahw->reset_context = 1;
}
}
@@ -3072,6 +3250,8 @@
return;
}
attach:
+ qlcnic_dcb_get_info(adapter);
+
if (netif_running(netdev)) {
if (qlcnic_up(adapter, netdev))
goto done;
@@ -3243,7 +3423,7 @@
qlcnic_clr_drv_state(adapter);
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
- err = qlcnic_setup_intr(adapter, 0);
+ err = qlcnic_setup_intr(adapter, 0, 0);
if (err) {
kfree(adapter->msix_entries);
@@ -3368,16 +3548,65 @@
return err;
}
+int qlcnic_validate_max_tx_rings(struct qlcnic_adapter *adapter, u32 txq)
+{
+ struct net_device *netdev = adapter->netdev;
+ u8 max_hw = QLCNIC_MAX_TX_RINGS;
+ u32 max_allowed;
+
+ if (!qlcnic_82xx_check(adapter)) {
+ netdev_err(netdev, "No Multi TX-Q support\n");
+ return -EINVAL;
+ }
+
+ if (!qlcnic_use_msi_x && !qlcnic_use_msi) {
+ netdev_err(netdev, "No Multi TX-Q support in INT-x mode\n");
+ return -EINVAL;
+ }
+
+ if (!qlcnic_check_multi_tx(adapter)) {
+ netdev_err(netdev, "No Multi TX-Q support\n");
+ return -EINVAL;
+ }
+
+ if (txq > QLCNIC_MAX_TX_RINGS) {
+ netdev_err(netdev, "Invalid ring count\n");
+ return -EINVAL;
+ }
+
+ max_allowed = rounddown_pow_of_two(min_t(int, max_hw,
+ num_online_cpus()));
+ if ((txq > max_allowed) || !is_power_of_2(txq)) {
+ if (!is_power_of_2(txq))
+ netdev_err(netdev,
+ "TX queue should be a power of 2\n");
+ if (txq > num_online_cpus())
+ netdev_err(netdev,
+ "Tx queue should not be higher than [%u], number of online CPUs in the system\n",
+ num_online_cpus());
+ netdev_err(netdev, "Unable to configure %u Tx rings\n", txq);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int qlcnic_validate_max_rss(struct qlcnic_adapter *adapter,
- __u32 val)
+ __u32 val)
{
struct net_device *netdev = adapter->netdev;
u8 max_hw = adapter->ahw->max_rx_ques;
u32 max_allowed;
- if (val > QLC_MAX_SDS_RINGS) {
+ if (qlcnic_82xx_check(adapter) && !qlcnic_use_msi_x &&
+ !qlcnic_use_msi) {
+ netdev_err(netdev, "No RSS support in INT-x mode\n");
+ return -EINVAL;
+ }
+
+ if (val > QLCNIC_MAX_SDS_RINGS) {
netdev_err(netdev, "RSS value should not be higher than %u\n",
- QLC_MAX_SDS_RINGS);
+ QLCNIC_MAX_SDS_RINGS);
return -EINVAL;
}
@@ -3407,27 +3636,48 @@
return 0;
}
-int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, size_t len)
+int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, int txq)
{
int err;
struct net_device *netdev = adapter->netdev;
+ int num_msix;
if (test_bit(__QLCNIC_RESETTING, &adapter->state))
return -EBUSY;
+ if (qlcnic_82xx_check(adapter) && !qlcnic_use_msi_x &&
+ !qlcnic_use_msi) {
+ netdev_err(netdev, "No RSS support in INT-x mode\n");
+ return -EINVAL;
+ }
+
netif_device_detach(netdev);
if (netif_running(netdev))
__qlcnic_down(adapter, netdev);
qlcnic_detach(adapter);
+ if (qlcnic_82xx_check(adapter)) {
+ if (txq != 0)
+ adapter->max_drv_tx_rings = txq;
+
+ if (qlcnic_check_multi_tx(adapter) &&
+ (txq > adapter->max_drv_tx_rings))
+ num_msix = adapter->max_drv_tx_rings;
+ else
+ num_msix = data;
+ }
+
if (qlcnic_83xx_check(adapter)) {
qlcnic_83xx_free_mbx_intr(adapter);
qlcnic_83xx_enable_mbx_poll(adapter);
}
+ netif_set_real_num_tx_queues(netdev, adapter->max_drv_tx_rings);
+
qlcnic_teardown_intr(adapter);
- err = qlcnic_setup_intr(adapter, data);
+
+ err = qlcnic_setup_intr(adapter, data, txq);
if (err) {
kfree(adapter->msix_entries);
netdev_err(netdev, "failed to setup interrupt\n");
@@ -3455,8 +3705,7 @@
goto done;
qlcnic_restore_indev_addr(netdev, NETDEV_UP);
}
- err = len;
- done:
+done:
netif_device_attach(netdev);
clear_bit(__QLCNIC_RESETTING, &adapter->state);
return err;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index dc24979..26f9aa6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -398,10 +398,14 @@
}
static int qlcnic_sriov_set_pvid_mode(struct qlcnic_adapter *adapter,
- struct qlcnic_cmd_args *cmd)
+ struct qlcnic_cmd_args *cmd, u32 cap)
{
- adapter->rx_pvid = (cmd->rsp.arg[1] >> 16) & 0xffff;
- adapter->flags &= ~QLCNIC_TAGGING_ENABLED;
+ if (cap & QLC_83XX_PVID_STRIP_CAPABILITY) {
+ adapter->rx_pvid = 0;
+ } else {
+ adapter->rx_pvid = (cmd->rsp.arg[1] >> 16) & 0xffff;
+ adapter->flags &= ~QLCNIC_TAGGING_ENABLED;
+ }
return 0;
}
@@ -432,12 +436,14 @@
return 0;
}
-static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter)
+static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter,
+ struct qlcnic_info *info)
{
struct qlcnic_sriov *sriov = adapter->ahw->sriov;
struct qlcnic_cmd_args cmd;
- int ret;
+ int ret, cap;
+ cap = info->capabilities;
ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, QLCNIC_BC_CMD_GET_ACL);
if (ret)
return ret;
@@ -453,7 +459,7 @@
ret = qlcnic_sriov_set_guest_vlan_mode(adapter, &cmd);
break;
case QLC_PVID_MODE:
- ret = qlcnic_sriov_set_pvid_mode(adapter, &cmd);
+ ret = qlcnic_sriov_set_pvid_mode(adapter, &cmd, cap);
break;
}
}
@@ -476,7 +482,7 @@
if (err)
return -EIO;
- err = qlcnic_sriov_get_vf_acl(adapter);
+ err = qlcnic_sriov_get_vf_acl(adapter, &nic_info);
if (err)
return err;
@@ -506,7 +512,7 @@
dev_warn(&adapter->pdev->dev,
"Device does not support MSI interrupts\n");
- err = qlcnic_setup_intr(adapter, 1);
+ err = qlcnic_setup_intr(adapter, 1, 0);
if (err) {
dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n");
goto err_out_disable_msi;
@@ -532,6 +538,9 @@
if (err)
goto err_out_send_channel_term;
+ if (adapter->dcb && qlcnic_dcb_attach(adapter))
+ qlcnic_clear_dcb_ops(adapter);
+
err = qlcnic_setup_netdev(adapter, adapter->netdev, pci_using_dac);
if (err)
goto err_out_send_channel_term;
@@ -539,6 +548,7 @@
pci_set_drvdata(adapter->pdev, adapter);
dev_info(&adapter->pdev->dev, "%s: XGbE port initialized\n",
adapter->netdev->name);
+
qlcnic_schedule_work(adapter, qlcnic_sriov_vf_poll_dev_state,
adapter->ahw->idc.delay);
return 0;
@@ -1571,6 +1581,8 @@
if (err)
goto err_out_term_channel;
+ qlcnic_dcb_get_info(adapter);
+
return 0;
err_out_term_channel:
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index eb49cd6..2d6faf0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1284,6 +1284,10 @@
QLCNIC_CMD_GET_STATISTICS,
QLCNIC_CMD_GET_PORT_CONFIG,
QLCNIC_CMD_GET_LINK_STATUS,
+ QLCNIC_CMD_DCB_QUERY_CAP,
+ QLCNIC_CMD_DCB_QUERY_PARAM,
+ QLCNIC_CMD_INIT_NIC_FUNC,
+ QLCNIC_CMD_STOP_NIC_FUNC,
};
static const struct qlcnic_sriov_cmd_handler qlcnic_pf_bc_cmd_hdlr[] = {
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b5eb419..6f87f2c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1897,12 +1897,13 @@
void *p)
{
struct rtl8169_private *tp = netdev_priv(dev);
-
- if (regs->len > R8169_REGS_SIZE)
- regs->len = R8169_REGS_SIZE;
+ u32 __iomem *data = tp->mmio_addr;
+ u32 *dw = p;
+ int i;
rtl_lock_work(tp);
- memcpy_fromio(p, tp->mmio_addr, regs->len);
+ for (i = 0; i < R8169_REGS_SIZE; i += 4)
+ memcpy_fromio(dw++, data++, 4);
rtl_unlock_work(tp);
}
@@ -7088,7 +7089,7 @@
RTL_W8(Cfg9346, Cfg9346_Unlock);
RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
- RTL_W8(Config5, RTL_R8(Config5) & PMEStatus);
+ RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
tp->features |= RTL_FEATURE_WOL;
if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 9e2afe8..06d8f62 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -378,6 +378,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_r8a777x,
+ .register_type = SH_ETH_REG_FAST_RCAR,
+
.ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
.ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
.eesipr_value = 0x01ff009f,
@@ -398,6 +400,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_r8a777x,
+ .register_type = SH_ETH_REG_FAST_RCAR,
+
.ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
.ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
.eesipr_value = 0x01ff009f,
@@ -435,6 +439,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_sh7724,
+ .register_type = SH_ETH_REG_FAST_SH4,
+
.ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
.ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
.eesipr_value = 0x01ff009f,
@@ -473,6 +479,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_sh7757,
+ .register_type = SH_ETH_REG_FAST_SH4,
+
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
.rmcr_value = 0x00000001,
@@ -541,6 +549,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_giga,
+ .register_type = SH_ETH_REG_GIGABIT,
+
.ecsr_value = ECSR_ICD | ECSR_MPD,
.ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
@@ -599,6 +609,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_gether,
+ .register_type = SH_ETH_REG_GIGABIT,
+
.ecsr_value = ECSR_ICD | ECSR_MPD,
.ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
@@ -626,6 +638,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_gether,
+ .register_type = SH_ETH_REG_GIGABIT,
+
.ecsr_value = ECSR_ICD | ECSR_MPD,
.ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
@@ -663,6 +677,8 @@
.set_duplex = sh_eth_set_duplex,
.set_rate = sh_eth_set_rate_gether,
+ .register_type = SH_ETH_REG_GIGABIT,
+
.ecsr_value = ECSR_ICD | ECSR_MPD,
.ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
@@ -685,6 +701,8 @@
};
static struct sh_eth_cpu_data sh7619_data = {
+ .register_type = SH_ETH_REG_FAST_SH3_SH2,
+
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
.apr = 1,
@@ -694,6 +712,8 @@
};
static struct sh_eth_cpu_data sh771x_data = {
+ .register_type = SH_ETH_REG_FAST_SH3_SH2,
+
.eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
.tsu = 1,
};
@@ -2586,7 +2606,7 @@
struct resource *res;
struct net_device *ndev = NULL;
struct sh_eth_private *mdp = NULL;
- struct sh_eth_plat_data *pd = pdev->dev.platform_data;
+ struct sh_eth_plat_data *pd = dev_get_platdata(&pdev->dev);
const struct platform_device_id *id = platform_get_device_id(pdev);
/* get base addr */
@@ -2619,9 +2639,6 @@
SET_NETDEV_DEV(ndev, &pdev->dev);
- /* Fill in the fields of the device structure with ethernet values. */
- ether_setup(ndev);
-
mdp = netdev_priv(ndev);
mdp->num_tx_ring = TX_RING_SIZE;
mdp->num_rx_ring = RX_RING_SIZE;
@@ -2643,10 +2660,10 @@
mdp->edmac_endian = pd->edmac_endian;
mdp->no_ether_link = pd->no_ether_link;
mdp->ether_link_active_low = pd->ether_link_active_low;
- mdp->reg_offset = sh_eth_get_register_offset(pd->register_type);
/* set cpu data */
mdp->cd = (struct sh_eth_cpu_data *)id->driver_data;
+ mdp->reg_offset = sh_eth_get_register_offset(mdp->cd->register_type);
sh_eth_set_default_cpu_data(mdp->cd);
/* set function */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index da93f5c..a0db02c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -157,6 +157,13 @@
SH_ETH_MAX_REGISTER_OFFSET,
};
+enum {
+ SH_ETH_REG_GIGABIT,
+ SH_ETH_REG_FAST_RCAR,
+ SH_ETH_REG_FAST_SH4,
+ SH_ETH_REG_FAST_SH3_SH2
+};
+
/* Driver's parameters */
#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
#define SH4_SKB_RX_ALIGN 32
@@ -454,6 +461,7 @@
void (*set_rate)(struct net_device *ndev);
/* mandatory initialize value */
+ int register_type;
unsigned long eesipr_value;
/* optional initialize value */
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 856e523..c765718 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -721,7 +721,7 @@
static int sgiseeq_probe(struct platform_device *pdev)
{
- struct sgiseeq_platform_data *pd = pdev->dev.platform_data;
+ struct sgiseeq_platform_data *pd = dev_get_platdata(&pdev->dev);
struct hpc3_regs *hpcregs = pd->hpc;
struct sgiseeq_init_block *sr;
unsigned int irq = pd->irq;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index e8e01a1..e7dbd2d 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -33,9 +33,8 @@
int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
unsigned int len, gfp_t gfp_flags)
{
- buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len,
- &buffer->dma_addr,
- gfp_flags | __GFP_ZERO);
+ buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len,
+ &buffer->dma_addr, gfp_flags);
if (!buffer->addr)
return -ENOMEM;
buffer->len = len;
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 9f5f35e..770036b 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -212,9 +212,8 @@
static int meth_init_tx_ring(struct meth_private *priv)
{
/* Init TX ring */
- priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE,
- &priv->tx_ring_dma,
- GFP_ATOMIC | __GFP_ZERO);
+ priv->tx_ring = dma_zalloc_coherent(NULL, TX_RING_BUFFER_SIZE,
+ &priv->tx_ring_dma, GFP_ATOMIC);
if (!priv->tx_ring)
return -ENOMEM;
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 02df089..ee18e6f 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1770,9 +1770,6 @@
struct sis190_private *tp = netdev_priv(dev);
unsigned long flags;
- if (regs->len > SIS190_REGS_SIZE)
- regs->len = SIS190_REGS_SIZE;
-
spin_lock_irqsave(&tp->lock, flags);
memcpy_fromio(p, tp->mmio_addr, regs->len);
spin_unlock_irqrestore(&tp->lock, flags);
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 345558f..afe01c4 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -2067,7 +2067,7 @@
lp->netdev = ndev;
#ifdef SMC_DYNAMIC_BUS_CONFIG
{
- struct smc911x_platdata *pd = pdev->dev.platform_data;
+ struct smc911x_platdata *pd = dev_get_platdata(&pdev->dev);
if (!pd) {
ret = -EINVAL;
goto release_both;
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index cde13be..73be7f3 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2202,7 +2202,7 @@
*/
static int smc_drv_probe(struct platform_device *pdev)
{
- struct smc91x_platdata *pd = pdev->dev.platform_data;
+ struct smc91x_platdata *pd = dev_get_platdata(&pdev->dev);
struct smc_local *lp;
struct net_device *ndev;
struct resource *res, *ires;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index a1419211..5fdbc26 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2374,7 +2374,7 @@
struct device_node *np = pdev->dev.of_node;
struct net_device *dev;
struct smsc911x_data *pdata;
- struct smsc911x_platform_config *config = pdev->dev.platform_data;
+ struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev);
struct resource *res, *irq_res;
unsigned int intcfg = 0;
int res_size, irq_flags;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c922fde..f16a9bd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -70,7 +70,6 @@
struct net_device *dev;
struct device *device;
struct mac_device_info *hw;
- int no_csum_insertion;
spinlock_t lock;
struct phy_device *phydev ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0a9bb9d..8d4ccd3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1224,8 +1224,9 @@
*/
static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
{
- if (likely(priv->plat->force_sf_dma_mode ||
- ((priv->plat->tx_coe) && (!priv->no_csum_insertion)))) {
+ if (priv->plat->force_thresh_dma_mode)
+ priv->hw->dma->dma_mode(priv->ioaddr, tc, tc);
+ else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
/*
* In case of GMAC, SF mode can be enabled
* to perform the TX COE in HW. This depends on:
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index da8be6e..623ebc5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -79,6 +79,11 @@
of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst");
dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst");
+ plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
+ if (plat->force_thresh_dma_mode) {
+ plat->force_sf_dma_mode = 0;
+ pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
+ }
return 0;
}
@@ -113,7 +118,7 @@
if (IS_ERR(addr))
return PTR_ERR(addr);
- plat_dat = pdev->dev.platform_data;
+ plat_dat = dev_get_platdata(&pdev->dev);
if (pdev->dev.of_node) {
if (!plat_dat)
plat_dat = devm_kzalloc(&pdev->dev,
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index fa32240..269c08b 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9360,7 +9360,7 @@
struct device_attribute *attr, char *buf)
{
struct platform_device *plat_dev = to_platform_device(dev);
- struct niu_parent *p = plat_dev->dev.platform_data;
+ struct niu_parent *p = dev_get_platdata(&plat_dev->dev);
u32 port_phy = p->port_phy;
char *orig_buf = buf;
int i;
@@ -9390,7 +9390,7 @@
struct device_attribute *attr, char *buf)
{
struct platform_device *plat_dev = to_platform_device(dev);
- struct niu_parent *p = plat_dev->dev.platform_data;
+ struct niu_parent *p = dev_get_platdata(&plat_dev->dev);
const char *type_str;
switch (p->plat_type) {
@@ -9419,7 +9419,7 @@
int rx)
{
struct platform_device *plat_dev = to_platform_device(dev);
- struct niu_parent *p = plat_dev->dev.platform_data;
+ struct niu_parent *p = dev_get_platdata(&plat_dev->dev);
char *orig_buf = buf;
u8 *arr;
int i;
@@ -9452,7 +9452,7 @@
struct device_attribute *attr, char *buf)
{
struct platform_device *plat_dev = to_platform_device(dev);
- struct niu_parent *p = plat_dev->dev.platform_data;
+ struct niu_parent *p = dev_get_platdata(&plat_dev->dev);
return sprintf(buf, "%d\n", p->num_ports);
}
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 0d43fa9..7217ee5 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -1239,7 +1239,7 @@
static int bigmac_sbus_remove(struct platform_device *op)
{
- struct bigmac *bp = dev_get_drvdata(&op->dev);
+ struct bigmac *bp = platform_get_drvdata(op);
struct device *parent = op->dev.parent;
struct net_device *net_dev = bp->dev;
struct platform_device *qec_op;
@@ -1259,8 +1259,6 @@
free_netdev(net_dev);
- dev_set_drvdata(&op->dev, NULL);
-
return 0;
}
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 171f5b0..c67e683 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -3231,7 +3231,7 @@
static int hme_sbus_remove(struct platform_device *op)
{
- struct happy_meal *hp = dev_get_drvdata(&op->dev);
+ struct happy_meal *hp = platform_get_drvdata(op);
struct net_device *net_dev = hp->dev;
unregister_netdev(net_dev);
@@ -3250,8 +3250,6 @@
free_netdev(net_dev);
- dev_set_drvdata(&op->dev, NULL);
-
return 0;
}
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 31bbbca..2dc16b6 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -636,7 +636,7 @@
{
int i;
struct cpmac_priv *priv = netdev_priv(dev);
- struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+ struct plat_cpmac_data *pdata = dev_get_platdata(&priv->pdev->dev);
ar7_device_reset(pdata->reset_bit);
cpmac_write(priv->regs, CPMAC_RX_CONTROL,
@@ -659,7 +659,7 @@
{
int i;
struct cpmac_priv *priv = netdev_priv(dev);
- struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+ struct plat_cpmac_data *pdata = dev_get_platdata(&priv->pdev->dev);
ar7_device_reset(pdata->reset_bit);
for (i = 0; i < 8; i++) {
@@ -1118,7 +1118,7 @@
struct net_device *dev;
struct plat_cpmac_data *pdata;
- pdata = pdev->dev.platform_data;
+ pdata = dev_get_platdata(&pdev->dev);
if (external_switch || dumb_switch) {
strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); /* fixed phys bus */
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 0fcf212..79974e3 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -34,9 +34,9 @@
#include <linux/of_device.h>
#include <linux/if_vlan.h>
-#include <linux/platform_data/cpsw.h>
#include <linux/pinctrl/consumer.h>
+#include "cpsw.h"
#include "cpsw_ale.h"
#include "cpts.h"
#include "davinci_cpdma.h"
@@ -1640,6 +1640,29 @@
return -EOPNOTSUPP;
}
+static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ int slave_no = cpsw_slave_index(priv);
+
+ wol->supported = 0;
+ wol->wolopts = 0;
+
+ if (priv->slaves[slave_no].phy)
+ phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol);
+}
+
+static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ int slave_no = cpsw_slave_index(priv);
+
+ if (priv->slaves[slave_no].phy)
+ return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol);
+ else
+ return -EOPNOTSUPP;
+}
+
static const struct ethtool_ops cpsw_ethtool_ops = {
.get_drvinfo = cpsw_get_drvinfo,
.get_msglevel = cpsw_get_msglevel,
@@ -1653,6 +1676,8 @@
.get_sset_count = cpsw_get_sset_count,
.get_strings = cpsw_get_strings,
.get_ethtool_stats = cpsw_get_ethtool_stats,
+ .get_wol = cpsw_get_wol,
+ .set_wol = cpsw_set_wol,
};
static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
diff --git a/include/linux/platform_data/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
similarity index 85%
rename from include/linux/platform_data/cpsw.h
rename to drivers/net/ethernet/ti/cpsw.h
index bb3cd58..eb3e101 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -1,11 +1,10 @@
-/*
- * Texas Instruments Ethernet Switch Driver
+/* Texas Instruments Ethernet Switch Driver
*
- * Copyright (C) 2012 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments
*
* This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
*
* This program is distributed "as is" WITHOUT ANY WARRANTY of any
* kind, whether express or implied; without even the implied warranty
@@ -22,14 +21,13 @@
int phy_if;
u8 mac_addr[ETH_ALEN];
u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */
-
};
struct cpsw_platform_data {
+ struct cpsw_slave_data *slave_data;
u32 ss_reg_ofs; /* Subsystem control register offset */
u32 channels; /* number of cpdma channels (symmetric) */
u32 slaves; /* number of slave cpgmac ports */
- struct cpsw_slave_data *slave_data;
u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */
u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 031ebc8..90a7946 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -591,6 +591,7 @@
spin_unlock_irqrestore(&chan->lock, flags);
return 0;
}
+EXPORT_SYMBOL_GPL(cpdma_chan_get_stats);
int cpdma_chan_dump(struct cpdma_chan *chan)
{
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 1a222bce..67df09e 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1761,7 +1761,7 @@
const u8 *mac_addr;
if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
- return pdev->dev.platform_data;
+ return dev_get_platdata(&pdev->dev);
pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata)
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 16ddfc3..4ec9265 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -314,7 +314,7 @@
static int davinci_mdio_probe(struct platform_device *pdev)
{
- struct mdio_platform_data *pdata = pdev->dev.platform_data;
+ struct mdio_platform_data *pdata = dev_get_platdata(&pdev->dev);
struct device *dev = &pdev->dev;
struct davinci_mdio_data *data;
struct resource *res;
@@ -421,8 +421,7 @@
static int davinci_mdio_remove(struct platform_device *pdev)
{
- struct device *dev = &pdev->dev;
- struct davinci_mdio_data *data = dev_get_drvdata(dev);
+ struct davinci_mdio_data *data = platform_get_drvdata(pdev);
if (data->bus) {
mdiobus_unregister(data->bus);
@@ -434,8 +433,6 @@
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- dev_set_drvdata(dev, NULL);
-
kfree(data);
return 0;
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 01bdc6c..c4dbf98 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1308,13 +1308,13 @@
data->id, dev->irq, dev->name);
}
- data->rxring = dma_alloc_coherent(NULL, rxring_size, &data->rxdma,
- GFP_KERNEL | __GFP_ZERO);
+ data->rxring = dma_zalloc_coherent(NULL, rxring_size, &data->rxdma,
+ GFP_KERNEL);
if (!data->rxring)
return -ENOMEM;
- data->txring = dma_alloc_coherent(NULL, txring_size, &data->txdma,
- GFP_KERNEL | __GFP_ZERO);
+ data->txring = dma_zalloc_coherent(NULL, txring_size, &data->txdma,
+ GFP_KERNEL);
if (!data->txring) {
pci_free_consistent(0, rxring_size, data->rxring, data->rxdma);
return -ENOMEM;
@@ -1558,7 +1558,7 @@
hw_info *einfo;
int err = 0;
- einfo = pdev->dev.platform_data;
+ einfo = dev_get_platdata(&pdev->dev);
if (NULL == einfo) {
printk(KERN_ERR "tsi-eth %d: Missing additional data!\n",
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 30fed08..0df36c6 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -622,7 +622,7 @@
static int w5100_hw_probe(struct platform_device *pdev)
{
- struct wiznet_platform_data *data = pdev->dev.platform_data;
+ struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev);
struct net_device *ndev = platform_get_drvdata(pdev);
struct w5100_priv *priv = netdev_priv(ndev);
const char *name = netdev_name(ndev);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index e928845..71c27b3 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -542,7 +542,7 @@
static int w5300_hw_probe(struct platform_device *pdev)
{
- struct wiznet_platform_data *data = pdev->dev.platform_data;
+ struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev);
struct net_device *ndev = platform_get_drvdata(pdev);
struct w5300_priv *priv = netdev_priv(ndev);
const char *name = netdev_name(ndev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 58eb448..b88121f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -243,15 +243,15 @@
/* allocate the tx and rx ring buffer descriptors. */
/* returns a virtual address and a physical address. */
- lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
- sizeof(*lp->tx_bd_v) * TX_BD_NUM,
- &lp->tx_bd_p, GFP_KERNEL | __GFP_ZERO);
+ lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+ sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+ &lp->tx_bd_p, GFP_KERNEL);
if (!lp->tx_bd_v)
goto out;
- lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
- sizeof(*lp->rx_bd_v) * RX_BD_NUM,
- &lp->rx_bd_p, GFP_KERNEL | __GFP_ZERO);
+ lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+ sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+ &lp->rx_bd_p, GFP_KERNEL);
if (!lp->rx_bd_v)
goto out;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index fb7d1c2..b2ff038 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -201,17 +201,15 @@
/*
* Allocate the Tx and Rx buffer descriptors.
*/
- lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
- sizeof(*lp->tx_bd_v) * TX_BD_NUM,
- &lp->tx_bd_p,
- GFP_KERNEL | __GFP_ZERO);
+ lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+ sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+ &lp->tx_bd_p, GFP_KERNEL);
if (!lp->tx_bd_v)
goto out;
- lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
- sizeof(*lp->rx_bd_v) * RX_BD_NUM,
- &lp->rx_bd_p,
- GFP_KERNEL | __GFP_ZERO);
+ lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+ sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+ &lp->rx_bd_p, GFP_KERNEL);
if (!lp->rx_bd_v)
goto out;
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index fd4dbda..4c619ea 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1230,8 +1230,7 @@
*/
static int xemaclite_of_remove(struct platform_device *of_dev)
{
- struct device *dev = &of_dev->dev;
- struct net_device *ndev = dev_get_drvdata(dev);
+ struct net_device *ndev = platform_get_drvdata(of_dev);
struct net_local *lp = netdev_priv(ndev);
@@ -1250,7 +1249,6 @@
lp->phy_node = NULL;
xemaclite_remove_ndev(ndev, of_dev);
- dev_set_drvdata(dev, NULL);
return 0;
}
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 3d689fc..e78802e 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1384,7 +1384,7 @@
{
struct port *port;
struct net_device *dev;
- struct eth_plat_info *plat = pdev->dev.platform_data;
+ struct eth_plat_info *plat = dev_get_platdata(&pdev->dev);
u32 regs_phys;
char phy_id[MII_BUS_ID_SIZE + 3];
int err;
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 4c8ddc9..0b40e1c 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -1068,9 +1068,9 @@
#endif
sizeof(PI_CONSUMER_BLOCK) +
(PI_ALIGN_K_DESC_BLK - 1);
- bp->kmalloced = top_v = dma_alloc_coherent(bp->bus_dev, alloc_size,
- &bp->kmalloced_dma,
- GFP_ATOMIC | __GFP_ZERO);
+ bp->kmalloced = top_v = dma_zalloc_coherent(bp->bus_dev, alloc_size,
+ &bp->kmalloced_dma,
+ GFP_ATOMIC);
if (top_v == NULL)
return DFX_K_FAILURE;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 3adb43c..7bbd318 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -351,16 +351,16 @@
/* Allocate memory if needed */
self->rx_buff.head =
- dma_alloc_coherent(NULL, self->rx_buff.truesize,
- &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->rx_buff.truesize,
+ &self->rx_buff_dma, GFP_KERNEL);
if (self->rx_buff.head == NULL) {
err = -ENOMEM;
goto err_out2;
}
self->tx_buff.head =
- dma_alloc_coherent(NULL, self->tx_buff.truesize,
- &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->tx_buff.truesize,
+ &self->tx_buff_dma, GFP_KERNEL);
if (self->tx_buff.head == NULL) {
err = -ENOMEM;
goto err_out3;
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 9cf836b5..ceeb537 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -430,8 +430,8 @@
/* Allocate memory if needed */
self->rx_buff.head =
- dma_alloc_coherent(NULL, self->rx_buff.truesize,
- &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->rx_buff.truesize,
+ &self->rx_buff_dma, GFP_KERNEL);
if (self->rx_buff.head == NULL) {
err = -ENOMEM;
goto out2;
@@ -439,8 +439,8 @@
}
self->tx_buff.head =
- dma_alloc_coherent(NULL, self->tx_buff.truesize,
- &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->tx_buff.truesize,
+ &self->tx_buff_dma, GFP_KERNEL);
if (self->tx_buff.head == NULL) {
err = -ENOMEM;
goto out3;
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 964b116..3eeaaf8 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -915,7 +915,7 @@
err = register_netdev(dev);
if (err == 0)
- dev_set_drvdata(&pdev->dev, dev);
+ platform_set_drvdata(pdev, dev);
if (err) {
if (si->pdata->shutdown)
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index aa05dad..0dcdf15 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -562,14 +562,14 @@
self->tx_buff.truesize = SMSC_IRCC2_TX_BUFF_TRUESIZE;
self->rx_buff.head =
- dma_alloc_coherent(NULL, self->rx_buff.truesize,
- &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->rx_buff.truesize,
+ &self->rx_buff_dma, GFP_KERNEL);
if (self->rx_buff.head == NULL)
goto err_out2;
self->tx_buff.head =
- dma_alloc_coherent(NULL, self->tx_buff.truesize,
- &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->tx_buff.truesize,
+ &self->tx_buff_dma, GFP_KERNEL);
if (self->tx_buff.head == NULL)
goto err_out3;
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 51f2bc3..9abaec2 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -210,8 +210,7 @@
pci_write_config_byte(pcidev,0x42,(bTmp | 0xf0));
pci_write_config_byte(pcidev,0x5a,0xc0);
WriteLPCReg(0x28, 0x70 );
- if (via_ircc_open(pcidev, &info, 0x3076) == 0)
- rc=0;
+ rc = via_ircc_open(pcidev, &info, 0x3076);
} else
rc = -ENODEV; //IR not turn on
} else { //Not VT1211
@@ -249,8 +248,7 @@
info.irq=FirIRQ;
info.dma=FirDRQ1;
info.dma2=FirDRQ0;
- if (via_ircc_open(pcidev, &info, 0x3096) == 0)
- rc=0;
+ rc = via_ircc_open(pcidev, &info, 0x3096);
} else
rc = -ENODEV; //IR not turn on !!!!!
}//Not VT1211
@@ -363,16 +361,16 @@
/* Allocate memory if needed */
self->rx_buff.head =
- dma_alloc_coherent(&pdev->dev, self->rx_buff.truesize,
- &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(&pdev->dev, self->rx_buff.truesize,
+ &self->rx_buff_dma, GFP_KERNEL);
if (self->rx_buff.head == NULL) {
err = -ENOMEM;
goto err_out2;
}
self->tx_buff.head =
- dma_alloc_coherent(&pdev->dev, self->tx_buff.truesize,
- &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(&pdev->dev, self->tx_buff.truesize,
+ &self->tx_buff_dma, GFP_KERNEL);
if (self->tx_buff.head == NULL) {
err = -ENOMEM;
goto err_out3;
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index bb8857a..e641bb2 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -215,16 +215,16 @@
/* Allocate memory if needed */
self->rx_buff.head =
- dma_alloc_coherent(NULL, self->rx_buff.truesize,
- &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->rx_buff.truesize,
+ &self->rx_buff_dma, GFP_KERNEL);
if (self->rx_buff.head == NULL) {
err = -ENOMEM;
goto err_out1;
}
self->tx_buff.head =
- dma_alloc_coherent(NULL, self->tx_buff.truesize,
- &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO);
+ dma_zalloc_coherent(NULL, self->tx_buff.truesize,
+ &self->tx_buff_dma, GFP_KERNEL);
if (self->tx_buff.head == NULL) {
err = -ENOMEM;
goto err_out2;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 510a9b6..201ef17 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -686,7 +686,7 @@
dev->priv_flags |= IFF_UNICAST_FLT;
dev->netdev_ops = &macvlan_netdev_ops;
dev->destructor = free_netdev;
- dev->header_ops = &macvlan_hard_header_ops,
+ dev->header_ops = &macvlan_hard_header_ops;
dev->ethtool_ops = &macvlan_ethtool_ops;
}
EXPORT_SYMBOL_GPL(macvlan_common_setup);
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 1c6e111..9dccb1e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -68,6 +68,8 @@
#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
NETIF_F_TSO6 | NETIF_F_UFO)
#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
+#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
+
/*
* RCU usage:
* The macvtap_queue and the macvlan_dev are loosely coupled, the
@@ -278,7 +280,8 @@
{
struct macvlan_dev *vlan = netdev_priv(dev);
struct macvtap_queue *q = macvtap_get_queue(dev, skb);
- netdev_features_t features;
+ netdev_features_t features = TAP_FEATURES;
+
if (!q)
goto drop;
@@ -287,9 +290,11 @@
skb->dev = dev;
/* Apply the forward feature mask so that we perform segmentation
- * according to users wishes.
+ * according to users wishes. This only works if VNET_HDR is
+ * enabled.
*/
- features = netif_skb_features(skb) & vlan->tap_features;
+ if (q->flags & IFF_VNET_HDR)
+ features |= vlan->tap_features;
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs = __skb_gso_segment(skb, features, false);
@@ -961,8 +966,7 @@
/* tap_features are the same as features on tun/tap and
* reflect user expectations.
*/
- vlan->tap_features = vlan->dev->features &
- (feature_mask | ~TUN_OFFLOADS);
+ vlan->tap_features = feature_mask;
vlan->set_features = features;
netdev_update_features(vlan->dev);
@@ -1058,10 +1062,6 @@
TUN_F_TSO_ECN | TUN_F_UFO))
return -EINVAL;
- /* TODO: only accept frames with the features that
- got enabled for forwarded frames */
- if (!(q->flags & IFF_VNET_HDR))
- return -EINVAL;
rtnl_lock();
ret = set_offload(q, arg);
rtnl_unlock();
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index a47f923..8004acb 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -191,7 +191,7 @@
pdata = mdio_gpio_of_get_data(pdev);
bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
} else {
- pdata = pdev->dev.platform_data;
+ pdata = dev_get_platdata(&pdev->dev);
bus_id = pdev->id;
}
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index e91d7d7..d2dd9e4 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -106,7 +106,7 @@
static int mdio_mux_gpio_remove(struct platform_device *pdev)
{
- struct mdio_mux_gpio_state *s = pdev->dev.platform_data;
+ struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev);
mdio_mux_uninit(s->mux_handle);
return 0;
}
diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c
index b51fa1f..7f18f80 100644
--- a/drivers/net/phy/mdio-octeon.c
+++ b/drivers/net/phy/mdio-octeon.c
@@ -222,7 +222,7 @@
bus->mii_bus->read = octeon_mdiobus_read;
bus->mii_bus->write = octeon_mdiobus_write;
- dev_set_drvdata(&pdev->dev, bus);
+ platform_set_drvdata(pdev, bus);
err = of_mdiobus_register(bus->mii_bus, pdev->dev.of_node);
if (err)
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index 7f25e49..18969b3 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -101,6 +101,7 @@
struct device_node *np = pdev->dev.of_node;
struct mii_bus *bus;
struct sun4i_mdio_data *data;
+ struct resource *res;
int ret, i;
bus = mdiobus_alloc_size(sizeof(*data));
@@ -114,7 +115,8 @@
snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev));
bus->parent = &pdev->dev;
- bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
+ bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR,
+ GFP_KERNEL);
if (!bus->irq) {
ret = -ENOMEM;
goto err_out_free_mdiobus;
@@ -124,10 +126,11 @@
bus->irq[i] = PHY_POLL;
data = bus->priv;
- data->membase = of_iomap(np, 0);
- if (!data->membase) {
- ret = -ENOMEM;
- goto err_out_free_mdio_irq;
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->membase = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(data->membase)) {
+ ret = PTR_ERR(data->membase);
+ goto err_out_free_mdiobus;
}
data->regulator = devm_regulator_get(&pdev->dev, "phy");
@@ -139,7 +142,7 @@
} else {
ret = regulator_enable(data->regulator);
if (ret)
- goto err_out_free_mdio_irq;
+ goto err_out_free_mdiobus;
}
ret = of_mdiobus_register(bus, np);
@@ -152,8 +155,6 @@
err_out_disable_regulator:
regulator_disable(data->regulator);
-err_out_free_mdio_irq:
- kfree(bus->irq);
err_out_free_mdiobus:
mdiobus_free(bus);
return ret;
@@ -164,7 +165,6 @@
struct mii_bus *bus = platform_get_drvdata(pdev);
mdiobus_unregister(bus);
- kfree(bus->irq);
mdiobus_free(bus);
return 0;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 9ca4945..c31aad0 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/micrel_phy.h>
+#include <linux/of.h>
/* Operation Mode Strap Override */
#define MII_KSZPHY_OMSO 0x16
@@ -53,6 +54,20 @@
#define KS8737_CTRL_INT_ACTIVE_HIGH (1 << 14)
#define KSZ8051_RMII_50MHZ_CLK (1 << 7)
+/* Write/read to/from extended registers */
+#define MII_KSZPHY_EXTREG 0x0b
+#define KSZPHY_EXTREG_WRITE 0x8000
+
+#define MII_KSZPHY_EXTREG_WRITE 0x0c
+#define MII_KSZPHY_EXTREG_READ 0x0d
+
+/* Extended registers */
+#define MII_KSZPHY_CLK_CONTROL_PAD_SKEW 0x104
+#define MII_KSZPHY_RX_DATA_PAD_SKEW 0x105
+#define MII_KSZPHY_TX_DATA_PAD_SKEW 0x106
+
+#define PS_TO_REG 200
+
static int ksz_config_flags(struct phy_device *phydev)
{
int regval;
@@ -65,6 +80,20 @@
return 0;
}
+static int kszphy_extended_write(struct phy_device *phydev,
+ u32 regnum, u16 val)
+{
+ phy_write(phydev, MII_KSZPHY_EXTREG, KSZPHY_EXTREG_WRITE | regnum);
+ return phy_write(phydev, MII_KSZPHY_EXTREG_WRITE, val);
+}
+
+static int kszphy_extended_read(struct phy_device *phydev,
+ u32 regnum)
+{
+ phy_write(phydev, MII_KSZPHY_EXTREG, regnum);
+ return phy_read(phydev, MII_KSZPHY_EXTREG_READ);
+}
+
static int kszphy_ack_interrupt(struct phy_device *phydev)
{
/* bit[7..0] int status, which is a read and clear register. */
@@ -141,6 +170,78 @@
return rc < 0 ? rc : 0;
}
+static int ksz9021_load_values_from_of(struct phy_device *phydev,
+ struct device_node *of_node, u16 reg,
+ char *field1, char *field2,
+ char *field3, char *field4)
+{
+ int val1 = -1;
+ int val2 = -2;
+ int val3 = -3;
+ int val4 = -4;
+ int newval;
+ int matches = 0;
+
+ if (!of_property_read_u32(of_node, field1, &val1))
+ matches++;
+
+ if (!of_property_read_u32(of_node, field2, &val2))
+ matches++;
+
+ if (!of_property_read_u32(of_node, field3, &val3))
+ matches++;
+
+ if (!of_property_read_u32(of_node, field4, &val4))
+ matches++;
+
+ if (!matches)
+ return 0;
+
+ if (matches < 4)
+ newval = kszphy_extended_read(phydev, reg);
+ else
+ newval = 0;
+
+ if (val1 != -1)
+ newval = ((newval & 0xfff0) | ((val1 / PS_TO_REG) & 0xf) << 0);
+
+ if (val2 != -1)
+ newval = ((newval & 0xff0f) | ((val2 / PS_TO_REG) & 0xf) << 4);
+
+ if (val3 != -1)
+ newval = ((newval & 0xf0ff) | ((val3 / PS_TO_REG) & 0xf) << 8);
+
+ if (val4 != -1)
+ newval = ((newval & 0x0fff) | ((val4 / PS_TO_REG) & 0xf) << 12);
+
+ return kszphy_extended_write(phydev, reg, newval);
+}
+
+static int ksz9021_config_init(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->dev;
+ struct device_node *of_node = dev->of_node;
+
+ if (!of_node && dev->parent->of_node)
+ of_node = dev->parent->of_node;
+
+ if (of_node) {
+ ksz9021_load_values_from_of(phydev, of_node,
+ MII_KSZPHY_CLK_CONTROL_PAD_SKEW,
+ "txen-skew-ps", "txc-skew-ps",
+ "rxdv-skew-ps", "rxc-skew-ps");
+ ksz9021_load_values_from_of(phydev, of_node,
+ MII_KSZPHY_RX_DATA_PAD_SKEW,
+ "rxd0-skew-ps", "rxd1-skew-ps",
+ "rxd2-skew-ps", "rxd3-skew-ps");
+ ksz9021_load_values_from_of(phydev, of_node,
+ MII_KSZPHY_TX_DATA_PAD_SKEW,
+ "txd0-skew-ps", "txd1-skew-ps",
+ "txd2-skew-ps", "txd3-skew-ps");
+ }
+ return 0;
+}
+
#define KSZ8873MLL_GLOBAL_CONTROL_4 0x06
#define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX (1 << 6)
#define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED (1 << 4)
@@ -281,7 +382,7 @@
.name = "Micrel KSZ9021 Gigabit PHY",
.features = (PHY_GBIT_FEATURES | SUPPORTED_Pause),
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
- .config_init = kszphy_config_init,
+ .config_init = ksz9021_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 8e7af83..138de83 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -23,7 +23,7 @@
#define RTL821x_INER_INIT 0x6400
#define RTL821x_INSR 0x13
-#define RTL8211E_INER_LINK_STAT 0x10
+#define RTL8211E_INER_LINK_STATUS 0x400
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
@@ -57,7 +57,7 @@
if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
err = phy_write(phydev, RTL821x_INER,
- RTL8211E_INER_LINK_STAT);
+ RTL8211E_INER_LINK_STATUS);
else
err = phy_write(phydev, RTL821x_INER, 0);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7ed13cc..60a1e93 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -138,7 +138,10 @@
struct fasync_struct *fasync;
/* only used for fasnyc */
unsigned int flags;
- u16 queue_index;
+ union {
+ u16 queue_index;
+ unsigned int ifindex;
+ };
struct list_head next;
struct tun_struct *detached;
};
@@ -498,7 +501,7 @@
module_put(THIS_MODULE);
}
-static int tun_attach(struct tun_struct *tun, struct file *file)
+static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
{
struct tun_file *tfile = file->private_data;
int err;
@@ -523,7 +526,7 @@
err = 0;
/* Re-attach the filter to presist device */
- if (tun->filter_attached == true) {
+ if (!skip_filter && (tun->filter_attached == true)) {
err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
if (!err)
goto out;
@@ -1554,7 +1557,7 @@
if (err < 0)
return err;
- err = tun_attach(tun, file);
+ err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
if (err < 0)
return err;
@@ -1601,6 +1604,7 @@
dev_net_set(dev, net);
dev->rtnl_link_ops = &tun_link_ops;
+ dev->ifindex = tfile->ifindex;
tun = netdev_priv(dev);
tun->dev = dev;
@@ -1627,7 +1631,7 @@
dev->vlan_features = dev->features;
INIT_LIST_HEAD(&tun->disabled);
- err = tun_attach(tun, file);
+ err = tun_attach(tun, file, false);
if (err < 0)
goto err_free_dev;
@@ -1791,7 +1795,7 @@
ret = security_tun_dev_attach_queue(tun->security);
if (ret < 0)
goto unlock;
- ret = tun_attach(tun, file);
+ ret = tun_attach(tun, file, false);
} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
tun = rtnl_dereference(tfile->tun);
if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached)
@@ -1817,6 +1821,7 @@
kgid_t group;
int sndbuf;
int vnet_hdr_sz;
+ unsigned int ifindex;
int ret;
if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
@@ -1851,6 +1856,19 @@
ret = -EFAULT;
goto unlock;
}
+ if (cmd == TUNSETIFINDEX) {
+ ret = -EPERM;
+ if (tun)
+ goto unlock;
+
+ ret = -EFAULT;
+ if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
+ goto unlock;
+
+ ret = 0;
+ tfile->ifindex = ifindex;
+ goto unlock;
+ }
ret = -EBADFD;
if (!tun)
@@ -1863,6 +1881,11 @@
case TUNGETIFF:
tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
+ if (tfile->detached)
+ ifr.ifr_flags |= IFF_DETACH_QUEUE;
+ if (!tfile->socket.sk->sk_filter)
+ ifr.ifr_flags |= IFF_NOFILTER;
+
if (copy_to_user(argp, &ifr, ifreq_len))
ret = -EFAULT;
break;
@@ -2019,6 +2042,16 @@
tun_detach_filter(tun, tun->numqueues);
break;
+ case TUNGETFILTER:
+ ret = -EINVAL;
+ if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+ break;
+ ret = -EFAULT;
+ if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog)))
+ break;
+ ret = 0;
+ break;
+
default:
ret = -EINVAL;
break;
@@ -2099,6 +2132,7 @@
rcu_assign_pointer(tfile->tun, NULL);
tfile->net = get_net(current->nsproxy->net_ns);
tfile->flags = 0;
+ tfile->ifindex = 0;
rcu_assign_pointer(tfile->socket.wq, &tfile->wq);
init_waitqueue_head(&tfile->wq.wait);
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index cba1d46..86292e6 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2816,13 +2816,16 @@
static int hso_get_config_data(struct usb_interface *interface)
{
struct usb_device *usbdev = interface_to_usbdev(interface);
- u8 config_data[17];
+ u8 *config_data = kmalloc(17, GFP_KERNEL);
u32 if_num = interface->altsetting->desc.bInterfaceNumber;
s32 result;
+ if (!config_data)
+ return -ENOMEM;
if (usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0),
0x86, 0xC0, 0, 0, config_data, 17,
USB_CTRL_SET_TIMEOUT) != 0x11) {
+ kfree(config_data);
return -EIO;
}
@@ -2873,6 +2876,7 @@
if (config_data[16] & 0x1)
result |= HSO_INFO_CRC_BUG;
+ kfree(config_data);
return result;
}
@@ -2886,6 +2890,11 @@
struct hso_shared_int *shared_int;
struct hso_device *tmp_dev = NULL;
+ if (interface->cur_altsetting->desc.bInterfaceClass != 0xFF) {
+ dev_err(&interface->dev, "Not our interface\n");
+ return -ENODEV;
+ }
+
if_num = interface->altsetting->desc.bInterfaceNumber;
/* Get the interface/port specification from either driver_info or from
@@ -2895,10 +2904,6 @@
else
port_spec = hso_get_config_data(interface);
- if (interface->cur_altsetting->desc.bInterfaceClass != 0xFF) {
- dev_err(&interface->dev, "Not our interface\n");
- return -ENODEV;
- }
/* Check if we need to switch to alt interfaces prior to port
* configuration */
if (interface->num_altsetting > 1)
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 55a62ca..7e2788c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -313,10 +313,10 @@
struct pci_dev *pdev)
{
if (tbi->map_type == VMXNET3_MAP_SINGLE)
- pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
+ dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
PCI_DMA_TODEVICE);
else if (tbi->map_type == VMXNET3_MAP_PAGE)
- pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
+ dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
PCI_DMA_TODEVICE);
else
BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
@@ -429,25 +429,29 @@
struct vmxnet3_adapter *adapter)
{
if (tq->tx_ring.base) {
- pci_free_consistent(adapter->pdev, tq->tx_ring.size *
- sizeof(struct Vmxnet3_TxDesc),
- tq->tx_ring.base, tq->tx_ring.basePA);
+ dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
+ sizeof(struct Vmxnet3_TxDesc),
+ tq->tx_ring.base, tq->tx_ring.basePA);
tq->tx_ring.base = NULL;
}
if (tq->data_ring.base) {
- pci_free_consistent(adapter->pdev, tq->data_ring.size *
- sizeof(struct Vmxnet3_TxDataDesc),
- tq->data_ring.base, tq->data_ring.basePA);
+ dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
+ sizeof(struct Vmxnet3_TxDataDesc),
+ tq->data_ring.base, tq->data_ring.basePA);
tq->data_ring.base = NULL;
}
if (tq->comp_ring.base) {
- pci_free_consistent(adapter->pdev, tq->comp_ring.size *
- sizeof(struct Vmxnet3_TxCompDesc),
- tq->comp_ring.base, tq->comp_ring.basePA);
+ dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
+ sizeof(struct Vmxnet3_TxCompDesc),
+ tq->comp_ring.base, tq->comp_ring.basePA);
tq->comp_ring.base = NULL;
}
- kfree(tq->buf_info);
- tq->buf_info = NULL;
+ if (tq->buf_info) {
+ dma_free_coherent(&adapter->pdev->dev,
+ tq->tx_ring.size * sizeof(tq->buf_info[0]),
+ tq->buf_info, tq->buf_info_pa);
+ tq->buf_info = NULL;
+ }
}
@@ -496,37 +500,38 @@
vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
struct vmxnet3_adapter *adapter)
{
+ size_t sz;
+
BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
tq->comp_ring.base || tq->buf_info);
- tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
- * sizeof(struct Vmxnet3_TxDesc),
- &tq->tx_ring.basePA);
+ tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
+ tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
+ &tq->tx_ring.basePA, GFP_KERNEL);
if (!tq->tx_ring.base) {
netdev_err(adapter->netdev, "failed to allocate tx ring\n");
goto err;
}
- tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
- tq->data_ring.size *
- sizeof(struct Vmxnet3_TxDataDesc),
- &tq->data_ring.basePA);
+ tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
+ tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
+ &tq->data_ring.basePA, GFP_KERNEL);
if (!tq->data_ring.base) {
netdev_err(adapter->netdev, "failed to allocate data ring\n");
goto err;
}
- tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
- tq->comp_ring.size *
- sizeof(struct Vmxnet3_TxCompDesc),
- &tq->comp_ring.basePA);
+ tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
+ tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
+ &tq->comp_ring.basePA, GFP_KERNEL);
if (!tq->comp_ring.base) {
netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
goto err;
}
- tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
- GFP_KERNEL);
+ sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
+ tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
+ &tq->buf_info_pa, GFP_KERNEL);
if (!tq->buf_info)
goto err;
@@ -578,7 +583,8 @@
break;
}
- rbi->dma_addr = pci_map_single(adapter->pdev,
+ rbi->dma_addr = dma_map_single(
+ &adapter->pdev->dev,
rbi->skb->data, rbi->len,
PCI_DMA_FROMDEVICE);
} else {
@@ -595,7 +601,8 @@
rq->stats.rx_buf_alloc_failure++;
break;
}
- rbi->dma_addr = pci_map_page(adapter->pdev,
+ rbi->dma_addr = dma_map_page(
+ &adapter->pdev->dev,
rbi->page, 0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
} else {
@@ -705,7 +712,7 @@
tbi = tq->buf_info + tq->tx_ring.next2fill;
tbi->map_type = VMXNET3_MAP_SINGLE;
- tbi->dma_addr = pci_map_single(adapter->pdev,
+ tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
skb->data + buf_offset, buf_size,
PCI_DMA_TODEVICE);
@@ -1221,7 +1228,8 @@
goto rcd_done;
}
- pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
+ dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
+ rbi->len,
PCI_DMA_FROMDEVICE);
#ifdef VMXNET3_RSS
@@ -1233,7 +1241,7 @@
/* Immediate refill */
rbi->skb = new_skb;
- rbi->dma_addr = pci_map_single(adapter->pdev,
+ rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
rbi->skb->data, rbi->len,
PCI_DMA_FROMDEVICE);
rxd->addr = cpu_to_le64(rbi->dma_addr);
@@ -1267,7 +1275,7 @@
}
if (rcd->len) {
- pci_unmap_page(adapter->pdev,
+ dma_unmap_page(&adapter->pdev->dev,
rbi->dma_addr, rbi->len,
PCI_DMA_FROMDEVICE);
@@ -1276,7 +1284,8 @@
/* Immediate refill */
rbi->page = new_page;
- rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
+ rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
+ rbi->page,
0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
rxd->addr = cpu_to_le64(rbi->dma_addr);
@@ -1352,13 +1361,13 @@
if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
rq->buf_info[ring_idx][i].skb) {
- pci_unmap_single(adapter->pdev, rxd->addr,
+ dma_unmap_single(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
rq->buf_info[ring_idx][i].skb = NULL;
} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
rq->buf_info[ring_idx][i].page) {
- pci_unmap_page(adapter->pdev, rxd->addr,
+ dma_unmap_page(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
@@ -1400,25 +1409,31 @@
}
- kfree(rq->buf_info[0]);
-
for (i = 0; i < 2; i++) {
if (rq->rx_ring[i].base) {
- pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
- * sizeof(struct Vmxnet3_RxDesc),
- rq->rx_ring[i].base,
- rq->rx_ring[i].basePA);
+ dma_free_coherent(&adapter->pdev->dev,
+ rq->rx_ring[i].size
+ * sizeof(struct Vmxnet3_RxDesc),
+ rq->rx_ring[i].base,
+ rq->rx_ring[i].basePA);
rq->rx_ring[i].base = NULL;
}
rq->buf_info[i] = NULL;
}
if (rq->comp_ring.base) {
- pci_free_consistent(adapter->pdev, rq->comp_ring.size *
- sizeof(struct Vmxnet3_RxCompDesc),
- rq->comp_ring.base, rq->comp_ring.basePA);
+ dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
+ * sizeof(struct Vmxnet3_RxCompDesc),
+ rq->comp_ring.base, rq->comp_ring.basePA);
rq->comp_ring.base = NULL;
}
+
+ if (rq->buf_info[0]) {
+ size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
+ (rq->rx_ring[0].size + rq->rx_ring[1].size);
+ dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
+ rq->buf_info_pa);
+ }
}
@@ -1503,8 +1518,10 @@
for (i = 0; i < 2; i++) {
sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
- rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
- &rq->rx_ring[i].basePA);
+ rq->rx_ring[i].base = dma_alloc_coherent(
+ &adapter->pdev->dev, sz,
+ &rq->rx_ring[i].basePA,
+ GFP_KERNEL);
if (!rq->rx_ring[i].base) {
netdev_err(adapter->netdev,
"failed to allocate rx ring %d\n", i);
@@ -1513,8 +1530,9 @@
}
sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
- rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
- &rq->comp_ring.basePA);
+ rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
+ &rq->comp_ring.basePA,
+ GFP_KERNEL);
if (!rq->comp_ring.base) {
netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
goto err;
@@ -1522,7 +1540,8 @@
sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
rq->rx_ring[1].size);
- bi = kzalloc(sz, GFP_KERNEL);
+ bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
+ GFP_KERNEL);
if (!bi)
goto err;
@@ -2005,6 +2024,7 @@
struct Vmxnet3_RxFilterConf *rxConf =
&adapter->shared->devRead.rxFilterConf;
u8 *new_table = NULL;
+ dma_addr_t new_table_pa = 0;
u32 new_mode = VMXNET3_RXM_UCAST;
if (netdev->flags & IFF_PROMISC) {
@@ -2028,8 +2048,12 @@
new_mode |= VMXNET3_RXM_MCAST;
rxConf->mfTableLen = cpu_to_le16(
netdev_mc_count(netdev) * ETH_ALEN);
- rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
- new_table));
+ new_table_pa = dma_map_single(
+ &adapter->pdev->dev,
+ new_table,
+ rxConf->mfTableLen,
+ PCI_DMA_TODEVICE);
+ rxConf->mfTablePA = cpu_to_le64(new_table_pa);
} else {
netdev_info(netdev, "failed to copy mcast list"
", setting ALL_MULTI\n");
@@ -2056,7 +2080,11 @@
VMXNET3_CMD_UPDATE_MAC_FILTERS);
spin_unlock_irqrestore(&adapter->cmd_lock, flags);
- kfree(new_table);
+ if (new_table) {
+ dma_unmap_single(&adapter->pdev->dev, new_table_pa,
+ rxConf->mfTableLen, PCI_DMA_TODEVICE);
+ kfree(new_table);
+ }
}
void
@@ -2096,7 +2124,7 @@
devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
- devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
+ devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
/* set up feature flags */
@@ -2125,7 +2153,7 @@
tqc->txRingBasePA = cpu_to_le64(tq->tx_ring.basePA);
tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
- tqc->ddPA = cpu_to_le64(virt_to_phys(tq->buf_info));
+ tqc->ddPA = cpu_to_le64(tq->buf_info_pa);
tqc->txRingSize = cpu_to_le32(tq->tx_ring.size);
tqc->dataRingSize = cpu_to_le32(tq->data_ring.size);
tqc->compRingSize = cpu_to_le32(tq->comp_ring.size);
@@ -2143,8 +2171,7 @@
rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
rqc->compRingBasePA = cpu_to_le64(rq->comp_ring.basePA);
- rqc->ddPA = cpu_to_le64(virt_to_phys(
- rq->buf_info));
+ rqc->ddPA = cpu_to_le64(rq->buf_info_pa);
rqc->rxRingSize[0] = cpu_to_le32(rq->rx_ring[0].size);
rqc->rxRingSize[1] = cpu_to_le32(rq->rx_ring[1].size);
rqc->compRingSize = cpu_to_le32(rq->comp_ring.size);
@@ -2184,8 +2211,9 @@
i, adapter->num_rx_queues);
devRead->rssConfDesc.confVer = 1;
- devRead->rssConfDesc.confLen = sizeof(*rssConf);
- devRead->rssConfDesc.confPA = virt_to_phys(rssConf);
+ devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
+ devRead->rssConfDesc.confPA =
+ cpu_to_le64(adapter->rss_conf_pa);
}
#endif /* VMXNET3_RSS */
@@ -2948,9 +2976,13 @@
adapter->pdev = pdev;
spin_lock_init(&adapter->cmd_lock);
- adapter->shared = pci_alloc_consistent(adapter->pdev,
- sizeof(struct Vmxnet3_DriverShared),
- &adapter->shared_pa);
+ adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
+ sizeof(struct vmxnet3_adapter),
+ PCI_DMA_TODEVICE);
+ adapter->shared = dma_alloc_coherent(
+ &adapter->pdev->dev,
+ sizeof(struct Vmxnet3_DriverShared),
+ &adapter->shared_pa, GFP_KERNEL);
if (!adapter->shared) {
dev_err(&pdev->dev, "Failed to allocate memory\n");
err = -ENOMEM;
@@ -2963,8 +2995,9 @@
size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
- adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
- &adapter->queue_desc_pa);
+ adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
+ &adapter->queue_desc_pa,
+ GFP_KERNEL);
if (!adapter->tqd_start) {
dev_err(&pdev->dev, "Failed to allocate memory\n");
@@ -2974,7 +3007,10 @@
adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
adapter->num_tx_queues);
- adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
+ adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
+ sizeof(struct Vmxnet3_PMConf),
+ &adapter->pm_conf_pa,
+ GFP_KERNEL);
if (adapter->pm_conf == NULL) {
err = -ENOMEM;
goto err_alloc_pm;
@@ -2982,7 +3018,10 @@
#ifdef VMXNET3_RSS
- adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
+ adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
+ sizeof(struct UPT1_RSSConf),
+ &adapter->rss_conf_pa,
+ GFP_KERNEL);
if (adapter->rss_conf == NULL) {
err = -ENOMEM;
goto err_alloc_rss;
@@ -3077,17 +3116,22 @@
vmxnet3_free_pci_resources(adapter);
err_alloc_pci:
#ifdef VMXNET3_RSS
- kfree(adapter->rss_conf);
+ dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
+ adapter->rss_conf, adapter->rss_conf_pa);
err_alloc_rss:
#endif
- kfree(adapter->pm_conf);
+ dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
+ adapter->pm_conf, adapter->pm_conf_pa);
err_alloc_pm:
- pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
- adapter->queue_desc_pa);
+ dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
+ adapter->queue_desc_pa);
err_alloc_queue_desc:
- pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
- adapter->shared, adapter->shared_pa);
+ dma_free_coherent(&adapter->pdev->dev,
+ sizeof(struct Vmxnet3_DriverShared),
+ adapter->shared, adapter->shared_pa);
err_alloc_shared:
+ dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
+ sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
pci_set_drvdata(pdev, NULL);
free_netdev(netdev);
return err;
@@ -3118,16 +3162,21 @@
vmxnet3_free_intr_resources(adapter);
vmxnet3_free_pci_resources(adapter);
#ifdef VMXNET3_RSS
- kfree(adapter->rss_conf);
+ dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
+ adapter->rss_conf, adapter->rss_conf_pa);
#endif
- kfree(adapter->pm_conf);
+ dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
+ adapter->pm_conf, adapter->pm_conf_pa);
size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
- pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
- adapter->queue_desc_pa);
- pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
- adapter->shared, adapter->shared_pa);
+ dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
+ adapter->queue_desc_pa);
+ dma_free_coherent(&adapter->pdev->dev,
+ sizeof(struct Vmxnet3_DriverShared),
+ adapter->shared, adapter->shared_pa);
+ dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
+ sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
free_netdev(netdev);
}
@@ -3227,8 +3276,8 @@
adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
*pmConf));
- adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
- pmConf));
+ adapter->shared->devRead.pmConfDesc.confPA =
+ cpu_to_le64(adapter->pm_conf_pa);
spin_lock_irqsave(&adapter->cmd_lock, flags);
VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
@@ -3265,8 +3314,8 @@
adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
*pmConf));
- adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
- pmConf));
+ adapter->shared->devRead.pmConfDesc.confPA =
+ cpu_to_le64(adapter->pm_conf_pa);
netif_device_attach(netdev);
pci_set_power_state(pdev, PCI_D0);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3541814..a03f358 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -70,10 +70,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.1.30.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.2.0.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01011E00
+#define VMXNET3_DRIVER_VERSION_NUM 0x01020000
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
@@ -229,6 +229,7 @@
spinlock_t tx_lock;
struct vmxnet3_cmd_ring tx_ring;
struct vmxnet3_tx_buf_info *buf_info;
+ dma_addr_t buf_info_pa;
struct vmxnet3_tx_data_ring data_ring;
struct vmxnet3_comp_ring comp_ring;
struct Vmxnet3_TxQueueCtrl *shared;
@@ -277,6 +278,7 @@
u32 qid; /* rqID in RCD for buffer from 1st ring */
u32 qid2; /* rqID in RCD for buffer from 2nd ring */
struct vmxnet3_rx_buf_info *buf_info[2];
+ dma_addr_t buf_info_pa;
struct Vmxnet3_RxQueueCtrl *shared;
struct vmxnet3_rq_driver_stats stats;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
@@ -353,6 +355,10 @@
unsigned long state; /* VMXNET3_STATE_BIT_xxx */
int share_intr;
+
+ dma_addr_t adapter_pa;
+ dma_addr_t pm_conf_pa;
+ dma_addr_t rss_conf_pa;
};
#define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index b9401b5..3b21aca 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -542,12 +542,6 @@
return 0;
}
-static void vxlan_fdb_free_rdst(struct rcu_head *head)
-{
- struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
- kfree(rd);
-}
-
static void vxlan_fdb_free(struct rcu_head *head)
{
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
@@ -687,7 +681,7 @@
*/
if (rd && !list_is_singular(&f->remotes)) {
list_del_rcu(&rd->list);
- call_rcu(&rd->rcu, vxlan_fdb_free_rdst);
+ kfree_rcu(rd, rcu);
goto out;
}
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index f7c70b3..c51d2dc 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -431,9 +431,9 @@
u16 ring_mem_size = (ring->type == B43_DMA_64BIT) ?
B43_DMA64_RINGMEMSIZE : B43_DMA32_RINGMEMSIZE;
- ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev,
- ring_mem_size, &(ring->dmabase),
- GFP_KERNEL | __GFP_ZERO);
+ ring->descbase = dma_zalloc_coherent(ring->dev->dev->dma_dev,
+ ring_mem_size, &(ring->dmabase),
+ GFP_KERNEL);
if (!ring->descbase)
return -ENOMEM;
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index faeafe2..42eb26c 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -331,10 +331,9 @@
static int alloc_ringmemory(struct b43legacy_dmaring *ring)
{
/* GFP flags must match the flags in free_ringmemory()! */
- ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev,
- B43legacy_DMA_RINGMEMSIZE,
- &(ring->dmabase),
- GFP_KERNEL | __GFP_ZERO);
+ ring->descbase = dma_zalloc_coherent(ring->dev->dev->dma_dev,
+ B43legacy_DMA_RINGMEMSIZE,
+ &(ring->dmabase), GFP_KERNEL);
if (!ring->descbase)
return -ENOMEM;
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index ac07473..e509030 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -523,9 +523,9 @@
data->length = prism2_ap_get_sta_qual(local, addr, qual, IW_MAX_AP, 1);
- memcpy(extra, &addr, sizeof(struct sockaddr) * data->length);
+ memcpy(extra, addr, sizeof(struct sockaddr) * data->length);
data->flags = 1; /* has quality information */
- memcpy(extra + sizeof(struct sockaddr) * data->length, &qual,
+ memcpy(extra + sizeof(struct sockaddr) * data->length, qual,
sizeof(struct iw_quality) * data->length);
kfree(addr);
diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index f0a2c95..cae4d31 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -1024,7 +1024,10 @@
if (test_bit(STATUS_EXIT_PENDING, &priv->status))
return;
- if (test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status))
+ if (!test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status))
+ return;
+
+ if (ctx->vif)
ieee80211_chswitch_done(ctx->vif, is_success);
}
diff --git a/drivers/net/wireless/iwlwifi/iwl-prph.h b/drivers/net/wireless/iwlwifi/iwl-prph.h
index a70c7b9..ff8cc75 100644
--- a/drivers/net/wireless/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/iwlwifi/iwl-prph.h
@@ -97,8 +97,6 @@
#define APMG_PCIDEV_STT_VAL_L1_ACT_DIS (0x00000800)
-#define APMG_RTC_INT_STT_RFKILL (0x10000000)
-
/* Device system time */
#define DEVICE_SYSTEM_TIME_REG 0xA0206C
diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c
index ad9bbca..7fd6fbf 100644
--- a/drivers/net/wireless/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c
@@ -138,6 +138,20 @@
schedule_work(&mvm->roc_done_wk);
}
+static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ const char *errmsg)
+{
+ if (vif->type != NL80211_IFTYPE_STATION)
+ return false;
+ if (vif->bss_conf.assoc && vif->bss_conf.dtim_period)
+ return false;
+ if (errmsg)
+ IWL_ERR(mvm, "%s\n", errmsg);
+ ieee80211_connection_loss(vif);
+ return true;
+}
+
/*
* Handles a FW notification for an event that is known to the driver.
*
@@ -163,8 +177,13 @@
* P2P Device discoveribility, while there are other higher priority
* events in the system).
*/
- WARN_ONCE(!le32_to_cpu(notif->status),
- "Failed to schedule time event\n");
+ if (WARN_ONCE(!le32_to_cpu(notif->status),
+ "Failed to schedule time event\n")) {
+ if (iwl_mvm_te_check_disconnect(mvm, te_data->vif, NULL)) {
+ iwl_mvm_te_clear_data(mvm, te_data);
+ return;
+ }
+ }
if (le32_to_cpu(notif->action) & TE_NOTIF_HOST_EVENT_END) {
IWL_DEBUG_TE(mvm,
@@ -180,14 +199,8 @@
* By now, we should have finished association
* and know the dtim period.
*/
- if (te_data->vif->type == NL80211_IFTYPE_STATION &&
- (!te_data->vif->bss_conf.assoc ||
- !te_data->vif->bss_conf.dtim_period)) {
- IWL_ERR(mvm,
- "No assocation and the time event is over already...\n");
- ieee80211_connection_loss(te_data->vif);
- }
-
+ iwl_mvm_te_check_disconnect(mvm, te_data->vif,
+ "No assocation and the time event is over already...");
iwl_mvm_te_clear_data(mvm, te_data);
} else if (le32_to_cpu(notif->action) & TE_NOTIF_HOST_EVENT_START) {
te_data->running = true;
diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index 68837d4..5fdb4ee 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c
@@ -888,14 +888,6 @@
iwl_op_mode_hw_rf_kill(trans->op_mode, hw_rfkill);
if (hw_rfkill) {
- /*
- * Clear the interrupt in APMG if the NIC is going down.
- * Note that when the NIC exits RFkill (else branch), we
- * can't access prph and the NIC will be reset in
- * start_hw anyway.
- */
- iwl_write_prph(trans, APMG_RTC_INT_STT_REG,
- APMG_RTC_INT_STT_RFKILL);
set_bit(STATUS_RFKILL, &trans_pcie->status);
if (test_and_clear_bit(STATUS_HCMD_ACTIVE,
&trans_pcie->status))
diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index e52d1ce..eca4429 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -1416,6 +1416,11 @@
goto out_no_pci;
}
+ /* W/A - seems to solve weird behavior. We need to remove this if we
+ * don't want to stay in L1 all the time. This wastes a lot of power */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
+ PCIE_LINK_STATE_CLKPM);
+
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 4941f20..b8ba1f9 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -98,10 +98,12 @@
goto exit;
err = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x4,
- USB_DIR_IN | 0x40, 0,0, &ret, sizeof(ret), ZD1201_FW_TIMEOUT);
+ USB_DIR_IN | 0x40, 0, 0, buf, sizeof(ret), ZD1201_FW_TIMEOUT);
if (err < 0)
goto exit;
+ memcpy(&ret, buf, sizeof(ret));
+
if (ret & 0x80) {
err = -EIO;
goto exit;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8a4d77e..a197743 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -45,31 +45,109 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
-struct xen_netbk;
+typedef unsigned int pending_ring_idx_t;
+#define INVALID_PENDING_RING_IDX (~0U)
+
+/* For the head field in pending_tx_info: it is used to indicate
+ * whether this tx info is the head of one or more coalesced requests.
+ *
+ * When head != INVALID_PENDING_RING_IDX, it means the start of a new
+ * tx requests queue and the end of previous queue.
+ *
+ * An example sequence of head fields (I = INVALID_PENDING_RING_IDX):
+ *
+ * ...|0 I I I|5 I|9 I I I|...
+ * -->|<-INUSE----------------
+ *
+ * After consuming the first slot(s) we have:
+ *
+ * ...|V V V V|5 I|9 I I I|...
+ * -----FREE->|<-INUSE--------
+ *
+ * where V stands for "valid pending ring index". Any number other
+ * than INVALID_PENDING_RING_IDX is OK. These entries are considered
+ * free and can contain any number other than
+ * INVALID_PENDING_RING_IDX. In practice we use 0.
+ *
+ * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the
+ * above example) number is the index into pending_tx_info and
+ * mmap_pages arrays.
+ */
+struct pending_tx_info {
+ struct xen_netif_tx_request req; /* coalesced tx request */
+ pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
+ * if it is head of one or more tx
+ * reqs
+ */
+};
+
+#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
+#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
+
+struct xenvif_rx_meta {
+ int id;
+ int size;
+ int gso_size;
+};
+
+/* Discriminate from any valid pending_idx value. */
+#define INVALID_PENDING_IDX 0xFFFF
+
+#define MAX_BUFFER_OFFSET PAGE_SIZE
+
+#define MAX_PENDING_REQS 256
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
unsigned int handle;
- /* Reference to netback processing backend. */
- struct xen_netbk *netbk;
-
- u8 fe_dev_addr[6];
-
+ /* Use NAPI for guest TX */
+ struct napi_struct napi;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
unsigned int tx_irq;
- unsigned int rx_irq;
/* Only used when feature-split-event-channels = 1 */
char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
- char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
-
- /* List of frontends to notify after a batch of frames sent. */
- struct list_head notify_list;
-
- /* The shared rings and indexes. */
struct xen_netif_tx_back_ring tx;
+ struct sk_buff_head tx_queue;
+ struct page *mmap_pages[MAX_PENDING_REQS];
+ pending_ring_idx_t pending_prod;
+ pending_ring_idx_t pending_cons;
+ u16 pending_ring[MAX_PENDING_REQS];
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+
+ /* Coalescing tx requests before copying makes number of grant
+ * copy ops greater or equal to number of slots required. In
+ * worst case a tx request consumes 2 gnttab_copy.
+ */
+ struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
+
+
+ /* Use kthread for guest RX */
+ struct task_struct *task;
+ wait_queue_head_t wq;
+ /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
+ unsigned int rx_irq;
+ /* Only used when feature-split-event-channels = 1 */
+ char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
struct xen_netif_rx_back_ring rx;
+ struct sk_buff_head rx_queue;
+
+ /* Allow xenvif_start_xmit() to peek ahead in the rx request
+ * ring. This is a prediction of what rx_req_cons will be
+ * once all queued skbs are put on the ring.
+ */
+ RING_IDX rx_req_cons_peek;
+
+ /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
+ * head/fragment page uses 2 copy operations because it
+ * straddles two buffers in the frontend.
+ */
+ struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
+ struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
+
+
+ u8 fe_dev_addr[6];
/* Frontend feature information. */
u8 can_sg:1;
@@ -80,13 +158,6 @@
/* Internal feature information. */
u8 can_queue:1; /* can queue packets for receiver? */
- /*
- * Allow xenvif_start_xmit() to peek ahead in the rx request
- * ring. This is a prediction of what rx_req_cons will be
- * once all queued skbs are put on the ring.
- */
- RING_IDX rx_req_cons_peek;
-
/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
unsigned long credit_bytes;
unsigned long credit_usec;
@@ -97,11 +168,7 @@
unsigned long rx_gso_checksum_fixup;
/* Miscellaneous private stuff. */
- struct list_head schedule_list;
- atomic_t refcnt;
struct net_device *dev;
-
- wait_queue_head_t waiting_to_free;
};
static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
@@ -109,9 +176,6 @@
return to_xenbus_device(vif->dev->dev.parent);
}
-#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
-#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
-
struct xenvif *xenvif_alloc(struct device *parent,
domid_t domid,
unsigned int handle);
@@ -121,39 +185,26 @@
unsigned int rx_evtchn);
void xenvif_disconnect(struct xenvif *vif);
-void xenvif_get(struct xenvif *vif);
-void xenvif_put(struct xenvif *vif);
-
int xenvif_xenbus_init(void);
void xenvif_xenbus_fini(void);
int xenvif_schedulable(struct xenvif *vif);
-int xen_netbk_rx_ring_full(struct xenvif *vif);
+int xenvif_rx_ring_full(struct xenvif *vif);
-int xen_netbk_must_stop_queue(struct xenvif *vif);
+int xenvif_must_stop_queue(struct xenvif *vif);
/* (Un)Map communication rings. */
-void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
-int xen_netbk_map_frontend_rings(struct xenvif *vif,
- grant_ref_t tx_ring_ref,
- grant_ref_t rx_ring_ref);
-
-/* (De)Register a xenvif with the netback backend. */
-void xen_netbk_add_xenvif(struct xenvif *vif);
-void xen_netbk_remove_xenvif(struct xenvif *vif);
-
-/* (De)Schedule backend processing for a xenvif */
-void xen_netbk_schedule_xenvif(struct xenvif *vif);
-void xen_netbk_deschedule_xenvif(struct xenvif *vif);
+void xenvif_unmap_frontend_rings(struct xenvif *vif);
+int xenvif_map_frontend_rings(struct xenvif *vif,
+ grant_ref_t tx_ring_ref,
+ grant_ref_t rx_ring_ref);
/* Check for SKBs from frontend and schedule backend processing */
-void xen_netbk_check_rx_xenvif(struct xenvif *vif);
-/* Receive an SKB from the frontend */
-void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
+void xenvif_check_rx_xenvif(struct xenvif *vif);
/* Queue an SKB for transmission to the frontend */
-void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
+void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
/* Notify xenvif that ring now has space to send an skb to the frontend */
void xenvif_notify_tx_completion(struct xenvif *vif);
@@ -161,7 +212,12 @@
void xenvif_carrier_off(struct xenvif *vif);
/* Returns number of ring slots required to send an skb to the frontend */
-unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
+unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
+
+int xenvif_tx_action(struct xenvif *vif, int budget);
+void xenvif_rx_action(struct xenvif *vif);
+
+int xenvif_kthread(void *data);
extern bool separate_tx_rx_irq;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 087d2db..625c6f4 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -30,6 +30,7 @@
#include "common.h"
+#include <linux/kthread.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
@@ -38,17 +39,7 @@
#include <asm/xen/hypercall.h>
#define XENVIF_QUEUE_LENGTH 32
-
-void xenvif_get(struct xenvif *vif)
-{
- atomic_inc(&vif->refcnt);
-}
-
-void xenvif_put(struct xenvif *vif)
-{
- if (atomic_dec_and_test(&vif->refcnt))
- wake_up(&vif->waiting_to_free);
-}
+#define XENVIF_NAPI_WEIGHT 64
int xenvif_schedulable(struct xenvif *vif)
{
@@ -57,28 +48,62 @@
static int xenvif_rx_schedulable(struct xenvif *vif)
{
- return xenvif_schedulable(vif) && !xen_netbk_rx_ring_full(vif);
+ return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif);
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;
- if (vif->netbk == NULL)
- return IRQ_HANDLED;
-
- xen_netbk_schedule_xenvif(vif);
+ if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
+ napi_schedule(&vif->napi);
return IRQ_HANDLED;
}
+static int xenvif_poll(struct napi_struct *napi, int budget)
+{
+ struct xenvif *vif = container_of(napi, struct xenvif, napi);
+ int work_done;
+
+ work_done = xenvif_tx_action(vif, budget);
+
+ if (work_done < budget) {
+ int more_to_do = 0;
+ unsigned long flags;
+
+ /* It is necessary to disable IRQ before calling
+ * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might
+ * lose event from the frontend.
+ *
+ * Consider:
+ * RING_HAS_UNCONSUMED_REQUESTS
+ * <frontend generates event to trigger napi_schedule>
+ * __napi_complete
+ *
+ * This handler is still in scheduled state so the
+ * event has no effect at all. After __napi_complete
+ * this handler is descheduled and cannot get
+ * scheduled again. We lose event in this case and the ring
+ * will be completely stalled.
+ */
+
+ local_irq_save(flags);
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
+ if (!more_to_do)
+ __napi_complete(napi);
+
+ local_irq_restore(flags);
+ }
+
+ return work_done;
+}
+
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;
- if (vif->netbk == NULL)
- return IRQ_HANDLED;
-
if (xenvif_rx_schedulable(vif))
netif_wake_queue(vif->dev);
@@ -99,7 +124,8 @@
BUG_ON(skb->dev != dev);
- if (vif->netbk == NULL)
+ /* Drop the packet if vif is not ready */
+ if (vif->task == NULL)
goto drop;
/* Drop the packet if the target domain has no receive buffers. */
@@ -107,13 +133,12 @@
goto drop;
/* Reserve ring slots for the worst-case number of fragments. */
- vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
- xenvif_get(vif);
+ vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb);
- if (vif->can_queue && xen_netbk_must_stop_queue(vif))
+ if (vif->can_queue && xenvif_must_stop_queue(vif))
netif_stop_queue(dev);
- xen_netbk_queue_tx_skb(vif, skb);
+ xenvif_queue_tx_skb(vif, skb);
return NETDEV_TX_OK;
@@ -123,11 +148,6 @@
return NETDEV_TX_OK;
}
-void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
-{
- netif_rx_ni(skb);
-}
-
void xenvif_notify_tx_completion(struct xenvif *vif)
{
if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
@@ -142,21 +162,20 @@
static void xenvif_up(struct xenvif *vif)
{
- xen_netbk_add_xenvif(vif);
+ napi_enable(&vif->napi);
enable_irq(vif->tx_irq);
if (vif->tx_irq != vif->rx_irq)
enable_irq(vif->rx_irq);
- xen_netbk_check_rx_xenvif(vif);
+ xenvif_check_rx_xenvif(vif);
}
static void xenvif_down(struct xenvif *vif)
{
+ napi_disable(&vif->napi);
disable_irq(vif->tx_irq);
if (vif->tx_irq != vif->rx_irq)
disable_irq(vif->rx_irq);
del_timer_sync(&vif->credit_timeout);
- xen_netbk_deschedule_xenvif(vif);
- xen_netbk_remove_xenvif(vif);
}
static int xenvif_open(struct net_device *dev)
@@ -272,11 +291,12 @@
struct net_device *dev;
struct xenvif *vif;
char name[IFNAMSIZ] = {};
+ int i;
snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
if (dev == NULL) {
- pr_warn("Could not allocate netdev\n");
+ pr_warn("Could not allocate netdev for %s\n", name);
return ERR_PTR(-ENOMEM);
}
@@ -285,14 +305,9 @@
vif = netdev_priv(dev);
vif->domid = domid;
vif->handle = handle;
- vif->netbk = NULL;
vif->can_sg = 1;
vif->csum = 1;
- atomic_set(&vif->refcnt, 1);
- init_waitqueue_head(&vif->waiting_to_free);
vif->dev = dev;
- INIT_LIST_HEAD(&vif->schedule_list);
- INIT_LIST_HEAD(&vif->notify_list);
vif->credit_bytes = vif->remaining_credit = ~0UL;
vif->credit_usec = 0UL;
@@ -307,6 +322,16 @@
dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
+ skb_queue_head_init(&vif->rx_queue);
+ skb_queue_head_init(&vif->tx_queue);
+
+ vif->pending_cons = 0;
+ vif->pending_prod = MAX_PENDING_REQS;
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ vif->pending_ring[i] = i;
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ vif->mmap_pages[i] = NULL;
+
/*
* Initialise a dummy MAC address. We choose the numerically
* largest non-broadcast address to prevent the address getting
@@ -316,6 +341,8 @@
memset(dev->dev_addr, 0xFF, ETH_ALEN);
dev->dev_addr[0] &= ~0x01;
+ netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
+
netif_carrier_off(dev);
err = register_netdev(dev);
@@ -341,7 +368,7 @@
__module_get(THIS_MODULE);
- err = xen_netbk_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
+ err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
if (err < 0)
goto err;
@@ -377,7 +404,14 @@
disable_irq(vif->rx_irq);
}
- xenvif_get(vif);
+ init_waitqueue_head(&vif->wq);
+ vif->task = kthread_create(xenvif_kthread,
+ (void *)vif, vif->dev->name);
+ if (IS_ERR(vif->task)) {
+ pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+ err = PTR_ERR(vif->task);
+ goto err_rx_unbind;
+ }
rtnl_lock();
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
@@ -388,12 +422,18 @@
xenvif_up(vif);
rtnl_unlock();
+ wake_up_process(vif->task);
+
return 0;
+
+err_rx_unbind:
+ unbind_from_irqhandler(vif->rx_irq, vif);
+ vif->rx_irq = 0;
err_tx_unbind:
unbind_from_irqhandler(vif->tx_irq, vif);
vif->tx_irq = 0;
err_unmap:
- xen_netbk_unmap_frontend_rings(vif);
+ xenvif_unmap_frontend_rings(vif);
err:
module_put(THIS_MODULE);
return err;
@@ -408,7 +448,6 @@
if (netif_running(dev))
xenvif_down(vif);
rtnl_unlock();
- xenvif_put(vif);
}
void xenvif_disconnect(struct xenvif *vif)
@@ -422,9 +461,6 @@
if (netif_carrier_ok(vif->dev))
xenvif_carrier_off(vif);
- atomic_dec(&vif->refcnt);
- wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
-
if (vif->tx_irq) {
if (vif->tx_irq == vif->rx_irq)
unbind_from_irqhandler(vif->tx_irq, vif);
@@ -438,9 +474,14 @@
need_module_put = 1;
}
+ if (vif->task)
+ kthread_stop(vif->task);
+
+ netif_napi_del(&vif->napi);
+
unregister_netdev(vif->dev);
- xen_netbk_unmap_frontend_rings(vif);
+ xenvif_unmap_frontend_rings(vif);
free_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 64828de..956130c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -70,131 +70,26 @@
*/
#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
-typedef unsigned int pending_ring_idx_t;
-#define INVALID_PENDING_RING_IDX (~0U)
-
-struct pending_tx_info {
- struct xen_netif_tx_request req; /* coalesced tx request */
- struct xenvif *vif;
- pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
- * if it is head of one or more tx
- * reqs
- */
-};
-
-struct netbk_rx_meta {
- int id;
- int size;
- int gso_size;
-};
-
-#define MAX_PENDING_REQS 256
-
-/* Discriminate from any valid pending_idx value. */
-#define INVALID_PENDING_IDX 0xFFFF
-
-#define MAX_BUFFER_OFFSET PAGE_SIZE
-
-/* extra field used in struct page */
-union page_ext {
- struct {
-#if BITS_PER_LONG < 64
-#define IDX_WIDTH 8
-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
- unsigned int group:GROUP_WIDTH;
- unsigned int idx:IDX_WIDTH;
-#else
- unsigned int group, idx;
-#endif
- } e;
- void *mapping;
-};
-
-struct xen_netbk {
- wait_queue_head_t wq;
- struct task_struct *task;
-
- struct sk_buff_head rx_queue;
- struct sk_buff_head tx_queue;
-
- struct timer_list net_timer;
-
- struct page *mmap_pages[MAX_PENDING_REQS];
-
- pending_ring_idx_t pending_prod;
- pending_ring_idx_t pending_cons;
- struct list_head net_schedule_list;
-
- /* Protect the net_schedule_list in netif. */
- spinlock_t net_schedule_list_lock;
-
- atomic_t netfront_count;
-
- struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
- /* Coalescing tx requests before copying makes number of grant
- * copy ops greater or equal to number of slots required. In
- * worst case a tx request consumes 2 gnttab_copy.
- */
- struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
-
- u16 pending_ring[MAX_PENDING_REQS];
-
- /*
- * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
- * head/fragment page uses 2 copy operations because it
- * straddles two buffers in the frontend.
- */
- struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
- struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
-};
-
-static struct xen_netbk *xen_netbk;
-static int xen_netbk_group_nr;
-
/*
* If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
* one or more merged tx requests, otherwise it is the continuation of
* previous tx request.
*/
-static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx)
+static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx)
{
- return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
+ return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
}
-void xen_netbk_add_xenvif(struct xenvif *vif)
-{
- int i;
- int min_netfront_count;
- int min_group = 0;
- struct xen_netbk *netbk;
+static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+ u8 status);
- min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
- for (i = 0; i < xen_netbk_group_nr; i++) {
- int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
- if (netfront_count < min_netfront_count) {
- min_group = i;
- min_netfront_count = netfront_count;
- }
- }
-
- netbk = &xen_netbk[min_group];
-
- vif->netbk = netbk;
- atomic_inc(&netbk->netfront_count);
-}
-
-void xen_netbk_remove_xenvif(struct xenvif *vif)
-{
- struct xen_netbk *netbk = vif->netbk;
- vif->netbk = NULL;
- atomic_dec(&netbk->netfront_count);
-}
-
-static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
- u8 status);
static void make_tx_response(struct xenvif *vif,
struct xen_netif_tx_request *txp,
s8 st);
+
+static inline int tx_work_todo(struct xenvif *vif);
+static inline int rx_work_todo(struct xenvif *vif);
+
static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
u16 id,
s8 st,
@@ -202,55 +97,16 @@
u16 size,
u16 flags);
-static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
+static inline unsigned long idx_to_pfn(struct xenvif *vif,
u16 idx)
{
- return page_to_pfn(netbk->mmap_pages[idx]);
+ return page_to_pfn(vif->mmap_pages[idx]);
}
-static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+static inline unsigned long idx_to_kaddr(struct xenvif *vif,
u16 idx)
{
- return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
-}
-
-/* extra field used in struct page */
-static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
- unsigned int idx)
-{
- unsigned int group = netbk - xen_netbk;
- union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
-
- BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
- pg->mapping = ext.mapping;
-}
-
-static int get_page_ext(struct page *pg,
- unsigned int *pgroup, unsigned int *pidx)
-{
- union page_ext ext = { .mapping = pg->mapping };
- struct xen_netbk *netbk;
- unsigned int group, idx;
-
- group = ext.e.group - 1;
-
- if (group < 0 || group >= xen_netbk_group_nr)
- return 0;
-
- netbk = &xen_netbk[group];
-
- idx = ext.e.idx;
-
- if ((idx < 0) || (idx >= MAX_PENDING_REQS))
- return 0;
-
- if (netbk->mmap_pages[idx] != pg)
- return 0;
-
- *pgroup = group;
- *pidx = idx;
-
- return 1;
+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
}
/*
@@ -278,15 +134,10 @@
return i & (MAX_PENDING_REQS-1);
}
-static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
{
return MAX_PENDING_REQS -
- netbk->pending_prod + netbk->pending_cons;
-}
-
-static void xen_netbk_kick_thread(struct xen_netbk *netbk)
-{
- wake_up(&netbk->wq);
+ vif->pending_prod + vif->pending_cons;
}
static int max_required_rx_slots(struct xenvif *vif)
@@ -300,7 +151,7 @@
return max;
}
-int xen_netbk_rx_ring_full(struct xenvif *vif)
+int xenvif_rx_ring_full(struct xenvif *vif)
{
RING_IDX peek = vif->rx_req_cons_peek;
RING_IDX needed = max_required_rx_slots(vif);
@@ -309,16 +160,16 @@
((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
}
-int xen_netbk_must_stop_queue(struct xenvif *vif)
+int xenvif_must_stop_queue(struct xenvif *vif)
{
- if (!xen_netbk_rx_ring_full(vif))
+ if (!xenvif_rx_ring_full(vif))
return 0;
vif->rx.sring->req_event = vif->rx_req_cons_peek +
max_required_rx_slots(vif);
mb(); /* request notification /then/ check the queue */
- return xen_netbk_rx_ring_full(vif);
+ return xenvif_rx_ring_full(vif);
}
/*
@@ -364,9 +215,9 @@
/*
* Figure out how many ring slots we're going to need to send @skb to
* the guest. This function is essentially a dry run of
- * netbk_gop_frag_copy.
+ * xenvif_gop_frag_copy.
*/
-unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
+unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
{
unsigned int count;
int i, copy_off;
@@ -418,15 +269,15 @@
unsigned copy_prod, copy_cons;
unsigned meta_prod, meta_cons;
struct gnttab_copy *copy;
- struct netbk_rx_meta *meta;
+ struct xenvif_rx_meta *meta;
int copy_off;
grant_ref_t copy_gref;
};
-static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
- struct netrx_pending_operations *npo)
+static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
+ struct netrx_pending_operations *npo)
{
- struct netbk_rx_meta *meta;
+ struct xenvif_rx_meta *meta;
struct xen_netif_rx_request *req;
req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
@@ -446,19 +297,13 @@
* Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here.
*/
-static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
- struct netrx_pending_operations *npo,
- struct page *page, unsigned long size,
- unsigned long offset, int *head)
+static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
+ struct netrx_pending_operations *npo,
+ struct page *page, unsigned long size,
+ unsigned long offset, int *head)
{
struct gnttab_copy *copy_gop;
- struct netbk_rx_meta *meta;
- /*
- * These variables are used iff get_page_ext returns true,
- * in which case they are guaranteed to be initialized.
- */
- unsigned int uninitialized_var(group), uninitialized_var(idx);
- int foreign = get_page_ext(page, &group, &idx);
+ struct xenvif_rx_meta *meta;
unsigned long bytes;
/* Data must not cross a page boundary. */
@@ -494,26 +339,15 @@
copy_gop = npo->copy + npo->copy_prod++;
copy_gop->flags = GNTCOPY_dest_gref;
- if (foreign) {
- struct xen_netbk *netbk = &xen_netbk[group];
- struct pending_tx_info *src_pend;
+ copy_gop->len = bytes;
- src_pend = &netbk->pending_tx_info[idx];
-
- copy_gop->source.domid = src_pend->vif->domid;
- copy_gop->source.u.ref = src_pend->req.gref;
- copy_gop->flags |= GNTCOPY_source_gref;
- } else {
- void *vaddr = page_address(page);
- copy_gop->source.domid = DOMID_SELF;
- copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
- }
+ copy_gop->source.domid = DOMID_SELF;
+ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
copy_gop->source.offset = offset;
- copy_gop->dest.domid = vif->domid;
+ copy_gop->dest.domid = vif->domid;
copy_gop->dest.offset = npo->copy_off;
copy_gop->dest.u.ref = npo->copy_gref;
- copy_gop->len = bytes;
npo->copy_off += bytes;
meta->size += bytes;
@@ -549,14 +383,14 @@
* zero GSO descriptors (for non-GSO packets) or one descriptor (for
* frontend-side LRO).
*/
-static int netbk_gop_skb(struct sk_buff *skb,
- struct netrx_pending_operations *npo)
+static int xenvif_gop_skb(struct sk_buff *skb,
+ struct netrx_pending_operations *npo)
{
struct xenvif *vif = netdev_priv(skb->dev);
int nr_frags = skb_shinfo(skb)->nr_frags;
int i;
struct xen_netif_rx_request *req;
- struct netbk_rx_meta *meta;
+ struct xenvif_rx_meta *meta;
unsigned char *data;
int head = 1;
int old_meta_prod;
@@ -593,30 +427,30 @@
if (data + len > skb_tail_pointer(skb))
len = skb_tail_pointer(skb) - data;
- netbk_gop_frag_copy(vif, skb, npo,
- virt_to_page(data), len, offset, &head);
+ xenvif_gop_frag_copy(vif, skb, npo,
+ virt_to_page(data), len, offset, &head);
data += len;
}
for (i = 0; i < nr_frags; i++) {
- netbk_gop_frag_copy(vif, skb, npo,
- skb_frag_page(&skb_shinfo(skb)->frags[i]),
- skb_frag_size(&skb_shinfo(skb)->frags[i]),
- skb_shinfo(skb)->frags[i].page_offset,
- &head);
+ xenvif_gop_frag_copy(vif, skb, npo,
+ skb_frag_page(&skb_shinfo(skb)->frags[i]),
+ skb_frag_size(&skb_shinfo(skb)->frags[i]),
+ skb_shinfo(skb)->frags[i].page_offset,
+ &head);
}
return npo->meta_prod - old_meta_prod;
}
/*
- * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+ * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
* used to set up the operations on the top of
* netrx_pending_operations, which have since been done. Check that
* they didn't give any errors and advance over them.
*/
-static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
- struct netrx_pending_operations *npo)
+static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
+ struct netrx_pending_operations *npo)
{
struct gnttab_copy *copy_op;
int status = XEN_NETIF_RSP_OKAY;
@@ -635,9 +469,9 @@
return status;
}
-static void netbk_add_frag_responses(struct xenvif *vif, int status,
- struct netbk_rx_meta *meta,
- int nr_meta_slots)
+static void xenvif_add_frag_responses(struct xenvif *vif, int status,
+ struct xenvif_rx_meta *meta,
+ int nr_meta_slots)
{
int i;
unsigned long offset;
@@ -665,9 +499,13 @@
int meta_slots_used;
};
-static void xen_netbk_rx_action(struct xen_netbk *netbk)
+static void xenvif_kick_thread(struct xenvif *vif)
{
- struct xenvif *vif = NULL, *tmp;
+ wake_up(&vif->wq);
+}
+
+void xenvif_rx_action(struct xenvif *vif)
+{
s8 status;
u16 flags;
struct xen_netif_rx_response *resp;
@@ -679,22 +517,23 @@
int count;
unsigned long offset;
struct skb_cb_overlay *sco;
+ int need_to_notify = 0;
struct netrx_pending_operations npo = {
- .copy = netbk->grant_copy_op,
- .meta = netbk->meta,
+ .copy = vif->grant_copy_op,
+ .meta = vif->meta,
};
skb_queue_head_init(&rxq);
count = 0;
- while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+ while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
vif = netdev_priv(skb->dev);
nr_frags = skb_shinfo(skb)->nr_frags;
sco = (struct skb_cb_overlay *)skb->cb;
- sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+ sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
count += nr_frags + 1;
@@ -706,27 +545,27 @@
break;
}
- BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
if (!npo.copy_prod)
return;
- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
- gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
+ BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op));
+ gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
while ((skb = __skb_dequeue(&rxq)) != NULL) {
sco = (struct skb_cb_overlay *)skb->cb;
vif = netdev_priv(skb->dev);
- if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
+ if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
resp = RING_GET_RESPONSE(&vif->rx,
- vif->rx.rsp_prod_pvt++);
+ vif->rx.rsp_prod_pvt++);
resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
- resp->offset = netbk->meta[npo.meta_cons].gso_size;
- resp->id = netbk->meta[npo.meta_cons].id;
+ resp->offset = vif->meta[npo.meta_cons].gso_size;
+ resp->id = vif->meta[npo.meta_cons].id;
resp->status = sco->meta_slots_used;
npo.meta_cons++;
@@ -737,7 +576,7 @@
vif->dev->stats.tx_bytes += skb->len;
vif->dev->stats.tx_packets++;
- status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
+ status = xenvif_check_gop(vif, sco->meta_slots_used, &npo);
if (sco->meta_slots_used == 1)
flags = 0;
@@ -751,12 +590,12 @@
flags |= XEN_NETRXF_data_validated;
offset = 0;
- resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
+ resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
status, offset,
- netbk->meta[npo.meta_cons].size,
+ vif->meta[npo.meta_cons].size,
flags);
- if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
+ if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
struct xen_netif_extra_info *gso =
(struct xen_netif_extra_info *)
RING_GET_RESPONSE(&vif->rx,
@@ -764,7 +603,7 @@
resp->flags |= XEN_NETRXF_extra_info;
- gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+ gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
gso->u.gso.features = 0;
@@ -773,123 +612,44 @@
gso->flags = 0;
}
- netbk_add_frag_responses(vif, status,
- netbk->meta + npo.meta_cons + 1,
- sco->meta_slots_used);
+ xenvif_add_frag_responses(vif, status,
+ vif->meta + npo.meta_cons + 1,
+ sco->meta_slots_used);
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
+ if (ret)
+ need_to_notify = 1;
+
xenvif_notify_tx_completion(vif);
- if (ret && list_empty(&vif->notify_list))
- list_add_tail(&vif->notify_list, ¬ify);
- else
- xenvif_put(vif);
npo.meta_cons += sco->meta_slots_used;
dev_kfree_skb(skb);
}
- list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) {
+ if (need_to_notify)
notify_remote_via_irq(vif->rx_irq);
- list_del_init(&vif->notify_list);
- xenvif_put(vif);
- }
/* More work to do? */
- if (!skb_queue_empty(&netbk->rx_queue) &&
- !timer_pending(&netbk->net_timer))
- xen_netbk_kick_thread(netbk);
+ if (!skb_queue_empty(&vif->rx_queue))
+ xenvif_kick_thread(vif);
}
-void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
+void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
{
- struct xen_netbk *netbk = vif->netbk;
+ skb_queue_tail(&vif->rx_queue, skb);
- skb_queue_tail(&netbk->rx_queue, skb);
-
- xen_netbk_kick_thread(netbk);
+ xenvif_kick_thread(vif);
}
-static void xen_netbk_alarm(unsigned long data)
-{
- struct xen_netbk *netbk = (struct xen_netbk *)data;
- xen_netbk_kick_thread(netbk);
-}
-
-static int __on_net_schedule_list(struct xenvif *vif)
-{
- return !list_empty(&vif->schedule_list);
-}
-
-/* Must be called with net_schedule_list_lock held */
-static void remove_from_net_schedule_list(struct xenvif *vif)
-{
- if (likely(__on_net_schedule_list(vif))) {
- list_del_init(&vif->schedule_list);
- xenvif_put(vif);
- }
-}
-
-static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
-{
- struct xenvif *vif = NULL;
-
- spin_lock_irq(&netbk->net_schedule_list_lock);
- if (list_empty(&netbk->net_schedule_list))
- goto out;
-
- vif = list_first_entry(&netbk->net_schedule_list,
- struct xenvif, schedule_list);
- if (!vif)
- goto out;
-
- xenvif_get(vif);
-
- remove_from_net_schedule_list(vif);
-out:
- spin_unlock_irq(&netbk->net_schedule_list_lock);
- return vif;
-}
-
-void xen_netbk_schedule_xenvif(struct xenvif *vif)
-{
- unsigned long flags;
- struct xen_netbk *netbk = vif->netbk;
-
- if (__on_net_schedule_list(vif))
- goto kick;
-
- spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
- if (!__on_net_schedule_list(vif) &&
- likely(xenvif_schedulable(vif))) {
- list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
- xenvif_get(vif);
- }
- spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
-
-kick:
- smp_mb();
- if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
- !list_empty(&netbk->net_schedule_list))
- xen_netbk_kick_thread(netbk);
-}
-
-void xen_netbk_deschedule_xenvif(struct xenvif *vif)
-{
- struct xen_netbk *netbk = vif->netbk;
- spin_lock_irq(&netbk->net_schedule_list_lock);
- remove_from_net_schedule_list(vif);
- spin_unlock_irq(&netbk->net_schedule_list_lock);
-}
-
-void xen_netbk_check_rx_xenvif(struct xenvif *vif)
+void xenvif_check_rx_xenvif(struct xenvif *vif)
{
int more_to_do;
RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
if (more_to_do)
- xen_netbk_schedule_xenvif(vif);
+ napi_schedule(&vif->napi);
}
static void tx_add_credit(struct xenvif *vif)
@@ -916,11 +676,11 @@
{
struct xenvif *vif = (struct xenvif *)data;
tx_add_credit(vif);
- xen_netbk_check_rx_xenvif(vif);
+ xenvif_check_rx_xenvif(vif);
}
-static void netbk_tx_err(struct xenvif *vif,
- struct xen_netif_tx_request *txp, RING_IDX end)
+static void xenvif_tx_err(struct xenvif *vif,
+ struct xen_netif_tx_request *txp, RING_IDX end)
{
RING_IDX cons = vif->tx.req_cons;
@@ -931,21 +691,18 @@
txp = RING_GET_REQUEST(&vif->tx, cons++);
} while (1);
vif->tx.req_cons = cons;
- xen_netbk_check_rx_xenvif(vif);
- xenvif_put(vif);
}
-static void netbk_fatal_tx_err(struct xenvif *vif)
+static void xenvif_fatal_tx_err(struct xenvif *vif)
{
netdev_err(vif->dev, "fatal error; disabling device\n");
xenvif_carrier_off(vif);
- xenvif_put(vif);
}
-static int netbk_count_requests(struct xenvif *vif,
- struct xen_netif_tx_request *first,
- struct xen_netif_tx_request *txp,
- int work_to_do)
+static int xenvif_count_requests(struct xenvif *vif,
+ struct xen_netif_tx_request *first,
+ struct xen_netif_tx_request *txp,
+ int work_to_do)
{
RING_IDX cons = vif->tx.req_cons;
int slots = 0;
@@ -962,7 +719,7 @@
netdev_err(vif->dev,
"Asked for %d slots but exceeds this limit\n",
work_to_do);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -ENODATA;
}
@@ -973,7 +730,7 @@
netdev_err(vif->dev,
"Malicious frontend using %d slots, threshold %u\n",
slots, fatal_skb_slots);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -E2BIG;
}
@@ -1021,7 +778,7 @@
if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
txp->offset, txp->size);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -EINVAL;
}
@@ -1033,30 +790,30 @@
} while (more_data);
if (drop_err) {
- netbk_tx_err(vif, first, cons + slots);
+ xenvif_tx_err(vif, first, cons + slots);
return drop_err;
}
return slots;
}
-static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
- u16 pending_idx)
+static struct page *xenvif_alloc_page(struct xenvif *vif,
+ u16 pending_idx)
{
struct page *page;
- page = alloc_page(GFP_KERNEL|__GFP_COLD);
+
+ page = alloc_page(GFP_ATOMIC|__GFP_COLD);
if (!page)
return NULL;
- set_page_ext(page, netbk, pending_idx);
- netbk->mmap_pages[pending_idx] = page;
+ vif->mmap_pages[pending_idx] = page;
+
return page;
}
-static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
- struct xenvif *vif,
- struct sk_buff *skb,
- struct xen_netif_tx_request *txp,
- struct gnttab_copy *gop)
+static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_copy *gop)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
@@ -1079,14 +836,14 @@
/* Coalesce tx requests, at this point the packet passed in
* should be <= 64K. Any packets larger than 64K have been
- * handled in netbk_count_requests().
+ * handled in xenvif_count_requests().
*/
for (shinfo->nr_frags = slot = start; slot < nr_slots;
shinfo->nr_frags++) {
struct pending_tx_info *pending_tx_info =
- netbk->pending_tx_info;
+ vif->pending_tx_info;
- page = alloc_page(GFP_KERNEL|__GFP_COLD);
+ page = alloc_page(GFP_ATOMIC|__GFP_COLD);
if (!page)
goto err;
@@ -1121,21 +878,18 @@
gop->len = txp->size;
dst_offset += gop->len;
- index = pending_index(netbk->pending_cons++);
+ index = pending_index(vif->pending_cons++);
- pending_idx = netbk->pending_ring[index];
+ pending_idx = vif->pending_ring[index];
memcpy(&pending_tx_info[pending_idx].req, txp,
sizeof(*txp));
- xenvif_get(vif);
-
- pending_tx_info[pending_idx].vif = vif;
/* Poison these fields, corresponding
* fields for head tx req will be set
* to correct values after the loop.
*/
- netbk->mmap_pages[pending_idx] = (void *)(~0UL);
+ vif->mmap_pages[pending_idx] = (void *)(~0UL);
pending_tx_info[pending_idx].head =
INVALID_PENDING_RING_IDX;
@@ -1155,8 +909,7 @@
first->req.offset = 0;
first->req.size = dst_offset;
first->head = start_idx;
- set_page_ext(page, netbk, head_idx);
- netbk->mmap_pages[head_idx] = page;
+ vif->mmap_pages[head_idx] = page;
frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
}
@@ -1166,20 +919,20 @@
err:
/* Unwind, freeing all pages and sending error responses. */
while (shinfo->nr_frags-- > start) {
- xen_netbk_idx_release(netbk,
+ xenvif_idx_release(vif,
frag_get_pending_idx(&frags[shinfo->nr_frags]),
XEN_NETIF_RSP_ERROR);
}
/* The head too, if necessary. */
if (start)
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
return NULL;
}
-static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
- struct sk_buff *skb,
- struct gnttab_copy **gopp)
+static int xenvif_tx_check_gop(struct xenvif *vif,
+ struct sk_buff *skb,
+ struct gnttab_copy **gopp)
{
struct gnttab_copy *gop = *gopp;
u16 pending_idx = *((u16 *)skb->data);
@@ -1192,7 +945,7 @@
/* Check status of header. */
err = gop->status;
if (unlikely(err))
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
@@ -1202,7 +955,7 @@
pending_ring_idx_t head;
pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
- tx_info = &netbk->pending_tx_info[pending_idx];
+ tx_info = &vif->pending_tx_info[pending_idx];
head = tx_info->head;
/* Check error status: if okay then remember grant handle. */
@@ -1210,18 +963,19 @@
newerr = (++gop)->status;
if (newerr)
break;
- peek = netbk->pending_ring[pending_index(++head)];
- } while (!pending_tx_is_head(netbk, peek));
+ peek = vif->pending_ring[pending_index(++head)];
+ } while (!pending_tx_is_head(vif, peek));
if (likely(!newerr)) {
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_release(vif, pending_idx,
+ XEN_NETIF_RSP_OKAY);
continue;
}
/* Error on this fragment: respond to client with an error. */
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
/* Not the first error? Preceding frags already invalidated. */
if (err)
@@ -1229,10 +983,11 @@
/* First error: invalidate header and preceding fragments. */
pending_idx = *((u16 *)skb->data);
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
for (j = start; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_release(vif, pending_idx,
+ XEN_NETIF_RSP_OKAY);
}
/* Remember the error: invalidate all subsequent fragments. */
@@ -1243,7 +998,7 @@
return err;
}
-static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
@@ -1257,20 +1012,20 @@
pending_idx = frag_get_pending_idx(frag);
- txp = &netbk->pending_tx_info[pending_idx].req;
- page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+ txp = &vif->pending_tx_info[pending_idx].req;
+ page = virt_to_page(idx_to_kaddr(vif, pending_idx));
__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
skb->len += txp->size;
skb->data_len += txp->size;
skb->truesize += txp->size;
- /* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
+ /* Take an extra reference to offset xenvif_idx_release */
+ get_page(vif->mmap_pages[pending_idx]);
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
-static int xen_netbk_get_extras(struct xenvif *vif,
+static int xenvif_get_extras(struct xenvif *vif,
struct xen_netif_extra_info *extras,
int work_to_do)
{
@@ -1280,7 +1035,7 @@
do {
if (unlikely(work_to_do-- <= 0)) {
netdev_err(vif->dev, "Missing extra info\n");
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -EBADR;
}
@@ -1291,7 +1046,7 @@
vif->tx.req_cons = ++cons;
netdev_err(vif->dev,
"Invalid extra type: %d\n", extra.type);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -EINVAL;
}
@@ -1302,20 +1057,20 @@
return work_to_do;
}
-static int netbk_set_skb_gso(struct xenvif *vif,
- struct sk_buff *skb,
- struct xen_netif_extra_info *gso)
+static int xenvif_set_skb_gso(struct xenvif *vif,
+ struct sk_buff *skb,
+ struct xen_netif_extra_info *gso)
{
if (!gso->u.gso.size) {
netdev_err(vif->dev, "GSO size must not be zero.\n");
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -EINVAL;
}
/* Currently only TCPv4 S.O. is supported. */
if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
return -EINVAL;
}
@@ -1426,16 +1181,14 @@
return false;
}
-static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
+static unsigned xenvif_tx_build_gops(struct xenvif *vif)
{
- struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
+ struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
struct sk_buff *skb;
int ret;
- while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX
- < MAX_PENDING_REQS) &&
- !list_empty(&netbk->net_schedule_list)) {
- struct xenvif *vif;
+ while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
+ < MAX_PENDING_REQS)) {
struct xen_netif_tx_request txreq;
struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
struct page *page;
@@ -1446,16 +1199,6 @@
unsigned int data_len;
pending_ring_idx_t index;
- /* Get a netif from the list with work to do. */
- vif = poll_net_schedule_list(netbk);
- /* This can sometimes happen because the test of
- * list_empty(net_schedule_list) at the top of the
- * loop is unlocked. Just go back and have another
- * look.
- */
- if (!vif)
- continue;
-
if (vif->tx.sring->req_prod - vif->tx.req_cons >
XEN_NETIF_TX_RING_SIZE) {
netdev_err(vif->dev,
@@ -1463,15 +1206,13 @@
"req_prod %d, req_cons %d, size %ld\n",
vif->tx.sring->req_prod, vif->tx.req_cons,
XEN_NETIF_TX_RING_SIZE);
- netbk_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(vif);
continue;
}
RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
- if (!work_to_do) {
- xenvif_put(vif);
- continue;
- }
+ if (!work_to_do)
+ break;
idx = vif->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
@@ -1479,10 +1220,8 @@
/* Credit-based scheduling. */
if (txreq.size > vif->remaining_credit &&
- tx_credit_exceeded(vif, txreq.size)) {
- xenvif_put(vif);
- continue;
- }
+ tx_credit_exceeded(vif, txreq.size))
+ break;
vif->remaining_credit -= txreq.size;
@@ -1491,24 +1230,24 @@
memset(extras, 0, sizeof(extras));
if (txreq.flags & XEN_NETTXF_extra_info) {
- work_to_do = xen_netbk_get_extras(vif, extras,
- work_to_do);
+ work_to_do = xenvif_get_extras(vif, extras,
+ work_to_do);
idx = vif->tx.req_cons;
if (unlikely(work_to_do < 0))
- continue;
+ break;
}
- ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
+ ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
if (unlikely(ret < 0))
- continue;
+ break;
idx += ret;
if (unlikely(txreq.size < ETH_HLEN)) {
netdev_dbg(vif->dev,
"Bad packet size: %d\n", txreq.size);
- netbk_tx_err(vif, &txreq, idx);
- continue;
+ xenvif_tx_err(vif, &txreq, idx);
+ break;
}
/* No crossing a page as the payload mustn't fragment. */
@@ -1517,12 +1256,12 @@
"txreq.offset: %x, size: %u, end: %lu\n",
txreq.offset, txreq.size,
(txreq.offset&~PAGE_MASK) + txreq.size);
- netbk_fatal_tx_err(vif);
- continue;
+ xenvif_fatal_tx_err(vif);
+ break;
}
- index = pending_index(netbk->pending_cons);
- pending_idx = netbk->pending_ring[index];
+ index = pending_index(vif->pending_cons);
+ pending_idx = vif->pending_ring[index];
data_len = (txreq.size > PKT_PROT_LEN &&
ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
@@ -1533,7 +1272,7 @@
if (unlikely(skb == NULL)) {
netdev_dbg(vif->dev,
"Can't allocate a skb in start_xmit.\n");
- netbk_tx_err(vif, &txreq, idx);
+ xenvif_tx_err(vif, &txreq, idx);
break;
}
@@ -1544,19 +1283,19 @@
struct xen_netif_extra_info *gso;
gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
- if (netbk_set_skb_gso(vif, skb, gso)) {
- /* Failure in netbk_set_skb_gso is fatal. */
+ if (xenvif_set_skb_gso(vif, skb, gso)) {
+ /* Failure in xenvif_set_skb_gso is fatal. */
kfree_skb(skb);
- continue;
+ break;
}
}
/* XXX could copy straight to head */
- page = xen_netbk_alloc_page(netbk, pending_idx);
+ page = xenvif_alloc_page(vif, pending_idx);
if (!page) {
kfree_skb(skb);
- netbk_tx_err(vif, &txreq, idx);
- continue;
+ xenvif_tx_err(vif, &txreq, idx);
+ break;
}
gop->source.u.ref = txreq.gref;
@@ -1572,10 +1311,9 @@
gop++;
- memcpy(&netbk->pending_tx_info[pending_idx].req,
+ memcpy(&vif->pending_tx_info[pending_idx].req,
&txreq, sizeof(txreq));
- netbk->pending_tx_info[pending_idx].vif = vif;
- netbk->pending_tx_info[pending_idx].head = index;
+ vif->pending_tx_info[pending_idx].head = index;
*((u16 *)skb->data) = pending_idx;
__skb_put(skb, data_len);
@@ -1590,46 +1328,45 @@
INVALID_PENDING_IDX);
}
- netbk->pending_cons++;
+ vif->pending_cons++;
- request_gop = xen_netbk_get_requests(netbk, vif,
- skb, txfrags, gop);
+ request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
if (request_gop == NULL) {
kfree_skb(skb);
- netbk_tx_err(vif, &txreq, idx);
- continue;
+ xenvif_tx_err(vif, &txreq, idx);
+ break;
}
gop = request_gop;
- __skb_queue_tail(&netbk->tx_queue, skb);
+ __skb_queue_tail(&vif->tx_queue, skb);
vif->tx.req_cons = idx;
- xen_netbk_check_rx_xenvif(vif);
- if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
+ if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
break;
}
- return gop - netbk->tx_copy_ops;
+ return gop - vif->tx_copy_ops;
}
-static void xen_netbk_tx_submit(struct xen_netbk *netbk)
-{
- struct gnttab_copy *gop = netbk->tx_copy_ops;
- struct sk_buff *skb;
- while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+static int xenvif_tx_submit(struct xenvif *vif, int budget)
+{
+ struct gnttab_copy *gop = vif->tx_copy_ops;
+ struct sk_buff *skb;
+ int work_done = 0;
+
+ while (work_done < budget &&
+ (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
struct xen_netif_tx_request *txp;
- struct xenvif *vif;
u16 pending_idx;
unsigned data_len;
pending_idx = *((u16 *)skb->data);
- vif = netbk->pending_tx_info[pending_idx].vif;
- txp = &netbk->pending_tx_info[pending_idx].req;
+ txp = &vif->pending_tx_info[pending_idx].req;
/* Check the remap error code. */
- if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
+ if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) {
netdev_dbg(vif->dev, "netback grant failed.\n");
skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
@@ -1638,7 +1375,7 @@
data_len = skb->len;
memcpy(skb->data,
- (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+ (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
data_len);
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
@@ -1646,7 +1383,8 @@
txp->size -= data_len;
} else {
/* Schedule a response immediately. */
- xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_release(vif, pending_idx,
+ XEN_NETIF_RSP_OKAY);
}
if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1654,7 +1392,7 @@
else if (txp->flags & XEN_NETTXF_data_validated)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- xen_netbk_fill_frags(netbk, skb);
+ xenvif_fill_frags(vif, skb);
/*
* If the initial fragment was < PKT_PROT_LEN then
@@ -1682,53 +1420,61 @@
vif->dev->stats.rx_bytes += skb->len;
vif->dev->stats.rx_packets++;
- xenvif_receive_skb(vif, skb);
+ work_done++;
+
+ netif_receive_skb(skb);
}
+
+ return work_done;
}
/* Called after netfront has transmitted */
-static void xen_netbk_tx_action(struct xen_netbk *netbk)
+int xenvif_tx_action(struct xenvif *vif, int budget)
{
unsigned nr_gops;
+ int work_done;
- nr_gops = xen_netbk_tx_build_gops(netbk);
+ if (unlikely(!tx_work_todo(vif)))
+ return 0;
+
+ nr_gops = xenvif_tx_build_gops(vif);
if (nr_gops == 0)
- return;
+ return 0;
- gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
+ gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
- xen_netbk_tx_submit(netbk);
+ work_done = xenvif_tx_submit(vif, nr_gops);
+
+ return work_done;
}
-static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
- u8 status)
+static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+ u8 status)
{
- struct xenvif *vif;
struct pending_tx_info *pending_tx_info;
pending_ring_idx_t head;
u16 peek; /* peek into next tx request */
- BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
+ BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
/* Already complete? */
- if (netbk->mmap_pages[pending_idx] == NULL)
+ if (vif->mmap_pages[pending_idx] == NULL)
return;
- pending_tx_info = &netbk->pending_tx_info[pending_idx];
+ pending_tx_info = &vif->pending_tx_info[pending_idx];
- vif = pending_tx_info->vif;
head = pending_tx_info->head;
- BUG_ON(!pending_tx_is_head(netbk, head));
- BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx);
+ BUG_ON(!pending_tx_is_head(vif, head));
+ BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
do {
pending_ring_idx_t index;
pending_ring_idx_t idx = pending_index(head);
- u16 info_idx = netbk->pending_ring[idx];
+ u16 info_idx = vif->pending_ring[idx];
- pending_tx_info = &netbk->pending_tx_info[info_idx];
+ pending_tx_info = &vif->pending_tx_info[info_idx];
make_tx_response(vif, &pending_tx_info->req, status);
/* Setting any number other than
@@ -1737,18 +1483,15 @@
*/
pending_tx_info->head = 0;
- index = pending_index(netbk->pending_prod++);
- netbk->pending_ring[index] = netbk->pending_ring[info_idx];
+ index = pending_index(vif->pending_prod++);
+ vif->pending_ring[index] = vif->pending_ring[info_idx];
- xenvif_put(vif);
+ peek = vif->pending_ring[pending_index(++head)];
- peek = netbk->pending_ring[pending_index(++head)];
+ } while (!pending_tx_is_head(vif, peek));
- } while (!pending_tx_is_head(netbk, peek));
-
- netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
- netbk->mmap_pages[pending_idx] = NULL;
+ put_page(vif->mmap_pages[pending_idx]);
+ vif->mmap_pages[pending_idx] = NULL;
}
@@ -1796,46 +1539,23 @@
return resp;
}
-static inline int rx_work_todo(struct xen_netbk *netbk)
+static inline int rx_work_todo(struct xenvif *vif)
{
- return !skb_queue_empty(&netbk->rx_queue);
+ return !skb_queue_empty(&vif->rx_queue);
}
-static inline int tx_work_todo(struct xen_netbk *netbk)
+static inline int tx_work_todo(struct xenvif *vif)
{
- if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX
- < MAX_PENDING_REQS) &&
- !list_empty(&netbk->net_schedule_list))
+ if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
+ (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
+ < MAX_PENDING_REQS))
return 1;
return 0;
}
-static int xen_netbk_kthread(void *data)
-{
- struct xen_netbk *netbk = data;
- while (!kthread_should_stop()) {
- wait_event_interruptible(netbk->wq,
- rx_work_todo(netbk) ||
- tx_work_todo(netbk) ||
- kthread_should_stop());
- cond_resched();
-
- if (kthread_should_stop())
- break;
-
- if (rx_work_todo(netbk))
- xen_netbk_rx_action(netbk);
-
- if (tx_work_todo(netbk))
- xen_netbk_tx_action(netbk);
- }
-
- return 0;
-}
-
-void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
+void xenvif_unmap_frontend_rings(struct xenvif *vif)
{
if (vif->tx.sring)
xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
@@ -1845,9 +1565,9 @@
vif->rx.sring);
}
-int xen_netbk_map_frontend_rings(struct xenvif *vif,
- grant_ref_t tx_ring_ref,
- grant_ref_t rx_ring_ref)
+int xenvif_map_frontend_rings(struct xenvif *vif,
+ grant_ref_t tx_ring_ref,
+ grant_ref_t rx_ring_ref)
{
void *addr;
struct xen_netif_tx_sring *txs;
@@ -1876,15 +1596,33 @@
return 0;
err:
- xen_netbk_unmap_frontend_rings(vif);
+ xenvif_unmap_frontend_rings(vif);
return err;
}
+int xenvif_kthread(void *data)
+{
+ struct xenvif *vif = data;
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(vif->wq,
+ rx_work_todo(vif) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ if (rx_work_todo(vif))
+ xenvif_rx_action(vif);
+
+ cond_resched();
+ }
+
+ return 0;
+}
+
static int __init netback_init(void)
{
- int i;
int rc = 0;
- int group;
if (!xen_domain())
return -ENODEV;
@@ -1895,48 +1633,6 @@
fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
}
- xen_netbk_group_nr = num_online_cpus();
- xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
- if (!xen_netbk)
- return -ENOMEM;
-
- for (group = 0; group < xen_netbk_group_nr; group++) {
- struct xen_netbk *netbk = &xen_netbk[group];
- skb_queue_head_init(&netbk->rx_queue);
- skb_queue_head_init(&netbk->tx_queue);
-
- init_timer(&netbk->net_timer);
- netbk->net_timer.data = (unsigned long)netbk;
- netbk->net_timer.function = xen_netbk_alarm;
-
- netbk->pending_cons = 0;
- netbk->pending_prod = MAX_PENDING_REQS;
- for (i = 0; i < MAX_PENDING_REQS; i++)
- netbk->pending_ring[i] = i;
-
- init_waitqueue_head(&netbk->wq);
- netbk->task = kthread_create(xen_netbk_kthread,
- (void *)netbk,
- "netback/%u", group);
-
- if (IS_ERR(netbk->task)) {
- pr_alert("kthread_create() fails at netback\n");
- del_timer(&netbk->net_timer);
- rc = PTR_ERR(netbk->task);
- goto failed_init;
- }
-
- kthread_bind(netbk->task, group);
-
- INIT_LIST_HEAD(&netbk->net_schedule_list);
-
- spin_lock_init(&netbk->net_schedule_list_lock);
-
- atomic_set(&netbk->netfront_count, 0);
-
- wake_up_process(netbk->task);
- }
-
rc = xenvif_xenbus_init();
if (rc)
goto failed_init;
@@ -1944,35 +1640,14 @@
return 0;
failed_init:
- while (--group >= 0) {
- struct xen_netbk *netbk = &xen_netbk[group];
- del_timer(&netbk->net_timer);
- kthread_stop(netbk->task);
- }
- vfree(xen_netbk);
return rc;
-
}
module_init(netback_init);
static void __exit netback_fini(void)
{
- int i, j;
-
xenvif_xenbus_fini();
-
- for (i = 0; i < xen_netbk_group_nr; i++) {
- struct xen_netbk *netbk = &xen_netbk[i];
- del_timer_sync(&netbk->net_timer);
- kthread_stop(netbk->task);
- for (j = 0; j < MAX_PENDING_REQS; j++) {
- if (netbk->mmap_pages[j])
- __free_page(netbk->mmap_pages[j]);
- }
- }
-
- vfree(xen_netbk);
}
module_exit(netback_fini);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 6bb7cf2..b10ba00 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -392,6 +392,8 @@
mem = (unsigned long)
dt_alloc(size + 4, __alignof__(struct device_node));
+ memset((void *)mem, 0, size);
+
((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
pr_debug(" unflattening %lx...\n", mem);
diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c
index c47fd1e..94716c7 100644
--- a/drivers/pinctrl/pinctrl-sunxi.c
+++ b/drivers/pinctrl/pinctrl-sunxi.c
@@ -278,6 +278,7 @@
{
struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
struct sunxi_pinctrl_group *g = &pctl->groups[group];
+ unsigned long flags;
u32 val, mask;
u16 strength;
u8 dlevel;
@@ -295,22 +296,35 @@
* 3: 40mA
*/
dlevel = strength / 10 - 1;
+
+ spin_lock_irqsave(&pctl->lock, flags);
+
val = readl(pctl->membase + sunxi_dlevel_reg(g->pin));
mask = DLEVEL_PINS_MASK << sunxi_dlevel_offset(g->pin);
writel((val & ~mask) | dlevel << sunxi_dlevel_offset(g->pin),
pctl->membase + sunxi_dlevel_reg(g->pin));
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
break;
case PIN_CONFIG_BIAS_PULL_UP:
+ spin_lock_irqsave(&pctl->lock, flags);
+
val = readl(pctl->membase + sunxi_pull_reg(g->pin));
mask = PULL_PINS_MASK << sunxi_pull_offset(g->pin);
writel((val & ~mask) | 1 << sunxi_pull_offset(g->pin),
pctl->membase + sunxi_pull_reg(g->pin));
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
break;
case PIN_CONFIG_BIAS_PULL_DOWN:
+ spin_lock_irqsave(&pctl->lock, flags);
+
val = readl(pctl->membase + sunxi_pull_reg(g->pin));
mask = PULL_PINS_MASK << sunxi_pull_offset(g->pin);
writel((val & ~mask) | 2 << sunxi_pull_offset(g->pin),
pctl->membase + sunxi_pull_reg(g->pin));
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
break;
default:
break;
@@ -360,11 +374,17 @@
u8 config)
{
struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ unsigned long flags;
+ u32 val, mask;
- u32 val = readl(pctl->membase + sunxi_mux_reg(pin));
- u32 mask = MUX_PINS_MASK << sunxi_mux_offset(pin);
+ spin_lock_irqsave(&pctl->lock, flags);
+
+ val = readl(pctl->membase + sunxi_mux_reg(pin));
+ mask = MUX_PINS_MASK << sunxi_mux_offset(pin);
writel((val & ~mask) | config << sunxi_mux_offset(pin),
pctl->membase + sunxi_mux_reg(pin));
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
}
static int sunxi_pmx_enable(struct pinctrl_dev *pctldev,
@@ -464,8 +484,21 @@
struct sunxi_pinctrl *pctl = dev_get_drvdata(chip->dev);
u32 reg = sunxi_data_reg(offset);
u8 index = sunxi_data_offset(offset);
+ unsigned long flags;
+ u32 regval;
- writel((value & DATA_PINS_MASK) << index, pctl->membase + reg);
+ spin_lock_irqsave(&pctl->lock, flags);
+
+ regval = readl(pctl->membase + reg);
+
+ if (value)
+ regval |= BIT(index);
+ else
+ regval &= ~(BIT(index));
+
+ writel(regval, pctl->membase + reg);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
}
static int sunxi_pinctrl_gpio_of_xlate(struct gpio_chip *gc,
@@ -526,6 +559,8 @@
struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
u32 reg = sunxi_irq_cfg_reg(d->hwirq);
u8 index = sunxi_irq_cfg_offset(d->hwirq);
+ unsigned long flags;
+ u32 regval;
u8 mode;
switch (type) {
@@ -548,7 +583,13 @@
return -EINVAL;
}
- writel((mode & IRQ_CFG_IRQ_MASK) << index, pctl->membase + reg);
+ spin_lock_irqsave(&pctl->lock, flags);
+
+ regval = readl(pctl->membase + reg);
+ regval &= ~IRQ_CFG_IRQ_MASK;
+ writel(regval | (mode << index), pctl->membase + reg);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
return 0;
}
@@ -560,14 +601,19 @@
u8 ctrl_idx = sunxi_irq_ctrl_offset(d->hwirq);
u32 status_reg = sunxi_irq_status_reg(d->hwirq);
u8 status_idx = sunxi_irq_status_offset(d->hwirq);
+ unsigned long flags;
u32 val;
+ spin_lock_irqsave(&pctl->lock, flags);
+
/* Mask the IRQ */
val = readl(pctl->membase + ctrl_reg);
writel(val & ~(1 << ctrl_idx), pctl->membase + ctrl_reg);
/* Clear the IRQ */
writel(1 << status_idx, pctl->membase + status_reg);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
}
static void sunxi_pinctrl_irq_mask(struct irq_data *d)
@@ -575,11 +621,16 @@
struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
+ unsigned long flags;
u32 val;
+ spin_lock_irqsave(&pctl->lock, flags);
+
/* Mask the IRQ */
val = readl(pctl->membase + reg);
writel(val & ~(1 << idx), pctl->membase + reg);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
}
static void sunxi_pinctrl_irq_unmask(struct irq_data *d)
@@ -588,6 +639,7 @@
struct sunxi_desc_function *func;
u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
+ unsigned long flags;
u32 val;
func = sunxi_pinctrl_desc_find_function_by_pin(pctl,
@@ -597,9 +649,13 @@
/* Change muxing to INT mode */
sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq], func->muxval);
+ spin_lock_irqsave(&pctl->lock, flags);
+
/* Unmask the IRQ */
val = readl(pctl->membase + reg);
writel(val | (1 << idx), pctl->membase + reg);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
}
static struct irq_chip sunxi_pinctrl_irq_chip = {
@@ -752,6 +808,8 @@
return -ENOMEM;
platform_set_drvdata(pdev, pctl);
+ spin_lock_init(&pctl->lock);
+
pctl->membase = of_iomap(node, 0);
if (!pctl->membase)
return -ENOMEM;
diff --git a/drivers/pinctrl/pinctrl-sunxi.h b/drivers/pinctrl/pinctrl-sunxi.h
index d68047d..01c494f 100644
--- a/drivers/pinctrl/pinctrl-sunxi.h
+++ b/drivers/pinctrl/pinctrl-sunxi.h
@@ -14,6 +14,7 @@
#define __PINCTRL_SUNXI_H
#include <linux/kernel.h>
+#include <linux/spinlock.h>
#define PA_BASE 0
#define PB_BASE 32
@@ -407,6 +408,7 @@
unsigned ngroups;
int irq;
int irq_array[SUNXI_IRQ_NUMBER];
+ spinlock_t lock;
struct pinctrl_dev *pctl_dev;
};
diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
index 0f9f859..f911952 100644
--- a/drivers/platform/olpc/olpc-ec.c
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -330,7 +330,7 @@
return platform_driver_register(&olpc_ec_plat_driver);
}
-module_init(olpc_ec_init_module);
+arch_initcall(olpc_ec_init_module);
MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 97bb05e..d6970f4 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -53,7 +53,6 @@
#define HPWMI_ALS_QUERY 0x3
#define HPWMI_HARDWARE_QUERY 0x4
#define HPWMI_WIRELESS_QUERY 0x5
-#define HPWMI_BIOS_QUERY 0x9
#define HPWMI_HOTKEY_QUERY 0xc
#define HPWMI_WIRELESS2_QUERY 0x1b
#define HPWMI_POSTCODEERROR_QUERY 0x2a
@@ -293,19 +292,6 @@
return (state & 0x4) ? 1 : 0;
}
-static int hp_wmi_enable_hotkeys(void)
-{
- int ret;
- int query = 0x6e;
-
- ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query),
- 0);
-
- if (ret)
- return -EINVAL;
- return 0;
-}
-
static int hp_wmi_set_block(void *data, bool blocked)
{
enum hp_wmi_radio r = (enum hp_wmi_radio) data;
@@ -1009,8 +995,6 @@
err = hp_wmi_input_setup();
if (err)
return err;
-
- hp_wmi_enable_hotkeys();
}
if (bios_capable) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 2ac045f..3a1b6bf 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2440,7 +2440,10 @@
if (pos < 0)
return pos;
- return snprintf(buffer, PAGE_SIZE, "%s\n", pos ? "speed" : "stamina");
+ return snprintf(buffer, PAGE_SIZE, "%s\n",
+ pos == SPEED ? "speed" :
+ pos == STAMINA ? "stamina" :
+ pos == AUTO ? "auto" : "unknown");
}
static int sony_nc_gfx_switch_setup(struct platform_device *pd,
@@ -4320,7 +4323,8 @@
goto err_free_resources;
}
- if (sonypi_compat_init())
+ result = sonypi_compat_init();
+ if (result)
goto err_remove_input;
/* request io port */
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e58cf00..448efe0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -13,7 +13,7 @@
#include <linux/eventfd.h>
#include <linux/vhost.h>
-#include <linux/socket.h> /* memcpy_fromiovec */
+#include <linux/uio.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a58ac43..5e8be46 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -348,7 +348,7 @@
for_each_possible_cpu(i)
memset(per_cpu(cpu_evtchn_mask, i),
- (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
+ (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
}
static inline void clear_evtchn(int port)
@@ -1493,8 +1493,10 @@
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
+ struct shared_info *s = HYPERVISOR_shared_info;
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
+ int masked;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1511,6 +1513,12 @@
bind_vcpu.vcpu = tcpu;
/*
+ * Mask the event while changing the VCPU binding to prevent
+ * it being delivered on an unexpected VCPU.
+ */
+ masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+
+ /*
* If this fails, it usually just indicates that we're dealing with a
* virq or IPI channel, which don't actually need to be rebound. Ignore
* it, but don't do the xenlinux-level rebind in that case.
@@ -1518,6 +1526,9 @@
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
return 0;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b577e45..0ab26fb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2086,6 +2086,7 @@
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern void ext4_truncate(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 72a3600..17ac112 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -255,10 +255,10 @@
set_buffer_prio(bh);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
- if (err) {
- /* Errors can only happen if there is a bug */
- handle->h_err = err;
- __ext4_journal_stop(where, line, handle);
+ /* Errors can only happen if there is a bug */
+ if (WARN_ON_ONCE(err)) {
+ ext4_journal_abort_handle(where, line, __func__, bh,
+ handle, err);
}
} else {
if (inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6f4cc56..319c9d2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -219,7 +219,6 @@
{
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
@@ -259,22 +258,10 @@
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
*/
- if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
- struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
-
- spin_lock(&inode->i_lock);
- if (!ei->jinode) {
- if (!jinode) {
- spin_unlock(&inode->i_lock);
- return -ENOMEM;
- }
- ei->jinode = jinode;
- jbd2_journal_init_jbd_inode(ei->jinode, inode);
- jinode = NULL;
- }
- spin_unlock(&inode->i_lock);
- if (unlikely(jinode != NULL))
- jbd2_free_inode(jinode);
+ if (filp->f_mode & FMODE_WRITE) {
+ int ret = ext4_inode_attach_jinode(inode);
+ if (ret < 0)
+ return ret;
}
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dd32a2e..c2ca04e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3533,6 +3533,18 @@
offset;
}
+ if (offset & (sb->s_blocksize - 1) ||
+ (offset + length) & (sb->s_blocksize - 1)) {
+ /*
+ * Attach jinode to inode for jbd2 if we do any zeroing of
+ * partial block
+ */
+ ret = ext4_inode_attach_jinode(inode);
+ if (ret < 0)
+ goto out_mutex;
+
+ }
+
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
@@ -3601,6 +3613,31 @@
return ret;
}
+int ext4_inode_attach_jinode(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct jbd2_inode *jinode;
+
+ if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
+ return 0;
+
+ jinode = jbd2_alloc_inode(GFP_KERNEL);
+ spin_lock(&inode->i_lock);
+ if (!ei->jinode) {
+ if (!jinode) {
+ spin_unlock(&inode->i_lock);
+ return -ENOMEM;
+ }
+ ei->jinode = jinode;
+ jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jinode = NULL;
+ }
+ spin_unlock(&inode->i_lock);
+ if (unlikely(jinode != NULL))
+ jbd2_free_inode(jinode);
+ return 0;
+}
+
/*
* ext4_truncate()
*
@@ -3661,6 +3698,12 @@
return;
}
+ /* If we zero-out tail of the page, we have to create jinode for jbd2 */
+ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
+ if (ext4_inode_attach_jinode(inode) < 0)
+ return;
+ }
+
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode);
else
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9435384..544a809 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1838,14 +1838,14 @@
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZABLE, 0);
- if (IS_ERR(glock_workqueue))
- return PTR_ERR(glock_workqueue);
+ if (!glock_workqueue)
+ return -ENOMEM;
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
WQ_MEM_RECLAIM | WQ_FREEZABLE,
0);
- if (IS_ERR(gfs2_delete_workqueue)) {
+ if (!gfs2_delete_workqueue) {
destroy_workqueue(glock_workqueue);
- return PTR_ERR(gfs2_delete_workqueue);
+ return -ENOMEM;
}
register_shrinker(&glock_shrinker);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5f2e522..e2e0a90 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,7 +47,8 @@
* None of the buffers should be dirty, locked, or pinned.
*/
-static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
+static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
+ unsigned int nr_revokes)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *head = &gl->gl_ail_list;
@@ -57,7 +58,9 @@
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
- list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
+ list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
+ if (nr_revokes == 0)
+ break;
bh = bd->bd_bh;
if (bh->b_state & b_state) {
if (fsync)
@@ -65,6 +68,7 @@
gfs2_ail_error(gl, bh);
}
gfs2_trans_add_revoke(sdp, bd);
+ nr_revokes--;
}
GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
spin_unlock(&sdp->sd_ail_lock);
@@ -91,7 +95,7 @@
WARN_ON_ONCE(current->journal_info);
current->journal_info = &tr;
- __gfs2_ail_flush(gl, 0);
+ __gfs2_ail_flush(gl, 0, tr.tr_revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
@@ -101,15 +105,19 @@
{
struct gfs2_sbd *sdp = gl->gl_sbd;
unsigned int revokes = atomic_read(&gl->gl_ail_count);
+ unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
int ret;
if (!revokes)
return;
- ret = gfs2_trans_begin(sdp, 0, revokes);
+ while (revokes > max_revokes)
+ max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
+
+ ret = gfs2_trans_begin(sdp, 0, max_revokes);
if (ret)
return;
- __gfs2_ail_flush(gl, fsync);
+ __gfs2_ail_flush(gl, fsync, max_revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index bbb2715..64915ee 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -594,7 +594,7 @@
}
gfs2_glock_dq_uninit(ghs);
if (IS_ERR(d))
- return PTR_RET(d);
+ return PTR_ERR(d);
return error;
} else if (error != -ENOENT) {
goto fail_gunlock;
@@ -1750,6 +1750,10 @@
struct gfs2_holder gh;
int ret;
+ /* For selinux during lookup */
+ if (gfs2_glock_is_locked_by_me(ip->i_gl))
+ return generic_getxattr(dentry, name, data, size);
+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e04d0e0..7b0f504 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -155,7 +155,7 @@
goto fail_wq;
gfs2_control_wq = alloc_workqueue("gfs2_control",
- WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+ WQ_UNBOUND | WQ_FREEZABLE, 0);
if (!gfs2_control_wq)
goto fail_recovery;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913..2d8be51 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -345,8 +345,7 @@
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- bio_put(bio);
- /* to be detected by submit_seg_bio() */
+ /* to be detected by nilfs_segbuf_submit_bio() */
}
if (!uptodate)
@@ -377,12 +376,12 @@
bio->bi_private = segbuf;
bio_get(bio);
submit_bio(mode, bio);
+ segbuf->sb_nbio++;
if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
bio_put(bio);
err = -EOPNOTSUPP;
goto failed;
}
- segbuf->sb_nbio++;
bio_put(bio);
wi->bio = NULL;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a4..737e156 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -271,7 +271,7 @@
de = next;
} while (de);
spin_unlock(&proc_subdir_lock);
- return 0;
+ return 1;
}
int proc_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 229e366..e0a790d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -205,7 +205,9 @@
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
{
if (ctx->pos < FIRST_PROCESS_ENTRY) {
- proc_readdir(file, ctx);
+ int error = proc_readdir(file, ctx);
+ if (unlikely(error <= 0))
+ return error;
ctx->pos = FIRST_PROCESS_ENTRY;
}
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index 089fe43..dc029db 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h
@@ -9,26 +9,13 @@
#include <linux/spi/spi.h>
-/**
+/*
* struct mcp251x_platform_data - MCP251X SPI CAN controller platform data
* @oscillator_frequency: - oscillator frequency in Hz
- * @irq_flags: - IRQF configuration flags
- * @board_specific_setup: - called before probing the chip (power,reset)
- * @transceiver_enable: - called to power on/off the transceiver
- * @power_enable: - called to power on/off the mcp *and* the
- * transceiver
- *
- * Please note that you should define power_enable or transceiver_enable or
- * none of them. Defining both of them is no use.
- *
*/
struct mcp251x_platform_data {
unsigned long oscillator_frequency;
- unsigned long irq_flags;
- int (*board_specific_setup)(struct spi_device *spi);
- int (*transceiver_enable)(int enable);
- int (*power_enable) (int enable);
};
#endif /* __CAN_PLATFORM_MCP251X_H__ */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 94af418..3a8d0a2 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -132,9 +132,8 @@
static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
- void *ret = dma_alloc_coherent(dev, size, dma_handle, flag);
- if (ret)
- memset(ret, 0, size);
+ void *ret = dma_alloc_coherent(dev, size, dma_handle,
+ flag | __GFP_ZERO);
return ret;
}
diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
index 343d82a..efb0596 100644
--- a/include/linux/fs_enet_pd.h
+++ b/include/linux/fs_enet_pd.h
@@ -16,6 +16,7 @@
#ifndef FS_ENET_PD_H
#define FS_ENET_PD_H
+#include <linux/clk.h>
#include <linux/string.h>
#include <linux/of_mdio.h>
#include <linux/if_ether.h>
@@ -143,6 +144,8 @@
int use_rmii; /* use RMII mode */
int has_phy; /* if the network is phy container as well...*/
+
+ struct clk *clk_per; /* 'per' clock for register access */
};
struct fs_mii_fec_platform_info {
u32 irq[32];
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c796ce2..79640e01 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -5,47 +5,13 @@
#include <linux/bitmap.h>
#include <linux/if.h>
+#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
#include <linux/timer.h>
#include <linux/sysctl.h>
#include <linux/rtnetlink.h>
-enum
-{
- IPV4_DEVCONF_FORWARDING=1,
- IPV4_DEVCONF_MC_FORWARDING,
- IPV4_DEVCONF_PROXY_ARP,
- IPV4_DEVCONF_ACCEPT_REDIRECTS,
- IPV4_DEVCONF_SECURE_REDIRECTS,
- IPV4_DEVCONF_SEND_REDIRECTS,
- IPV4_DEVCONF_SHARED_MEDIA,
- IPV4_DEVCONF_RP_FILTER,
- IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
- IPV4_DEVCONF_BOOTP_RELAY,
- IPV4_DEVCONF_LOG_MARTIANS,
- IPV4_DEVCONF_TAG,
- IPV4_DEVCONF_ARPFILTER,
- IPV4_DEVCONF_MEDIUM_ID,
- IPV4_DEVCONF_FORCE_IGMP_VERSION,
- IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL,
- IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL,
- IPV4_DEVCONF_NOXFRM,
- IPV4_DEVCONF_NOPOLICY,
- IPV4_DEVCONF_ARP_ANNOUNCE,
- IPV4_DEVCONF_ARP_IGNORE,
- IPV4_DEVCONF_PROMOTE_SECONDARIES,
- IPV4_DEVCONF_ARP_ACCEPT,
- IPV4_DEVCONF_ARP_NOTIFY,
- IPV4_DEVCONF_ACCEPT_LOCAL,
- IPV4_DEVCONF_SRC_VMARK,
- IPV4_DEVCONF_PROXY_ARP_PVLAN,
- IPV4_DEVCONF_ROUTE_LOCALNET,
- __IPV4_DEVCONF_MAX
-};
-
-#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
-
struct ipv4_devconf {
void *sysctl;
int data[IPV4_DEVCONF_MAX];
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 77a4784..28ea384 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -50,6 +50,7 @@
__s32 accept_dad;
__s32 force_tllao;
__s32 ndisc_notify;
+ __s32 suppress_frag_ndisc;
void *sysctl;
};
@@ -103,6 +104,7 @@
#define IP6SKB_FORWARDED 2
#define IP6SKB_REROUTED 4
#define IP6SKB_ROUTERALERT 8
+#define IP6SKB_FRAGMENTED 16
};
#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fb425aa..faf4b7c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -332,6 +332,7 @@
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
+ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 077363d..3ad49b8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1125,6 +1125,7 @@
struct list_head napi_list;
struct list_head unreg_list;
struct list_head upper_dev_list; /* List of upper devices */
+ struct list_head lower_dev_list;
/* currently active device features */
@@ -2767,6 +2768,16 @@
extern bool netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev);
extern bool netdev_has_any_upper_dev(struct net_device *dev);
+extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter);
+
+/* iterate through upper list, must be called under RCU read lock */
+#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
+ for (iter = &(dev)->upper_dev_list, \
+ upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
+ upper; \
+ upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+
extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index de70f7b..708fe72ab9 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -314,25 +314,24 @@
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
-extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
+extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
struct nf_conn;
+enum ip_conntrack_info;
struct nlattr;
struct nfq_ct_hook {
size_t (*build_size)(const struct nf_conn *ct);
int (*build)(struct sk_buff *skb, struct nf_conn *ct);
int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
+ int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report);
+ void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, s32 off);
};
extern struct nfq_ct_hook __rcu *nfq_ct_hook;
-
-struct nfq_ct_nat_hook {
- void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
- u32 ctinfo, int off);
-};
-extern struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook;
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e9995eb..078066d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -314,7 +314,6 @@
struct user_namespace;
#ifdef CONFIG_MMU
-extern unsigned long mmap_legacy_base(void);
extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index 6205eeb..90b5e30 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -5,17 +5,10 @@
#include <linux/if_ether.h>
enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
-enum {
- SH_ETH_REG_GIGABIT,
- SH_ETH_REG_FAST_RCAR,
- SH_ETH_REG_FAST_SH4,
- SH_ETH_REG_FAST_SH3_SH2
-};
struct sh_eth_plat_data {
int phy;
int edmac_endian;
- int register_type;
phy_interface_t phy_interface;
void (*set_mdio_gate)(void *addr);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 9e495d31..bb5deb0f 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -108,6 +108,7 @@
int bugged_jumbo;
int pmt;
int force_sf_dma_mode;
+ int force_thresh_dma_mode;
int riwt_off;
void (*fix_mac_speed)(void *priv, unsigned int speed);
void (*bus_setup)(void __iomem *ioaddr);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 260f83f..f667248 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -135,6 +135,8 @@
extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
__be32 mtu);
extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
+extern void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+ u32 mark);
extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
struct netlink_callback;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5fe5649..7bdff04 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -41,6 +41,7 @@
#define NEXTHDR_ICMP 58 /* ICMP for IPv6. */
#define NEXTHDR_NONE 59 /* No next header */
#define NEXTHDR_DEST 60 /* Destination options header. */
+#define NEXTHDR_SCTP 132 /* SCTP message. */
#define NEXTHDR_MOBILITY 135 /* Mobility header. */
#define NEXTHDR_MAX 255
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 644d9c2..0c1288a 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -181,8 +181,7 @@
const struct nf_conntrack_tuple *tuple);
extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
-extern void nf_ct_delete_from_lists(struct nf_conn *ct);
-extern void nf_ct_dying_timeout(struct nf_conn *ct);
+bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
@@ -235,7 +234,7 @@
}
/* These are for NAT. Icky. */
-extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
+extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
@@ -249,7 +248,9 @@
/* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void
-nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data);
+nf_ct_iterate_cleanup(struct net *net,
+ int (*iter)(struct nf_conn *i, void *data),
+ void *data, u32 portid, int report);
extern void nf_conntrack_free(struct nf_conn *ct);
extern struct nf_conn *
nf_conntrack_alloc(struct net *net, u16 zone,
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 977bc8a..ff95434 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -10,6 +10,7 @@
#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
NF_CT_EXT_NAT,
#endif
+ NF_CT_EXT_SEQADJ,
NF_CT_EXT_ACCT,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
NF_CT_EXT_ECACHE,
@@ -26,17 +27,22 @@
#ifdef CONFIG_NF_CONNTRACK_LABELS
NF_CT_EXT_LABELS,
#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ NF_CT_EXT_SYNPROXY,
+#endif
NF_CT_EXT_NUM,
};
#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
+#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
+#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 914d8d9..b411d7b 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -148,17 +148,10 @@
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
-#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(net, proto) \
((net)->ct.sysctl_log_invalid == (proto) || \
(net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
-#define LOG_INVALID(net, proto) \
- (((net)->ct.sysctl_log_invalid == (proto) || \
- (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
- && net_ratelimit())
-#endif
-#else
static inline int LOG_INVALID(struct net *net, int proto) { return 0; }
#endif /* CONFIG_SYSCTL */
diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h
new file mode 100644
index 0000000..f6177a5
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_seqadj.h
@@ -0,0 +1,51 @@
+#ifndef _NF_CONNTRACK_SEQADJ_H
+#define _NF_CONNTRACK_SEQADJ_H
+
+#include <net/netfilter/nf_conntrack_extend.h>
+
+/**
+ * struct nf_ct_seqadj - sequence number adjustment information
+ *
+ * @correction_pos: position of the last TCP sequence number modification
+ * @offset_before: sequence number offset before last modification
+ * @offset_after: sequence number offset after last modification
+ */
+struct nf_ct_seqadj {
+ u32 correction_pos;
+ s32 offset_before;
+ s32 offset_after;
+};
+
+struct nf_conn_seqadj {
+ struct nf_ct_seqadj seq[IP_CT_DIR_MAX];
+};
+
+static inline struct nf_conn_seqadj *nfct_seqadj(const struct nf_conn *ct)
+{
+ return nf_ct_ext_find(ct, NF_CT_EXT_SEQADJ);
+}
+
+static inline struct nf_conn_seqadj *nfct_seqadj_ext_add(struct nf_conn *ct)
+{
+ return nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ, GFP_ATOMIC);
+}
+
+extern int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ s32 off);
+extern int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ __be32 seq, s32 off);
+extern void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ s32 off);
+
+extern int nf_ct_seq_adjust(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned int protoff);
+extern s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir,
+ u32 seq);
+
+extern int nf_conntrack_seqadj_init(void);
+extern void nf_conntrack_seqadj_fini(void);
+
+#endif /* _NF_CONNTRACK_SEQADJ_H */
diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
new file mode 100644
index 0000000..806f54a
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -0,0 +1,77 @@
+#ifndef _NF_CONNTRACK_SYNPROXY_H
+#define _NF_CONNTRACK_SYNPROXY_H
+
+#include <net/netns/generic.h>
+
+struct nf_conn_synproxy {
+ u32 isn;
+ u32 its;
+ u32 tsoff;
+};
+
+static inline struct nf_conn_synproxy *nfct_synproxy(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ return nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
+#else
+ return NULL;
+#endif
+}
+
+static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ return nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, GFP_ATOMIC);
+#else
+ return NULL;
+#endif
+}
+
+struct synproxy_stats {
+ unsigned int syn_received;
+ unsigned int cookie_invalid;
+ unsigned int cookie_valid;
+ unsigned int cookie_retrans;
+ unsigned int conn_reopened;
+};
+
+struct synproxy_net {
+ struct nf_conn *tmpl;
+ struct synproxy_stats __percpu *stats;
+};
+
+extern int synproxy_net_id;
+static inline struct synproxy_net *synproxy_pernet(struct net *net)
+{
+ return net_generic(net, synproxy_net_id);
+}
+
+struct synproxy_options {
+ u8 options;
+ u8 wscale;
+ u16 mss;
+ u32 tsval;
+ u32 tsecr;
+};
+
+struct tcphdr;
+struct xt_synproxy_info;
+extern void synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+ const struct tcphdr *th,
+ struct synproxy_options *opts);
+extern unsigned int synproxy_options_size(const struct synproxy_options *opts);
+extern void synproxy_build_options(struct tcphdr *th,
+ const struct synproxy_options *opts);
+
+extern void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+ struct synproxy_options *opts);
+extern void synproxy_check_timestamp_cookie(struct synproxy_options *opts);
+
+extern unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
+ unsigned int protoff,
+ struct tcphdr *th,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_conn_synproxy *synproxy);
+
+#endif /* _NF_CONNTRACK_SYNPROXY_H */
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index ad14a79..59a1924 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -13,15 +13,6 @@
#define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \
(hooknum) != NF_INET_LOCAL_IN)
-/* NAT sequence number modifications */
-struct nf_nat_seq {
- /* position of the last TCP sequence number modification (if any) */
- u_int32_t correction_pos;
-
- /* sequence number offset before and after last modification */
- int16_t offset_before, offset_after;
-};
-
#include <linux/list.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
#include <net/netfilter/nf_conntrack_extend.h>
@@ -39,7 +30,6 @@
/* The structure embedded in the conntrack structure. */
struct nf_conn_nat {
struct hlist_node bysource;
- struct nf_nat_seq seq[IP_CT_DIR_MAX];
struct nf_conn *ct;
union nf_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index b4d6bfc..404324d 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -39,28 +39,9 @@
const char *rep_buffer,
unsigned int rep_len);
-extern void nf_nat_set_seq_adjust(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- __be32 seq, s16 off);
-extern int nf_nat_seq_adjust(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff);
-extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff);
-
/* Setup NAT on this expected conntrack so it follows master, but goes
* to port ct->master->saved_proto. */
extern void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *this);
-extern s16 nf_nat_get_offset(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq);
-
-extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
- u32 dir, int off);
-
#endif
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
deleted file mode 100644
index 36d9379..0000000
--- a/include/net/netfilter/nf_tproxy_core.h
+++ /dev/null
@@ -1,210 +0,0 @@
-#ifndef _NF_TPROXY_CORE_H
-#define _NF_TPROXY_CORE_H
-
-#include <linux/types.h>
-#include <linux/in.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/inet_hashtables.h>
-#include <net/inet6_hashtables.h>
-#include <net/tcp.h>
-
-#define NFT_LOOKUP_ANY 0
-#define NFT_LOOKUP_LISTENER 1
-#define NFT_LOOKUP_ESTABLISHED 2
-
-/* look up and get a reference to a matching socket */
-
-
-/* This function is used by the 'TPROXY' target and the 'socket'
- * match. The following lookups are supported:
- *
- * Explicit TProxy target rule
- * ===========================
- *
- * This is used when the user wants to intercept a connection matching
- * an explicit iptables rule. In this case the sockets are assumed
- * matching in preference order:
- *
- * - match: if there's a fully established connection matching the
- * _packet_ tuple, it is returned, assuming the redirection
- * already took place and we process a packet belonging to an
- * established connection
- *
- * - match: if there's a listening socket matching the redirection
- * (e.g. on-port & on-ip of the connection), it is returned,
- * regardless if it was bound to 0.0.0.0 or an explicit
- * address. The reasoning is that if there's an explicit rule, it
- * does not really matter if the listener is bound to an interface
- * or to 0. The user already stated that he wants redirection
- * (since he added the rule).
- *
- * "socket" match based redirection (no specific rule)
- * ===================================================
- *
- * There are connections with dynamic endpoints (e.g. FTP data
- * connection) that the user is unable to add explicit rules
- * for. These are taken care of by a generic "socket" rule. It is
- * assumed that the proxy application is trusted to open such
- * connections without explicit iptables rule (except of course the
- * generic 'socket' rule). In this case the following sockets are
- * matched in preference order:
- *
- * - match: if there's a fully established connection matching the
- * _packet_ tuple
- *
- * - match: if there's a non-zero bound listener (possibly with a
- * non-local address) We don't accept zero-bound listeners, since
- * then local services could intercept traffic going through the
- * box.
- *
- * Please note that there's an overlap between what a TPROXY target
- * and a socket match will match. Normally if you have both rules the
- * "socket" match will be the first one, effectively all packets
- * belonging to established connections going through that one.
- */
-static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
- const __be32 saddr, const __be32 daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in, int lookup_type)
-{
- struct sock *sk;
-
- /* look up socket */
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_ANY:
- sk = __inet_lookup(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
- break;
- case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
- saddr, sport,
- daddr, dport,
- in->ifindex);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too */
-
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = inet_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- break;
- }
- break;
- case IPPROTO_UDP:
- sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk && lookup_type != NFT_LOOKUP_ANY) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
- protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in, int lookup_type)
-{
- struct sock *sk;
-
- /* look up socket */
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_ANY:
- sk = inet6_lookup(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
- break;
- case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
- saddr, sport,
- daddr, ntohs(dport),
- in->ifindex);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too */
-
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, ntohs(dport),
- in->ifindex);
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- break;
- }
- break;
- case IPPROTO_UDP:
- sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk && lookup_type != NFT_LOOKUP_ANY) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
- protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-#endif
-
-/* assign a socket to the skb -- consumes sk */
-void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
-
-#endif
diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h
index 86267a5..aff88ba 100644
--- a/include/net/netfilter/nfnetlink_queue.h
+++ b/include/net/netfilter/nfnetlink_queue.h
@@ -15,6 +15,8 @@
enum ip_conntrack_info ctinfo);
void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff);
+int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+ u32 portid, u32 report);
#else
inline struct nf_conn *
nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo)
@@ -39,5 +41,11 @@
enum ip_conntrack_info ctinfo, int diff)
{
}
+
+inline int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+ u32 portid, u32 report)
+{
+ return 0;
+}
#endif /* NF_CONNTRACK */
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f7c24f8..59ec3cd 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -85,6 +85,9 @@
int register_qdisc(struct Qdisc_ops *qops);
int unregister_qdisc(struct Qdisc_ops *qops);
+void qdisc_get_default(char *id, size_t len);
+int qdisc_set_default(const char *id);
+
void qdisc_list_del(struct Qdisc *q);
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 76368c9..f4eb365 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -316,6 +316,7 @@
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
extern struct Qdisc_ops mq_qdisc_ops;
+extern const struct Qdisc_ops *default_qdisc_ops;
struct Qdisc_class_common {
u32 classid;
@@ -369,9 +370,9 @@
void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops);
+ const struct Qdisc_ops *ops);
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops, u32 parentid);
+ const struct Qdisc_ops *ops, u32 parentid);
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
void tcf_destroy(struct tcf_proto *tp);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 422db6c..2174d8d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -113,29 +113,27 @@
/* The SCTP globals structure. */
extern struct sctp_globals {
- /* The following variables are implementation specific. */
-
- /* Default initialization values to be applied to new associations. */
- __u16 max_instreams;
- __u16 max_outstreams;
-
/* This is a list of groups of functions for each address
* family that we support.
*/
struct list_head address_families;
/* This is the hash of all endpoints. */
- int ep_hashsize;
struct sctp_hashbucket *ep_hashtable;
-
/* This is the hash of all associations. */
- int assoc_hashsize;
struct sctp_hashbucket *assoc_hashtable;
-
/* This is the sctp port control hash. */
- int port_hashsize;
struct sctp_bind_hashbucket *port_hashtable;
+ /* Sizes of above hashtables. */
+ int ep_hashsize;
+ int assoc_hashsize;
+ int port_hashsize;
+
+ /* Default initialization values to be applied to new associations. */
+ __u16 max_instreams;
+ __u16 max_outstreams;
+
/* Flag to indicate whether computing and verifying checksum
* is disabled. */
bool checksum_disable;
diff --git a/include/net/sock.h b/include/net/sock.h
index e4bbcbf..6ba2e7b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -232,6 +232,7 @@
* @sk_napi_id: id of the last napi context to receive data for sk
* @sk_ll_usec: usecs to busypoll when there is no data
* @sk_allocation: allocation mode
+ * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
* @sk_sndbuf: size of send buffer in bytes
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -361,6 +362,7 @@
kmemcheck_bitfield_end(flags);
int sk_wmem_queued;
gfp_t sk_allocation;
+ u32 sk_pacing_rate; /* bytes per second */
netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps;
int sk_gso_type;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 09cb5c1..6a6a88d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,6 +281,7 @@
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
extern unsigned int sysctl_tcp_notsent_lowat;
+extern int sysctl_tcp_min_tso_segs;
extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -476,9 +477,13 @@
/* From syncookies.c */
extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
+extern int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
+ u32 cookie);
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
#ifdef CONFIG_SYN_COOKIES
+extern u32 __cookie_v4_init_sequence(const struct iphdr *iph,
+ const struct tcphdr *th, u16 *mssp);
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
__u16 *mss);
#else
@@ -495,8 +500,12 @@
struct net *net, bool *ecn_ok);
/* From net/ipv6/syncookies.c */
+extern int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+ u32 cookie);
extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
+extern u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+ const struct tcphdr *th, u16 *mssp);
extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
__u16 *mss);
#else
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 94ce082..89d3d8a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1548,7 +1548,7 @@
int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info);
u32 xfrm_get_acqseq(void);
extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
-struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark,
+struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
u8 mode, u32 reqid, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index b950c02..dbf0666 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -56,6 +56,7 @@
#define PACKET_FANOUT_LB 1
#define PACKET_FANOUT_CPU 2
#define PACKET_FANOUT_ROLLOVER 3
+#define PACKET_FANOUT_RND 4
#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000
#define PACKET_FANOUT_FLAG_DEFRAG 0x8000
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 1870ee2..e9502dd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -56,6 +56,8 @@
#define TUNGETVNETHDRSZ _IOR('T', 215, int)
#define TUNSETVNETHDRSZ _IOW('T', 216, int)
#define TUNSETQUEUE _IOW('T', 217, int)
+#define TUNSETIFINDEX _IOW('T', 218, unsigned int)
+#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
@@ -70,6 +72,7 @@
#define IFF_DETACH_QUEUE 0x0400
/* read-only flag */
#define IFF_PERSIST 0x0800
+#define IFF_NOFILTER 0x1000
/* Socket options */
#define TUN_TX_TIMESTAMP 1
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 6cf06bf..4119594 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -133,4 +133,40 @@
__u8 reserved;
};
+/* index values for the variables in ipv4_devconf */
+enum
+{
+ IPV4_DEVCONF_FORWARDING=1,
+ IPV4_DEVCONF_MC_FORWARDING,
+ IPV4_DEVCONF_PROXY_ARP,
+ IPV4_DEVCONF_ACCEPT_REDIRECTS,
+ IPV4_DEVCONF_SECURE_REDIRECTS,
+ IPV4_DEVCONF_SEND_REDIRECTS,
+ IPV4_DEVCONF_SHARED_MEDIA,
+ IPV4_DEVCONF_RP_FILTER,
+ IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
+ IPV4_DEVCONF_BOOTP_RELAY,
+ IPV4_DEVCONF_LOG_MARTIANS,
+ IPV4_DEVCONF_TAG,
+ IPV4_DEVCONF_ARPFILTER,
+ IPV4_DEVCONF_MEDIUM_ID,
+ IPV4_DEVCONF_NOXFRM,
+ IPV4_DEVCONF_NOPOLICY,
+ IPV4_DEVCONF_FORCE_IGMP_VERSION,
+ IPV4_DEVCONF_ARP_ANNOUNCE,
+ IPV4_DEVCONF_ARP_IGNORE,
+ IPV4_DEVCONF_PROMOTE_SECONDARIES,
+ IPV4_DEVCONF_ARP_ACCEPT,
+ IPV4_DEVCONF_ARP_NOTIFY,
+ IPV4_DEVCONF_ACCEPT_LOCAL,
+ IPV4_DEVCONF_SRC_VMARK,
+ IPV4_DEVCONF_PROXY_ARP_PVLAN,
+ IPV4_DEVCONF_ROUTE_LOCALNET,
+ IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL,
+ IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL,
+ __IPV4_DEVCONF_MAX
+};
+
+#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
+
#endif /* _UAPI_LINUX_IP_H */
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index d07ac69..593b0e3 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -162,6 +162,7 @@
DEVCONF_NDISC_NOTIFY,
DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
+ DEVCONF_SUPPRESS_FRAG_NDISC,
DEVCONF_MAX
};
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 4111577..1749154 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -22,6 +22,7 @@
header-y += xt_CONNSECMARK.h
header-y += xt_CT.h
header-y += xt_DSCP.h
+header-y += xt_HMARK.h
header-y += xt_IDLETIMER.h
header-y += xt_LED.h
header-y += xt_LOG.h
@@ -68,6 +69,7 @@
header-y += xt_rateest.h
header-y += xt_realm.h
header-y += xt_recent.h
+header-y += xt_rpfilter.h
header-y += xt_sctp.h
header-y += xt_set.h
header-y += xt_socket.h
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index d69483f..8dd8038 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -99,7 +99,8 @@
IPCT_PROTOINFO, /* protocol information has changed */
IPCT_HELPER, /* new helper has been set */
IPCT_MARK, /* new mark has been set */
- IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */
+ IPCT_SEQADJ, /* sequence adjustment has changed */
+ IPCT_NATSEQADJ = IPCT_SEQADJ,
IPCT_SECMARK, /* new security mark has been set */
IPCT_LABEL, /* new connlabel has been set */
};
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 08fabc6..acad6c5 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -42,8 +42,10 @@
CTA_ID,
CTA_NAT_DST,
CTA_TUPLE_MASTER,
- CTA_NAT_SEQ_ADJ_ORIG,
- CTA_NAT_SEQ_ADJ_REPLY,
+ CTA_SEQ_ADJ_ORIG,
+ CTA_NAT_SEQ_ADJ_ORIG = CTA_SEQ_ADJ_ORIG,
+ CTA_SEQ_ADJ_REPLY,
+ CTA_NAT_SEQ_ADJ_REPLY = CTA_SEQ_ADJ_REPLY,
CTA_SECMARK, /* obsolete */
CTA_ZONE,
CTA_SECCTX,
@@ -165,6 +167,15 @@
};
#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1)
+enum ctattr_seqadj {
+ CTA_SEQADJ_UNSPEC,
+ CTA_SEQADJ_CORRECTION_POS,
+ CTA_SEQADJ_OFFSET_BEFORE,
+ CTA_SEQADJ_OFFSET_AFTER,
+ __CTA_SEQADJ_MAX
+};
+#define CTA_SEQADJ_MAX (__CTA_SEQADJ_MAX - 1)
+
enum ctattr_natseq {
CTA_NAT_SEQ_UNSPEC,
CTA_NAT_SEQ_CORRECTION_POS,
diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 3a9b921..0132bad 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -46,6 +46,7 @@
NFQA_CT_INFO, /* enum ip_conntrack_info */
NFQA_CAP_LEN, /* __u32 length of captured packet */
NFQA_SKB_INFO, /* __u32 skb meta information */
+ NFQA_EXP, /* nf_conntrack_netlink.h */
__NFQA_MAX
};
diff --git a/include/linux/netfilter/xt_HMARK.h b/include/uapi/linux/netfilter/xt_HMARK.h
similarity index 100%
rename from include/linux/netfilter/xt_HMARK.h
rename to include/uapi/linux/netfilter/xt_HMARK.h
diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h
new file mode 100644
index 0000000..2d59fba
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h
@@ -0,0 +1,16 @@
+#ifndef _XT_SYNPROXY_H
+#define _XT_SYNPROXY_H
+
+#define XT_SYNPROXY_OPT_MSS 0x01
+#define XT_SYNPROXY_OPT_WSCALE 0x02
+#define XT_SYNPROXY_OPT_SACK_PERM 0x04
+#define XT_SYNPROXY_OPT_TIMESTAMP 0x08
+#define XT_SYNPROXY_OPT_ECN 0x10
+
+struct xt_synproxy_info {
+ __u8 options;
+ __u8 wscale;
+ __u16 mss;
+};
+
+#endif /* _XT_SYNPROXY_H */
diff --git a/include/linux/netfilter/xt_rpfilter.h b/include/uapi/linux/netfilter/xt_rpfilter.h
similarity index 100%
rename from include/linux/netfilter/xt_rpfilter.h
rename to include/uapi/linux/netfilter/xt_rpfilter.h
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 52490b0..a74d375 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2007-2011 Nicira Networks.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -259,6 +259,7 @@
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
+ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
@@ -333,6 +334,11 @@
__be16 udp_dst;
};
+struct ovs_key_sctp {
+ __be16 sctp_src;
+ __be16 sctp_dst;
+};
+
struct ovs_key_icmp {
__u8 icmp_type;
__u8 icmp_code;
@@ -379,6 +385,12 @@
* @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
* last-used time, accumulated TCP flags, and statistics for this flow.
* Otherwise ignored in requests. Never present in notifications.
+ * @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
+ * mask bits for wildcarded flow match. Mask bit value '1' specifies exact
+ * match with corresponding flow key bit, while mask bit value '0' specifies
+ * a wildcarded match. Omitting attribute is treated as wildcarding all
+ * corresponding fields. Optional for all requests. If not present,
+ * all flow key bits are exact match bits.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands.
@@ -391,6 +403,7 @@
OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
+ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
__OVS_FLOW_ATTR_MAX
};
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 09d62b92..9b82913 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -744,4 +744,45 @@
};
};
+/* FQ */
+
+enum {
+ TCA_FQ_UNSPEC,
+
+ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */
+
+ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */
+
+ TCA_FQ_QUANTUM, /* RR quantum */
+
+ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */
+
+ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */
+
+ TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate,
+ * use the following rate
+ */
+
+ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
+
+ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+ __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+ __u64 gc_flows;
+ __u64 highprio_packets;
+ __u64 tcp_retrans;
+ __u64 throttled;
+ __u64 flows_plimit;
+ __u64 pkts_too_long;
+ __u64 allocation_errors;
+ __s64 time_next_delayed_flow;
+ __u32 flows;
+ __u32 inactive_flows;
+ __u32 throttled_flows;
+ __u32 pad;
+};
#endif
diff --git a/init/Kconfig b/init/Kconfig
index 247084b..fed81b5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -955,7 +955,7 @@
Memory Resource Controller Swap Extension comes with its price in
a bigger memory consumption. General purpose distribution kernels
which want to enable the feature but keep it disabled by default
- and let the user enable it by swapaccount boot command line
+ and let the user enable it by swapaccount=1 boot command line
parameter should have this option unselected.
For those who want to have the feature enabled by default should
select this option (if, for some reason, they need to disable it
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e565778..010a008 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1608,11 +1608,13 @@
{
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- int retval = -ENODEV;
+ int retval = 0;
mutex_lock(&cpuset_mutex);
- if (!is_cpuset_online(cs))
+ if (!is_cpuset_online(cs)) {
+ retval = -ENODEV;
goto out_unlock;
+ }
switch (type) {
case FILE_CPU_EXCLUSIVE:
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27..0b479a6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@
BUG_ON(bits > 32);
WARN_ON(!irqs_disabled());
read_sched_clock = read;
- sched_clock_mask = (1 << bits) - 1;
+ sched_clock_mask = (1ULL << bits) - 1;
cd.rate = rate;
/* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e77edc9..e8a1516 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -182,7 +182,8 @@
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
- WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
+ WARN_ONCE(have_nohz_full_mask,
+ "NO_HZ FULL will not work with unstable sched clock");
return false;
}
#endif
@@ -343,8 +344,6 @@
void __init tick_nohz_init(void)
{
- int cpu;
-
if (!have_nohz_full_mask) {
if (tick_nohz_init_all() < 0)
return;
diff --git a/kernel/wait.c b/kernel/wait.c
index dec68bd..d550920 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -363,8 +363,7 @@
/**
* wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @word: The word being waited on, a kernel virtual address
- * @bit: The bit of the word being waited on
+ * @p: The atomic_t being waited on, a kernel virtual address
*
* Wake up anyone waiting for the atomic_t to go to zero.
*
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index fd94058..28321d8 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -437,7 +437,7 @@
exit:
return ret;
}
-EXPORT_SYMBOL_GPL(lz4_compress);
+EXPORT_SYMBOL(lz4_compress);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index d3414ea..411be80 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -299,7 +299,7 @@
return ret;
}
#ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress);
+EXPORT_SYMBOL(lz4_decompress);
#endif
int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
@@ -319,8 +319,8 @@
return ret;
}
#ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4 Decompressor");
#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index eb1a74f..f344f76 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -533,7 +533,7 @@
exit:
return ret;
}
-EXPORT_SYMBOL_GPL(lz4hc_compress);
+EXPORT_SYMBOL(lz4hc_compress);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4HC compressor");
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c5792a5..0878ff7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6969,7 +6969,6 @@
#ifdef CONFIG_MEMCG_SWAP
static int __init enable_swap_account(char *s)
{
- /* consider enabled if no parameter or 1 is given */
if (!strcmp(s, "1"))
really_do_swap_account = 1;
else if (!strcmp(s, "0"))
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 62da527..0a8a80c 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -28,6 +28,22 @@
#include "bat_algo.h"
#include "network-coding.h"
+
+/**
+ * batadv_dup_status - duplicate status
+ * @BATADV_NO_DUP: the packet is a duplicate
+ * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
+ * neighbor)
+ * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
+ * @BATADV_PROTECTED: originator is currently protected (after reboot)
+ */
+enum batadv_dup_status {
+ BATADV_NO_DUP = 0,
+ BATADV_ORIG_DUP,
+ BATADV_NEIGH_DUP,
+ BATADV_PROTECTED,
+};
+
/**
* batadv_ring_buffer_set - update the ring buffer with the given value
* @lq_recv: pointer to the ring buffer
@@ -71,21 +87,6 @@
return (uint8_t)(sum / count);
}
-/*
- * batadv_dup_status - duplicate status
- * @BATADV_NO_DUP: the packet is a duplicate
- * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
- * neighbor)
- * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
- * @BATADV_PROTECTED: originator is currently protected (after reboot)
- */
-enum batadv_dup_status {
- BATADV_NO_DUP = 0,
- BATADV_ORIG_DUP,
- BATADV_NEIGH_DUP,
- BATADV_PROTECTED,
-};
-
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
@@ -478,6 +479,7 @@
kfree(forw_packet_aggr);
goto out;
}
+ forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 7614af3..1ce4b87 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -190,6 +190,33 @@
return curr_gw;
}
+/**
+ * batadv_gw_check_client_stop - check if client mode has been switched off
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * This function assumes the caller has checked that the gw state *is actually
+ * changing*. This function is not supposed to be called when there is no state
+ * change.
+ */
+void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
+{
+ struct batadv_gw_node *curr_gw;
+
+ if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+ return;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ if (!curr_gw)
+ return;
+
+ /* if batman-adv is switching the gw client mode off and a gateway was
+ * already selected, send a DEL uevent
+ */
+ batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
+
+ batadv_gw_node_free_ref(curr_gw);
+}
+
void batadv_gw_election(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL;
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 1037d75..ceef4eb 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -20,6 +20,7 @@
#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
+void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
void batadv_gw_deselect(struct batadv_priv *bat_priv);
void batadv_gw_election(struct batadv_priv *bat_priv);
struct batadv_orig_node *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index b27508b..5a99bb4 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -183,6 +183,7 @@
goto out;
}
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 08125f3..c72d1bc 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -19,6 +19,10 @@
#include <linux/crc32c.h>
#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
#include "main.h"
#include "sysfs.h"
#include "debugfs.h"
@@ -249,6 +253,60 @@
return primary_if;
}
+/**
+ * batadv_skb_set_priority - sets skb priority according to packet content
+ * @skb: the packet to be sent
+ * @offset: offset to the packet content
+ *
+ * This function sets a value between 256 and 263 (802.1d priority), which
+ * can be interpreted by the cfg80211 or other drivers.
+ */
+void batadv_skb_set_priority(struct sk_buff *skb, int offset)
+{
+ struct iphdr ip_hdr_tmp, *ip_hdr;
+ struct ipv6hdr ip6_hdr_tmp, *ip6_hdr;
+ struct ethhdr ethhdr_tmp, *ethhdr;
+ struct vlan_ethhdr *vhdr, vhdr_tmp;
+ u32 prio;
+
+ /* already set, do nothing */
+ if (skb->priority >= 256 && skb->priority <= 263)
+ return;
+
+ ethhdr = skb_header_pointer(skb, offset, sizeof(*ethhdr), ðhdr_tmp);
+ if (!ethhdr)
+ return;
+
+ switch (ethhdr->h_proto) {
+ case htons(ETH_P_8021Q):
+ vhdr = skb_header_pointer(skb, offset + sizeof(*vhdr),
+ sizeof(*vhdr), &vhdr_tmp);
+ if (!vhdr)
+ return;
+ prio = ntohs(vhdr->h_vlan_TCI) & VLAN_PRIO_MASK;
+ prio = prio >> VLAN_PRIO_SHIFT;
+ break;
+ case htons(ETH_P_IP):
+ ip_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
+ sizeof(*ip_hdr), &ip_hdr_tmp);
+ if (!ip_hdr)
+ return;
+ prio = (ipv4_get_dsfield(ip_hdr) & 0xfc) >> 5;
+ break;
+ case htons(ETH_P_IPV6):
+ ip6_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
+ sizeof(*ip6_hdr), &ip6_hdr_tmp);
+ if (!ip6_hdr)
+ return;
+ prio = (ipv6_get_dsfield(ip6_hdr) & 0xfc) >> 5;
+ break;
+ default:
+ return;
+ }
+
+ skb->priority = prio + 256;
+}
+
static int batadv_recv_unhandled_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 5e9aebb..5d00f23 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.3.0"
+#define BATADV_SOURCE_VERSION "2013.4.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -184,6 +184,7 @@
int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq);
+void batadv_skb_set_priority(struct sk_buff *skb, int offset);
int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 2f0bd3f..0439395 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -775,7 +775,7 @@
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr = eth_hdr(skb);
- int res, ret = NET_RX_DROP;
+ int res, hdr_len, ret = NET_RX_DROP;
struct sk_buff *new_skb;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -835,6 +835,22 @@
/* decrement ttl */
unicast_packet->header.ttl--;
+ switch (unicast_packet->header.packet_type) {
+ case BATADV_UNICAST_4ADDR:
+ hdr_len = sizeof(struct batadv_unicast_4addr_packet);
+ break;
+ case BATADV_UNICAST:
+ hdr_len = sizeof(struct batadv_unicast_packet);
+ break;
+ default:
+ /* other packet types not supported - yet */
+ hdr_len = -1;
+ break;
+ }
+
+ if (hdr_len > 0)
+ batadv_skb_set_priority(skb, hdr_len);
+
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
/* translate transmit result into receive result */
@@ -1193,6 +1209,8 @@
if (batadv_bla_check_bcast_duplist(bat_priv, skb))
goto out;
+ batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
+
/* rebroadcast packet */
batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index e9ff8d8..0266edd 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -67,7 +67,6 @@
ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
skb_set_network_header(skb, ETH_HLEN);
- skb->priority = TC_PRIO_CONTROL;
skb->protocol = __constant_htons(ETH_P_BATMAN);
skb->dev = hard_iface->net_dev;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0f04e1c..4493913 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -229,6 +229,8 @@
*/
}
+ batadv_skb_set_priority(skb, 0);
+
/* ethernet packet should be broadcasted */
if (do_bcast) {
primary_if = batadv_primary_if_get_selected(bat_priv);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 929e304..4114b96 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -385,6 +385,10 @@
curr_gw_mode_str, buff);
batadv_gw_deselect(bat_priv);
+ /* always call batadv_gw_check_client_stop() before changing the gateway
+ * state
+ */
+ batadv_gw_check_client_stop(bat_priv);
atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
return count;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 429aeef..34510f3 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1626,6 +1626,7 @@
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
tt_response->ttvn = ttvn;
@@ -1691,6 +1692,7 @@
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
tt_req_len = sizeof(*tt_request);
@@ -1788,6 +1790,7 @@
if (!skb)
goto unlock;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -1906,6 +1909,7 @@
if (!skb)
goto unlock;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -2240,6 +2244,7 @@
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 688a041..48b31d3 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -242,6 +242,8 @@
frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
if (!frag_skb)
goto dropped;
+
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(frag_skb, ucf_hdr_len);
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -432,12 +434,16 @@
switch (packet_type) {
case BATADV_UNICAST:
- batadv_unicast_prepare_skb(skb, orig_node);
+ if (!batadv_unicast_prepare_skb(skb, orig_node))
+ goto out;
+
header_len = sizeof(struct batadv_unicast_packet);
break;
case BATADV_UNICAST_4ADDR:
- batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
- packet_subtype);
+ if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
+ packet_subtype))
+ goto out;
+
header_len = sizeof(struct batadv_unicast_4addr_packet);
break;
default:
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 4983340..d8ea31a 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -397,6 +397,7 @@
kfree(info);
return NULL;
}
+ info->skb_packet->priority = TC_PRIO_CONTROL;
skb_reserve(info->skb_packet, ETH_HLEN);
packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
@@ -861,6 +862,7 @@
if (!bat_priv->vis.my_info->skb_packet)
goto free_info;
+ bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL;
skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
tmp_skb = bat_priv->vis.my_info->skb_packet;
packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 60aca91..ffd5874 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@
if (!pv)
return;
- for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
@@ -730,7 +730,7 @@
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
goto out;
}
@@ -739,7 +739,7 @@
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
if (err)
goto out;
@@ -817,7 +817,7 @@
err = __br_fdb_delete(p, addr, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_delete(p, addr, 0);
goto out;
}
@@ -827,7 +827,7 @@
* vlan on this port.
*/
err = -ENOENT;
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err &= __br_fdb_delete(p, addr, vid);
}
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index aa6c9a8..c41d5fb 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -383,6 +383,9 @@
netdev_update_features(br->dev);
+ if (br->dev->needed_headroom < dev->needed_headroom)
+ br->dev->needed_headroom = dev->needed_headroom;
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 1fc30ab..b9259ef 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -132,7 +132,7 @@
else
pv = br_get_vlan_info(br);
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
goto done;
af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -140,7 +140,7 @@
goto nla_put_failure;
pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
vinfo.vid = vid;
vinfo.flags = 0;
if (vid == pvid)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45..9a9ffe7 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@
clear_bit(vid, v->vlan_bitmap);
v->num_vlans--;
- if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
@@ -122,7 +122,7 @@
{
smp_wmb();
v->pvid = 0;
- bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+ bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
diff --git a/net/core/dev.c b/net/core/dev.c
index 1ed2b66..6fbb0c9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4367,59 +4367,50 @@
goto out;
}
-struct netdev_upper {
+struct netdev_adjacent {
struct net_device *dev;
+
+ /* upper master flag, there can only be one master device per list */
bool master;
+
+ /* indicates that this dev is our first-level lower/upper device */
+ bool neighbour;
+
+ /* counter for the number of times this device was added to us */
+ u16 ref_nr;
+
struct list_head list;
struct rcu_head rcu;
- struct list_head search_list;
};
-static void __append_search_uppers(struct list_head *search_list,
- struct net_device *dev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool upper)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *adj;
+ struct list_head *dev_list;
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- /* check if this upper is not already in search list */
- if (list_empty(&upper->search_list))
- list_add_tail(&upper->search_list, search_list);
- }
-}
+ dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
-static bool __netdev_search_upper_dev(struct net_device *dev,
- struct net_device *upper_dev)
-{
- LIST_HEAD(search_list);
- struct netdev_upper *upper;
- struct netdev_upper *tmp;
- bool ret = false;
-
- __append_search_uppers(&search_list, dev);
- list_for_each_entry(upper, &search_list, search_list) {
- if (upper->dev == upper_dev) {
- ret = true;
- break;
- }
- __append_search_uppers(&search_list, upper->dev);
- }
- list_for_each_entry_safe(upper, tmp, &search_list, search_list)
- INIT_LIST_HEAD(&upper->search_list);
- return ret;
-}
-
-static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
- struct net_device *upper_dev)
-{
- struct netdev_upper *upper;
-
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- if (upper->dev == upper_dev)
- return upper;
+ list_for_each_entry(adj, dev_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
}
return NULL;
}
+static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_find_adj(dev, udev, true);
+}
+
+static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
+ struct net_device *ldev)
+{
+ return __netdev_find_adj(dev, ldev, false);
+}
+
/**
* netdev_has_upper_dev - Check if device is linked to an upper device
* @dev: device
@@ -4462,7 +4453,7 @@
*/
struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
ASSERT_RTNL();
@@ -4470,13 +4461,38 @@
return NULL;
upper = list_first_entry(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->upper_dev_list)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
@@ -4486,20 +4502,158 @@
*/
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
upper = list_first_or_null_rcu(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+static int __netdev_adjacent_dev_insert(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool neighbour, bool master,
+ bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(dev, adj_dev, upper);
+
+ if (adj) {
+ BUG_ON(neighbour);
+ adj->ref_nr++;
+ return 0;
+ }
+
+ adj = kmalloc(sizeof(*adj), GFP_KERNEL);
+ if (!adj)
+ return -ENOMEM;
+
+ adj->dev = adj_dev;
+ adj->master = master;
+ adj->neighbour = neighbour;
+ adj->ref_nr = 1;
+
+ dev_hold(adj_dev);
+ pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+
+ if (!upper) {
+ list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
+ return 0;
+ }
+
+ /* Ensure that master upper link is always the first item in list. */
+ if (master)
+ list_add_rcu(&adj->list, &dev->upper_dev_list);
+ else
+ list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+
+ return 0;
+}
+
+static inline int __netdev_upper_dev_insert(struct net_device *dev,
+ struct net_device *udev,
+ bool master, bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
+ true);
+}
+
+static inline int __netdev_lower_dev_insert(struct net_device *dev,
+ struct net_device *ldev,
+ bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
+ false);
+}
+
+void __netdev_adjacent_dev_remove(struct net_device *dev,
+ struct net_device *adj_dev, bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ if (upper)
+ adj = __netdev_find_upper(dev, adj_dev);
+ else
+ adj = __netdev_find_lower(dev, adj_dev);
+
+ if (!adj)
+ BUG();
+
+ if (adj->ref_nr > 1) {
+ adj->ref_nr--;
+ return;
+ }
+
+ list_del_rcu(&adj->list);
+ pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+ dev_put(adj_dev);
+ kfree_rcu(adj, rcu);
+}
+
+static inline void __netdev_upper_dev_remove(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_remove(dev, udev, true);
+}
+
+static inline void __netdev_lower_dev_remove(struct net_device *dev,
+ struct net_device *ldev)
+{
+ return __netdev_adjacent_dev_remove(dev, ldev, false);
+}
+
+int __netdev_adjacent_dev_insert_link(struct net_device *dev,
+ struct net_device *upper_dev,
+ bool master, bool neighbour)
+{
+ int ret;
+
+ ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ if (ret)
+ return ret;
+
+ ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ if (ret) {
+ __netdev_upper_dev_remove(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+}
+
+static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *udev,
+ bool master)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+}
+
+void __netdev_adjacent_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_lower_dev_remove(upper_dev, dev);
+}
+
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *i, *j, *to_i, *to_j;
+ int ret = 0;
ASSERT_RTNL();
@@ -4507,7 +4661,7 @@
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_search_upper_dev(upper_dev, dev))
+ if (__netdev_find_upper(upper_dev, dev))
return -EBUSY;
if (__netdev_find_upper(dev, upper_dev))
@@ -4516,22 +4670,76 @@
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- upper = kmalloc(sizeof(*upper), GFP_KERNEL);
- if (!upper)
- return -ENOMEM;
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ if (ret)
+ return ret;
- upper->dev = upper_dev;
- upper->master = master;
- INIT_LIST_HEAD(&upper->search_list);
+ /* Now that we linked these devs, make all the upper_dev's
+ * upper_dev_list visible to every dev's lower_dev_list and vice
+ * versa, and don't forget the devices itself. All of these
+ * links are non-neighbours.
+ */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(j, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ if (ret)
+ goto rollback_mesh;
+ }
+ }
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&upper->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
- dev_hold(upper_dev);
+ /* add dev to every upper_dev's upper device */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(dev, i->dev);
+ if (ret)
+ goto rollback_upper_mesh;
+ }
+
+ /* add upper_dev to every dev's lower device */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ if (ret)
+ goto rollback_lower_mesh;
+ }
+
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
+
+rollback_lower_mesh:
+ to_i = i;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ }
+
+ i = NULL;
+
+rollback_upper_mesh:
+ to_i = i;
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+ }
+
+ i = j = NULL;
+
+rollback_mesh:
+ to_i = i;
+ to_j = j;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ if (i == to_i && j == to_j)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ }
+ if (i == to_i)
+ break;
+ }
+
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ return ret;
}
/**
@@ -4580,16 +4788,28 @@
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_upper *upper;
-
+ struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- upper = __netdev_find_upper(dev, upper_dev);
- if (!upper)
- return;
- list_del_rcu(&upper->list);
- dev_put(upper_dev);
- kfree_rcu(upper, rcu);
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ /* Here is the tricky part. We must remove all dev's lower
+ * devices from all upper_dev's upper devices and vice
+ * versa, to maintain the graph relationship.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+
+ /* remove also the devices itself from lower/upper device
+ * list
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -5850,6 +6070,7 @@
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list);
+ INIT_LIST_HEAD(&dev->lower_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31107ab..cca4441 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,7 @@
#include <net/sock.h>
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
+#include <net/pkt_sched.h>
static int zero = 0;
static int one = 1;
@@ -193,6 +194,26 @@
}
#endif /* CONFIG_NET_FLOW_LIMIT */
+#ifdef CONFIG_NET_SCHED
+static int set_default_qdisc(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ char id[IFNAMSIZ];
+ struct ctl_table tbl = {
+ .data = id,
+ .maxlen = IFNAMSIZ,
+ };
+ int ret;
+
+ qdisc_get_default(id, IFNAMSIZ);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = qdisc_set_default(id);
+ return ret;
+}
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -315,7 +336,14 @@
.mode = 0644,
.proc_handler = proc_dointvec
},
-#
+#endif
+#ifdef CONFIG_NET_SCHED
+ {
+ .procname = "default_qdisc",
+ .mode = 0644,
+ .maxlen = IFNAMSIZ,
+ .proc_handler = set_default_qdisc
+ },
#endif
#endif /* CONFIG_NET */
{
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 3b9d5f2..c85e71e 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -67,39 +67,6 @@
static LIST_HEAD(lowpan_devices);
-/*
- * Uncompression of linklocal:
- * 0 -> 16 bytes from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 2 bytes from prefix - zeroes + 2 from packet
- * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
- *
- * NOTE: => the uncompress function does change 0xf to 0x10
- * NOTE: 0x00 => no-autoconfig => unspecified
- */
-static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
-
-/*
- * Uncompression of ctx-based:
- * 0 -> 0 bits from packet [unspecified / reserved]
- * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 8 bytes from prefix - zeroes + 2 from packet
- * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
- */
-static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
-
-/*
- * Uncompression of ctx-base
- * 0 -> 0 bits from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
- * 2 -> 2 bytes from prefix - zeroes + 3 from packet
- * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
- */
-static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
-
-/* Link local prefix */
-static const u8 lowpan_llprefix[] = {0xfe, 0x80};
-
/* private device info */
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
@@ -191,55 +158,177 @@
return rol8(val, shift);
}
-static void
-lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr)
-{
- memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- ipaddr->s6_addr[8] ^= 0x02;
-}
-
/*
- * Uncompress addresses based on a prefix and a postfix with zeroes in
- * between. If the postfix is zero in length it will use the link address
- * to configure the IP address (autoconf style).
- * pref_post_count takes a byte where the first nibble specify prefix count
- * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
+ * Uncompress address function for source and
+ * destination address(non-multicast).
+ *
+ * address_mode is sam value or dam value.
*/
static int
-lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
- u8 const *prefix, u8 pref_post_count, unsigned char *lladdr)
+lowpan_uncompress_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 address_mode,
+ const struct ieee802154_addr *lladdr)
{
- u8 prefcount = pref_post_count >> 4;
- u8 postcount = pref_post_count & 0x0f;
+ bool fail;
- /* full nibble 15 => 16 */
- prefcount = (prefcount == 15 ? 16 : prefcount);
- postcount = (postcount == 15 ? 16 : postcount);
-
- if (lladdr)
- lowpan_raw_dump_inline(__func__, "linklocal address",
- lladdr, IEEE802154_ADDR_LEN);
- if (prefcount > 0)
- memcpy(ipaddr, prefix, prefcount);
-
- if (prefcount + postcount < 16)
- memset(&ipaddr->s6_addr[prefcount], 0,
- 16 - (prefcount + postcount));
-
- if (postcount > 0) {
- memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount);
- skb_pull(skb, postcount);
- } else if (prefcount > 0) {
- if (lladdr == NULL)
+ switch (address_mode) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* for global link addresses */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
+ break;
+ case LOWPAN_IPHC_ADDR_02:
+ /* fe:80::ff:fe00:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
+ break;
+ case LOWPAN_IPHC_ADDR_03:
+ fail = false;
+ switch (lladdr->addr_type) {
+ case IEEE802154_ADDR_LONG:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX
+ * \_________________/
+ * hwaddr
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr,
+ IEEE802154_ADDR_LEN);
+ /* second bit-flip (Universe/Local)
+ * is done according RFC2464
+ */
+ ipaddr->s6_addr[8] ^= 0x02;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ ipaddr->s6_addr16[7] = htons(lladdr->short_addr);
+ break;
+ default:
+ pr_debug("Invalid addr_type set\n");
return -EINVAL;
-
- /* no IID based configuration if no prefix and no data */
- lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr);
+ }
+ break;
+ default:
+ pr_debug("Invalid address mode value: 0x%x\n", address_mode);
+ return -EINVAL;
}
- pr_debug("uncompressing %d + %d => ", prefcount, postcount);
- lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16);
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
+ }
+
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
+}
+
+/* Uncompress address function for source context
+ * based address(non-multicast).
+ */
+static int
+lowpan_uncompress_context_based_src_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 sam)
+{
+ switch (sam) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* unspec address ::
+ * Do nothing, address is already ::
+ */
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_02:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_03:
+ /* TODO */
+ netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
+ return -EINVAL;
+ default:
+ pr_debug("Invalid sam value: 0x%x\n", sam);
+ return -EINVAL;
+ }
+
+ lowpan_raw_dump_inline(NULL,
+ "Reconstructed context based ipv6 src addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
+}
+
+/* Uncompress function for multicast destination address,
+ * when M bit is set.
+ */
+static int
+lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 dam)
+{
+ bool fail;
+
+ switch (dam) {
+ case LOWPAN_IPHC_DAM_00:
+ /* 00: 128 bits. The full address
+ * is carried in-line.
+ */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_DAM_01:
+ /* 01: 48 bits. The address takes
+ * the form ffXX::00XX:XXXX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5);
+ break;
+ case LOWPAN_IPHC_DAM_10:
+ /* 10: 32 bits. The address takes
+ * the form ffXX::00XX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3);
+ break;
+ case LOWPAN_IPHC_DAM_11:
+ /* 11: 8 bits. The address takes
+ * the form ff02::00XX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ ipaddr->s6_addr[1] = 0x02;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
+ break;
+ default:
+ pr_debug("DAM value has a wrong value: 0x%x\n", dam);
+ return -EINVAL;
+ }
+
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
+ }
+
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n",
+ ipaddr->s6_addr, 16);
return 0;
}
@@ -702,6 +791,12 @@
skb_reserve(frame->skb, sizeof(struct ipv6hdr));
skb_put(frame->skb, frame->length);
+ /* copy the first control block to keep a
+ * trace of the link-layer addresses in case
+ * of a link-local compressed address
+ */
+ memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
+
init_timer(&frame->timer);
/* time out is the same as for ipv6 - 60 sec */
frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
@@ -723,9 +818,9 @@
static int
lowpan_process_data(struct sk_buff *skb)
{
- struct ipv6hdr hdr;
+ struct ipv6hdr hdr = {};
u8 tmp, iphc0, iphc1, num_context = 0;
- u8 *_saddr, *_daddr;
+ const struct ieee802154_addr *_saddr, *_daddr;
int err;
lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
@@ -828,8 +923,8 @@
if (lowpan_fetch_skb_u8(skb, &iphc1))
goto drop;
- _saddr = mac_cb(skb)->sa.hwaddr;
- _daddr = mac_cb(skb)->da.hwaddr;
+ _saddr = &mac_cb(skb)->sa;
+ _daddr = &mac_cb(skb)->da;
pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1);
@@ -868,8 +963,6 @@
hdr.priority = ((tmp >> 2) & 0x0f);
hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
/*
* Flow Label carried in-line
@@ -885,10 +978,6 @@
break;
/* Traffic Class and Flow Label are elided */
case 3: /* 11b */
- hdr.priority = 0;
- hdr.flow_lbl[0] = 0;
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
default:
break;
@@ -915,10 +1004,18 @@
/* Extract SAM to the tmp variable */
tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
- /* Source address uncompression */
- pr_debug("source address stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ if (iphc1 & LOWPAN_IPHC_SAC) {
+ /* Source address context based uncompression */
+ pr_debug("SAC bit is set. Handle context based source address.\n");
+ err = lowpan_uncompress_context_based_src_addr(
+ skb, &hdr.saddr, tmp);
+ } else {
+ /* Source address uncompression */
+ pr_debug("source address stateless compression\n");
+ err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr);
+ }
+
+ /* Check on error of previous branch */
if (err)
goto drop;
@@ -931,23 +1028,14 @@
pr_debug("dest: context-based mcast compression\n");
/* TODO: implement this */
} else {
- u8 prefix[] = {0xff, 0x02};
-
- pr_debug("dest: non context-based mcast compression\n");
- if (0 < tmp && tmp < 3) {
- if (lowpan_fetch_skb_u8(skb, &prefix[1]))
- goto drop;
- }
-
- err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
- lowpan_unc_mxconf[tmp], NULL);
+ err = lowpan_uncompress_multicast_daddr(
+ skb, &hdr.daddr, tmp);
if (err)
goto drop;
}
} else {
pr_debug("dest: stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr);
if (err)
goto drop;
}
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 4b8f917..2869c05 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -193,10 +193,12 @@
/* Values of fields within the IPHC encoding second byte */
#define LOWPAN_IPHC_CID 0x80
+#define LOWPAN_IPHC_ADDR_00 0x00
+#define LOWPAN_IPHC_ADDR_01 0x01
+#define LOWPAN_IPHC_ADDR_02 0x02
+#define LOWPAN_IPHC_ADDR_03 0x03
+
#define LOWPAN_IPHC_SAC 0x40
-#define LOWPAN_IPHC_SAM_00 0x00
-#define LOWPAN_IPHC_SAM_01 0x10
-#define LOWPAN_IPHC_SAM_10 0x20
#define LOWPAN_IPHC_SAM 0x30
#define LOWPAN_IPHC_SAM_BIT 4
@@ -230,4 +232,16 @@
dest = 16 bit inline */
#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
+static inline bool lowpan_fetch_skb(struct sk_buff *skb,
+ void *data, const unsigned int len)
+{
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return true;
+
+ skb_copy_from_linear_data(skb, data, len);
+ skb_pull(skb, len);
+
+ return false;
+}
+
#endif /* __6LOWPAN_H__ */
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a4d9126..830de3f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -857,13 +857,11 @@
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ if (!IS_ERR(itn->fb_tunnel_dev))
+ itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
rtnl_unlock();
- if (IS_ERR(itn->fb_tunnel_dev))
- return PTR_ERR(itn->fb_tunnel_dev);
-
- return 0;
+ return PTR_RET(itn->fb_tunnel_dev);
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 4e90280..1657e39b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,6 +110,19 @@
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_NF_TARGET_ULOG
tristate "ULOG target support (obsolete)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 007b128..3622b24 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,6 +46,7 @@
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 30e4de9..00352ce 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -118,7 +118,7 @@
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1..b969131 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -119,7 +119,26 @@
nf_ct_attach(nskb, oldskb);
- ip_local_out(nskb);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ /* If we use ip_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ niph->tot_len = htons(nskb->len);
+ ip_send_check(niph);
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ goto free_nskb;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip_local_out(nskb);
+
return;
free_nskb:
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 0000000..94371db
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ synproxy_parse_options(skb, par->thoff, th, &opts);
+
+ if (th->syn && !th->ack) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ } else if (th->ack && !(th->fin || th->rst))
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+
+ return NF_DROP;
+}
+
+static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ synproxy_parse_options(skb, thoff, th, &opts);
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ synproxy_parse_options(skb, thoff, th, &opts);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0a2e0e3..86f5b34 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -25,6 +25,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
@@ -136,11 +137,7 @@
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e805481..727f436 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-#define IP_MAX_MTU 0xFFF0
+/* IPv4 datagram length is stored into 16bit field (tot_len) */
+#define IP_MAX_MTU 0xFFFF
#define RT_GC_TIMEOUT (300*HZ)
@@ -1227,10 +1228,7 @@
mtu = 576;
}
- if (mtu > IP_MAX_MTU)
- mtu = IP_MAX_MTU;
-
- return mtu;
+ return min_t(unsigned int, mtu, IP_MAX_MTU);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b05c96e..14a15c4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -160,26 +160,33 @@
* Generate a syncookie. mssp points to the mss, which is returned
* rounded down to the value encoded in the cookie.
*/
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
+ u16 *mssp)
{
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
th->source, th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
+
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v4_init_sequence(iph, th, mssp);
+}
/*
* This (misnamed) value is the age of syncookie which is permitted.
@@ -192,10 +199,9 @@
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
+ u32 cookie)
{
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
th->source, th->dest, seq,
@@ -204,6 +210,7 @@
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v4_check);
static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -284,7 +291,7 @@
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 8ed7c32..540279f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
static int zero;
static int one = 1;
static int four = 4;
+static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -761,6 +762,15 @@
.extra2 = &four,
},
{
+ .procname = "tcp_min_tso_segs",
+ .data = &sysctl_tcp_min_tso_segs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &gso_max_segs,
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab64eea..fdf7409 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,8 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -785,12 +787,28 @@
xmit_size_goal = mss_now;
if (large_allowed && sk_can_gso(sk)) {
- xmit_size_goal = ((sk->sk_gso_max_size - 1) -
- inet_csk(sk)->icsk_af_ops->net_header_len -
- inet_csk(sk)->icsk_ext_hdr_len -
- tp->tcp_header_len);
+ u32 gso_size, hlen;
- /* TSQ : try to have two TSO segments in flight */
+ /* Maybe we should/could use sk->sk_prot->max_header here ? */
+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+ inet_csk(sk)->icsk_ext_hdr_len +
+ tp->tcp_header_len;
+
+ /* Goal is to send at least one packet per ms,
+ * not one big TSO packet every 100 ms.
+ * This preserves ACK clocking and is consistent
+ * with tcp_tso_should_defer() heuristic.
+ */
+ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+ gso_size = max_t(u32, gso_size,
+ sysctl_tcp_min_tso_segs * mss_now);
+
+ xmit_size_goal = min_t(u32, gso_size,
+ sk->sk_gso_max_size - 1 - hlen);
+
+ /* TSQ : try to have at least two segments in flight
+ * (one in NIC TX ring, another in Qdisc)
+ */
xmit_size_goal = min_t(u32, xmit_size_goal,
sysctl_tcp_limit_output_bytes >> 1);
@@ -1117,6 +1135,13 @@
goto wait_for_memory;
/*
+ * All packets are restored as if they have
+ * already been sent.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+ /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e965cc7..1a84fff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -688,6 +688,34 @@
}
}
+/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
+ * Note: TCP stack does not yet implement pacing.
+ * FQ packet scheduler can be used to implement cheap but effective
+ * TCP pacing, to smooth the burst on large writes when packets
+ * in flight is significantly lower than cwnd (or rwin)
+ */
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
+ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+ * We probably need usec resolution in the future.
+ * Note: This also takes care of possible srtt=0 case,
+ * when tcp_rtt_estimator() was not yet called.
+ */
+ if (tp->srtt > 8 + 2)
+ do_div(rate, tp->srtt);
+
+ sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+}
+
/* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
@@ -2485,8 +2513,6 @@
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
- tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_reduction(sk, prior_unsacked, 0);
}
@@ -3128,11 +3154,24 @@
inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
}
+/* Decide wheather to run the increase function of congestion control. */
static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
- !tcp_in_cwnd_reduction(sk);
+ if (tcp_in_cwnd_reduction(sk))
+ return false;
+
+ /* If reordering is high then always grow cwnd whenever data is
+ * delivered regardless of its ordering. Otherwise stay conservative
+ * and only grow cwnd on in-order delivery in Open state, and retain
+ * cwnd in Disordered state (RFC5681). A stretched ACK with
+ * new SACK or ECE mark may first advance cwnd here and later reduce
+ * cwnd in tcp_fastretrans_alert() based on more states.
+ */
+ if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+ flag & FLAG_DATA_ACKED;
}
/* Check that window update is acceptable.
@@ -3267,7 +3306,7 @@
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight;
+ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
u32 prior_fackets;
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3352,18 +3391,15 @@
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
acked -= tp->packets_out;
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, prior_in_flight);
+
if (tcp_ack_is_dubious(sk, flag)) {
- /* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
- } else {
- if (flag & FLAG_DATA_ACKED)
- tcp_cong_avoid(sk, ack, prior_in_flight);
}
-
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3375,6 +3411,8 @@
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
+ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
+ tcp_update_pacing_rate(sk);
return 1;
no_queue:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 05a3d45..09d45d7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -821,8 +821,7 @@
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- u16 queue_mapping,
- bool nocache)
+ u16 queue_mapping)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -852,7 +851,7 @@
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0);
if (!res)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f6a005c..273ed73 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -443,7 +443,7 @@
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_metrics_block *tm;
- u32 val;
+ u32 val, crtt = 0; /* cached RTT scaled by 8 */
if (dst == NULL)
goto reset;
@@ -478,40 +478,18 @@
tp->reordering = val;
}
- val = tcp_metric_get(tm, TCP_METRIC_RTT);
- if (val == 0 || tp->srtt == 0) {
- rcu_read_unlock();
- goto reset;
- }
- /* Initial rtt is determined from SYN,SYN-ACK.
- * The segment is small and rtt may appear much
- * less than real one. Use per-dst memory
- * to make it more realistic.
- *
- * A bit of theory. RTT is time passed after "normal" sized packet
- * is sent until it is ACKed. In normal circumstances sending small
- * packets force peer to delay ACKs and calculation is correct too.
- * The algorithm is adaptive and, provided we follow specs, it
- * NEVER underestimate RTT. BUT! If peer tries to make some clever
- * tricks sort of "quick acks" for time long enough to decrease RTT
- * to low value, and then abruptly stops to do it and starts to delay
- * ACKs, wait for troubles.
- */
- val = msecs_to_jiffies(val);
- if (val > tp->srtt) {
- tp->srtt = val;
- tp->rtt_seq = tp->snd_nxt;
- }
- val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
- if (val > tp->mdev) {
- tp->mdev = val;
- tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
- }
+ crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
rcu_read_unlock();
-
- tcp_set_rto(sk);
reset:
- if (tp->srtt == 0) {
+ if (crtt > tp->srtt) {
+ /* Initial RTT (tp->srtt) from SYN usually don't measure
+ * serialization delay on low BW links well so RTO may be
+ * under-estimated. Stay conservative and seed RTO with
+ * the RTTs from past data exchanges, using the same seeding
+ * formula in tcp_rtt_estimator().
+ */
+ inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk));
+ } else if (tp->srtt == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
* including spurious one. Reset the RTO back to 3secs
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 884efff..e63ae4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1631,7 +1631,7 @@
/* If a full-sized TSO skb can be sent, do it. */
if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
- sk->sk_gso_max_segs * tp->mss_cache))
+ tp->xmit_size_goal_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d4943f6..622a437 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -46,6 +46,10 @@
MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
module_param(bufsize, uint, 0);
+static unsigned int fwmark __read_mostly = 0;
+MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
+module_param(fwmark, uint, 0);
+
static int full __read_mostly;
MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
module_param(full, int, 0);
@@ -54,12 +58,16 @@
struct tcp_log {
ktime_t tstamp;
- __be32 saddr, daddr;
- __be16 sport, dport;
+ union {
+ struct sockaddr raw;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } src, dst;
u16 length;
u32 snd_nxt;
u32 snd_una;
u32 snd_wnd;
+ u32 rcv_wnd;
u32 snd_cwnd;
u32 ssthresh;
u32 srtt;
@@ -86,19 +94,45 @@
return bufsize - tcp_probe_used() - 1;
}
+#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
+ do { \
+ si4.sin_family = AF_INET; \
+ si4.sin_port = inet->inet_##mem##port; \
+ si4.sin_addr.s_addr = inet->inet_##mem##addr; \
+ } while (0) \
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
+ do { \
+ struct ipv6_pinfo *pi6 = inet->pinet6; \
+ si6.sin6_family = AF_INET6; \
+ si6.sin6_port = inet->inet_##mem##port; \
+ si6.sin6_addr = pi6->mem##addr; \
+ si6.sin6_flowinfo = 0; /* No need here. */ \
+ si6.sin6_scope_id = 0; /* No need here. */ \
+ } while (0)
+#else
+#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
+ do { \
+ memset(&si6, 0, sizeof(si6)); \
+ } while (0)
+#endif
+
/*
* Hook inserted to be called before each receive packet.
* Note: arguments must match tcp_rcv_established()!
*/
static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned int len)
+ const struct tcphdr *th, unsigned int len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
- /* Only update if port matches */
- if ((port == 0 || ntohs(inet->inet_dport) == port ||
- ntohs(inet->inet_sport) == port) &&
+ /* Only update if port or skb mark matches */
+ if (((port == 0 && fwmark == 0) ||
+ ntohs(inet->inet_dport) == port ||
+ ntohs(inet->inet_sport) == port ||
+ (fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
spin_lock(&tcp_probe.lock);
@@ -107,15 +141,25 @@
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
p->tstamp = ktime_get();
- p->saddr = inet->inet_saddr;
- p->sport = inet->inet_sport;
- p->daddr = inet->inet_daddr;
- p->dport = inet->inet_dport;
+ switch (sk->sk_family) {
+ case AF_INET:
+ tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
+ tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
+ break;
+ case AF_INET6:
+ tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
+ tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+ break;
+ default:
+ BUG();
+ }
+
p->length = skb->len;
p->snd_nxt = tp->snd_nxt;
p->snd_una = tp->snd_una;
p->snd_cwnd = tp->snd_cwnd;
p->snd_wnd = tp->snd_wnd;
+ p->rcv_wnd = tp->rcv_wnd;
p->ssthresh = tcp_current_ssthresh(sk);
p->srtt = tp->srtt >> 3;
@@ -157,13 +201,11 @@
= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
return scnprintf(tbuf, n,
- "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
+ "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
(unsigned long) tv.tv_sec,
(unsigned long) tv.tv_nsec,
- &p->saddr, ntohs(p->sport),
- &p->daddr, ntohs(p->dport),
- p->length, p->snd_nxt, p->snd_una,
- p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
+ &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
+ p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
}
static ssize_t tcpprobe_read(struct file *file, char __user *buf,
@@ -223,6 +265,13 @@
{
int ret = -ENOMEM;
+ /* Warning: if the function signature of tcp_rcv_established,
+ * has been changed, you also have to change the signature of
+ * jtcp_rcv_established, otherwise you end up right here!
+ */
+ BUILD_BUG_ON(__same_type(tcp_rcv_established,
+ jtcp_rcv_established) == 0);
+
init_waitqueue_head(&tcp_probe.wait);
spin_lock_init(&tcp_probe.lock);
@@ -241,7 +290,8 @@
if (ret)
goto err1;
- pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
+ pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
+ port, fwmark, bufsize);
return 0;
err1:
remove_proc_entry(procname, init_net.proc_net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad12f7c..a7183fc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
#define ACONF_DEBUG 2
#if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -204,6 +204,7 @@
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -241,6 +242,7 @@
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -374,9 +376,9 @@
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -384,9 +386,9 @@
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -849,7 +851,7 @@
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG(("ipv6_add_addr: already assigned\n"));
+ ADBG("ipv6_add_addr: already assigned\n");
err = -EEXIST;
goto out;
}
@@ -857,7 +859,7 @@
ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- ADBG(("ipv6_add_addr: malloc failed\n"));
+ ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1131,12 +1133,10 @@
if (ifp->flags & IFA_F_OPTIMISTIC)
addr_flags |= IFA_F_OPTIMISTIC;
- ift = !max_addresses ||
- ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, NULL, tmp_plen,
- ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft) : NULL;
- if (IS_ERR_OR_NULL(ift)) {
+ ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+ ipv6_addr_scope(&addr), addr_flags,
+ tmp_valid_lft, tmp_prefered_lft);
+ if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1814,6 +1814,16 @@
return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
}
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+ memcpy(eui, dev->perm_addr, 3);
+ memcpy(eui + 5, dev->perm_addr + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
@@ -1832,6 +1842,8 @@
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
+ case ARPHRD_TUNNEL6:
+ return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
}
@@ -2057,7 +2069,7 @@
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG(("addrconf: prefix option too short\n"));
+ ADBG("addrconf: prefix option too short\n");
return;
}
@@ -2709,7 +2721,8 @@
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
(dev->type != ARPHRD_IEEE802154) &&
- (dev->type != ARPHRD_IEEE1394)) {
+ (dev->type != ARPHRD_IEEE1394) &&
+ (dev->type != ARPHRD_TUNNEL6)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2795,44 +2808,6 @@
return -1;
}
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
- struct net_device *link_dev;
- struct net *net = dev_net(idev->dev);
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
- for_each_netdev(net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- pr_debug("init ip6-ip6: add_linklocal failed\n");
-}
-
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
-{
- struct inet6_dev *idev;
-
- ASSERT_RTNL();
-
- idev = addrconf_add_dev(dev);
- if (IS_ERR(idev)) {
- pr_debug("init ip6-ip6: add_dev failed\n");
- return;
- }
- ip6_tnl_add_linklocal(idev);
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -2900,9 +2875,6 @@
addrconf_gre_config(dev);
break;
#endif
- case ARPHRD_TUNNEL6:
- addrconf_ip6_tnl_config(dev);
- break;
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
@@ -3637,8 +3609,8 @@
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched));
+ ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
addr_chk_timer.expires = next_sched;
add_timer(&addr_chk_timer);
@@ -4218,6 +4190,7 @@
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
}
static inline size_t inet6_ifla6_size(void)
@@ -5032,6 +5005,13 @@
.proc_handler = proc_dointvec
},
{
+ .procname = "suppress_frag_ndisc",
+ .data = &ipv6_devconf.suppress_frag_ndisc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cc3bb20..d6e00a3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <linux/hash.h>
+#include <linux/etherdevice.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -1471,6 +1472,9 @@
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ /* This perm addr will be used as interface identifier by IPv6 */
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ eth_random_addr(dev->perm_addr);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6c76df9..98ead2b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -107,9 +107,12 @@
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
-
#define MLD_QRV_DEFAULT 2
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN 24
+#define MLD_V2_QUERY_LEN_MIN 28
+
#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
(idev)->cnf.force_mld_version == 1 || \
((idev)->mc_v1_seen && \
@@ -996,24 +999,24 @@
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- int tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = net_random() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- int tv = net_random() % delay;
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_dad_start_timer(struct inet6_dev *idev, int delay)
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- int tv = net_random() % delay;
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
in6_dev_hold(idev);
@@ -1146,13 +1149,11 @@
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == 24) {
+ if (len == MLD_V1_QUERY_LEN) {
int switchback;
/* MLDv1 router present */
- /* Translate milliseconds to jiffies */
- max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
-
+ max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
switchback = (idev->mc_qrv + 1) * max_delay;
idev->mc_v1_seen = jiffies + switchback;
@@ -1162,17 +1163,18 @@
__in6_dev_put(idev);
/* clear deleted report items */
mld_clear_delrec(idev);
- } else if (len >= 28) {
+ } else if (len >= MLD_V2_QUERY_LEN_MIN) {
int srcs_offset = sizeof(struct mld2_query) -
sizeof(struct icmp6hdr);
if (!pskb_may_pull(skb, srcs_offset))
return -EINVAL;
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
- if (!max_delay)
- max_delay = 1;
+
+ max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mlh2->mld2q_mrc))), 1UL);
+
idev->mc_maxdelay = max_delay;
+
if (mlh2->mld2q_qrv)
idev->mc_qrv = mlh2->mld2q_qrv;
if (group_type == IPV6_ADDR_ANY) { /* general query */
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 79aa965..bb6fd95 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -428,7 +428,6 @@
type = icmp6h->icmp6_type;
if (!dst) {
- struct sock *sk = net->ipv6.ndisc_sk;
struct flowi6 fl6;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
@@ -1369,8 +1368,10 @@
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
return;
- if (!ndopts.nd_opts_rh)
+ if (!ndopts.nd_opts_rh) {
+ ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0);
return;
+ }
hdr = (u8 *)ndopts.nd_opts_rh;
hdr += 8;
@@ -1517,10 +1518,27 @@
kfree_skb(skb);
}
+static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
+{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+ if (!idev)
+ return true;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
+ idev->cnf.suppress_frag_ndisc) {
+ net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
+ return true;
+ }
+ return false;
+}
+
int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
+ if (ndisc_suppress_frag_ndisc(skb))
+ return 0;
+
if (skb_linearize(skb))
return 0;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40..a7f842b 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,19 @@
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc..2b53738 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -14,7 +14,7 @@
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
@@ -37,3 +37,4 @@
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 47bff61..3e4e92d 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -76,7 +76,7 @@
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc..56eef30 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,25 @@
nf_ct_attach(nskb, oldskb);
- ip6_local_out(nskb);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ /* If we use ip6_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ nskb->protocol = htons(ETH_P_IPV6);
+ ip6h->payload_len = htons(sizeof(struct tcphdr));
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ return;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip6_local_out(nskb);
}
static inline void
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 0000000..4270a9b
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct ipv6hdr *
+synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct ipv6hdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+ ip6_flow_hdr(iph, 0, 0);
+ iph->hop_limit = 64; //XXX
+ iph->nexthdr = IPPROTO_TCP;
+ iph->saddr = *saddr;
+ iph->daddr = *daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct ipv6hdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = niph->saddr;
+ fl6.daddr = niph->daddr;
+ fl6.fl6_sport = nth->source;
+ fl6.fl6_dport = nth->dest;
+ security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ goto free_nskb;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ goto free_nskb;
+
+ skb_dst_set(nskb, dst);
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip6_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ synproxy_parse_options(skb, par->thoff, th, &opts);
+
+ if (th->syn) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ } else if (th->ack && !(th->fin || th->rst))
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+
+ return NF_DROP;
+}
+
+static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ __be16 frag_off;
+ u8 nexthdr;
+ int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (thoff < 0)
+ return NF_ACCEPT;
+
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ synproxy_parse_options(skb, thoff, th, &opts);
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ synproxy_parse_options(skb, thoff, th, &opts);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (!(e->ipv6.flags & IP6T_F_PROTO) ||
+ e->ipv6.proto != IPPROTO_TCP ||
+ e->ipv6.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg6_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV6,
+ .target = synproxy_tg6,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg6_check,
+ .destroy = synproxy_tg6_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg6_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg6_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg6_exit(void)
+{
+ xt_unregister_target(&synproxy_tg6_reg);
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+}
+
+module_init(synproxy_tg6_init);
+module_exit(synproxy_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c9b6a6e..d6e4dd8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -158,11 +159,7 @@
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, protoff)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4..1aeb473 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@
ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
+ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@
struct net *net = dev_net(skb_dst(skb)->dev);
int evicted;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e22c4db..55236a8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1177,6 +1177,27 @@
}
EXPORT_SYMBOL_GPL(ip6_redirect);
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+ u32 mark)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = msg->dest;
+ fl6.saddr = iph->daddr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20..bf63ac8 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -112,32 +112,38 @@
& COOKIEMASK;
}
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+ const struct tcphdr *th, __u16 *mssp)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v6_init_sequence(iph, th, mssp);
+}
+
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+ __u32 cookie)
+{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
th->source, th->dest, seq,
@@ -145,6 +151,7 @@
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v6_check);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
@@ -167,7 +174,7 @@
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ab8bd2c..9d58537 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -45,7 +45,7 @@
static DEFINE_MUTEX(pfkey_mutex);
#define DUMMY_MARK 0
-static struct xfrm_mark dummy_mark = {0, 0};
+static const struct xfrm_mark dummy_mark = {0, 0};
struct pfkey_sock {
/* struct sock must be the first member of struct pfkey_sock */
struct sock sk;
@@ -338,7 +338,7 @@
return 0;
}
-static u8 sadb_ext_min_len[] = {
+static const u8 sadb_ext_min_len[] = {
[SADB_EXT_RESERVED] = (u8) 0,
[SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
[SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
@@ -1196,10 +1196,6 @@
x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
&x->props.saddr);
- if (!x->props.family) {
- err = -EAFNOSUPPORT;
- goto out;
- }
pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1],
&x->id.daddr);
@@ -2205,10 +2201,6 @@
sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
- if (!xp->family) {
- err = -EINVAL;
- goto out;
- }
xp->selector.family = xp->family;
xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2737,7 +2729,7 @@
typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
const struct sadb_msg *hdr, void * const *ext_hdrs);
-static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
+static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
[SADB_RESERVED] = pfkey_reserved,
[SADB_GETSPI] = pfkey_getspi,
[SADB_UPDATE] = pfkey_add,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22ca..62a171a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -408,22 +408,11 @@
depends on NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_TFTP
+config NETFILTER_SYNPROXY
+ tristate
+
endif # NF_CONNTRACK
-# transparent proxy support
-config NETFILTER_TPROXY
- tristate "Transparent proxying support"
- depends on IP_NF_MANGLE
- depends on NETFILTER_ADVANCED
- help
- This option enables transparent proxying support, that is,
- support for handling non-locally bound IPv4 TCP and UDP sockets.
- For it to work you will have to configure certain iptables rules
- and use policy routing. For more information on how to set it up
- see Documentation/networking/tproxy.txt.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -720,10 +709,10 @@
this clone be rerouted to another nexthop.
config NETFILTER_XT_TARGET_TPROXY
- tristate '"TPROXY" target support'
- depends on NETFILTER_TPROXY
+ tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
@@ -731,6 +720,9 @@
REDIRECT. It can only be used in the mangle table and is useful
to redirect traffic to a transparent proxy. It does _not_ depend
on Netfilter connection tracking and NAT, unlike REDIRECT.
+ For it to work you will have to configure certain iptables rules
+ and use policy routing. For more information on how to set it up
+ see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
@@ -1180,7 +1172,6 @@
config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
- depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87..c3a0a12 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
-nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
@@ -61,8 +61,8 @@
obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
-# transparent proxy support
-obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+# SYNPROXY
+obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2217363..593b16e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -234,12 +234,13 @@
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
+void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
+ __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
- void (*attach)(struct sk_buff *, struct sk_buff *);
+ void (*attach)(struct sk_buff *, const struct sk_buff *);
if (skb->nfct) {
rcu_read_lock();
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3cd85b2..5199448 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -414,7 +414,7 @@
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
@@ -440,7 +440,7 @@
struct ip_vs_lblcr_entry *en;
struct hlist_node *next;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -495,7 +495,7 @@
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -536,7 +536,7 @@
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index f16c027..3588fae 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -269,14 +269,20 @@
switch (iph->protocol) {
case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (unlikely(th == NULL))
+ return 0;
port = th->source;
break;
case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (unlikely(uh == NULL))
+ return 0;
port = uh->source;
break;
case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (unlikely(sh == NULL))
+ return 0;
port = sh->source;
break;
default:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283bae..5d892fe 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,6 +48,7 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
@@ -238,7 +240,7 @@
nf_conntrack_free(ct);
}
-void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
@@ -253,7 +255,6 @@
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
}
-EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack)
{
@@ -275,7 +276,7 @@
nf_ct_put(ct);
}
-void nf_ct_dying_timeout(struct nf_conn *ct)
+static void nf_ct_dying_timeout(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +289,33 @@
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
-EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
-static void death_by_timeout(unsigned long ul_conntrack)
+bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
- struct nf_conn *ct = (void *)ul_conntrack;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
- if (!test_bit(IPS_DYING_BIT, &ct->status) &&
- unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+ if (!nf_ct_is_dying(ct) &&
+ unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
+ portid, report) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_dying_timeout(ct);
- return;
+ return false;
}
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
+ return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
/*
@@ -643,10 +650,7 @@
return dropped;
if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- /* Check if we indeed killed this entry. Reliable event
- delivery may have inserted it into the dying list. */
- if (test_bit(IPS_DYING_BIT, &ct->status)) {
+ if (nf_ct_delete(ct, 0, 0)) {
dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
@@ -796,6 +800,11 @@
if (IS_ERR(ct))
return (struct nf_conntrack_tuple_hash *)ct;
+ if (tmpl && nfct_synproxy(tmpl)) {
+ nfct_seqadj_ext_add(ct);
+ nfct_synproxy_ext_add(ct);
+ }
+
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
if (timeout_ext)
timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
@@ -1192,7 +1201,7 @@
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
-static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -1244,7 +1253,7 @@
void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
- void *data)
+ void *data, u32 portid, int report)
{
struct nf_conn *ct;
unsigned int bucket = 0;
@@ -1252,7 +1261,8 @@
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
+ nf_ct_delete(ct, portid, report);
+
/* ... else the timer will get him soon. */
nf_ct_put(ct);
@@ -1260,30 +1270,6 @@
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
-struct __nf_ct_flush_report {
- u32 portid;
- int report;
-};
-
-static int kill_report(struct nf_conn *i, void *data)
-{
- struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
- struct nf_conn_tstamp *tstamp;
-
- tstamp = nf_conn_tstamp_find(i);
- if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
-
- /* If we fail to deliver the event, death_by_timeout() will retry */
- if (nf_conntrack_event_report(IPCT_DESTROY, i,
- fr->portid, fr->report) < 0)
- return 1;
-
- /* Avoid the delivery of the destroy event in death_by_timeout(). */
- set_bit(IPS_DYING_BIT, &i->status);
- return 1;
-}
-
static int kill_all(struct nf_conn *i, void *data)
{
return 1;
@@ -1301,11 +1287,7 @@
void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{
- struct __nf_ct_flush_report fr = {
- .portid = portid,
- .report = report,
- };
- nf_ct_iterate_cleanup(net, kill_report, &fr);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
@@ -1351,6 +1333,7 @@
nf_ct_extend_unregister(&nf_ct_zone_extend);
#endif
nf_conntrack_proto_fini();
+ nf_conntrack_seqadj_fini();
nf_conntrack_labels_fini();
nf_conntrack_helper_fini();
nf_conntrack_timeout_fini();
@@ -1386,7 +1369,7 @@
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
@@ -1556,6 +1539,10 @@
if (ret < 0)
goto err_labels;
+ ret = nf_conntrack_seqadj_init();
+ if (ret < 0)
+ goto err_seqadj;
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
ret = nf_ct_extend_register(&nf_ct_zone_extend);
if (ret < 0)
@@ -1580,6 +1567,8 @@
nf_ct_extend_unregister(&nf_ct_zone_extend);
err_extend:
#endif
+ nf_conntrack_seqadj_fini();
+err_seqadj:
nf_conntrack_labels_fini();
err_labels:
nf_conntrack_helper_fini();
@@ -1602,9 +1591,6 @@
/* For use by REJECT target */
RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
-
- /* Howto get NAT offsets */
- RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
}
/*
@@ -1691,8 +1677,3 @@
err_stat:
return ret;
}
-
-s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq);
-EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 355d2ef..bb53f12 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
* published by the Free Software Foundation.
*/
-#include <linux/ctype.h>
#include <linux/export.h>
-#include <linux/jhash.h>
-#include <linux/spinlock.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index edc410e..eea936b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -381,9 +382,8 @@
return -1;
}
-#ifdef CONFIG_NF_NAT_NEEDED
static int
-dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
+dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
{
struct nlattr *nest_parms;
@@ -391,12 +391,12 @@
if (!nest_parms)
goto nla_put_failure;
- if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS,
- htonl(natseq->correction_pos)) ||
- nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
- htonl(natseq->offset_before)) ||
- nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
- htonl(natseq->offset_after)))
+ if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
+ htonl(seq->correction_pos)) ||
+ nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
+ htonl(seq->offset_before)) ||
+ nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
+ htonl(seq->offset_after)))
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -408,27 +408,24 @@
}
static inline int
-ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
{
- struct nf_nat_seq *natseq;
- struct nf_conn_nat *nat = nfct_nat(ct);
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *seq;
- if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
+ if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
- natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
- if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
+ seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
+ if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
return -1;
- natseq = &nat->seq[IP_CT_DIR_REPLY];
- if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
+ seq = &seqadj->seq[IP_CT_DIR_REPLY];
+ if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
return -1;
return 0;
}
-#else
-#define ctnetlink_dump_nat_seq_adj(a, b) (0)
-#endif
static inline int
ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -502,7 +499,7 @@
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -707,8 +704,8 @@
ctnetlink_dump_master(skb, ct) < 0)
goto nla_put_failure;
- if (events & (1 << IPCT_NATSEQADJ) &&
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ if (events & (1 << IPCT_SEQADJ) &&
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
}
@@ -1038,21 +1035,9 @@
}
}
- if (del_timer(&ct->timeout)) {
- if (nf_conntrack_event_report(IPCT_DESTROY, ct,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh)) < 0) {
- nf_ct_delete_from_lists(ct);
- /* we failed to report the event, try later */
- nf_ct_dying_timeout(ct);
- nf_ct_put(ct);
- return 0;
- }
- /* death_by_timeout would report the event again */
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_delete_from_lists(ct);
- nf_ct_put(ct);
- }
+ if (del_timer(&ct->timeout))
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+
nf_ct_put(ct);
return 0;
@@ -1451,66 +1436,65 @@
return err;
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
- [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 },
- [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 },
- [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 },
+static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
+ [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 },
+ [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 },
+ [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 },
};
static inline int
-change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
+change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr)
{
int err;
- struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
+ struct nlattr *cda[CTA_SEQADJ_MAX+1];
- err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
+ err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
if (err < 0)
return err;
- if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
+ if (!cda[CTA_SEQADJ_CORRECTION_POS])
return -EINVAL;
- natseq->correction_pos =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+ seq->correction_pos =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
- if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
+ if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
return -EINVAL;
- natseq->offset_before =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+ seq->offset_before =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
- if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
+ if (!cda[CTA_SEQADJ_OFFSET_AFTER])
return -EINVAL;
- natseq->offset_after =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+ seq->offset_after =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
return 0;
}
static int
-ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
- const struct nlattr * const cda[])
+ctnetlink_change_seq_adj(struct nf_conn *ct,
+ const struct nlattr * const cda[])
{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
int ret = 0;
- struct nf_conn_nat *nat = nfct_nat(ct);
- if (!nat)
+ if (!seqadj)
return 0;
- if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
- ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
- cda[CTA_NAT_SEQ_ADJ_ORIG]);
+ if (cda[CTA_SEQ_ADJ_ORIG]) {
+ ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
+ cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
return ret;
ct->status |= IPS_SEQ_ADJUST;
}
- if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
- cda[CTA_NAT_SEQ_ADJ_REPLY]);
+ if (cda[CTA_SEQ_ADJ_REPLY]) {
+ ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
+ cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
return ret;
@@ -1519,7 +1503,6 @@
return 0;
}
-#endif
static int
ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
@@ -1585,13 +1568,12 @@
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
#endif
-#ifdef CONFIG_NF_NAT_NEEDED
- if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- err = ctnetlink_change_nat_seq_adj(ct, cda);
+ if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+ err = ctnetlink_change_seq_adj(ct, cda);
if (err < 0)
return err;
}
-#endif
+
if (cda[CTA_LABELS]) {
err = ctnetlink_attach_labels(ct, cda);
if (err < 0)
@@ -1696,13 +1678,11 @@
goto err2;
}
-#ifdef CONFIG_NF_NAT_NEEDED
- if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- err = ctnetlink_change_nat_seq_adj(ct, cda);
+ if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+ err = ctnetlink_change_seq_adj(ct, cda);
if (err < 0)
goto err2;
}
-#endif
memset(&ct->proto, 0, sizeof(ct->proto));
if (cda[CTA_PROTOINFO]) {
@@ -1816,7 +1796,7 @@
(1 << IPCT_ASSURED) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
- (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_SEQADJ) |
(1 << IPCT_MARK) | events,
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
@@ -1839,7 +1819,7 @@
(1 << IPCT_HELPER) |
(1 << IPCT_LABEL) |
(1 << IPCT_PROTOINFO) |
- (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_SEQADJ) |
(1 << IPCT_MARK),
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
@@ -1999,6 +1979,27 @@
return err == -EAGAIN ? -ENOBUFS : err;
}
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+ [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
+ [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
+ [CTA_EXPECT_ID] = { .type = NLA_U32 },
+ [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
+ [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
+ [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
+ [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
+};
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask);
+
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2073,7 +2074,7 @@
goto nla_put_failure;
if ((ct->status & IPS_SEQ_ADJUST) &&
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -2139,10 +2140,70 @@
return ret;
}
+static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
+ const struct nf_conn *ct,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
+{
+ int err;
+
+ err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
+ nf_ct_l3num(ct));
+ if (err < 0)
+ return err;
+
+ return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
+ nf_ct_l3num(ct));
+}
+
+static int
+ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report)
+{
+ struct nlattr *cda[CTA_EXPECT_MAX+1];
+ struct nf_conntrack_tuple tuple, mask;
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conntrack_expect *exp;
+ int err;
+
+ err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+ if (err < 0)
+ return err;
+
+ err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
+ ct, &tuple, &mask);
+ if (err < 0)
+ return err;
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
+ if (helper == NULL)
+ return -EOPNOTSUPP;
+ }
+
+ exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
+ helper, &tuple, &mask);
+ if (IS_ERR(exp))
+ return PTR_ERR(exp);
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0) {
+ nf_ct_expect_put(exp);
+ return err;
+ }
+
+ return 0;
+}
+
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
.build_size = ctnetlink_nfqueue_build_size,
.build = ctnetlink_nfqueue_build,
.parse = ctnetlink_nfqueue_parse,
+ .attach_expect = ctnetlink_nfqueue_attach_expect,
+ .seq_adjust = nf_ct_tcp_seqadj_set,
};
#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
@@ -2510,21 +2571,6 @@
return err;
}
-static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
- [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
- [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
- [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
- [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
- [CTA_EXPECT_ID] = { .type = NLA_U32 },
- [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
- .len = NF_CT_HELPER_NAME_LEN - 1 },
- [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
- [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
- [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
- [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
-};
-
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -2747,76 +2793,26 @@
#endif
}
-static int
-ctnetlink_create_expect(struct net *net, u16 zone,
- const struct nlattr * const cda[],
- u_int8_t u3,
- u32 portid, int report)
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
{
- struct nf_conntrack_tuple tuple, mask, master_tuple;
- struct nf_conntrack_tuple_hash *h = NULL;
- struct nf_conntrack_expect *exp;
- struct nf_conn *ct;
- struct nf_conn_help *help;
- struct nf_conntrack_helper *helper = NULL;
u_int32_t class = 0;
- int err = 0;
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = nf_conntrack_find_get(net, zone, &master_tuple);
- if (!h)
- return -ENOENT;
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* Look for helper of this expectation */
- if (cda[CTA_EXPECT_HELP_NAME]) {
- const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper == NULL) {
-#ifdef CONFIG_MODULES
- if (request_module("nfct-helper-%s", helpname) < 0) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- helper = __nf_conntrack_helper_find(helpname,
- nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper) {
- err = -EAGAIN;
- goto out;
- }
-#endif
- err = -EOPNOTSUPP;
- goto out;
- }
- }
+ struct nf_conntrack_expect *exp;
+ struct nf_conn_help *help;
+ int err;
if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
- if (class > helper->expect_class_max) {
- err = -EINVAL;
- goto out;
- }
+ if (class > helper->expect_class_max)
+ return ERR_PTR(-EINVAL);
}
exp = nf_ct_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
+ if (!exp)
+ return ERR_PTR(-ENOMEM);
+
help = nfct_help(ct);
if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2854,21 +2850,89 @@
exp->class = class;
exp->master = ct;
exp->helper = helper;
- memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
- memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
- exp->mask.src.u.all = mask.src.u.all;
+ exp->tuple = *tuple;
+ exp->mask.src.u3 = mask->src.u3;
+ exp->mask.src.u.all = mask->src.u.all;
if (cda[CTA_EXPECT_NAT]) {
err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
- exp, u3);
+ exp, nf_ct_l3num(ct));
if (err < 0)
goto err_out;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ return exp;
err_out:
nf_ct_expect_put(exp);
-out:
- nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+ return ERR_PTR(err);
+}
+
+static int
+ctnetlink_create_expect(struct net *net, u16 zone,
+ const struct nlattr * const cda[],
+ u_int8_t u3, u32 portid, int report)
+{
+ struct nf_conntrack_tuple tuple, mask, master_tuple;
+ struct nf_conntrack_tuple_hash *h = NULL;
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *ct;
+ int err;
+
+ /* caller guarantees that those three CTA_EXPECT_* exist */
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ if (err < 0)
+ return err;
+
+ /* Look for master conntrack of this expectation */
+ h = nf_conntrack_find_get(net, zone, &master_tuple);
+ if (!h)
+ return -ENOENT;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper == NULL) {
+#ifdef CONFIG_MODULES
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper) {
+ err = -EAGAIN;
+ goto err_ct;
+ }
+#endif
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ }
+
+ exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+ if (IS_ERR(exp)) {
+ err = PTR_ERR(exp);
+ goto err_ct;
+ }
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0)
+ goto err_exp;
+
+ return 0;
+err_exp:
+ nf_ct_expect_put(exp);
+err_ct:
+ nf_ct_put(ct);
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636..ce30041 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@
nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
@@ -476,7 +476,7 @@
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 2f80107..44d1ea3 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -27,6 +27,8 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -495,21 +497,6 @@
}
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static inline s16 nat_offset(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq)
-{
- typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
-
- return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
-}
-#define NAT_OFFSET(ct, dir, seq) \
- (nat_offset(ct, dir, seq))
-#else
-#define NAT_OFFSET(ct, dir, seq) 0
-#endif
-
static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
@@ -525,7 +512,7 @@
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
- s16 receiver_offset;
+ s32 receiver_offset;
bool res, in_recv_win;
/*
@@ -540,7 +527,7 @@
tcp_sack(skb, dataoff, tcph, &sack);
/* Take into account NAT sequence number mangling */
- receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
+ receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
ack -= receiver_offset;
sack -= receiver_offset;
@@ -960,6 +947,21 @@
"state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
+ /* Special case for SYN proxy: when the SYN to the server or
+ * the SYN/ACK from the server is lost, the client may transmit
+ * a keep-alive packet while in SYN_SENT state. This needs to
+ * be associated with the original conntrack entry in order to
+ * generate a new SYN with the correct sequence number.
+ */
+ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
+ ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
+ pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
+ spin_unlock_bh(&ct->lock);
+ return NF_ACCEPT;
+ }
+
/* Invalid packet */
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 0000000..5f9bfd0
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,238 @@
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+
+int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ s32 off)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct nf_conn_seqadj *seqadj;
+ struct nf_ct_seqadj *this_way;
+
+ if (off == 0)
+ return 0;
+
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+ seqadj = nfct_seqadj(ct);
+ this_way = &seqadj->seq[dir];
+ this_way->offset_before = off;
+ this_way->offset_after = off;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
+
+int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ __be32 seq, s32 off)
+{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct nf_ct_seqadj *this_way;
+
+ if (off == 0)
+ return 0;
+
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+ spin_lock_bh(&ct->lock);
+ this_way = &seqadj->seq[dir];
+ if (this_way->offset_before == this_way->offset_after ||
+ before(this_way->correction_pos, seq)) {
+ this_way->correction_pos = seq;
+ this_way->offset_before = this_way->offset_after;
+ this_way->offset_after += off;
+ }
+ spin_unlock_bh(&ct->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
+
+void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ s32 off)
+{
+ const struct tcphdr *th;
+
+ if (nf_ct_protonum(ct) != IPPROTO_TCP)
+ return;
+
+ th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
+ nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
+
+/* Adjust one found SACK option including checksum correction */
+static void nf_ct_sack_block_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ unsigned int sackoff,
+ unsigned int sackend,
+ struct nf_ct_seqadj *seq)
+{
+ while (sackoff < sackend) {
+ struct tcp_sack_block_wire *sack;
+ __be32 new_start_seq, new_end_seq;
+
+ sack = (void *)skb->data + sackoff;
+ if (after(ntohl(sack->start_seq) - seq->offset_before,
+ seq->correction_pos))
+ new_start_seq = htonl(ntohl(sack->start_seq) -
+ seq->offset_after);
+ else
+ new_start_seq = htonl(ntohl(sack->start_seq) -
+ seq->offset_before);
+
+ if (after(ntohl(sack->end_seq) - seq->offset_before,
+ seq->correction_pos))
+ new_end_seq = htonl(ntohl(sack->end_seq) -
+ seq->offset_after);
+ else
+ new_end_seq = htonl(ntohl(sack->end_seq) -
+ seq->offset_before);
+
+ pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sack->start_seq), new_start_seq,
+ ntohl(sack->end_seq), new_end_seq);
+
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->start_seq, new_start_seq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->end_seq, new_end_seq, 0);
+ sack->start_seq = new_start_seq;
+ sack->end_seq = new_end_seq;
+ sackoff += sizeof(*sack);
+ }
+}
+
+/* TCP SACK sequence number adjustment */
+static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
+ unsigned int protoff,
+ struct tcphdr *tcph,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dir, optoff, optend;
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+
+ optoff = protoff + sizeof(struct tcphdr);
+ optend = protoff + tcph->doff * 4;
+
+ if (!skb_make_writable(skb, optend))
+ return 0;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (optoff < optend) {
+ /* Usually: option, length. */
+ unsigned char *op = skb->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ /* no partial options */
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_SACK &&
+ op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+ ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+ nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
+ optoff+op[1],
+ &seqadj->seq[!dir]);
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+
+/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
+int nf_ct_seq_adjust(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned int protoff)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct tcphdr *tcph;
+ __be32 newseq, newack;
+ s32 seqoff, ackoff;
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *this_way, *other_way;
+ int res;
+
+ this_way = &seqadj->seq[dir];
+ other_way = &seqadj->seq[!dir];
+
+ if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+ return 0;
+
+ tcph = (void *)skb->data + protoff;
+ spin_lock_bh(&ct->lock);
+ if (after(ntohl(tcph->seq), this_way->correction_pos))
+ seqoff = this_way->offset_after;
+ else
+ seqoff = this_way->offset_before;
+
+ if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+ ackoff = other_way->offset_after;
+ else
+ ackoff = other_way->offset_before;
+
+ newseq = htonl(ntohl(tcph->seq) + seqoff);
+ newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+
+ pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+ ntohl(newack));
+
+ tcph->seq = newseq;
+ tcph->ack_seq = newack;
+
+ res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+ spin_unlock_bh(&ct->lock);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
+
+s32 nf_ct_seq_offset(const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ u32 seq)
+{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *this_way;
+
+ if (!seqadj)
+ return 0;
+
+ this_way = &seqadj->seq[dir];
+ return after(seq, this_way->correction_pos) ?
+ this_way->offset_after : this_way->offset_before;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
+
+static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_seqadj),
+ .align = __alignof__(struct nf_conn_seqadj),
+ .id = NF_CT_EXT_SEQADJ,
+};
+
+int nf_conntrack_seqadj_init(void)
+{
+ return nf_ct_extend_register(&nf_ct_seqadj_extend);
+}
+
+void nf_conntrack_seqadj_fini(void)
+{
+ nf_ct_extend_unregister(&nf_ct_seqadj_extend);
+}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5..6f0f4f7 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -25,6 +25,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
@@ -402,6 +403,9 @@
ct->status |= IPS_SRC_NAT;
else
ct->status |= IPS_DST_NAT;
+
+ if (nfct_help(ct))
+ nfct_seqadj_ext_add(ct);
}
if (maniptype == NF_NAT_MANIP_SRC) {
@@ -497,7 +501,7 @@
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -511,7 +515,7 @@
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -749,7 +753,7 @@
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
@@ -764,10 +768,6 @@
.expectfn = nf_nat_follow_master,
};
-static struct nfq_ct_nat_hook nfq_ct_nat = {
- .seq_adjust = nf_nat_tcp_seq_adjust,
-};
-
static int __init nf_nat_init(void)
{
int ret;
@@ -787,14 +787,9 @@
/* Initialize fake conntrack so that NAT will skip it */
nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
- BUG_ON(nf_nat_seq_adjust_hook != NULL);
- RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
nfnetlink_parse_nat_setup);
- BUG_ON(nf_ct_nat_offset != NULL);
- RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
- RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
#ifdef CONFIG_XFRM
BUG_ON(nf_nat_decode_session_hook != NULL);
RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
@@ -813,10 +808,7 @@
unregister_pernet_subsys(&nf_nat_net_ops);
nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
- RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
- RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
- RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
#ifdef CONFIG_XFRM
RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
#endif
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 85e20a9..2840abb 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -20,74 +20,13 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
-#define DUMP_OFFSET(x) \
- pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
- x->offset_before, x->offset_after, x->correction_pos);
-
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way = &nat->seq[dir];
-
- pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
- seq, sizediff);
-
- pr_debug("adjust_tcp_sequence: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&nf_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after ||
- before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- pr_debug("adjust_tcp_sequence: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Get the offset value, for conntrack */
-s16 nf_nat_get_offset(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq)
-{
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way;
- s16 offset;
-
- if (!nat)
- return 0;
-
- this_way = &nat->seq[dir];
- spin_lock_bh(&nf_nat_seqofs_lock);
- offset = after(seq, this_way->correction_pos)
- ? this_way->offset_after : this_way->offset_before;
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- return offset;
-}
-
/* Frobs data inside this packet, which is linear. */
static void mangle_contents(struct sk_buff *skb,
unsigned int dataoff,
@@ -142,30 +81,6 @@
return 1;
}
-void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- __be32 seq, s16 off)
-{
- if (!off)
- return;
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
- nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
-}
-EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
-
-void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
- u32 ctinfo, int off)
-{
- const struct tcphdr *th;
-
- if (nf_ct_protonum(ct) != IPPROTO_TCP)
- return;
-
- th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
- nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
-}
-EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
-
/* Generic function for mangling variable-length address changes inside
* NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
* command in FTP).
@@ -210,8 +125,8 @@
datalen, oldlen);
if (adjust && rep_len != match_len)
- nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
- (int)rep_len - (int)match_len);
+ nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
+ (int)rep_len - (int)match_len);
return 1;
}
@@ -271,145 +186,6 @@
}
EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct nf_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block_wire *sack;
- __be32 new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_after);
- else
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_before);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_after);
- else
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_before);
-
- pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff *skb,
- unsigned int protoff,
- struct tcphdr *tcph,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
- struct nf_conn_nat *nat = nfct_nat(ct);
-
- optoff = protoff + sizeof(struct tcphdr);
- optend = protoff + tcph->doff * 4;
-
- if (!skb_make_writable(skb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = skb->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend ||
- optoff + op[1] > optend ||
- op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK &&
- op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
- ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(skb, tcph, optoff+2,
- optoff+op[1], &nat->seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-nf_nat_seq_adjust(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff)
-{
- struct tcphdr *tcph;
- int dir;
- __be32 newseq, newack;
- s16 seqoff, ackoff;
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &nat->seq[dir];
- other_way = &nat->seq[!dir];
-
- if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
- return 0;
-
- tcph = (void *)skb->data + protoff;
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- seqoff = this_way->offset_after;
- else
- seqoff = this_way->offset_before;
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- ackoff = other_way->offset_after;
- else
- ackoff = other_way->offset_before;
-
- newseq = htonl(ntohl(tcph->seq) + seqoff);
- newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
-
- pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
-}
-
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void nf_nat_follow_master(struct nf_conn *ct,
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index dac11f7..f979040 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -20,6 +20,7 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <linux/netfilter/nf_conntrack_sip.h>
MODULE_LICENSE("GPL");
@@ -308,7 +309,7 @@
return;
th = (struct tcphdr *)(skb->data + protoff);
- nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+ nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
}
/* Handles expected signalling connections and media streams */
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 0000000..d23dc79
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <asm/unaligned.h>
+#include <net/tcp.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+int synproxy_net_id;
+EXPORT_SYMBOL_GPL(synproxy_net_id);
+
+void
+synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+ const struct tcphdr *th, struct synproxy_options *opts)
+{
+ int length = (th->doff * 4) - sizeof(*th);
+ u8 buf[40], *ptr;
+
+ ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+ BUG_ON(ptr == NULL);
+
+ opts->options = 0;
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2)
+ return;
+ if (opsize > length)
+ return;
+
+ switch (opcode) {
+ case TCPOPT_MSS:
+ if (opsize == TCPOLEN_MSS) {
+ opts->mss = get_unaligned_be16(ptr);
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+ }
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW) {
+ opts->wscale = *ptr;
+ if (opts->wscale > 14)
+ opts->wscale = 14;
+ opts->options |= XT_SYNPROXY_OPT_WSCALE;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opsize == TCPOLEN_TIMESTAMP) {
+ opts->tsval = get_unaligned_be32(ptr);
+ opts->tsecr = get_unaligned_be32(ptr + 4);
+ opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM)
+ opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
+ break;
+ }
+
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(synproxy_parse_options);
+
+unsigned int synproxy_options_size(const struct synproxy_options *opts)
+{
+ unsigned int size = 0;
+
+ if (opts->options & XT_SYNPROXY_OPT_MSS)
+ size += TCPOLEN_MSS_ALIGNED;
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+ size += TCPOLEN_WSCALE_ALIGNED;
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(synproxy_options_size);
+
+void
+synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
+{
+ __be32 *ptr = (__be32 *)(th + 1);
+ u8 options = opts->options;
+
+ if (options & XT_SYNPROXY_OPT_MSS)
+ *ptr++ = htonl((TCPOPT_MSS << 24) |
+ (TCPOLEN_MSS << 16) |
+ opts->mss);
+
+ if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
+ if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+
+ *ptr++ = htonl(opts->tsval);
+ *ptr++ = htonl(opts->tsecr);
+ } else if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_SACK_PERM << 8) |
+ TCPOLEN_SACK_PERM);
+
+ if (options & XT_SYNPROXY_OPT_WSCALE)
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ opts->wscale);
+}
+EXPORT_SYMBOL_GPL(synproxy_build_options);
+
+void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+ struct synproxy_options *opts)
+{
+ opts->tsecr = opts->tsval;
+ opts->tsval = tcp_time_stamp & ~0x3f;
+
+ if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+ opts->tsval |= info->wscale;
+ else
+ opts->tsval |= 0xf;
+
+ if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ opts->tsval |= 1 << 4;
+
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ opts->tsval |= 1 << 5;
+}
+EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
+
+void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+{
+ opts->wscale = opts->tsecr & 0xf;
+ if (opts->wscale != 0xf)
+ opts->options |= XT_SYNPROXY_OPT_WSCALE;
+
+ opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
+
+ opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
+}
+EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
+
+unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
+ unsigned int protoff,
+ struct tcphdr *th,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_conn_synproxy *synproxy)
+{
+ unsigned int optoff, optend;
+ u32 *ptr, old;
+
+ if (synproxy->tsoff == 0)
+ return 1;
+
+ optoff = protoff + sizeof(struct tcphdr);
+ optend = protoff + th->doff * 4;
+
+ if (!skb_make_writable(skb, optend))
+ return 0;
+
+ while (optoff < optend) {
+ unsigned char *op = skb->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_TIMESTAMP &&
+ op[1] == TCPOLEN_TIMESTAMP) {
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ ptr = (u32 *)&op[2];
+ old = *ptr;
+ *ptr = htonl(ntohl(*ptr) -
+ synproxy->tsoff);
+ } else {
+ ptr = (u32 *)&op[6];
+ old = *ptr;
+ *ptr = htonl(ntohl(*ptr) +
+ synproxy->tsoff);
+ }
+ inet_proto_csum_replace4(&th->check, skb,
+ old, *ptr, 0);
+ return 1;
+ }
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
+
+static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_synproxy),
+ .align = __alignof__(struct nf_conn_synproxy),
+ .id = NF_CT_EXT_SYNPROXY,
+};
+
+#ifdef CONFIG_PROC_FS
+static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(snet->stats, cpu);
+ }
+
+ return NULL;
+}
+
+static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+ int cpu;
+
+ for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(snet->stats, cpu);
+ }
+
+ return NULL;
+}
+
+static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+ return;
+}
+
+static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct synproxy_stats *stats = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries\t\tsyn_received\t"
+ "cookie_invalid\tcookie_valid\t"
+ "cookie_retrans\tconn_reopened\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
+ stats->syn_received,
+ stats->cookie_invalid,
+ stats->cookie_valid,
+ stats->cookie_retrans,
+ stats->conn_reopened);
+
+ return 0;
+}
+
+static const struct seq_operations synproxy_cpu_seq_ops = {
+ .start = synproxy_cpu_seq_start,
+ .next = synproxy_cpu_seq_next,
+ .stop = synproxy_cpu_seq_stop,
+ .show = synproxy_cpu_seq_show,
+};
+
+static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations synproxy_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = synproxy_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init synproxy_proc_init(struct net *net)
+{
+ if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+ &synproxy_cpu_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+ remove_proc_entry("synproxy", net->proc_net_stat);
+}
+#else
+static int __net_init synproxy_proc_init(struct net *net)
+{
+ return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+ return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __net_init synproxy_net_init(struct net *net)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct nf_conntrack_tuple t;
+ struct nf_conn *ct;
+ int err = -ENOMEM;
+
+ memset(&t, 0, sizeof(t));
+ ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
+ if (IS_ERR(ct)) {
+ err = PTR_ERR(ct);
+ goto err1;
+ }
+
+ __set_bit(IPS_TEMPLATE_BIT, &ct->status);
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ if (!nfct_seqadj_ext_add(ct))
+ goto err2;
+ if (!nfct_synproxy_ext_add(ct))
+ goto err2;
+
+ snet->tmpl = ct;
+
+ snet->stats = alloc_percpu(struct synproxy_stats);
+ if (snet->stats == NULL)
+ goto err2;
+
+ err = synproxy_proc_init(net);
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ free_percpu(snet->stats);
+err2:
+ nf_conntrack_free(ct);
+err1:
+ return err;
+}
+
+static void __net_exit synproxy_net_exit(struct net *net)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+
+ nf_conntrack_free(snet->tmpl);
+ synproxy_proc_exit(net);
+ free_percpu(snet->stats);
+}
+
+static struct pernet_operations synproxy_net_ops = {
+ .init = synproxy_net_init,
+ .exit = synproxy_net_exit,
+ .id = &synproxy_net_id,
+ .size = sizeof(struct synproxy_net),
+};
+
+static int __init synproxy_core_init(void)
+{
+ int err;
+
+ err = nf_ct_extend_register(&nf_ct_synproxy_extend);
+ if (err < 0)
+ goto err1;
+
+ err = register_pernet_subsys(&synproxy_net_ops);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+err1:
+ return err;
+}
+
+static void __exit synproxy_core_exit(void)
+{
+ unregister_pernet_subsys(&synproxy_net_ops);
+ nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+}
+
+module_init(synproxy_core_init);
+module_exit(synproxy_core_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621..0000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Transparent proxy support for Linux/iptables
- *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
- * Author: Balazs Scheidler, Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/net.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <net/udp.h>
-#include <net/netfilter/nf_tproxy_core.h>
-
-
-static void
-nf_tproxy_destructor(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- skb->sk = NULL;
- skb->destructor = NULL;
-
- if (sk)
- sock_put(sk);
-}
-
-/* consumes sk */
-void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
- /* assigning tw sockets complicates things; most
- * skb->sk->X checks would have to test sk->sk_state first */
- if (sk->sk_state == TCP_TIME_WAIT) {
- inet_twsk_put(inet_twsk(sk));
- return;
- }
-
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = nf_tproxy_destructor;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
-
-static int __init nf_tproxy_init(void)
-{
- pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
- pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
- return 0;
-}
-
-module_init(nf_tproxy_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
-MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 8a703c3..95a98c8 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -862,6 +862,7 @@
[NFQA_MARK] = { .type = NLA_U32 },
[NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
[NFQA_CT] = { .type = NLA_UNSPEC },
+ [NFQA_EXP] = { .type = NLA_UNSPEC },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -990,9 +991,14 @@
if (entry == NULL)
return -ENOENT;
- rcu_read_lock();
- if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
+ if (nfqa[NFQA_CT]) {
ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
+ if (ct && nfqa[NFQA_EXP]) {
+ nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
+ }
+ }
if (nfqa[NFQA_PAYLOAD]) {
u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -1005,7 +1011,6 @@
if (ct)
nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
}
- rcu_read_unlock();
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66..96cac50 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -87,12 +87,27 @@
void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff)
{
- struct nfq_ct_nat_hook *nfq_nat_ct;
+ struct nfq_ct_hook *nfq_ct;
- nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook);
- if (nfq_nat_ct == NULL)
+ nfq_ct = rcu_dereference(nfq_ct_hook);
+ if (nfq_ct == NULL)
return;
if ((ct->status & IPS_NAT_MASK) && diff)
- nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
+ nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
+}
+
+int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+ u32 portid, u32 report)
+{
+ struct nfq_ct_hook *nfq_ct;
+
+ if (nf_ct_is_untracked(ct))
+ return 0;
+
+ nfq_ct = rcu_dereference(nfq_ct_hook);
+ if (nfq_ct == NULL)
+ return -EOPNOTSUPP;
+
+ return nfq_ct->attach_expect(attr, ct, portid, report);
}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f1953..5d8a3a3 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
#include <linux/inetdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
+#include <net/inet6_hashtables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
-#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h>
+enum nf_tproxy_lookup_t {
+ NFT_LOOKUP_LISTENER,
+ NFT_LOOKUP_ESTABLISHED,
+};
+
static bool tproxy_sk_is_transparent(struct sock *sk)
{
if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@
return laddr ? laddr : daddr;
}
+/*
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple, it is returned, assuming the redirection
+ * already took place and we process a packet belonging to an
+ * established connection
+ *
+ * - match: if there's a listening socket matching the redirection
+ * (e.g. on-port & on-ip of the connection), it is returned,
+ * regardless if it was bound to 0.0.0.0 or an explicit
+ * address. The reasoning is that if there's an explicit rule, it
+ * does not really matter if the listener is bound to an interface
+ * or to 0. The user already stated that he wants redirection
+ * (since he added the rule).
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, dport,
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = inet_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+
+#ifdef XT_TPROXY_HAVE_IPV6
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, ntohs(dport),
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, ntohs(dport),
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+ protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+#endif
+
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
@@ -117,6 +275,15 @@
return sk;
}
+/* assign a socket to the skb -- consumes sk */
+static void
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+}
+
static unsigned int
tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f..fab6eea 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@
return -EINVAL;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
- pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
+ pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 20b1591..06df2b9 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
#include <net/icmp.h>
#include <net/sock.h>
#include <net/inet_sock.h>
-#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
@@ -101,6 +101,43 @@
return 0;
}
+/* "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple
+ *
+ * - match: if there's a non-zero bound listener (possibly with a
+ * non-local address) We don't accept zero-bound listeners, since
+ * then local services could intercept traffic going through the
+ * box.
+ */
+static struct sock *
+xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return __inet_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+ return NULL;
+}
+
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -156,9 +193,9 @@
#endif
if (!sk)
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -265,6 +302,25 @@
return 0;
}
+static struct sock *
+xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet6_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+
+ return NULL;
+}
+
static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -302,9 +358,9 @@
}
if (!sk)
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f85f8a2..512718a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -789,10 +789,6 @@
struct net *net = sock_net(skb->sk);
int chains_to_skip = cb->args[0];
int fams_to_skip = cb->args[1];
- bool need_locking = chains_to_skip || fams_to_skip;
-
- if (need_locking)
- genl_lock();
for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
n = 0;
@@ -814,9 +810,6 @@
cb->args[0] = i;
cb->args[1] = n;
- if (need_locking)
- genl_unlock();
-
return skb->len;
}
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index bed30e6..6ecf491 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,6 +4,7 @@
config OPENVSWITCH
tristate "Open vSwitch"
+ select LIBCRC32C
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 82e4ee5..ea36e99 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -10,10 +10,13 @@
dp_notify.o \
flow.o \
vport.o \
- vport-gre.o \
vport-internal_dev.o \
vport-netdev.o
ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o
endif
+
+ifneq ($(CONFIG_OPENVSWITCH_GRE),)
+openvswitch-y += vport-gre.o
+endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ab101f7..65cfaa8 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
+#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
@@ -31,6 +32,7 @@
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
+#include <net/sctp/checksum.h>
#include "datapath.h"
#include "vport.h"
@@ -352,6 +354,39 @@
return 0;
}
+static int set_sctp(struct sk_buff *skb,
+ const struct ovs_key_sctp *sctp_port_key)
+{
+ struct sctphdr *sh;
+ int err;
+ unsigned int sctphoff = skb_transport_offset(skb);
+
+ err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
+ if (unlikely(err))
+ return err;
+
+ sh = sctp_hdr(skb);
+ if (sctp_port_key->sctp_src != sh->source ||
+ sctp_port_key->sctp_dst != sh->dest) {
+ __le32 old_correct_csum, new_csum, old_csum;
+
+ old_csum = sh->checksum;
+ old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+
+ sh->source = sctp_port_key->sctp_src;
+ sh->dest = sctp_port_key->sctp_dst;
+
+ new_csum = sctp_compute_cksum(skb, sctphoff);
+
+ /* Carry any checksum errors through. */
+ sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+ skb->rxhash = 0;
+ }
+
+ return 0;
+}
+
static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport;
@@ -376,8 +411,10 @@
const struct nlattr *a;
int rem;
+ BUG_ON(!OVS_CB(skb)->pkt_key);
+
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = &OVS_CB(skb)->flow->key;
+ upcall.key = OVS_CB(skb)->pkt_key;
upcall.userdata = NULL;
upcall.portid = 0;
@@ -459,6 +496,10 @@
case OVS_KEY_ATTR_UDP:
err = set_udp(skb, nla_data(nested_attr));
break;
+
+ case OVS_KEY_ATTR_SCTP:
+ err = set_sctp(skb, nla_data(nested_attr));
+ break;
}
return err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f2ed760..2aa13bd 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+ ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -226,19 +226,18 @@
struct sw_flow_key key;
u64 *stats_counter;
int error;
- int key_len;
stats = this_cpu_ptr(dp->stats_percpu);
/* Extract flow from 'skb' into 'key'. */
- error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+ error = ovs_flow_extract(skb, p->port_no, &key);
if (unlikely(error)) {
kfree_skb(skb);
return;
}
/* Look up flow. */
- flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+ flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -253,6 +252,7 @@
}
OVS_CB(skb)->flow = flow;
+ OVS_CB(skb)->pkt_key = &key;
stats_counter = &stats->n_hit;
ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -435,7 +435,7 @@
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+ ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
@@ -468,7 +468,7 @@
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
return 0;
}
@@ -611,10 +611,12 @@
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa)
{
- struct ovs_key_ipv4_tunnel tun_key;
+ struct sw_flow_match match;
+ struct sw_flow_key key;
int err, start;
- err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
if (err)
return err;
@@ -622,7 +624,8 @@
if (start < 0)
return start;
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
+ err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+ sizeof(match.key->tun_key));
add_nested_action_end(*sfa, start);
return err;
@@ -709,6 +712,12 @@
return validate_tp_port(flow_key);
+ case OVS_KEY_ATTR_SCTP:
+ if (flow_key->ip.proto != IPPROTO_SCTP)
+ return -EINVAL;
+
+ return validate_tp_port(flow_key);
+
default:
return -EINVAL;
}
@@ -857,7 +866,6 @@
struct ethhdr *eth;
int len;
int err;
- int key_len;
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -890,11 +898,11 @@
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+ err = ovs_flow_extract(packet, -1, &flow->key);
if (err)
goto err_flow_free;
- err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
+ err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
if (err)
goto err_flow_free;
acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -908,6 +916,7 @@
goto err_flow_free;
OVS_CB(packet)->flow = flow;
+ OVS_CB(packet)->pkt_key = &flow->key;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
@@ -922,13 +931,13 @@
local_bh_enable();
rcu_read_unlock();
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
return err;
err_unlock:
rcu_read_unlock();
err_flow_free:
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
err_kfree_skb:
kfree_skb(packet);
err:
@@ -951,9 +960,10 @@
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
{
+ struct flow_table *table;
int i;
- struct flow_table *table = ovsl_dereference(dp->table);
+ table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
stats->n_flows = ovs_flow_tbl_count(table);
stats->n_hit = stats->n_missed = stats->n_lost = 0;
@@ -1044,7 +1054,8 @@
if (!start)
return -EMSGSIZE;
- err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
+ err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+ nla_data(ovs_key));
if (err)
return err;
nla_nest_end(skb, start);
@@ -1092,6 +1103,7 @@
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -1104,7 +1116,6 @@
u32 seq, u32 flags, u8 cmd)
{
const int skb_orig_len = skb->len;
- const struct sw_flow_actions *sf_acts;
struct nlattr *start;
struct ovs_flow_stats stats;
struct ovs_header *ovs_header;
@@ -1113,22 +1124,33 @@
u8 tcp_flags;
int err;
- sf_acts = ovsl_dereference(flow->sf_acts);
-
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
ovs_header->dp_ifindex = get_dpifindex(dp);
+ /* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
goto nla_put_failure;
- err = ovs_flow_to_nlattrs(&flow->key, skb);
+
+ err = ovs_flow_to_nlattrs(&flow->unmasked_key,
+ &flow->unmasked_key, skb);
if (err)
goto error;
nla_nest_end(skb, nla);
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+ if (!nla)
+ goto nla_put_failure;
+
+ err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
+ if (err)
+ goto error;
+
+ nla_nest_end(skb, nla);
+
spin_lock_bh(&flow->lock);
used = flow->used;
stats.n_packets = flow->packet_count;
@@ -1161,6 +1183,11 @@
*/
start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
if (start) {
+ const struct sw_flow_actions *sf_acts;
+
+ sf_acts = rcu_dereference_check(flow->sf_acts,
+ lockdep_ovsl_is_held());
+
err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
if (!err)
nla_nest_end(skb, start);
@@ -1211,20 +1238,24 @@
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key;
- struct sw_flow *flow;
+ struct sw_flow_key key, masked_key;
+ struct sw_flow *flow = NULL;
+ struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
struct flow_table *table;
struct sw_flow_actions *acts = NULL;
+ struct sw_flow_match match;
int error;
- int key_len;
/* Extract key. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
goto error;
- error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_match_from_nlattrs(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
@@ -1235,9 +1266,13 @@
if (IS_ERR(acts))
goto error;
- error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
- if (error)
+ ovs_flow_key_mask(&masked_key, &key, &mask);
+ error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree;
+ }
} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
error = -EINVAL;
goto error;
@@ -1250,8 +1285,11 @@
goto err_unlock_ovs;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+
+ /* Check if this is a duplicate flow */
+ flow = ovs_flow_lookup(table, &key);
if (!flow) {
+ struct sw_flow_mask *mask_p;
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1264,7 +1302,7 @@
new_table = ovs_flow_tbl_expand(table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(table);
+ ovs_flow_tbl_destroy(table, true);
table = ovsl_dereference(dp->table);
}
}
@@ -1277,14 +1315,30 @@
}
clear_stats(flow);
+ flow->key = masked_key;
+ flow->unmasked_key = key;
+
+ /* Make sure mask is unique in the system */
+ mask_p = ovs_sw_flow_mask_find(table, &mask);
+ if (!mask_p) {
+ /* Allocate a new mask if none exsits. */
+ mask_p = ovs_sw_flow_mask_alloc();
+ if (!mask_p)
+ goto err_flow_free;
+ mask_p->key = mask.key;
+ mask_p->range = mask.range;
+ ovs_sw_flow_mask_insert(table, mask_p);
+ }
+
+ ovs_sw_flow_mask_add_ref(mask_p);
+ flow->mask = mask_p;
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- ovs_flow_tbl_insert(table, flow, &key, key_len);
+ ovs_flow_insert(table, flow);
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq,
- OVS_FLOW_CMD_NEW);
+ info->snd_seq, OVS_FLOW_CMD_NEW);
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts;
@@ -1300,6 +1354,13 @@
info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
goto err_unlock_ovs;
+ /* The unmasked key has to be the same for flow updates. */
+ error = -EINVAL;
+ if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
+ OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+ goto err_unlock_ovs;
+ }
+
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
@@ -1324,6 +1385,8 @@
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
return 0;
+err_flow_free:
+ ovs_flow_free(flow, false);
err_unlock_ovs:
ovs_unlock();
err_kfree:
@@ -1341,12 +1404,16 @@
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
return -EINVAL;
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ }
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -1358,7 +1425,7 @@
}
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1387,8 +1454,8 @@
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1401,12 +1468,14 @@
err = flush_flows(dp);
goto unlock;
}
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1418,13 +1487,13 @@
goto unlock;
}
- ovs_flow_tbl_remove(table, flow);
+ ovs_flow_remove(table, flow);
err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_FLOW_CMD_DEL);
BUG_ON(err < 0);
- ovs_flow_deferred_free(flow);
+ ovs_flow_free(flow, true);
ovs_unlock();
ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1440,22 +1509,21 @@
struct datapath *dp;
struct flow_table *table;
- ovs_lock();
+ rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
- ovs_unlock();
+ rcu_read_unlock();
return -ENODEV;
}
- table = ovsl_dereference(dp->table);
-
+ table = rcu_dereference(dp->table);
for (;;) {
struct sw_flow *flow;
u32 bucket, obj;
bucket = cb->args[0];
obj = cb->args[1];
- flow = ovs_flow_tbl_next(table, &bucket, &obj);
+ flow = ovs_flow_dump_next(table, &bucket, &obj);
if (!flow)
break;
@@ -1468,7 +1536,7 @@
cb->args[0] = bucket;
cb->args[1] = obj;
}
- ovs_unlock();
+ rcu_read_unlock();
return skb->len;
}
@@ -1664,7 +1732,7 @@
goto err_destroy_local_port;
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
- list_add_tail(&dp->list_node, &ovs_net->dps);
+ list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
ovs_unlock();
@@ -1678,7 +1746,7 @@
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
+ ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1702,7 +1770,7 @@
ovs_dp_detach_port(vport);
}
- list_del(&dp->list_node);
+ list_del_rcu(&dp->list_node);
/* OVSP_LOCAL is datapath internal port. We need to make sure that
* all port in datapath are destroyed first before freeing datapath.
@@ -1807,8 +1875,8 @@
int skip = cb->args[0];
int i = 0;
- ovs_lock();
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1816,7 +1884,7 @@
break;
i++;
}
- ovs_unlock();
+ rcu_read_unlock();
cb->args[0] = i;
@@ -2285,7 +2353,7 @@
new_table = ovs_flow_tbl_rehash(old_table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
}
}
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a914864..4d109c1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,11 +88,13 @@
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
+ * @pkt_key: The flow information extracted from the packet. Must be nonnull.
* @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
* packet is not being tunneled.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
+ struct sw_flow_key *pkt_key;
struct ovs_key_ipv4_tunnel *tun_key;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -183,4 +185,8 @@
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work);
+
+#define OVS_NLERR(fmt, ...) \
+ pr_info_once("netlink: " fmt, ##__VA_ARGS__)
+
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 1aa84dc..ad1aeeb 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -34,6 +34,7 @@
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
@@ -46,6 +47,202 @@
static struct kmem_cache *flow_cache;
+static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val);
+
+static void update_range__(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
+{
+ struct sw_flow_key_range *range = NULL;
+ size_t start = rounddown(offset, sizeof(long));
+ size_t end = roundup(offset + size, sizeof(long));
+
+ if (!is_mask)
+ range = &match->range;
+ else if (match->mask)
+ range = &match->mask->range;
+
+ if (!range)
+ return;
+
+ if (range->start == range->end) {
+ range->start = start;
+ range->end = end;
+ return;
+ }
+
+ if (range->start > start)
+ range->start = start;
+
+ if (range->end < end)
+ range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ (match)->mask->key.field = value; \
+ } else { \
+ (match)->key->field = value; \
+ } \
+ } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ len, is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ memcpy(&(match)->mask->key.field, value_p, len);\
+ } else { \
+ memcpy(&(match)->key->field, value_p, len); \
+ } \
+ } while (0)
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+ return range->end - range->start;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct sw_flow_mask *mask)
+{
+ memset(match, 0, sizeof(*match));
+ match->key = key;
+ match->mask = mask;
+
+ memset(key, 0, sizeof(*key));
+
+ if (mask) {
+ memset(&mask->key, 0, sizeof(mask->key));
+ mask->range.start = mask->range.end = 0;
+ }
+}
+
+static bool ovs_match_validate(const struct sw_flow_match *match,
+ u64 key_attrs, u64 mask_attrs)
+{
+ u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+ u64 mask_allowed = key_attrs; /* At most allow all key attributes */
+
+ /* The following mask attributes allowed only if they
+ * pass the validation tests. */
+ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+ | (1 << OVS_KEY_ATTR_IPV6)
+ | (1 << OVS_KEY_ATTR_TCP)
+ | (1 << OVS_KEY_ATTR_UDP)
+ | (1 << OVS_KEY_ATTR_SCTP)
+ | (1 << OVS_KEY_ATTR_ICMP)
+ | (1 << OVS_KEY_ATTR_ICMPV6)
+ | (1 << OVS_KEY_ATTR_ARP)
+ | (1 << OVS_KEY_ATTR_ND));
+
+ /* Always allowed mask fields. */
+ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+ | (1 << OVS_KEY_ATTR_IN_PORT)
+ | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+ /* Check key attributes. */
+ if (match->key->eth.type == htons(ETH_P_ARP)
+ || match->key->eth.type == htons(ETH_P_RARP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ARP;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMP) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+ }
+ }
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IPV6)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMPV6) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+ if (match->key->ipv6.tp.src ==
+ htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ND;
+ if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+ }
+ }
+ }
+ }
+
+ if ((key_attrs & key_expected) != key_expected) {
+ /* Key attributes check failed. */
+ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+ key_attrs, key_expected);
+ return false;
+ }
+
+ if ((mask_attrs & mask_allowed) != mask_attrs) {
+ /* Mask attributes check failed. */
+ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+ mask_attrs, mask_allowed);
+ return false;
+ }
+
+ return true;
+}
+
static int check_header(struct sk_buff *skb, int len)
{
if (unlikely(skb->len < len))
@@ -102,6 +299,12 @@
sizeof(struct udphdr));
}
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct sctphdr));
+}
+
static bool icmphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -121,12 +324,7 @@
return cur_ms - idle_ms;
}
-#define SW_FLOW_KEY_OFFSET(field) \
- (offsetof(struct sw_flow_key, field) + \
- FIELD_SIZEOF(struct sw_flow_key, field))
-
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp)
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
@@ -136,8 +334,6 @@
__be16 frag_off;
int err;
- *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
-
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
return err;
@@ -176,6 +372,22 @@
sizeof(struct icmp6hdr));
}
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask)
+{
+ const long *m = (long *)((u8 *)&mask->key + mask->range.start);
+ const long *s = (long *)((u8 *)src + mask->range.start);
+ long *d = (long *)((u8 *)dst + mask->range.start);
+ int i;
+
+ /* The memory outside of the 'mask->range' are not set since
+ * further operations on 'dst' only uses contents within
+ * 'mask->range'.
+ */
+ for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+ *d++ = *s++ & *m++;
+}
+
#define TCP_FLAGS_OFFSET 13
#define TCP_FLAG_MASK 0x3f
@@ -224,6 +436,7 @@
spin_lock_init(&flow->lock);
flow->sf_acts = NULL;
+ flow->mask = NULL;
return flow;
}
@@ -263,7 +476,7 @@
flex_array_free(buckets);
}
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
+static struct flow_table *__flow_tbl_alloc(int new_size)
{
struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
@@ -281,17 +494,15 @@
table->node_ver = 0;
table->keep_flows = false;
get_random_bytes(&table->hash_seed, sizeof(u32));
+ table->mask_list = NULL;
return table;
}
-void ovs_flow_tbl_destroy(struct flow_table *table)
+static void __flow_tbl_destroy(struct flow_table *table)
{
int i;
- if (!table)
- return;
-
if (table->keep_flows)
goto skip_flows;
@@ -302,32 +513,56 @@
int ver = table->node_ver;
hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del_rcu(&flow->hash_node[ver]);
- ovs_flow_free(flow);
+ hlist_del(&flow->hash_node[ver]);
+ ovs_flow_free(flow, false);
}
}
+ BUG_ON(!list_empty(table->mask_list));
+ kfree(table->mask_list);
+
skip_flows:
free_buckets(table->buckets);
kfree(table);
}
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+ struct flow_table *table = __flow_tbl_alloc(new_size);
+
+ if (!table)
+ return NULL;
+
+ table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!table->mask_list) {
+ table->keep_flows = true;
+ __flow_tbl_destroy(table);
+ return NULL;
+ }
+ INIT_LIST_HEAD(table->mask_list);
+
+ return table;
+}
+
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
struct flow_table *table = container_of(rcu, struct flow_table, rcu);
- ovs_flow_tbl_destroy(table);
+ __flow_tbl_destroy(table);
}
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
if (!table)
return;
- call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ if (deferred)
+ call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ else
+ __flow_tbl_destroy(table);
}
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -353,11 +588,13 @@
return NULL;
}
-static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
{
struct hlist_head *head;
+
head = find_bucket(table, flow->hash);
hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+
table->count++;
}
@@ -377,8 +614,10 @@
head = flex_array_get(old->buckets, i);
hlist_for_each_entry(flow, head, hash_node[old_ver])
- __flow_tbl_insert(new, flow);
+ __tbl_insert(new, flow);
}
+
+ new->mask_list = old->mask_list;
old->keep_flows = true;
}
@@ -386,7 +625,7 @@
{
struct flow_table *new_table;
- new_table = ovs_flow_tbl_alloc(n_buckets);
+ new_table = __flow_tbl_alloc(n_buckets);
if (!new_table)
return ERR_PTR(-ENOMEM);
@@ -405,28 +644,30 @@
return __flow_tbl_rehash(table, table->n_buckets * 2);
}
-void ovs_flow_free(struct sw_flow *flow)
+static void __flow_free(struct sw_flow *flow)
{
- if (unlikely(!flow))
- return;
-
kfree((struct sf_flow_acts __force *)flow->sf_acts);
kmem_cache_free(flow_cache, flow);
}
-/* RCU callback used by ovs_flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
- ovs_flow_free(flow);
+ __flow_free(flow);
}
-/* Schedules 'flow' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free(struct sw_flow *flow)
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
- call_rcu(&flow->rcu, rcu_free_flow_callback);
+ if (!flow)
+ return;
+
+ ovs_sw_flow_mask_del_ref(flow->mask, deferred);
+
+ if (deferred)
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+ else
+ __flow_free(flow);
}
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -497,18 +738,15 @@
}
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp, int nh_len)
+ int nh_len)
{
struct icmp6hdr *icmp = icmp6_hdr(skb);
- int error = 0;
- int key_len;
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +755,17 @@
struct nd_msg *nd;
int offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
-
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
if (unlikely(icmp_len < sizeof(*nd)))
- goto out;
- if (unlikely(skb_linearize(skb))) {
- error = -ENOMEM;
- goto out;
- }
+ return 0;
+
+ if (unlikely(skb_linearize(skb)))
+ return -ENOMEM;
nd = (struct nd_msg *)skb_transport_header(skb);
key->ipv6.nd.target = nd->target;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
icmp_len -= sizeof(*nd);
offset = 0;
@@ -541,7 +775,7 @@
int opt_len = nd_opt->nd_opt_len * 8;
if (unlikely(!opt_len || opt_len > icmp_len))
- goto invalid;
+ return 0;
/* Store the link layer address if the appropriate
* option is provided. It is considered an error if
@@ -566,16 +800,14 @@
}
}
- goto out;
+ return 0;
invalid:
memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
-out:
- *key_lenp = key_len;
- return error;
+ return 0;
}
/**
@@ -584,7 +816,6 @@
* Ethernet header
* @in_port: port number on which @skb was received.
* @key: output flow key
- * @key_lenp: length of output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
@@ -602,11 +833,9 @@
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*/
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
- int *key_lenp)
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
{
- int error = 0;
- int key_len = SW_FLOW_KEY_OFFSET(eth);
+ int error;
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
@@ -649,15 +878,13 @@
struct iphdr *nh;
__be16 offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
-
error = check_iphdr(skb);
if (unlikely(error)) {
if (error == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
}
- goto out;
+ return error;
}
nh = ip_hdr(skb);
@@ -671,7 +898,7 @@
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- goto out;
+ return 0;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -679,21 +906,24 @@
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->ip.proto == IPPROTO_UDP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
+ } else if (key->ip.proto == IPPROTO_SCTP) {
+ if (sctphdr_ok(skb)) {
+ struct sctphdr *sctp = sctp_hdr(skb);
+ key->ipv4.tp.src = sctp->source;
+ key->ipv4.tp.dst = sctp->dest;
+ }
} else if (key->ip.proto == IPPROTO_ICMP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
@@ -722,102 +952,175 @@
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
- nh_len = parse_ipv6hdr(skb, key, &key_len);
+ nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
- if (nh_len == -EINVAL)
+ if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
- else
+ error = 0;
+ } else {
error = nh_len;
- goto out;
+ }
+ return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
- goto out;
+ return 0;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
}
} else if (key->ip.proto == NEXTHDR_UDP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp.src = udp->source;
key->ipv6.tp.dst = udp->dest;
}
+ } else if (key->ip.proto == NEXTHDR_SCTP) {
+ if (sctphdr_ok(skb)) {
+ struct sctphdr *sctp = sctp_hdr(skb);
+ key->ipv6.tp.src = sctp->source;
+ key->ipv6.tp.dst = sctp->dest;
+ }
} else if (key->ip.proto == NEXTHDR_ICMP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (icmp6hdr_ok(skb)) {
- error = parse_icmpv6(skb, key, &key_len, nh_len);
- if (error < 0)
- goto out;
+ error = parse_icmpv6(skb, key, nh_len);
+ if (error)
+ return error;
}
}
}
-out:
- *key_lenp = key_len;
- return error;
+ return 0;
}
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
+static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
+ int key_end)
{
- return jhash2((u32 *)((u8 *)key + key_start),
- DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
+ u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ int hash_u32s = (key_end - key_start) >> 2;
+
+ /* Make sure number of hash bytes are multiple of u32. */
+ BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+ return jhash2(hash_key, hash_u32s, 0);
}
-static int flow_key_start(struct sw_flow_key *key)
+static int flow_key_start(const struct sw_flow_key *key)
{
if (key->tun_key.ipv4_dst)
return 0;
else
- return offsetof(struct sw_flow_key, phy);
+ return rounddown(offsetof(struct sw_flow_key, phy),
+ sizeof(long));
}
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
- struct sw_flow_key *key, int key_len)
+static bool __cmp_key(const struct sw_flow_key *key1,
+ const struct sw_flow_key *key2, int key_start, int key_end)
+{
+ const long *cp1 = (long *)((u8 *)key1 + key_start);
+ const long *cp2 = (long *)((u8 *)key2 + key_start);
+ long diffs = 0;
+ int i;
+
+ for (i = key_start; i < key_end; i += sizeof(long))
+ diffs |= *cp1++ ^ *cp2++;
+
+ return diffs == 0;
+}
+
+static bool __flow_cmp_masked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_start, int key_end)
+{
+ return __cmp_key(&flow->key, key, key_start, key_end);
+}
+
+static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_start, int key_end)
+{
+ return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
+}
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_end)
+{
+ int key_start;
+ key_start = flow_key_start(key);
+
+ return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
+
+}
+
+struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
+ struct sw_flow_match *match)
+{
+ struct sw_flow_key *unmasked = match->key;
+ int key_end = match->range.end;
+ struct sw_flow *flow;
+
+ flow = ovs_flow_lookup(table, unmasked);
+ if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
+ flow = NULL;
+
+ return flow;
+}
+
+static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
+ const struct sw_flow_key *unmasked,
+ struct sw_flow_mask *mask)
{
struct sw_flow *flow;
struct hlist_head *head;
- u8 *_key;
- int key_start;
+ int key_start = mask->range.start;
+ int key_end = mask->range.end;
u32 hash;
+ struct sw_flow_key masked_key;
- key_start = flow_key_start(key);
- hash = ovs_flow_hash(key, key_start, key_len);
-
- _key = (u8 *) key + key_start;
+ ovs_flow_key_mask(&masked_key, unmasked, mask);
+ hash = ovs_flow_hash(&masked_key, key_start, key_end);
head = find_bucket(table, hash);
hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-
- if (flow->hash == hash &&
- !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
+ if (flow->mask == mask &&
+ __flow_cmp_masked_key(flow, &masked_key,
+ key_start, key_end))
return flow;
- }
}
return NULL;
}
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_key *key, int key_len)
+struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
+ const struct sw_flow_key *key)
{
- flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
- memcpy(&flow->key, key, sizeof(flow->key));
- __flow_tbl_insert(table, flow);
+ struct sw_flow *flow = NULL;
+ struct sw_flow_mask *mask;
+
+ list_for_each_entry_rcu(mask, tbl->mask_list, list) {
+ flow = ovs_masked_flow_lookup(tbl, key, mask);
+ if (flow) /* Found */
+ break;
+ }
+
+ return flow;
}
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+
+void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
+{
+ flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
+ flow->mask->range.end);
+ __tbl_insert(table, flow);
+}
+
+void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
{
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[table->node_ver]);
@@ -837,6 +1140,7 @@
[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+ [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
@@ -844,149 +1148,84 @@
[OVS_KEY_ATTR_TUNNEL] = -1,
};
-static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
- const struct nlattr *a[], u32 *attrs)
+static bool is_all_zero(const u8 *fp, size_t size)
{
- const struct ovs_key_icmp *icmp_key;
- const struct ovs_key_tcp *tcp_key;
- const struct ovs_key_udp *udp_key;
+ int i;
- switch (swkey->ip.proto) {
- case IPPROTO_TCP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+ if (!fp)
+ return false;
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- swkey->ipv4.tp.src = tcp_key->tcp_src;
- swkey->ipv4.tp.dst = tcp_key->tcp_dst;
- break;
+ for (i = 0; i < size; i++)
+ if (fp[i])
+ return false;
- case IPPROTO_UDP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- swkey->ipv4.tp.src = udp_key->udp_src;
- swkey->ipv4.tp.dst = udp_key->udp_dst;
- break;
-
- case IPPROTO_ICMP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
- swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
- break;
- }
-
- return 0;
+ return true;
}
-static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
- const struct nlattr *a[], u32 *attrs)
-{
- const struct ovs_key_icmpv6 *icmpv6_key;
- const struct ovs_key_tcp *tcp_key;
- const struct ovs_key_udp *udp_key;
-
- switch (swkey->ip.proto) {
- case IPPROTO_TCP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- swkey->ipv6.tp.src = tcp_key->tcp_src;
- swkey->ipv6.tp.dst = tcp_key->tcp_dst;
- break;
-
- case IPPROTO_UDP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- swkey->ipv6.tp.src = udp_key->udp_src;
- swkey->ipv6.tp.dst = udp_key->udp_dst;
- break;
-
- case IPPROTO_ICMPV6:
- if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
- swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
-
- if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
- const struct ovs_key_nd *nd_key;
-
- if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ND);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
- nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
- memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
- sizeof(swkey->ipv6.nd.target));
- memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
- memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
- }
- break;
- }
-
- return 0;
-}
-
-static int parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u32 *attrsp)
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[],
+ u64 *attrsp, bool nz)
{
const struct nlattr *nla;
u32 attrs;
int rem;
- attrs = 0;
+ attrs = *attrsp;
nla_for_each_nested(nla, attr, rem) {
u16 type = nla_type(nla);
int expected_len;
- if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
+ if (type > OVS_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+ type, OVS_KEY_ATTR_MAX);
+ }
+
+ if (attrs & (1 << type)) {
+ OVS_NLERR("Duplicate key attribute (type %d).\n", type);
return -EINVAL;
+ }
expected_len = ovs_key_lens[type];
- if (nla_len(nla) != expected_len && expected_len != -1)
+ if (nla_len(nla) != expected_len && expected_len != -1) {
+ OVS_NLERR("Key attribute has unexpected length (type=%d"
+ ", length=%d, expected=%d).\n", type,
+ nla_len(nla), expected_len);
return -EINVAL;
+ }
- attrs |= 1 << type;
- a[type] = nla;
+ if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+ attrs |= 1 << type;
+ a[type] = nla;
+ }
}
- if (rem)
+ if (rem) {
+ OVS_NLERR("Message has %d unknown bytes.\n", rem);
return -EINVAL;
+ }
*attrsp = attrs;
return 0;
}
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, true);
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, false);
+}
+
int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct ovs_key_ipv4_tunnel *tun_key)
+ struct sw_flow_match *match, bool is_mask)
{
struct nlattr *a;
int rem;
bool ttl = false;
-
- memset(tun_key, 0, sizeof(*tun_key));
+ __be16 tun_flags = 0;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
@@ -1000,53 +1239,78 @@
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
};
- if (type > OVS_TUNNEL_KEY_ATTR_MAX ||
- ovs_tunnel_key_lens[type] != nla_len(a))
+ if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+ type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
+ }
+
+ if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+ " length (type=%d, length=%d, expected=%d).\n",
+ type, nla_len(a), ovs_tunnel_key_lens[type]);
+ return -EINVAL;
+ }
switch (type) {
case OVS_TUNNEL_KEY_ATTR_ID:
- tun_key->tun_id = nla_get_be64(a);
- tun_key->tun_flags |= TUNNEL_KEY;
+ SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+ nla_get_be64(a), is_mask);
+ tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- tun_key->ipv4_src = nla_get_be32(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ nla_get_be32(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- tun_key->ipv4_dst = nla_get_be32(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ nla_get_be32(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- tun_key->ipv4_tos = nla_get_u8(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- tun_key->ipv4_ttl = nla_get_u8(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ nla_get_u8(a), is_mask);
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
+ tun_flags |= TUNNEL_DONT_FRAGMENT;
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_key->tun_flags |= TUNNEL_CSUM;
+ tun_flags |= TUNNEL_CSUM;
break;
default:
return -EINVAL;
-
}
}
- if (rem > 0)
- return -EINVAL;
- if (!tun_key->ipv4_dst)
- return -EINVAL;
+ SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
- if (!ttl)
+ if (rem > 0) {
+ OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if (!match->key->tun_key.ipv4_dst) {
+ OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+ return -EINVAL;
+ }
+
+ if (!ttl) {
+ OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+ return -EINVAL;
+ }
+ }
return 0;
}
int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key)
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ const struct ovs_key_ipv4_tunnel *output)
{
struct nlattr *nla;
@@ -1054,23 +1318,24 @@
if (!nla)
return -EMSGSIZE;
- if (tun_key->tun_flags & TUNNEL_KEY &&
- nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
+ if (output->tun_flags & TUNNEL_KEY &&
+ nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (tun_key->ipv4_src &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src))
+ if (output->ipv4_src &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
return -EMSGSIZE;
- if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst))
+ if (output->ipv4_dst &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
return -EMSGSIZE;
- if (tun_key->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos))
+ if (output->ipv4_tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+ if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
@@ -1078,176 +1343,390 @@
return 0;
}
-/**
- * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
- * @swkey: receives the extracted flow key.
- * @key_lenp: number of bytes used in @swkey.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- */
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
- const struct nlattr *attr)
+static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
+ const struct nlattr **a, bool is_mask)
{
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- const struct ovs_key_ethernet *eth_key;
- int key_len;
- u32 attrs;
+ if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+ SW_FLOW_KEY_PUT(match, phy.priority,
+ nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+ }
+
+ if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+ u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+
+ if (is_mask)
+ in_port = 0xffffffff; /* Always exact match in_port. */
+ else if (in_port >= DP_MAX_PORTS)
+ return -EINVAL;
+
+ SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
+ }
+
+ if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+ uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+ SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
+ if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
+ }
+ return 0;
+}
+
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+ const struct nlattr **a, bool is_mask)
+{
int err;
+ u64 orig_attrs = attrs;
- memset(swkey, 0, sizeof(struct sw_flow_key));
- key_len = SW_FLOW_KEY_OFFSET(eth);
-
- err = parse_flow_nlattrs(attr, a, &attrs);
+ err = metadata_from_nlattrs(match, &attrs, a, is_mask);
if (err)
return err;
- /* Metadata attributes. */
- if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
- swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
- attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
- }
- if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
- u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- if (in_port >= DP_MAX_PORTS)
- return -EINVAL;
- swkey->phy.in_port = in_port;
- attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
- } else {
- swkey->phy.in_port = DP_MAX_PORTS;
- }
- if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
- swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
- attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+ const struct ovs_key_ethernet *eth_key;
+
+ eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+ SW_FLOW_KEY_MEMCPY(match, eth.src,
+ eth_key->eth_src, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, eth.dst,
+ eth_key->eth_dst, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
}
- if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
- if (err)
- return err;
-
- attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
- }
-
- /* Data attributes. */
- if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
-
- eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
- memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
- memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
-
- if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
- nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
- const struct nlattr *encap;
+ if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
__be16 tci;
- if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
- (1 << OVS_KEY_ATTR_ETHERTYPE) |
- (1 << OVS_KEY_ATTR_ENCAP)))
- return -EINVAL;
-
- encap = a[OVS_KEY_ATTR_ENCAP];
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- if (tci & htons(VLAN_TAG_PRESENT)) {
- swkey->eth.tci = tci;
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ if (is_mask)
+ OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+ else
+ OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
- err = parse_flow_nlattrs(encap, a, &attrs);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+ } else if (!is_mask)
+ SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+ __be16 eth_type;
+
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+ if (is_mask) {
+ /* Always exact match EtherType. */
+ eth_type = htons(0xffff);
+ } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+ ntohs(eth_type), ETH_P_802_3_MIN);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ const struct ovs_key_ipv4 *ipv4_key;
+
+ ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+ if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+ ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+ return -EINVAL;
+ }
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv4_key->ipv4_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv4_key->ipv4_tos, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv4_key->ipv4_ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv4_key->ipv4_frag, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ ipv4_key->ipv4_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ ipv4_key->ipv4_dst, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
+ const struct ovs_key_ipv6 *ipv6_key;
+
+ ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+ if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+ ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+ return -EINVAL;
+ }
+ SW_FLOW_KEY_PUT(match, ipv6.label,
+ ipv6_key->ipv6_label, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv6_key->ipv6_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv6_key->ipv6_tclass, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv6_key->ipv6_hlimit, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv6_key->ipv6_frag, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+ ipv6_key->ipv6_src,
+ sizeof(match->key->ipv6.addr.src),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+ ipv6_key->ipv6_dst,
+ sizeof(match->key->ipv6.addr.dst),
+ is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
+ const struct ovs_key_arp *arp_key;
+
+ arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+ if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+ OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+ arp_key->arp_op);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ arp_key->arp_sip, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ arp_key->arp_tip, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ntohs(arp_key->arp_op), is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+ arp_key->arp_sha, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+ arp_key->arp_tha, ETH_ALEN, is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
+ const struct ovs_key_tcp *tcp_key;
+
+ tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
+ const struct ovs_key_udp *udp_key;
+
+ udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ udp_key->udp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ udp_key->udp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
+ const struct ovs_key_sctp *sctp_key;
+
+ sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
+ const struct ovs_key_icmp *icmp_key;
+
+ icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ htons(icmp_key->icmp_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ htons(icmp_key->icmp_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
+ const struct ovs_key_icmpv6 *icmpv6_key;
+
+ icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ htons(icmpv6_key->icmpv6_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ htons(icmpv6_key->icmpv6_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ND)) {
+ const struct ovs_key_nd *nd_key;
+
+ nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+ nd_key->nd_target,
+ sizeof(match->key->ipv6.nd.target),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+ nd_key->nd_sll, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+ nd_key->nd_tll, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ND);
+ }
+
+ if (attrs != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow.
+ */
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+ const struct nlattr *key,
+ const struct nlattr *mask)
+{
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ const struct nlattr *encap;
+ u64 key_attrs = 0;
+ u64 mask_attrs = 0;
+ bool encap_valid = false;
+ int err;
+
+ err = parse_flow_nlattrs(key, a, &key_attrs);
+ if (err)
+ return err;
+
+ if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+ (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+ __be16 tci;
+
+ if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+ OVS_NLERR("Invalid Vlan frame.\n");
+ return -EINVAL;
+ }
+
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ encap_valid = true;
+
+ if (tci & htons(VLAN_TAG_PRESENT)) {
+ err = parse_flow_nlattrs(encap, a, &key_attrs);
if (err)
return err;
} else if (!tci) {
/* Corner case for truncated 802.1Q header. */
- if (nla_len(encap))
+ if (nla_len(encap)) {
+ OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
return -EINVAL;
-
- swkey->eth.type = htons(ETH_P_8021Q);
- *key_lenp = key_len;
- return 0;
+ }
} else {
- return -EINVAL;
+ OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+ return -EINVAL;
}
}
- if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
- swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+ if (err)
+ return err;
+
+ if (mask) {
+ err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+ if (err)
+ return err;
+
+ if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
+ __be16 eth_type = 0;
+ __be16 tci = 0;
+
+ if (!encap_valid) {
+ OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+ return -EINVAL;
+ }
+
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ if (a[OVS_KEY_ATTR_ETHERTYPE])
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+ if (eth_type == htons(0xffff)) {
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ } else {
+ OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+ ntohs(eth_type));
+ return -EINVAL;
+ }
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+ return -EINVAL;
+ }
+ }
+
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+ if (err)
+ return err;
} else {
- swkey->eth.type = htons(ETH_P_802_2);
+ /* Populate exact match flow's key mask. */
+ if (match->mask)
+ ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
}
- if (swkey->eth.type == htons(ETH_P_IP)) {
- const struct ovs_key_ipv4 *ipv4_key;
-
- if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
- key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
- ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
- if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
- return -EINVAL;
- swkey->ip.proto = ipv4_key->ipv4_proto;
- swkey->ip.tos = ipv4_key->ipv4_tos;
- swkey->ip.ttl = ipv4_key->ipv4_ttl;
- swkey->ip.frag = ipv4_key->ipv4_frag;
- swkey->ipv4.addr.src = ipv4_key->ipv4_src;
- swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
-
- if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
- err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
- if (err)
- return err;
- }
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- const struct ovs_key_ipv6 *ipv6_key;
-
- if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
-
- key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
- ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
- if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
- return -EINVAL;
- swkey->ipv6.label = ipv6_key->ipv6_label;
- swkey->ip.proto = ipv6_key->ipv6_proto;
- swkey->ip.tos = ipv6_key->ipv6_tclass;
- swkey->ip.ttl = ipv6_key->ipv6_hlimit;
- swkey->ip.frag = ipv6_key->ipv6_frag;
- memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
- sizeof(swkey->ipv6.addr.src));
- memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
- sizeof(swkey->ipv6.addr.dst));
-
- if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
- err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
- if (err)
- return err;
- }
- } else if (swkey->eth.type == htons(ETH_P_ARP) ||
- swkey->eth.type == htons(ETH_P_RARP)) {
- const struct ovs_key_arp *arp_key;
-
- if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_ARP);
-
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
- arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
- swkey->ipv4.addr.src = arp_key->arp_sip;
- swkey->ipv4.addr.dst = arp_key->arp_tip;
- if (arp_key->arp_op & htons(0xff00))
- return -EINVAL;
- swkey->ip.proto = ntohs(arp_key->arp_op);
- memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
- memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
- }
-
- if (attrs)
+ if (!ovs_match_validate(match, key_attrs, mask_attrs))
return -EINVAL;
- *key_lenp = key_len;
return 0;
}
@@ -1255,7 +1734,6 @@
/**
* ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
* @flow: Receives extracted in_port, priority, tun_key and skb_mark.
- * @key_len: Length of key in @flow. Used for calculating flow hash.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
*
@@ -1264,102 +1742,100 @@
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
- const struct nlattr *attr)
+
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
+ const struct nlattr *attr)
{
struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
- const struct nlattr *nla;
- int rem;
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ u64 attrs = 0;
+ int err;
+ struct sw_flow_match match;
flow->key.phy.in_port = DP_MAX_PORTS;
flow->key.phy.priority = 0;
flow->key.phy.skb_mark = 0;
memset(tun_key, 0, sizeof(flow->key.tun_key));
- nla_for_each_nested(nla, attr, rem) {
- int type = nla_type(nla);
-
- if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
- int err;
-
- if (nla_len(nla) != ovs_key_lens[type])
- return -EINVAL;
-
- switch (type) {
- case OVS_KEY_ATTR_PRIORITY:
- flow->key.phy.priority = nla_get_u32(nla);
- break;
-
- case OVS_KEY_ATTR_TUNNEL:
- err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
- if (err)
- return err;
- break;
-
- case OVS_KEY_ATTR_IN_PORT:
- if (nla_get_u32(nla) >= DP_MAX_PORTS)
- return -EINVAL;
- flow->key.phy.in_port = nla_get_u32(nla);
- break;
-
- case OVS_KEY_ATTR_SKB_MARK:
- flow->key.phy.skb_mark = nla_get_u32(nla);
- break;
- }
- }
- }
- if (rem)
+ err = parse_flow_nlattrs(attr, a, &attrs);
+ if (err)
return -EINVAL;
- flow->hash = ovs_flow_hash(&flow->key,
- flow_key_start(&flow->key), key_len);
+ memset(&match, 0, sizeof(match));
+ match.key = &flow->key;
+
+ err = metadata_from_nlattrs(&match, &attrs, a, false);
+ if (err)
+ return err;
return 0;
}
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
+ bool is_mask = (swkey != output);
- if (swkey->phy.priority &&
- nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if (swkey->tun_key.ipv4_dst &&
- ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key))
+ if ((swkey->tun_key.ipv4_dst || is_mask) &&
+ ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
goto nla_put_failure;
- if (swkey->phy.in_port != DP_MAX_PORTS &&
- nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
- goto nla_put_failure;
+ if (swkey->phy.in_port == DP_MAX_PORTS) {
+ if (is_mask && (output->phy.in_port == 0xffff))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+ goto nla_put_failure;
+ } else {
+ u16 upper_u16;
+ upper_u16 = !is_mask ? 0 : 0xffff;
- if (swkey->phy.skb_mark &&
- nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+ (upper_u16 << 16) | output->phy.in_port))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
+
eth_key = nla_data(nla);
- memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
- memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+ memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+ memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) ||
- nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci))
+ __be16 eth_type;
+ eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+ nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
goto nla_put_failure;
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.tci)
goto unencap;
- } else {
+ } else
encap = NULL;
+
+ if (swkey->eth.type == htons(ETH_P_802_2)) {
+ /*
+ * Ethertype 802.2 is represented in the netlink with omitted
+ * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+ * 0xffff in the mask attribute. Ethertype can also
+ * be wildcarded.
+ */
+ if (is_mask && output->eth.type)
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+ output->eth.type))
+ goto nla_put_failure;
+ goto unencap;
}
- if (swkey->eth.type == htons(ETH_P_802_2))
- goto unencap;
-
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
goto nla_put_failure;
if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1369,12 +1845,12 @@
if (!nla)
goto nla_put_failure;
ipv4_key = nla_data(nla);
- ipv4_key->ipv4_src = swkey->ipv4.addr.src;
- ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
- ipv4_key->ipv4_proto = swkey->ip.proto;
- ipv4_key->ipv4_tos = swkey->ip.tos;
- ipv4_key->ipv4_ttl = swkey->ip.ttl;
- ipv4_key->ipv4_frag = swkey->ip.frag;
+ ipv4_key->ipv4_src = output->ipv4.addr.src;
+ ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+ ipv4_key->ipv4_proto = output->ip.proto;
+ ipv4_key->ipv4_tos = output->ip.tos;
+ ipv4_key->ipv4_ttl = output->ip.ttl;
+ ipv4_key->ipv4_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
@@ -1382,15 +1858,15 @@
if (!nla)
goto nla_put_failure;
ipv6_key = nla_data(nla);
- memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+ memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
sizeof(ipv6_key->ipv6_src));
- memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+ memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
sizeof(ipv6_key->ipv6_dst));
- ipv6_key->ipv6_label = swkey->ipv6.label;
- ipv6_key->ipv6_proto = swkey->ip.proto;
- ipv6_key->ipv6_tclass = swkey->ip.tos;
- ipv6_key->ipv6_hlimit = swkey->ip.ttl;
- ipv6_key->ipv6_frag = swkey->ip.frag;
+ ipv6_key->ipv6_label = output->ipv6.label;
+ ipv6_key->ipv6_proto = output->ip.proto;
+ ipv6_key->ipv6_tclass = output->ip.tos;
+ ipv6_key->ipv6_hlimit = output->ip.ttl;
+ ipv6_key->ipv6_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_ARP) ||
swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
@@ -1400,11 +1876,11 @@
goto nla_put_failure;
arp_key = nla_data(nla);
memset(arp_key, 0, sizeof(struct ovs_key_arp));
- arp_key->arp_sip = swkey->ipv4.addr.src;
- arp_key->arp_tip = swkey->ipv4.addr.dst;
- arp_key->arp_op = htons(swkey->ip.proto);
- memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
- memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+ arp_key->arp_sip = output->ipv4.addr.src;
+ arp_key->arp_tip = output->ipv4.addr.dst;
+ arp_key->arp_op = htons(output->ip.proto);
+ memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
+ memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1419,11 +1895,11 @@
goto nla_put_failure;
tcp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = swkey->ipv4.tp.src;
- tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+ tcp_key->tcp_src = output->ipv4.tp.src;
+ tcp_key->tcp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = swkey->ipv6.tp.src;
- tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+ tcp_key->tcp_src = output->ipv6.tp.src;
+ tcp_key->tcp_dst = output->ipv6.tp.dst;
}
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
@@ -1433,11 +1909,25 @@
goto nla_put_failure;
udp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = swkey->ipv4.tp.src;
- udp_key->udp_dst = swkey->ipv4.tp.dst;
+ udp_key->udp_src = output->ipv4.tp.src;
+ udp_key->udp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = swkey->ipv6.tp.src;
- udp_key->udp_dst = swkey->ipv6.tp.dst;
+ udp_key->udp_src = output->ipv6.tp.src;
+ udp_key->udp_dst = output->ipv6.tp.dst;
+ }
+ } else if (swkey->ip.proto == IPPROTO_SCTP) {
+ struct ovs_key_sctp *sctp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+ if (!nla)
+ goto nla_put_failure;
+ sctp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ sctp_key->sctp_src = swkey->ipv4.tp.src;
+ sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ sctp_key->sctp_src = swkey->ipv6.tp.src;
+ sctp_key->sctp_dst = swkey->ipv6.tp.dst;
}
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
@@ -1447,8 +1937,8 @@
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+ icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
+ icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
@@ -1458,8 +1948,8 @@
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+ icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1469,10 +1959,10 @@
if (!nla)
goto nla_put_failure;
nd_key = nla_data(nla);
- memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+ memcpy(nd_key->nd_target, &output->ipv6.nd.target,
sizeof(nd_key->nd_target));
- memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
- memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+ memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
+ memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
}
}
}
@@ -1491,6 +1981,8 @@
* Returns zero if successful or a negative error code. */
int ovs_flow_init(void)
{
+ BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
0, NULL);
if (flow_cache == NULL)
@@ -1504,3 +1996,84 @@
{
kmem_cache_destroy(flow_cache);
}
+
+struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
+{
+ struct sw_flow_mask *mask;
+
+ mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+ if (mask)
+ mask->ref_count = 0;
+
+ return mask;
+}
+
+void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
+{
+ mask->ref_count++;
+}
+
+void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
+{
+ if (!mask)
+ return;
+
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ if (deferred)
+ kfree_rcu(mask, rcu);
+ else
+ kfree(mask);
+ }
+}
+
+static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
+ const struct sw_flow_mask *b)
+{
+ u8 *a_ = (u8 *)&a->key + a->range.start;
+ u8 *b_ = (u8 *)&b->key + b->range.start;
+
+ return (a->range.end == b->range.end)
+ && (a->range.start == b->range.start)
+ && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
+ const struct sw_flow_mask *mask)
+{
+ struct list_head *ml;
+
+ list_for_each(ml, tbl->mask_list) {
+ struct sw_flow_mask *m;
+ m = container_of(ml, struct sw_flow_mask, list);
+ if (ovs_sw_flow_mask_equal(mask, m))
+ return m;
+ }
+
+ return NULL;
+}
+
+/**
+ * add a new mask into the mask list.
+ * The caller needs to make sure that 'mask' is not the same
+ * as any masks that are already on the list.
+ */
+void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ list_add_rcu(&mask->list, tbl->mask_list);
+}
+
+/**
+ * Set 'range' fields in the mask to the value of 'val'.
+ */
+static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val)
+{
+ u8 *m = (u8 *)&mask->key + range->start;
+
+ mask->range = *range;
+ memset(m, val, range_n_bytes(range));
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 66ef722..b65f885 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -33,6 +33,8 @@
#include <net/inet_ecn.h>
struct sk_buff;
+struct sw_flow_mask;
+struct flow_table;
struct sw_flow_actions {
struct rcu_head rcu;
@@ -97,8 +99,8 @@
} addr;
union {
struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
} tp;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
@@ -113,8 +115,8 @@
} addr;
__be32 label; /* IPv6 flow label. */
struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
} tp;
struct {
struct in6_addr target; /* ND target address. */
@@ -123,7 +125,7 @@
} nd;
} ipv6;
};
-};
+} __aligned(__alignof__(long));
struct sw_flow {
struct rcu_head rcu;
@@ -131,6 +133,8 @@
u32 hash;
struct sw_flow_key key;
+ struct sw_flow_key unmasked_key;
+ struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
spinlock_t lock; /* Lock for values below. */
@@ -140,6 +144,20 @@
u8 tcp_flags; /* Union of seen TCP flags. */
};
+struct sw_flow_key_range {
+ size_t start;
+ size_t end;
+};
+
+struct sw_flow_match {
+ struct sw_flow_key *key;
+ struct sw_flow_key_range range;
+ struct sw_flow_mask *mask;
+};
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key, struct sw_flow_mask *mask);
+
struct arp_eth_header {
__be16 ar_hrd; /* format of hardware address */
__be16 ar_pro; /* format of protocol address */
@@ -159,21 +177,21 @@
struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *flow);
+void ovs_flow_free(struct sw_flow *, bool deferred);
struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
- int *key_lenp);
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+int ovs_flow_to_nlattrs(const struct sw_flow_key *,
+ const struct sw_flow_key *, struct sk_buff *);
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+ const struct nlattr *,
const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
- const struct nlattr *attr);
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
+ const struct nlattr *attr);
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
#define TBL_MIN_BUCKETS 1024
@@ -182,6 +200,7 @@
struct flex_array *buckets;
unsigned int count, n_buckets;
struct rcu_head rcu;
+ struct list_head *mask_list;
int node_ver;
u32 hash_seed;
bool keep_flows;
@@ -197,22 +216,44 @@
return (table->count > table->n_buckets);
}
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
- struct sw_flow_key *key, int len);
-void ovs_flow_tbl_destroy(struct flow_table *table);
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
+struct sw_flow *ovs_flow_lookup(struct flow_table *,
+ const struct sw_flow_key *);
+struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
+ struct sw_flow_match *match);
+
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
struct flow_table *ovs_flow_tbl_alloc(int new_size);
struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_key *key, int key_len);
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
+void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
+
+struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct ovs_key_ipv4_tunnel *tun_key);
+ struct sw_flow_match *match, bool is_mask);
int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key);
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ const struct ovs_key_ipv4_tunnel *output);
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_end);
+
+struct sw_flow_mask {
+ int ref_count;
+ struct rcu_head rcu;
+ struct list_head list;
+ struct sw_flow_key_range range;
+ struct sw_flow_key key;
+};
+
+struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
+void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
+void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
+void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
+struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
+ const struct sw_flow_mask *);
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask);
#endif /* flow.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 493e977..21d5073 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -16,7 +16,6 @@
* 02110-1301, USA
*/
-#ifdef CONFIG_OPENVSWITCH_GRE
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if.h>
@@ -271,5 +270,3 @@
.get_name = gre_get_name,
.send = gre_tnl_send,
};
-
-#endif /* OPENVSWITCH_GRE */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 5982f3f..09d93c1 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -25,6 +25,7 @@
#include <linux/llc.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
#include <net/llc.h>
@@ -74,6 +75,15 @@
return RX_HANDLER_CONSUMED;
}
+static struct net_device *get_dpdev(struct datapath *dp)
+{
+ struct vport *local;
+
+ local = ovs_vport_ovsl(dp, OVSP_LOCAL);
+ BUG_ON(!local);
+ return netdev_vport_priv(local)->dev;
+}
+
static struct vport *netdev_create(const struct vport_parms *parms)
{
struct vport *vport;
@@ -103,10 +113,15 @@
}
rtnl_lock();
+ err = netdev_master_upper_dev_link(netdev_vport->dev,
+ get_dpdev(vport->dp));
+ if (err)
+ goto error_unlock;
+
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
- goto error_unlock;
+ goto error_master_upper_dev_unlink;
dev_set_promiscuity(netdev_vport->dev, 1);
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
@@ -114,6 +129,8 @@
return vport;
+error_master_upper_dev_unlink:
+ netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
error_unlock:
rtnl_unlock();
error_put:
@@ -140,6 +157,7 @@
rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
+ netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
dev_set_promiscuity(netdev_vport->dev, -1);
rtnl_unlock();
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d69e0c0..6f65dbe 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -203,7 +203,7 @@
* ovs_vport_set_options - modify existing vport device (for kernel callers)
*
* @vport: vport to modify.
- * @port: New configuration.
+ * @options: New configuration.
*
* Modifies an existing device with the specified configuration (which is
* dependent on device type). ovs_mutex must be held.
@@ -328,6 +328,7 @@
*
* @vport: vport that received the packet
* @skb: skb that was received
+ * @tun_key: tunnel (if any) that carried packet
*
* Must be called with rcu_read_lock. The packet cannot be shared and
* skb->data should point to the Ethernet header.
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6c53dd9..2e8286b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,7 +88,7 @@
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
-
+#include <linux/reciprocal_div.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
@@ -1135,7 +1135,7 @@
struct sk_buff *skb,
unsigned int num)
{
- return (((u64)skb->rxhash) * num) >> 32;
+ return reciprocal_divide(skb->rxhash, num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1158,6 +1158,13 @@
return smp_processor_id() % num;
}
+static unsigned int fanout_demux_rnd(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
+{
+ return reciprocal_divide(prandom_u32(), num);
+}
+
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int idx, unsigned int skip,
@@ -1215,6 +1222,9 @@
case PACKET_FANOUT_CPU:
idx = fanout_demux_cpu(f, skb, num);
break;
+ case PACKET_FANOUT_RND:
+ idx = fanout_demux_rnd(f, skb, num);
+ break;
case PACKET_FANOUT_ROLLOVER:
idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
break;
@@ -1284,6 +1294,7 @@
case PACKET_FANOUT_HASH:
case PACKET_FANOUT_LB:
case PACKET_FANOUT_CPU:
+ case PACKET_FANOUT_RND:
break;
default:
return -EINVAL;
@@ -3215,9 +3226,11 @@
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
+ st.stats3.tp_packets += st.stats3.tp_drops;
data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
+ st.stats1.tp_packets += st.stats1.tp_drops;
data = &st.stats1;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 235e01a..c03a32a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -272,6 +272,20 @@
If unsure, say N.
+config NET_SCH_FQ
+ tristate "Fair Queue"
+ help
+ Say Y here if you want to use the FQ packet scheduling algorithm.
+
+ FQ does flow separation, and is able to respect pacing requirements
+ set by TCP stack into sk->sk_pacing_rate (for localy generated
+ traffic)
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_fq.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf0..e5f9abe 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -39,6 +39,7 @@
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 51b968d..2adda7f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -200,6 +200,58 @@
}
EXPORT_SYMBOL(unregister_qdisc);
+/* Get default qdisc if not otherwise specified */
+void qdisc_get_default(char *name, size_t len)
+{
+ read_lock(&qdisc_mod_lock);
+ strlcpy(name, default_qdisc_ops->id, len);
+ read_unlock(&qdisc_mod_lock);
+}
+
+static struct Qdisc_ops *qdisc_lookup_default(const char *name)
+{
+ struct Qdisc_ops *q = NULL;
+
+ for (q = qdisc_base; q; q = q->next) {
+ if (!strcmp(name, q->id)) {
+ if (!try_module_get(q->owner))
+ q = NULL;
+ break;
+ }
+ }
+
+ return q;
+}
+
+/* Set new default qdisc to use */
+int qdisc_set_default(const char *name)
+{
+ const struct Qdisc_ops *ops;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ write_lock(&qdisc_mod_lock);
+ ops = qdisc_lookup_default(name);
+ if (!ops) {
+ /* Not found, drop lock and try to load module */
+ write_unlock(&qdisc_mod_lock);
+ request_module("sch_%s", name);
+ write_lock(&qdisc_mod_lock);
+
+ ops = qdisc_lookup_default(name);
+ }
+
+ if (ops) {
+ /* Set new default */
+ module_put(default_qdisc_ops->owner);
+ default_qdisc_ops = ops;
+ }
+ write_unlock(&qdisc_mod_lock);
+
+ return ops ? 0 : -ENOENT;
+}
+
/* We know handle. Find qdisc among all qdisc's attached to device
(root qdisc, all its children, children of children etc.)
*/
@@ -1854,6 +1906,7 @@
return err;
}
+ register_qdisc(&pfifo_fast_ops);
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
new file mode 100644
index 0000000..32ad015
--- /dev/null
+++ b/net/sched/sch_fq.c
@@ -0,0 +1,793 @@
+/*
+ * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
+ *
+ * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Meant to be mostly used for localy generated traffic :
+ * Fast classification depends on skb->sk being set before reaching us.
+ * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
+ * All packets belonging to a socket are considered as a 'flow'.
+ *
+ * Flows are dynamically allocated and stored in a hash table of RB trees
+ * They are also part of one Round Robin 'queues' (new or old flows)
+ *
+ * Burst avoidance (aka pacing) capability :
+ *
+ * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
+ * bunch of packets, and this packet scheduler adds delay between
+ * packets to respect rate limitation.
+ *
+ * enqueue() :
+ * - lookup one RB tree (out of 1024 or more) to find the flow.
+ * If non existent flow, create it, add it to the tree.
+ * Add skb to the per flow list of skb (fifo).
+ * - Use a special fifo for high prio packets
+ *
+ * dequeue() : serves flows in Round Robin
+ * Note : When a flow becomes empty, we do not immediately remove it from
+ * rb trees, for performance reasons (its expected to send additional packets,
+ * or SLAB cache will reuse socket for another flow)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/hash.h>
+#include <linux/prefetch.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+/*
+ * Per flow structure, dynamically allocated
+ */
+struct fq_flow {
+ struct sk_buff *head; /* list of skbs for this flow : first skb */
+ union {
+ struct sk_buff *tail; /* last skb in the list */
+ unsigned long age; /* jiffies when flow was emptied, for gc */
+ };
+ struct rb_node fq_node; /* anchor in fq_root[] trees */
+ struct sock *sk;
+ int qlen; /* number of packets in flow queue */
+ int credit;
+ u32 socket_hash; /* sk_hash */
+ struct fq_flow *next; /* next pointer in RR lists, or &detached */
+
+ struct rb_node rate_node; /* anchor in q->delayed tree */
+ u64 time_next_packet;
+};
+
+struct fq_flow_head {
+ struct fq_flow *first;
+ struct fq_flow *last;
+};
+
+struct fq_sched_data {
+ struct fq_flow_head new_flows;
+
+ struct fq_flow_head old_flows;
+
+ struct rb_root delayed; /* for rate limited flows */
+ u64 time_next_delayed_flow;
+
+ struct fq_flow internal; /* for non classified or high prio packets */
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_default_rate;/* rate per flow : bytes per second */
+ u32 flow_max_rate; /* optional max rate per flow */
+ u32 flow_plimit; /* max packets per flow */
+ struct rb_root *fq_root;
+ u8 rate_enable;
+ u8 fq_trees_log;
+
+ u32 flows;
+ u32 inactive_flows;
+ u32 throttled_flows;
+
+ u64 stat_gc_flows;
+ u64 stat_internal_packets;
+ u64 stat_tcp_retrans;
+ u64 stat_throttled;
+ u64 stat_flows_plimit;
+ u64 stat_pkts_too_long;
+ u64 stat_allocation_errors;
+ struct qdisc_watchdog watchdog;
+};
+
+/* special value to mark a detached flow (not on old/new list) */
+static struct fq_flow detached, throttled;
+
+static void fq_flow_set_detached(struct fq_flow *f)
+{
+ f->next = &detached;
+}
+
+static bool fq_flow_is_detached(const struct fq_flow *f)
+{
+ return f->next == &detached;
+}
+
+static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+ struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
+
+ while (*p) {
+ struct fq_flow *aux;
+
+ parent = *p;
+ aux = container_of(parent, struct fq_flow, rate_node);
+ if (f->time_next_packet >= aux->time_next_packet)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&f->rate_node, parent, p);
+ rb_insert_color(&f->rate_node, &q->delayed);
+ q->throttled_flows++;
+ q->stat_throttled++;
+
+ f->next = &throttled;
+ if (q->time_next_delayed_flow > f->time_next_packet)
+ q->time_next_delayed_flow = f->time_next_packet;
+}
+
+
+static struct kmem_cache *fq_flow_cachep __read_mostly;
+
+static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+{
+ if (head->first)
+ head->last->next = flow;
+ else
+ head->first = flow;
+ head->last = flow;
+ flow->next = NULL;
+}
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*HZ)
+
+static bool fq_gc_candidate(const struct fq_flow *f)
+{
+ return fq_flow_is_detached(f) &&
+ time_after(jiffies, f->age + FQ_GC_AGE);
+}
+
+static void fq_gc(struct fq_sched_data *q,
+ struct rb_root *root,
+ struct sock *sk)
+{
+ struct fq_flow *f, *tofree[FQ_GC_MAX];
+ struct rb_node **p, *parent;
+ int fcnt = 0;
+
+ p = &root->rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+
+ f = container_of(parent, struct fq_flow, fq_node);
+ if (f->sk == sk)
+ break;
+
+ if (fq_gc_candidate(f)) {
+ tofree[fcnt++] = f;
+ if (fcnt == FQ_GC_MAX)
+ break;
+ }
+
+ if (f->sk > sk)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ q->flows -= fcnt;
+ q->inactive_flows -= fcnt;
+ q->stat_gc_flows += fcnt;
+ while (fcnt) {
+ struct fq_flow *f = tofree[--fcnt];
+
+ rb_erase(&f->fq_node, root);
+ kmem_cache_free(fq_flow_cachep, f);
+ }
+}
+
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
+{
+ struct rb_node **p, *parent;
+ struct sock *sk = skb->sk;
+ struct rb_root *root;
+ struct fq_flow *f;
+ int band;
+
+ /* warning: no starvation prevention... */
+ band = prio2band[skb->priority & TC_PRIO_MAX];
+ if (unlikely(band == 0))
+ return &q->internal;
+
+ if (unlikely(!sk)) {
+ /* By forcing low order bit to 1, we make sure to not
+ * collide with a local flow (socket pointers are word aligned)
+ */
+ sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
+ }
+
+ root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
+
+ if (q->flows >= (2U << q->fq_trees_log) &&
+ q->inactive_flows > q->flows/2)
+ fq_gc(q, root, sk);
+
+ p = &root->rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+
+ f = container_of(parent, struct fq_flow, fq_node);
+ if (f->sk == sk) {
+ /* socket might have been reallocated, so check
+ * if its sk_hash is the same.
+ * It not, we need to refill credit with
+ * initial quantum
+ */
+ if (unlikely(skb->sk &&
+ f->socket_hash != sk->sk_hash)) {
+ f->credit = q->initial_quantum;
+ f->socket_hash = sk->sk_hash;
+ }
+ return f;
+ }
+ if (f->sk > sk)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!f)) {
+ q->stat_allocation_errors++;
+ return &q->internal;
+ }
+ fq_flow_set_detached(f);
+ f->sk = sk;
+ if (skb->sk)
+ f->socket_hash = sk->sk_hash;
+ f->credit = q->initial_quantum;
+
+ rb_link_node(&f->fq_node, parent, p);
+ rb_insert_color(&f->fq_node, root);
+
+ q->flows++;
+ q->inactive_flows++;
+ return f;
+}
+
+
+/* remove one skb from head of flow queue */
+static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
+{
+ struct sk_buff *skb = flow->head;
+
+ if (skb) {
+ flow->head = skb->next;
+ skb->next = NULL;
+ flow->qlen--;
+ }
+ return skb;
+}
+
+/* We might add in the future detection of retransmits
+ * For the time being, just return false
+ */
+static bool skb_is_retransmit(struct sk_buff *skb)
+{
+ return false;
+}
+
+/* add skb to flow queue
+ * flow queue is a linked list, kind of FIFO, except for TCP retransmits
+ * We special case tcp retransmits to be transmitted before other packets.
+ * We rely on fact that TCP retransmits are unlikely, so we do not waste
+ * a separate queue or a pointer.
+ * head-> [retrans pkt 1]
+ * [retrans pkt 2]
+ * [ normal pkt 1]
+ * [ normal pkt 2]
+ * [ normal pkt 3]
+ * tail-> [ normal pkt 4]
+ */
+static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
+{
+ struct sk_buff *prev, *head = flow->head;
+
+ skb->next = NULL;
+ if (!head) {
+ flow->head = skb;
+ flow->tail = skb;
+ return;
+ }
+ if (likely(!skb_is_retransmit(skb))) {
+ flow->tail->next = skb;
+ flow->tail = skb;
+ return;
+ }
+
+ /* This skb is a tcp retransmit,
+ * find the last retrans packet in the queue
+ */
+ prev = NULL;
+ while (skb_is_retransmit(head)) {
+ prev = head;
+ head = head->next;
+ if (!head)
+ break;
+ }
+ if (!prev) { /* no rtx packet in queue, become the new head */
+ skb->next = flow->head;
+ flow->head = skb;
+ } else {
+ if (prev == flow->tail)
+ flow->tail = skb;
+ else
+ skb->next = prev->next;
+ prev->next = skb;
+ }
+}
+
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct fq_flow *f;
+
+ if (unlikely(sch->q.qlen >= sch->limit))
+ return qdisc_drop(skb, sch);
+
+ f = fq_classify(skb, q);
+ if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
+ q->stat_flows_plimit++;
+ return qdisc_drop(skb, sch);
+ }
+
+ f->qlen++;
+ flow_queue_add(f, skb);
+ if (skb_is_retransmit(skb))
+ q->stat_tcp_retrans++;
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+ if (fq_flow_is_detached(f)) {
+ fq_flow_add_tail(&q->new_flows, f);
+ if (q->quantum > f->credit)
+ f->credit = q->quantum;
+ q->inactive_flows--;
+ qdisc_unthrottled(sch);
+ }
+ if (unlikely(f == &q->internal)) {
+ q->stat_internal_packets++;
+ qdisc_unthrottled(sch);
+ }
+ sch->q.qlen++;
+
+ return NET_XMIT_SUCCESS;
+}
+
+static void fq_check_throttled(struct fq_sched_data *q, u64 now)
+{
+ struct rb_node *p;
+
+ if (q->time_next_delayed_flow > now)
+ return;
+
+ q->time_next_delayed_flow = ~0ULL;
+ while ((p = rb_first(&q->delayed)) != NULL) {
+ struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
+
+ if (f->time_next_packet > now) {
+ q->time_next_delayed_flow = f->time_next_packet;
+ break;
+ }
+ rb_erase(p, &q->delayed);
+ q->throttled_flows--;
+ fq_flow_add_tail(&q->old_flows, f);
+ }
+}
+
+static struct sk_buff *fq_dequeue(struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ u64 now = ktime_to_ns(ktime_get());
+ struct fq_flow_head *head;
+ struct sk_buff *skb;
+ struct fq_flow *f;
+
+ skb = fq_dequeue_head(&q->internal);
+ if (skb)
+ goto out;
+ fq_check_throttled(q, now);
+begin:
+ head = &q->new_flows;
+ if (!head->first) {
+ head = &q->old_flows;
+ if (!head->first) {
+ if (q->time_next_delayed_flow != ~0ULL)
+ qdisc_watchdog_schedule_ns(&q->watchdog,
+ q->time_next_delayed_flow);
+ return NULL;
+ }
+ }
+ f = head->first;
+
+ if (f->credit <= 0) {
+ f->credit += q->quantum;
+ head->first = f->next;
+ fq_flow_add_tail(&q->old_flows, f);
+ goto begin;
+ }
+
+ if (unlikely(f->head && now < f->time_next_packet)) {
+ head->first = f->next;
+ fq_flow_set_throttled(q, f);
+ goto begin;
+ }
+
+ skb = fq_dequeue_head(f);
+ if (!skb) {
+ head->first = f->next;
+ /* force a pass through old_flows to prevent starvation */
+ if ((head == &q->new_flows) && q->old_flows.first) {
+ fq_flow_add_tail(&q->old_flows, f);
+ } else {
+ fq_flow_set_detached(f);
+ f->age = jiffies;
+ q->inactive_flows++;
+ }
+ goto begin;
+ }
+ prefetch(&skb->end);
+ f->time_next_packet = now;
+ f->credit -= qdisc_pkt_len(skb);
+
+ if (f->credit <= 0 &&
+ q->rate_enable &&
+ skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
+ u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
+
+ rate = min(rate, q->flow_max_rate);
+ if (rate) {
+ u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
+
+ do_div(len, rate);
+ /* Since socket rate can change later,
+ * clamp the delay to 125 ms.
+ * TODO: maybe segment the too big skb, as in commit
+ * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
+ */
+ if (unlikely(len > 125 * NSEC_PER_MSEC)) {
+ len = 125 * NSEC_PER_MSEC;
+ q->stat_pkts_too_long++;
+ }
+
+ f->time_next_packet = now + len;
+ }
+ }
+out:
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+ qdisc_unthrottled(sch);
+ return skb;
+}
+
+static void fq_reset(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = fq_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void fq_rehash(struct fq_sched_data *q,
+ struct rb_root *old_array, u32 old_log,
+ struct rb_root *new_array, u32 new_log)
+{
+ struct rb_node *op, **np, *parent;
+ struct rb_root *oroot, *nroot;
+ struct fq_flow *of, *nf;
+ int fcnt = 0;
+ u32 idx;
+
+ for (idx = 0; idx < (1U << old_log); idx++) {
+ oroot = &old_array[idx];
+ while ((op = rb_first(oroot)) != NULL) {
+ rb_erase(op, oroot);
+ of = container_of(op, struct fq_flow, fq_node);
+ if (fq_gc_candidate(of)) {
+ fcnt++;
+ kmem_cache_free(fq_flow_cachep, of);
+ continue;
+ }
+ nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
+
+ np = &nroot->rb_node;
+ parent = NULL;
+ while (*np) {
+ parent = *np;
+
+ nf = container_of(parent, struct fq_flow, fq_node);
+ BUG_ON(nf->sk == of->sk);
+
+ if (nf->sk > of->sk)
+ np = &parent->rb_right;
+ else
+ np = &parent->rb_left;
+ }
+
+ rb_link_node(&of->fq_node, parent, np);
+ rb_insert_color(&of->fq_node, nroot);
+ }
+ }
+ q->flows -= fcnt;
+ q->inactive_flows -= fcnt;
+ q->stat_gc_flows += fcnt;
+}
+
+static int fq_resize(struct fq_sched_data *q, u32 log)
+{
+ struct rb_root *array;
+ u32 idx;
+
+ if (q->fq_root && log == q->fq_trees_log)
+ return 0;
+
+ array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
+ if (!array)
+ return -ENOMEM;
+
+ for (idx = 0; idx < (1U << log); idx++)
+ array[idx] = RB_ROOT;
+
+ if (q->fq_root) {
+ fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
+ kfree(q->fq_root);
+ }
+ q->fq_root = array;
+ q->fq_trees_log = log;
+
+ return 0;
+}
+
+static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
+ [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
+ [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
+};
+
+static int fq_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_FQ_MAX + 1];
+ int err, drop_count = 0;
+ u32 fq_log;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+
+ fq_log = q->fq_trees_log;
+
+ if (tb[TCA_FQ_BUCKETS_LOG]) {
+ u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
+
+ if (nval >= 1 && nval <= ilog2(256*1024))
+ fq_log = nval;
+ else
+ err = -EINVAL;
+ }
+ if (tb[TCA_FQ_PLIMIT])
+ sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+
+ if (tb[TCA_FQ_FLOW_PLIMIT])
+ q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+
+ if (tb[TCA_FQ_QUANTUM])
+ q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+
+ if (tb[TCA_FQ_INITIAL_QUANTUM])
+ q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+
+ if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
+ q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
+
+ if (tb[TCA_FQ_FLOW_MAX_RATE])
+ q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+
+ if (tb[TCA_FQ_RATE_ENABLE]) {
+ u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
+
+ if (enable <= 1)
+ q->rate_enable = enable;
+ else
+ err = -EINVAL;
+ }
+
+ if (!err)
+ err = fq_resize(q, fq_log);
+
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = fq_dequeue(sch);
+
+ kfree_skb(skb);
+ drop_count++;
+ }
+ qdisc_tree_decrease_qlen(sch, drop_count);
+
+ sch_tree_unlock(sch);
+ return err;
+}
+
+static void fq_destroy(struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct rb_root *root;
+ struct rb_node *p;
+ unsigned int idx;
+
+ if (q->fq_root) {
+ for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
+ root = &q->fq_root[idx];
+ while ((p = rb_first(root)) != NULL) {
+ rb_erase(p, root);
+ kmem_cache_free(fq_flow_cachep,
+ container_of(p, struct fq_flow, fq_node));
+ }
+ }
+ kfree(q->fq_root);
+ }
+ qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static int fq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ sch->limit = 10000;
+ q->flow_plimit = 100;
+ q->quantum = 2 * psched_mtu(qdisc_dev(sch));
+ q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
+ q->flow_default_rate = 0;
+ q->flow_max_rate = ~0U;
+ q->rate_enable = 1;
+ q->new_flows.first = NULL;
+ q->old_flows.first = NULL;
+ q->delayed = RB_ROOT;
+ q->fq_root = NULL;
+ q->fq_trees_log = ilog2(1024);
+ qdisc_watchdog_init(&q->watchdog, sch);
+
+ if (opt)
+ err = fq_change(sch, opt);
+ else
+ err = fq_resize(q, q->fq_trees_log);
+
+ return err;
+}
+
+static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
+ nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
+ nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+ nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, opts);
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ u64 now = ktime_to_ns(ktime_get());
+ struct tc_fq_qd_stats st = {
+ .gc_flows = q->stat_gc_flows,
+ .highprio_packets = q->stat_internal_packets,
+ .tcp_retrans = q->stat_tcp_retrans,
+ .throttled = q->stat_throttled,
+ .flows_plimit = q->stat_flows_plimit,
+ .pkts_too_long = q->stat_pkts_too_long,
+ .allocation_errors = q->stat_allocation_errors,
+ .flows = q->flows,
+ .inactive_flows = q->inactive_flows,
+ .throttled_flows = q->throttled_flows,
+ .time_next_delayed_flow = q->time_next_delayed_flow - now,
+ };
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
+ .id = "fq",
+ .priv_size = sizeof(struct fq_sched_data),
+
+ .enqueue = fq_enqueue,
+ .dequeue = fq_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = fq_init,
+ .reset = fq_reset,
+ .destroy = fq_destroy,
+ .change = fq_change,
+ .dump = fq_dump,
+ .dump_stats = fq_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init fq_module_init(void)
+{
+ int ret;
+
+ fq_flow_cachep = kmem_cache_create("fq_flow_cache",
+ sizeof(struct fq_flow),
+ 0, 0, NULL);
+ if (!fq_flow_cachep)
+ return -ENOMEM;
+
+ ret = register_qdisc(&fq_qdisc_ops);
+ if (ret)
+ kmem_cache_destroy(fq_flow_cachep);
+ return ret;
+}
+
+static void __exit fq_module_exit(void)
+{
+ unregister_qdisc(&fq_qdisc_ops);
+ kmem_cache_destroy(fq_flow_cachep);
+}
+
+module_init(fq_module_init)
+module_exit(fq_module_exit)
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 48be3d5..a74e278 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -30,6 +30,10 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
+/* Qdisc to use by default */
+const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
+EXPORT_SYMBOL(default_qdisc_ops);
+
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with
@@ -530,12 +534,11 @@
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
};
-EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops)
+ const struct Qdisc_ops *ops)
{
void *p;
struct Qdisc *sch;
@@ -579,10 +582,14 @@
}
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops, unsigned int parentid)
+ const struct Qdisc_ops *ops,
+ unsigned int parentid)
{
struct Qdisc *sch;
+ if (!try_module_get(ops->owner))
+ goto errout;
+
sch = qdisc_alloc(dev_queue, ops);
if (IS_ERR(sch))
goto errout;
@@ -686,7 +693,7 @@
if (dev->tx_queue_len) {
qdisc = qdisc_create_dflt(dev_queue,
- &pfifo_fast_ops, TC_H_ROOT);
+ default_qdisc_ops, TC_H_ROOT);
if (!qdisc) {
netdev_info(dev, "activation failed\n");
return;
@@ -739,9 +746,8 @@
int need_watchdog;
/* No queueing discipline is attached to device;
- create default one i.e. pfifo_fast for devices,
- which need queueing and noqueue_qdisc for
- virtual interfaces
+ * create default one for devices, which need queueing
+ * and noqueue_qdisc for virtual interfaces
*/
if (dev->qdisc == &noop_qdisc)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 5da78a1..2e56185 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
- qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+ qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index accec33..d44c868 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -124,7 +124,7 @@
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
- qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+ qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
if (qdisc == NULL) {
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index e62c225..cd72ae5 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -155,13 +155,8 @@
if (sp == asoc->peer.primary_path)
printl("*");
- if (sp->ipaddr.sa.sa_family == AF_INET)
- printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
- else
- printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
-
- printl("%2u %8u %8u %8u %8u %8u ",
- sp->state, sp->cwnd, sp->ssthresh,
+ printl("%pISc %2u %8u %8u %8u %8u %8u ",
+ &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
sp->flight_size, sp->partial_bytes_acked,
sp->pathmtu);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 01e9783..d244a23 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2240,25 +2240,23 @@
struct sctp_chunk **errp)
{
union sctp_params param;
- int has_cookie = 0;
+ bool has_cookie = false;
int result;
- /* Verify stream values are non-zero. */
- if ((0 == peer_init->init_hdr.num_outbound_streams) ||
- (0 == peer_init->init_hdr.num_inbound_streams) ||
- (0 == peer_init->init_hdr.init_tag) ||
- (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) {
-
+ /* Check for missing mandatory parameters. Note: Initial TSN is
+ * also mandatory, but is not checked here since the valid range
+ * is 0..2**32-1. RFC4960, section 3.3.3.
+ */
+ if (peer_init->init_hdr.num_outbound_streams == 0 ||
+ peer_init->init_hdr.num_inbound_streams == 0 ||
+ peer_init->init_hdr.init_tag == 0 ||
+ ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW)
return sctp_process_inv_mandatory(asoc, chunk, errp);
- }
- /* Check for missing mandatory parameters. */
sctp_walk_params(param, peer_init, init_hdr.params) {
-
- if (SCTP_PARAM_STATE_COOKIE == param.p->type)
- has_cookie = 1;
-
- } /* for (loop through all parameters) */
+ if (param.p->type == SCTP_PARAM_STATE_COOKIE)
+ has_cookie = true;
+ }
/* There is a possibility that a parameter length was bad and
* in that case we would have stoped walking the parameters.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index adf1e98..170c0ab 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2664,8 +2664,8 @@
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_NEW_KEY);
- if (IS_ERR(hdr))
- return PTR_ERR(hdr);
+ if (!hdr)
+ return -ENOBUFS;
cookie.msg = msg;
cookie.idx = key_idx;
@@ -6670,6 +6670,9 @@
NL80211_CMD_TESTMODE);
struct nlattr *tmdata;
+ if (!hdr)
+ break;
+
if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) {
genlmsg_cancel(skb, hdr);
break;
@@ -7114,9 +7117,8 @@
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_REMAIN_ON_CHANNEL);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
@@ -7414,9 +7416,8 @@
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_FRAME);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
}
@@ -8551,9 +8552,8 @@
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_PROBE_CLIENT);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 81c8a10..20e86a9 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -976,21 +976,19 @@
struct net_device *dev, u16 reason, bool wextev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
+ int err = 0;
ASSERT_WDEV_LOCK(wdev);
kfree(wdev->connect_keys);
wdev->connect_keys = NULL;
- if (wdev->conn) {
+ if (wdev->conn)
err = cfg80211_sme_disconnect(wdev, reason);
- } else if (!rdev->ops->disconnect) {
+ else if (!rdev->ops->disconnect)
cfg80211_mlme_down(rdev, dev);
- err = 0;
- } else {
+ else if (wdev->current_bss)
err = rdev_disconnect(rdev, dev, reason);
- }
return err;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d8da6b8..ad8cc7b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -308,7 +308,7 @@
{
BUG_ON(!policy->walk.dead);
- if (del_timer(&policy->timer))
+ if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
security_xfrm_policy_free(policy->security);
@@ -2132,8 +2132,6 @@
* have the xfrm_state's. We need to wait for KM to
* negotiate new SA's or bail out with error.*/
if (net->xfrm.sysctl_larval_drop) {
- /* EREMOTE tells the caller to generate
- * a one-shot blackhole route. */
dst_release(dst);
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 78f66fa..4f8ace8 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -499,7 +499,8 @@
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
+ CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
setup_timer(&x->rtimer, xfrm_replay_timer_handler,
(unsigned long)x);
x->curlft.add_time = get_seconds();
@@ -990,11 +991,13 @@
EXPORT_SYMBOL(xfrm_state_insert);
/* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
+static struct xfrm_state *__find_acq_core(struct net *net,
+ const struct xfrm_mark *m,
unsigned short family, u8 mode,
u32 reqid, u8 proto,
const xfrm_address_t *daddr,
- const xfrm_address_t *saddr, int create)
+ const xfrm_address_t *saddr,
+ int create)
{
unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
struct xfrm_state *x;
@@ -1399,9 +1402,9 @@
EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
-xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
- const xfrm_address_t *daddr, const xfrm_address_t *saddr,
- int create, unsigned short family)
+xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
+ u8 proto, const xfrm_address_t *daddr,
+ const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;