net: Add Generic Receive Offload infrastructure
This patch adds the top-level GRO (Generic Receive Offload) infrastructure.
This is pretty similar to LRO except that this is protocol-independent.
Instead of holding packets in an lro_mgr structure, they're now held in
napi_struct.
For drivers that intend to use this, they can set the NETIF_F_GRO bit and
call napi_gro_receive instead of netif_receive_skb or just call netif_rx.
The latter will call napi_receive_skb automatically. When napi_gro_receive
is used, the driver must either call napi_complete/napi_rx_complete, or
call napi_gro_flush in softirq context if the driver uses the primitives
__napi_complete/__napi_rx_complete.
Protocols will set the gro_receive and gro_complete function pointers in
order to participate in this scheme.
In addition to the packet, gro_receive will get a list of currently held
packets. Each packet in the list has a same_flow field which is non-zero
if it is a potential match for the new packet. For each packet that may
match, they also have a flush field which is non-zero if the held packet
must not be merged with the new packet.
Once gro_receive has determined that the new skb matches a held packet,
the held packet may be processed immediately if the new skb cannot be
merged with it. In this case gro_receive should return the pointer to
the existing skb in gro_list. Otherwise the new skb should be merged into
the existing packet and NULL should be returned, unless the new skb makes
it impossible for any further merges to be made (e.g., FIN packet) where
the merged skb should be returned.
Whenever the skb is merged into an existing entry, the gro_receive
function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb
merely matches an existing entry but can't be merged with it, then
this shouldn't be set.
If gro_receive finds it pointless to hold the new skb for future merging,
it should set NAPI_GRO_CB(skb)->flush.
Held packets will be flushed by napi_gro_flush which is called by
napi_complete and napi_rx_complete.
Currently held packets are stored in a singly liked list just like LRO.
The list is limited to a maximum of 8 entries. In future, this may be
expanded to use a hash table to allow more flows to be held for merging.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bdf5465..58856b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,8 +314,9 @@
spinlock_t poll_lock;
int poll_owner;
struct net_device *dev;
- struct list_head dev_list;
#endif
+ struct list_head dev_list;
+ struct sk_buff *gro_list;
};
enum
@@ -376,22 +377,8 @@
*
* Mark NAPI processing as complete.
*/
-static inline void __napi_complete(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- list_del(&n->poll_list);
- smp_mb__before_clear_bit();
- clear_bit(NAPI_STATE_SCHED, &n->state);
-}
-
-static inline void napi_complete(struct napi_struct *n)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __napi_complete(n);
- local_irq_restore(flags);
-}
+extern void __napi_complete(struct napi_struct *n);
+extern void napi_complete(struct napi_struct *n);
/**
* napi_disable - prevent NAPI from scheduling
@@ -640,9 +627,7 @@
unsigned long state;
struct list_head dev_list;
-#ifdef CONFIG_NETPOLL
struct list_head napi_list;
-#endif
/* Net device features */
unsigned long features;
@@ -661,6 +646,7 @@
#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
/* do not use LLTX in new drivers */
#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
+#define NETIF_F_GRO 16384 /* Generic receive offload */
#define NETIF_F_LRO 32768 /* large receive offload */
/* Segmentation offload features */
@@ -984,22 +970,8 @@
* netif_napi_add() must be used to initialize a napi context prior to calling
* *any* of the other napi related functions.
*/
-static inline void netif_napi_add(struct net_device *dev,
- struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int),
- int weight)
-{
- INIT_LIST_HEAD(&napi->poll_list);
- napi->poll = poll;
- napi->weight = weight;
-#ifdef CONFIG_NETPOLL
- napi->dev = dev;
- list_add(&napi->dev_list, &dev->napi_list);
- spin_lock_init(&napi->poll_lock);
- napi->poll_owner = -1;
-#endif
- set_bit(NAPI_STATE_SCHED, &napi->state);
-}
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight);
/**
* netif_napi_del - remove a napi context
@@ -1007,12 +979,20 @@
*
* netif_napi_del() removes a napi context from the network device napi list
*/
-static inline void netif_napi_del(struct napi_struct *napi)
-{
-#ifdef CONFIG_NETPOLL
- list_del(&napi->dev_list);
-#endif
-}
+void netif_napi_del(struct napi_struct *napi);
+
+struct napi_gro_cb {
+ /* This is non-zero if the packet may be of the same flow. */
+ int same_flow;
+
+ /* This is non-zero if the packet cannot be merged with the new skb. */
+ int flush;
+
+ /* Number of segments aggregated. */
+ int count;
+};
+
+#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
@@ -1024,6 +1004,9 @@
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
int features);
int (*gso_send_check)(struct sk_buff *skb);
+ struct sk_buff **(*gro_receive)(struct sk_buff **head,
+ struct sk_buff *skb);
+ int (*gro_complete)(struct sk_buff *skb);
void *af_packet_priv;
struct list_head list;
};
@@ -1377,6 +1360,9 @@
extern int netif_rx_ni(struct sk_buff *skb);
#define HAVE_NETIF_RECEIVE_SKB 1
extern int netif_receive_skb(struct sk_buff *skb);
+extern void napi_gro_flush(struct napi_struct *napi);
+extern int napi_gro_receive(struct napi_struct *napi,
+ struct sk_buff *skb);
extern void netif_nit_deliver(struct sk_buff *skb);
extern int dev_valid_name(const char *name);
extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
@@ -1621,17 +1607,7 @@
static inline void netif_rx_complete(struct net_device *dev,
struct napi_struct *napi)
{
- unsigned long flags;
-
- /*
- * don't let napi dequeue from the cpu poll list
- * just in case its running on a different cpu
- */
- if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
- return;
- local_irq_save(flags);
- __netif_rx_complete(dev, napi);
- local_irq_restore(flags);
+ napi_complete(napi);
}
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)