Merge branch 'xmit_list'
David Miller says:
====================
net: Make dev_hard_start_xmit() work fundamentally on lists
After this patch set, dev_hard_start_xmit() will work fundemantally on
any and all SKB lists.
This opens the path for a clean implementation of pulling multiple
packets out during qdisc_restart(), and then passing that blob in one
shot to dev_hard_start_xmit().
There were two main architectural blockers to this:
1) The GSO handling, we kept the original GSO head SKB around simply
because dev_hard_start_xmit() had no way to communicate to the
caller how far into the segmented list it was able to go. Now it
can, so the head GSO can be liberated immediately.
All of the special GSO head SKB destructor et al. handling goes
away too.
2) Validate of VLAN, CSUM, and segmentation characteristics was being
performed inside of dev_hard_start_xmit(). If want to truly batch,
we have to let the higher levels to this. In particular, this is
now dequeue_skb()'s job.
And with those two issues out of the way, it should now be trivial to
build experiments on top of this patch set, all of the framework
should be there now. You could do something as simple as:
skb = q->dequeue(q);
if (skb)
skb = validate_xmit_skb(skb, qdisc_dev(q));
if (skb) {
struct sk_buff *new, *head = skb;
int limit = 5;
do {
new = q->dequeue(q);
if (new)
new = validate_xmit_skb(new, qdisc_dev(q));
if (new) {
skb->next = new;
skb = new;
}
} while (new && --limit);
skb = head;
}
inside of the else branch of dequeue_skb().
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 81b22a1..ae6ecf4 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -192,8 +192,10 @@
{
struct dlci_local *dlp = netdev_priv(dev);
- if (skb)
- netdev_start_xmit(skb, dlp->slave);
+ if (skb) {
+ struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
+ netdev_start_xmit(skb, dlp->slave, txq, false);
+ }
return NETDEV_TX_OK;
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 456eb1f..202c25a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2827,8 +2827,9 @@
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
struct netdev_phys_port_id *ppid);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq);
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq, int *ret);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
@@ -3431,17 +3432,24 @@
#endif
static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
- struct sk_buff *skb, struct net_device *dev)
+ struct sk_buff *skb, struct net_device *dev,
+ bool more)
{
- skb->xmit_more = 0;
+ skb->xmit_more = more ? 1 : 0;
return ops->ndo_start_xmit(skb, dev);
}
-static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq, bool more)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int rc;
- return __netdev_start_xmit(ops, skb, dev);
+ rc = __netdev_start_xmit(ops, skb, dev, more);
+ if (rc == NETDEV_TX_OK)
+ txq_trans_update(txq);
+
+ return rc;
}
int netdev_class_create_file_ns(struct class_attribute *class_attr,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index d662da1..0e98222 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@
}
non_ip:
- return __netdev_start_xmit(mpc->old_ops, skb, dev);
+ return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
}
static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index a6077ef..6857d57 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2485,52 +2485,6 @@
return 0;
}
-struct dev_gso_cb {
- void (*destructor)(struct sk_buff *skb);
-};
-
-#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-
-static void dev_gso_skb_destructor(struct sk_buff *skb)
-{
- struct dev_gso_cb *cb;
-
- kfree_skb_list(skb->next);
- skb->next = NULL;
-
- cb = DEV_GSO_CB(skb);
- if (cb->destructor)
- cb->destructor(skb);
-}
-
-/**
- * dev_gso_segment - Perform emulated hardware segmentation on skb.
- * @skb: buffer to segment
- * @features: device features as applicable to this skb
- *
- * This function segments the given skb and stores the list of segments
- * in skb->next.
- */
-static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
- struct sk_buff *segs;
-
- segs = skb_gso_segment(skb, features);
-
- /* Verifying header integrity only. */
- if (!segs)
- return 0;
-
- if (IS_ERR(segs))
- return PTR_ERR(segs);
-
- skb->next = segs;
- DEV_GSO_CB(skb)->destructor = skb->destructor;
- skb->destructor = dev_gso_skb_destructor;
-
- return 0;
-}
-
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
@@ -2599,118 +2553,125 @@
}
EXPORT_SYMBOL(netif_skb_features);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq)
+static int xmit_one(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq, bool more)
{
- int rc = NETDEV_TX_OK;
- unsigned int skb_len;
+ unsigned int len;
+ int rc;
- if (likely(!skb->next)) {
- netdev_features_t features;
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
- /*
- * If device doesn't need skb->dst, release it right now while
- * its hot in this cpu cache
- */
- if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
- skb_dst_drop(skb);
+ len = skb->len;
+ trace_net_dev_start_xmit(skb, dev);
+ rc = netdev_start_xmit(skb, dev, txq, more);
+ trace_net_dev_xmit(skb, rc, dev, len);
- features = netif_skb_features(skb);
-
- if (vlan_tx_tag_present(skb) &&
- !vlan_hw_offload_capable(features, skb->vlan_proto)) {
- skb = __vlan_put_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
- if (unlikely(!skb))
- goto out;
-
- skb->vlan_tci = 0;
- }
-
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
-
- if (netif_needs_gso(skb, features)) {
- if (unlikely(dev_gso_segment(skb, features)))
- goto out_kfree_skb;
- if (skb->next)
- goto gso;
- } else {
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
-
- /* If packet is not checksummed and device does not
- * support checksumming for this protocol, complete
- * checksumming here.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb->encapsulation)
- skb_set_inner_transport_header(skb,
- skb_checksum_start_offset(skb));
- else
- skb_set_transport_header(skb,
- skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_ALL_CSUM) &&
- skb_checksum_help(skb))
- goto out_kfree_skb;
- }
- }
-
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
-
- skb_len = skb->len;
- trace_net_dev_start_xmit(skb, dev);
- rc = netdev_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc, dev, skb_len);
- if (rc == NETDEV_TX_OK)
- txq_trans_update(txq);
- return rc;
- }
-
-gso:
- do {
- struct sk_buff *nskb = skb->next;
-
- skb->next = nskb->next;
- nskb->next = NULL;
-
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(nskb, dev);
-
- skb_len = nskb->len;
- trace_net_dev_start_xmit(nskb, dev);
- rc = netdev_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc, dev, skb_len);
- if (unlikely(rc != NETDEV_TX_OK)) {
- if (rc & ~NETDEV_TX_MASK)
- goto out_kfree_gso_skb;
- nskb->next = skb->next;
- skb->next = nskb;
- return rc;
- }
- txq_trans_update(txq);
- if (unlikely(netif_xmit_stopped(txq) && skb->next))
- return NETDEV_TX_BUSY;
- } while (skb->next);
-
-out_kfree_gso_skb:
- if (likely(skb->next == NULL)) {
- skb->destructor = DEV_GSO_CB(skb)->destructor;
- consume_skb(skb);
- return rc;
- }
-out_kfree_skb:
- kfree_skb(skb);
-out:
return rc;
}
-EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
+
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
+ struct netdev_queue *txq, int *ret)
+{
+ struct sk_buff *skb = first;
+ int rc = NETDEV_TX_OK;
+
+ while (skb) {
+ struct sk_buff *next = skb->next;
+
+ skb->next = NULL;
+ rc = xmit_one(skb, dev, txq, next != NULL);
+ if (unlikely(!dev_xmit_complete(rc))) {
+ skb->next = next;
+ goto out;
+ }
+
+ skb = next;
+ if (netif_xmit_stopped(txq) && skb) {
+ rc = NETDEV_TX_BUSY;
+ break;
+ }
+ }
+
+out:
+ *ret = rc;
+ return skb;
+}
+
+struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features)
+{
+ if (vlan_tx_tag_present(skb) &&
+ !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (skb)
+ skb->vlan_tci = 0;
+ }
+ return skb;
+}
+
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+{
+ netdev_features_t features;
+
+ if (skb->next)
+ return skb;
+
+ /* If device doesn't need skb->dst, release it right now while
+ * its hot in this cpu cache
+ */
+ if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ skb_dst_drop(skb);
+
+ features = netif_skb_features(skb);
+ skb = validate_xmit_vlan(skb, features);
+ if (unlikely(!skb))
+ goto out_null;
+
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (netif_needs_gso(skb, features)) {
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, features);
+ kfree_skb(skb);
+ if (IS_ERR(segs))
+ segs = NULL;
+ skb = segs;
+ } else {
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
+
+ /* If packet is not checksummed and device does not
+ * support checksumming for this protocol, complete
+ * checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb->encapsulation)
+ skb_set_inner_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ else
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (!(features & NETIF_F_ALL_CSUM) &&
+ skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
+ }
+
+ return skb;
+
+out_kfree_skb:
+ kfree_skb(skb);
+out_null:
+ return NULL;
+}
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -2922,7 +2883,7 @@
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
- rc = dev_hard_start_xmit(skb, dev, txq);
+ skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 12b1df9..e6645b4 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -91,9 +91,7 @@
skb->vlan_tci = 0;
}
- status = netdev_start_xmit(skb, dev);
- if (status == NETDEV_TX_OK)
- txq_trans_update(txq);
+ status = netdev_start_xmit(skb, dev, txq, false);
out:
return status;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d81b540..5b36a94 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3335,11 +3335,10 @@
goto unlock;
}
atomic_inc(&(pkt_dev->skb->users));
- ret = netdev_start_xmit(pkt_dev->skb, odev);
+ ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
switch (ret) {
case NETDEV_TX_OK:
- txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b7a7f5a..87d20f4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -258,11 +258,8 @@
local_bh_disable();
HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_xmit_frozen_or_drv_stopped(txq)) {
- ret = netdev_start_xmit(skb, dev);
- if (ret == NETDEV_TX_OK)
- txq_trans_update(txq);
- }
+ if (!netif_xmit_frozen_or_drv_stopped(txq))
+ ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
local_bh_enable();
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 05b3f5d..a8bf9f9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -70,8 +70,11 @@
} else
skb = NULL;
} else {
- if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+ if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
+ if (skb)
+ skb = validate_xmit_skb(skb, qdisc_dev(q));
+ }
}
return skb;
@@ -126,7 +129,7 @@
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
- ret = dev_hard_start_xmit(skb, dev, txq);
+ skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 64cd93c..aaa8d03 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -316,8 +316,8 @@
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
- netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
- txq_trans_update(slave_txq);
+ netdev_start_xmit(skb, slave, slave_txq, false) ==
+ NETDEV_TX_OK) {
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);