veth: avoid a NULL deref in veth_stats_one
commit 2681128f0ced8a (veth: extend device features) added a NULL deref
in veth_stats_one(), as veth_get_stats64() was not testing if the peer
device was setup or not.
At init time, we call dev_get_stats() before veth pair is fully setup.
[ 178.854758] [<ffffffffa00f5677>] veth_get_stats64+0x47/0x70 [veth]
[ 178.861013] [<ffffffff814f0a2d>] dev_get_stats+0x6d/0x130
[ 178.866486] [<ffffffff81504efc>] rtnl_fill_ifinfo+0x47c/0x930
[ 178.872299] [<ffffffff81505b93>] rtmsg_ifinfo+0x83/0x100
[ 178.877678] [<ffffffff81505cc6>] rtnl_configure_link+0x76/0xa0
[ 178.883580] [<ffffffffa00f52fa>] veth_newlink+0x16a/0x350 [veth]
[ 178.889654] [<ffffffff815061cc>] rtnl_newlink+0x4dc/0x5e0
[ 178.895128] [<ffffffff81505e1e>] ? rtnl_newlink+0x12e/0x5e0
[ 178.900769] [<ffffffff8150587d>] rtnetlink_rcv_msg+0x11d/0x310
[ 178.906669] [<ffffffff81505760>] ? __rtnl_unlock+0x20/0x20
[ 178.912225] [<ffffffff81521f89>] netlink_rcv_skb+0xa9/0xd0
[ 178.917779] [<ffffffff81502d55>] rtnetlink_rcv+0x25/0x40
[ 178.923159] [<ffffffff815218d1>] netlink_unicast+0x1b1/0x230
[ 178.928887] [<ffffffff81521c4e>] netlink_sendmsg+0x2fe/0x3b0
[ 178.934615] [<ffffffff814dbe22>] sock_sendmsg+0xd2/0xf0
So we must check if peer was setup in veth_get_stats64()
As pointed out by Ben Hutchings, priv->peer is missing proper
synchronization. Adding RCU protection is a safe and well documented
way to make sure we don't access about to be freed or already
freed data.
Reported-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8b2e112..0f71a4f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -32,7 +32,7 @@
};
struct veth_priv {
- struct net_device *peer;
+ struct net_device __rcu *peer;
atomic64_t dropped;
};
@@ -89,10 +89,10 @@
static void veth_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
- struct veth_priv *priv;
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
- priv = netdev_priv(dev);
- data[0] = priv->peer->ifindex;
+ data[0] = peer ? peer->ifindex : 0;
}
static const struct ethtool_ops veth_ethtool_ops = {
@@ -107,9 +107,15 @@
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
- struct net_device *rcv = priv->peer;
+ struct net_device *rcv;
int length = skb->len;
+ rcu_read_lock();
+ rcv = rcu_dereference(priv->peer);
+ if (unlikely(!rcv)) {
+ kfree_skb(skb);
+ goto drop;
+ }
/* don't change ip_summed == CHECKSUM_PARTIAL, as that
* will cause bad checksum on forwarded packets
*/
@@ -125,9 +131,10 @@
stats->packets++;
u64_stats_update_end(&stats->syncp);
} else {
+drop:
atomic64_inc(&priv->dropped);
}
-
+ rcu_read_unlock();
return NETDEV_TX_OK;
}
@@ -162,30 +169,36 @@
struct rtnl_link_stats64 *tot)
{
struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
struct pcpu_vstats one;
tot->tx_dropped = veth_stats_one(&one, dev);
tot->tx_bytes = one.bytes;
tot->tx_packets = one.packets;
- tot->rx_dropped = veth_stats_one(&one, priv->peer);
- tot->rx_bytes = one.bytes;
- tot->rx_packets = one.packets;
+ rcu_read_lock();
+ peer = rcu_dereference(priv->peer);
+ if (peer) {
+ tot->rx_dropped = veth_stats_one(&one, peer);
+ tot->rx_bytes = one.bytes;
+ tot->rx_packets = one.packets;
+ }
+ rcu_read_unlock();
return tot;
}
static int veth_open(struct net_device *dev)
{
- struct veth_priv *priv;
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
- priv = netdev_priv(dev);
- if (priv->peer == NULL)
+ if (!peer)
return -ENOTCONN;
- if (priv->peer->flags & IFF_UP) {
+ if (peer->flags & IFF_UP) {
netif_carrier_on(dev);
- netif_carrier_on(priv->peer);
+ netif_carrier_on(peer);
}
return 0;
}
@@ -195,7 +208,7 @@
struct veth_priv *priv = netdev_priv(dev);
netif_carrier_off(dev);
- netif_carrier_off(priv->peer);
+ netif_carrier_off(rtnl_dereference(priv->peer));
return 0;
}
@@ -380,10 +393,10 @@
*/
priv = netdev_priv(dev);
- priv->peer = peer;
+ rcu_assign_pointer(priv->peer, peer);
priv = netdev_priv(peer);
- priv->peer = dev;
+ rcu_assign_pointer(priv->peer, dev);
return 0;
err_register_dev:
@@ -404,7 +417,16 @@
struct net_device *peer;
priv = netdev_priv(dev);
- peer = priv->peer;
+ peer = rtnl_dereference(priv->peer);
+
+ /* Note : dellink() is called from default_device_exit_batch(),
+ * before a rcu_synchronize() point. The devices are guaranteed
+ * not being freed before one RCU grace period.
+ */
+ RCU_INIT_POINTER(priv->peer, NULL);
+
+ priv = netdev_priv(peer);
+ RCU_INIT_POINTER(priv->peer, NULL);
unregister_netdevice_queue(dev, head);
unregister_netdevice_queue(peer, head);