blob: a3a96ffc67f484277c964a05c71aba4eee1c4a81 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070076#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080077#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070078#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
stephen hemminger08e98972009-11-10 07:20:34 +000081#include <linux/hash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090082#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080084#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070094#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/notifier.h>
96#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/sock.h>
99#include <linux/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100#include <linux/stat.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101#include <net/dst.h>
102#include <net/pkt_sched.h>
103#include <net/checksum.h>
Arnd Bergmann44540962009-11-26 06:07:08 +0000104#include <net/xfrm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#include <linux/highmem.h>
106#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108#include <linux/netpoll.h>
109#include <linux/rcupdate.h>
110#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500113#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700114#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700115#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700116#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700117#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700118#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700119#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700120#include <net/ip.h>
Simon Horman25cd9ba2014-10-06 05:05:13 -0700121#include <net/mpls.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700122#include <linux/ipv6.h>
123#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700124#include <linux/jhash.h>
125#include <linux/random.h>
David S. Miller9cbc1cb2009-06-15 03:02:23 -0700126#include <trace/events/napi.h>
Koki Sanagicf66ba52010-08-23 18:45:02 +0900127#include <trace/events/net.h>
Koki Sanagi07dc22e2010-08-23 18:46:12 +0900128#include <trace/events/skb.h>
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +0000129#include <linux/pci.h>
Stephen Rothwellcaeda9b2010-09-16 21:39:16 -0700130#include <linux/inetdevice.h>
Ben Hutchingsc4454772011-01-19 11:03:53 +0000131#include <linux/cpu_rmap.h>
Ingo Molnarc5905af2012-02-24 08:31:31 +0100132#include <linux/static_key.h>
Eliezer Tamiraf12fa62013-06-10 11:39:41 +0300133#include <linux/hashtable.h>
Eric Dumazet60877a32013-06-20 01:15:51 -0700134#include <linux/vmalloc.h>
Michal Kubeček529d0482013-11-15 06:18:50 +0100135#include <linux/if_macvlan.h>
Willem de Bruijne7fd2882014-08-04 22:11:48 -0400136#include <linux/errqueue.h>
Eric Dumazet3b47d302014-11-06 21:09:44 -0800137#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700139#include "net-sysfs.h"
140
Herbert Xud565b0a2008-12-15 23:38:52 -0800141/* Instead of increasing this, you should create a hash table. */
142#define MAX_GRO_SKBS 8
143
Herbert Xu5d38a072009-01-04 16:13:40 -0800144/* This should be increased if a protocol with a bigger head is added. */
145#define GRO_MAX_HEAD (MAX_HEADER + 128)
146
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147static DEFINE_SPINLOCK(ptype_lock);
Vlad Yasevich62532da2012-11-15 08:49:10 +0000148static DEFINE_SPINLOCK(offload_lock);
Cong Wang900ff8c2013-02-18 19:20:33 +0000149struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
150struct list_head ptype_all __read_mostly; /* Taps */
Vlad Yasevich62532da2012-11-15 08:49:10 +0000151static struct list_head offload_base __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
Ben Hutchingsae78dbf2014-01-10 22:17:24 +0000153static int netif_rx_internal(struct sk_buff *skb);
Loic Prylli5495119462014-07-01 21:39:43 -0700154static int call_netdevice_notifiers_info(unsigned long val,
155 struct net_device *dev,
156 struct netdev_notifier_info *info);
Ben Hutchingsae78dbf2014-01-10 22:17:24 +0000157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700159 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 * semaphore.
161 *
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800162 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 *
164 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700165 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 * actual updates. This allows pure readers to access the list even
167 * while a writer is preparing to update it.
168 *
169 * To put it another way, dev_base_lock is held for writing only to
170 * protect against pure readers; the rtnl semaphore provides the
171 * protection against other writers.
172 *
173 * See, for example usages, register_netdevice() and
174 * unregister_netdevice(), which must be called with the rtnl
175 * semaphore held.
176 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177DEFINE_RWLOCK(dev_base_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178EXPORT_SYMBOL(dev_base_lock);
179
Eliezer Tamiraf12fa62013-06-10 11:39:41 +0300180/* protects napi_hash addition/deletion and napi_gen_id */
181static DEFINE_SPINLOCK(napi_hash_lock);
182
183static unsigned int napi_gen_id;
184static DEFINE_HASHTABLE(napi_hash, 8);
185
Thomas Gleixner18afa4b2013-07-23 16:13:17 +0200186static seqcount_t devnet_rename_seq;
Brian Haleyc91f6df2012-11-26 05:21:08 +0000187
Thomas Graf4e985ad2011-06-21 03:11:20 +0000188static inline void dev_base_seq_inc(struct net *net)
189{
190 while (++net->dev_base_seq == 0);
191}
192
Eric W. Biederman881d9662007-09-17 11:56:21 -0700193static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194{
Eric Dumazet95c96172012-04-15 05:58:06 +0000195 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
196
stephen hemminger08e98972009-11-10 07:20:34 +0000197 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198}
199
Eric W. Biederman881d9662007-09-17 11:56:21 -0700200static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201{
Eric Dumazet7c28bd02009-10-24 06:13:17 -0700202 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203}
204
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000205static inline void rps_lock(struct softnet_data *sd)
Changli Gao152102c2010-03-30 20:16:22 +0000206{
207#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000208 spin_lock(&sd->input_pkt_queue.lock);
Changli Gao152102c2010-03-30 20:16:22 +0000209#endif
210}
211
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000212static inline void rps_unlock(struct softnet_data *sd)
Changli Gao152102c2010-03-30 20:16:22 +0000213{
214#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000215 spin_unlock(&sd->input_pkt_queue.lock);
Changli Gao152102c2010-03-30 20:16:22 +0000216#endif
217}
218
Eric W. Biedermance286d32007-09-12 13:53:49 +0200219/* Device list insertion */
dingtianhong53759be2013-04-17 22:17:50 +0000220static void list_netdevice(struct net_device *dev)
Eric W. Biedermance286d32007-09-12 13:53:49 +0200221{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900222 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200223
224 ASSERT_RTNL();
225
226 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800227 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
Eric Dumazet72c95282009-10-30 07:11:27 +0000228 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000229 hlist_add_head_rcu(&dev->index_hlist,
230 dev_index_hash(net, dev->ifindex));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200231 write_unlock_bh(&dev_base_lock);
Thomas Graf4e985ad2011-06-21 03:11:20 +0000232
233 dev_base_seq_inc(net);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200234}
235
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000236/* Device list removal
237 * caller must respect a RCU grace period before freeing/reusing dev
238 */
Eric W. Biedermance286d32007-09-12 13:53:49 +0200239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800245 list_del_rcu(&dev->dev_list);
Eric Dumazet72c95282009-10-30 07:11:27 +0000246 hlist_del_rcu(&dev->name_hlist);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000247 hlist_del_rcu(&dev->index_hlist);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200248 write_unlock_bh(&dev_base_lock);
Thomas Graf4e985ad2011-06-21 03:11:20 +0000249
250 dev_base_seq_inc(dev_net(dev));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200251}
252
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253/*
254 * Our notifier list
255 */
256
Alan Sternf07d5b92006-05-09 15:23:03 -0700257static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259/*
260 * Device drivers call our routines to queue packets here. We empty the
261 * queue in the local softnet handler.
262 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700263
Eric Dumazet9958da02010-04-17 04:17:02 +0000264DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700265EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
David S. Millercf508b12008-07-22 14:16:42 -0700267#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700268/*
David S. Millerc773e842008-07-08 23:13:53 -0700269 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700270 * according to dev->type
271 */
272static const unsigned short netdev_lock_type[] =
273 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
274 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
275 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
276 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
277 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
278 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
279 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
280 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
281 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
282 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
283 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
284 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
Paul Gortmaker211ed862012-05-10 17:14:35 -0400285 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
286 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
287 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700288
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700289static const char *const netdev_lock_name[] =
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700290 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
291 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
292 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
293 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
294 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
295 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
296 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
297 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
298 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
299 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
300 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
301 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
Paul Gortmaker211ed862012-05-10 17:14:35 -0400302 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
303 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
304 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700305
306static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700307static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700308
309static inline unsigned short netdev_lock_pos(unsigned short dev_type)
310{
311 int i;
312
313 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
314 if (netdev_lock_type[i] == dev_type)
315 return i;
316 /* the last key is used by default */
317 return ARRAY_SIZE(netdev_lock_type) - 1;
318}
319
David S. Millercf508b12008-07-22 14:16:42 -0700320static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
321 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700322{
323 int i;
324
325 i = netdev_lock_pos(dev_type);
326 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
327 netdev_lock_name[i]);
328}
David S. Millercf508b12008-07-22 14:16:42 -0700329
330static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
331{
332 int i;
333
334 i = netdev_lock_pos(dev->type);
335 lockdep_set_class_and_name(&dev->addr_list_lock,
336 &netdev_addr_lock_key[i],
337 netdev_lock_name[i]);
338}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700339#else
David S. Millercf508b12008-07-22 14:16:42 -0700340static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
341 unsigned short dev_type)
342{
343}
344static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700345{
346}
347#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
349/*******************************************************************************
350
351 Protocol management and registration routines
352
353*******************************************************************************/
354
355/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 * Add a protocol ID to the list. Now that the input handler is
357 * smarter we can dispense with all the messy stuff that used to be
358 * here.
359 *
360 * BEWARE!!! Protocol handlers, mangling input packets,
361 * MUST BE last in hash buckets and checking protocol handlers
362 * MUST start from promiscuous ptype_all chain in net_bh.
363 * It is true now, do not change it.
364 * Explanation follows: if protocol handler, mangling packet, will
365 * be the first on list, it is not able to sense, that packet
366 * is cloned and should be copied-on-write, so that it will
367 * change it and subsequent readers will get broken packet.
368 * --ANK (980803)
369 */
370
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000371static inline struct list_head *ptype_head(const struct packet_type *pt)
372{
373 if (pt->type == htons(ETH_P_ALL))
Salam Noureddine7866a622015-01-27 11:35:48 -0800374 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000375 else
Salam Noureddine7866a622015-01-27 11:35:48 -0800376 return pt->dev ? &pt->dev->ptype_specific :
377 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000378}
379
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380/**
381 * dev_add_pack - add packet handler
382 * @pt: packet type declaration
383 *
384 * Add a protocol handler to the networking stack. The passed &packet_type
385 * is linked into kernel lists and may not be freed until it has been
386 * removed from the kernel lists.
387 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900388 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 * guarantee all CPU's that are in middle of receiving packets
390 * will see the new packet type (until the next received packet).
391 */
392
393void dev_add_pack(struct packet_type *pt)
394{
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000395 struct list_head *head = ptype_head(pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000397 spin_lock(&ptype_lock);
398 list_add_rcu(&pt->list, head);
399 spin_unlock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700401EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403/**
404 * __dev_remove_pack - remove packet handler
405 * @pt: packet type declaration
406 *
407 * Remove a protocol handler that was previously added to the kernel
408 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
409 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900410 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 *
412 * The packet type might still be in use by receivers
413 * and must not be freed until after all the CPU's have gone
414 * through a quiescent state.
415 */
416void __dev_remove_pack(struct packet_type *pt)
417{
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000418 struct list_head *head = ptype_head(pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 struct packet_type *pt1;
420
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000421 spin_lock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
423 list_for_each_entry(pt1, head, list) {
424 if (pt == pt1) {
425 list_del_rcu(&pt->list);
426 goto out;
427 }
428 }
429
Joe Perches7b6cd1c2012-02-01 10:54:43 +0000430 pr_warn("dev_remove_pack: %p not found\n", pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431out:
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000432 spin_unlock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700434EXPORT_SYMBOL(__dev_remove_pack);
435
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436/**
437 * dev_remove_pack - remove packet handler
438 * @pt: packet type declaration
439 *
440 * Remove a protocol handler that was previously added to the kernel
441 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
442 * from the kernel lists and can be freed or reused once this function
443 * returns.
444 *
445 * This call sleeps to guarantee that no CPU is looking at the packet
446 * type after return.
447 */
448void dev_remove_pack(struct packet_type *pt)
449{
450 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900451
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 synchronize_net();
453}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700454EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
Vlad Yasevich62532da2012-11-15 08:49:10 +0000456
457/**
458 * dev_add_offload - register offload handlers
459 * @po: protocol offload declaration
460 *
461 * Add protocol offload handlers to the networking stack. The passed
462 * &proto_offload is linked into kernel lists and may not be freed until
463 * it has been removed from the kernel lists.
464 *
465 * This call does not sleep therefore it can not
466 * guarantee all CPU's that are in middle of receiving packets
467 * will see the new offload handlers (until the next received packet).
468 */
469void dev_add_offload(struct packet_offload *po)
470{
471 struct list_head *head = &offload_base;
472
473 spin_lock(&offload_lock);
474 list_add_rcu(&po->list, head);
475 spin_unlock(&offload_lock);
476}
477EXPORT_SYMBOL(dev_add_offload);
478
479/**
480 * __dev_remove_offload - remove offload handler
481 * @po: packet offload declaration
482 *
483 * Remove a protocol offload handler that was previously added to the
484 * kernel offload handlers by dev_add_offload(). The passed &offload_type
485 * is removed from the kernel lists and can be freed or reused once this
486 * function returns.
487 *
488 * The packet type might still be in use by receivers
489 * and must not be freed until after all the CPU's have gone
490 * through a quiescent state.
491 */
stephen hemminger1d143d92013-12-29 14:01:29 -0800492static void __dev_remove_offload(struct packet_offload *po)
Vlad Yasevich62532da2012-11-15 08:49:10 +0000493{
494 struct list_head *head = &offload_base;
495 struct packet_offload *po1;
496
Eric Dumazetc53aa502012-11-16 08:08:23 +0000497 spin_lock(&offload_lock);
Vlad Yasevich62532da2012-11-15 08:49:10 +0000498
499 list_for_each_entry(po1, head, list) {
500 if (po == po1) {
501 list_del_rcu(&po->list);
502 goto out;
503 }
504 }
505
506 pr_warn("dev_remove_offload: %p not found\n", po);
507out:
Eric Dumazetc53aa502012-11-16 08:08:23 +0000508 spin_unlock(&offload_lock);
Vlad Yasevich62532da2012-11-15 08:49:10 +0000509}
Vlad Yasevich62532da2012-11-15 08:49:10 +0000510
511/**
512 * dev_remove_offload - remove packet offload handler
513 * @po: packet offload declaration
514 *
515 * Remove a packet offload handler that was previously added to the kernel
516 * offload handlers by dev_add_offload(). The passed &offload_type is
517 * removed from the kernel lists and can be freed or reused once this
518 * function returns.
519 *
520 * This call sleeps to guarantee that no CPU is looking at the packet
521 * type after return.
522 */
523void dev_remove_offload(struct packet_offload *po)
524{
525 __dev_remove_offload(po);
526
527 synchronize_net();
528}
529EXPORT_SYMBOL(dev_remove_offload);
530
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531/******************************************************************************
532
533 Device Boot-time Settings Routines
534
535*******************************************************************************/
536
537/* Boot time configuration table */
538static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
539
540/**
541 * netdev_boot_setup_add - add new setup entry
542 * @name: name of the device
543 * @map: configured settings for the device
544 *
545 * Adds new setup entry to the dev_boot_setup list. The function
546 * returns 0 on error and 1 on success. This is a generic routine to
547 * all netdevices.
548 */
549static int netdev_boot_setup_add(char *name, struct ifmap *map)
550{
551 struct netdev_boot_setup *s;
552 int i;
553
554 s = dev_boot_setup;
555 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
556 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
557 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700558 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 memcpy(&s[i].map, map, sizeof(s[i].map));
560 break;
561 }
562 }
563
564 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
565}
566
567/**
568 * netdev_boot_setup_check - check boot time settings
569 * @dev: the netdevice
570 *
571 * Check boot time settings for the device.
572 * The found settings are set for the device to be used
573 * later in the device probing.
574 * Returns 0 if no settings found, 1 if they are.
575 */
576int netdev_boot_setup_check(struct net_device *dev)
577{
578 struct netdev_boot_setup *s = dev_boot_setup;
579 int i;
580
581 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
582 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700583 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 dev->irq = s[i].map.irq;
585 dev->base_addr = s[i].map.base_addr;
586 dev->mem_start = s[i].map.mem_start;
587 dev->mem_end = s[i].map.mem_end;
588 return 1;
589 }
590 }
591 return 0;
592}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700593EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
595
596/**
597 * netdev_boot_base - get address from boot time settings
598 * @prefix: prefix for network device
599 * @unit: id for network device
600 *
601 * Check boot time settings for the base address of device.
602 * The found settings are set for the device to be used
603 * later in the device probing.
604 * Returns 0 if no settings found.
605 */
606unsigned long netdev_boot_base(const char *prefix, int unit)
607{
608 const struct netdev_boot_setup *s = dev_boot_setup;
609 char name[IFNAMSIZ];
610 int i;
611
612 sprintf(name, "%s%d", prefix, unit);
613
614 /*
615 * If device already registered then return base of 1
616 * to indicate not to probe for this interface
617 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700618 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 return 1;
620
621 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
622 if (!strcmp(name, s[i].name))
623 return s[i].map.base_addr;
624 return 0;
625}
626
627/*
628 * Saves at boot time configured settings for any netdevice.
629 */
630int __init netdev_boot_setup(char *str)
631{
632 int ints[5];
633 struct ifmap map;
634
635 str = get_options(str, ARRAY_SIZE(ints), ints);
636 if (!str || !*str)
637 return 0;
638
639 /* Save settings */
640 memset(&map, 0, sizeof(map));
641 if (ints[0] > 0)
642 map.irq = ints[1];
643 if (ints[0] > 1)
644 map.base_addr = ints[2];
645 if (ints[0] > 2)
646 map.mem_start = ints[3];
647 if (ints[0] > 3)
648 map.mem_end = ints[4];
649
650 /* Add new entry to the list */
651 return netdev_boot_setup_add(str, &map);
652}
653
654__setup("netdev=", netdev_boot_setup);
655
656/*******************************************************************************
657
658 Device Interface Subroutines
659
660*******************************************************************************/
661
662/**
663 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700664 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 * @name: name to find
666 *
667 * Find an interface by name. Must be called under RTNL semaphore
668 * or @dev_base_lock. If the name is found a pointer to the device
669 * is returned. If the name is not found then %NULL is returned. The
670 * reference counters are not incremented so the caller must be
671 * careful with locks.
672 */
673
Eric W. Biederman881d9662007-09-17 11:56:21 -0700674struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675{
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700676 struct net_device *dev;
677 struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
Sasha Levinb67bfe02013-02-27 17:06:00 -0800679 hlist_for_each_entry(dev, head, name_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 if (!strncmp(dev->name, name, IFNAMSIZ))
681 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700682
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 return NULL;
684}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700685EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687/**
Eric Dumazet72c95282009-10-30 07:11:27 +0000688 * dev_get_by_name_rcu - find a device by its name
689 * @net: the applicable net namespace
690 * @name: name to find
691 *
692 * Find an interface by name.
693 * If the name is found a pointer to the device is returned.
694 * If the name is not found then %NULL is returned.
695 * The reference counters are not incremented so the caller must be
696 * careful with locks. The caller must hold RCU lock.
697 */
698
699struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
700{
Eric Dumazet72c95282009-10-30 07:11:27 +0000701 struct net_device *dev;
702 struct hlist_head *head = dev_name_hash(net, name);
703
Sasha Levinb67bfe02013-02-27 17:06:00 -0800704 hlist_for_each_entry_rcu(dev, head, name_hlist)
Eric Dumazet72c95282009-10-30 07:11:27 +0000705 if (!strncmp(dev->name, name, IFNAMSIZ))
706 return dev;
707
708 return NULL;
709}
710EXPORT_SYMBOL(dev_get_by_name_rcu);
711
712/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700714 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 * @name: name to find
716 *
717 * Find an interface by name. This can be called from any
718 * context and does its own locking. The returned handle has
719 * the usage count incremented and the caller must use dev_put() to
720 * release it when it is no longer needed. %NULL is returned if no
721 * matching device is found.
722 */
723
Eric W. Biederman881d9662007-09-17 11:56:21 -0700724struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725{
726 struct net_device *dev;
727
Eric Dumazet72c95282009-10-30 07:11:27 +0000728 rcu_read_lock();
729 dev = dev_get_by_name_rcu(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (dev)
731 dev_hold(dev);
Eric Dumazet72c95282009-10-30 07:11:27 +0000732 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 return dev;
734}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700735EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737/**
738 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700739 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 * @ifindex: index of device
741 *
742 * Search for an interface by index. Returns %NULL if the device
743 * is not found or a pointer to the device. The device has not
744 * had its reference counter increased so the caller must be careful
745 * about locking. The caller must hold either the RTNL semaphore
746 * or @dev_base_lock.
747 */
748
Eric W. Biederman881d9662007-09-17 11:56:21 -0700749struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750{
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700751 struct net_device *dev;
752 struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753
Sasha Levinb67bfe02013-02-27 17:06:00 -0800754 hlist_for_each_entry(dev, head, index_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 if (dev->ifindex == ifindex)
756 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700757
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 return NULL;
759}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700760EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000762/**
763 * dev_get_by_index_rcu - find a device by its ifindex
764 * @net: the applicable net namespace
765 * @ifindex: index of device
766 *
767 * Search for an interface by index. Returns %NULL if the device
768 * is not found or a pointer to the device. The device has not
769 * had its reference counter increased so the caller must be careful
770 * about locking. The caller must hold RCU lock.
771 */
772
773struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
774{
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000775 struct net_device *dev;
776 struct hlist_head *head = dev_index_hash(net, ifindex);
777
Sasha Levinb67bfe02013-02-27 17:06:00 -0800778 hlist_for_each_entry_rcu(dev, head, index_hlist)
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000779 if (dev->ifindex == ifindex)
780 return dev;
781
782 return NULL;
783}
784EXPORT_SYMBOL(dev_get_by_index_rcu);
785
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
787/**
788 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700789 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 * @ifindex: index of device
791 *
792 * Search for an interface by index. Returns NULL if the device
793 * is not found or a pointer to the device. The device returned has
794 * had a reference added and the pointer is safe until the user calls
795 * dev_put to indicate they have finished with it.
796 */
797
Eric W. Biederman881d9662007-09-17 11:56:21 -0700798struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799{
800 struct net_device *dev;
801
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000802 rcu_read_lock();
803 dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 if (dev)
805 dev_hold(dev);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000806 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 return dev;
808}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700809EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
811/**
Nicolas Schichan5dbe7c12013-06-26 17:23:42 +0200812 * netdev_get_name - get a netdevice name, knowing its ifindex.
813 * @net: network namespace
814 * @name: a pointer to the buffer where the name will be stored.
815 * @ifindex: the ifindex of the interface to get the name from.
816 *
817 * The use of raw_seqcount_begin() and cond_resched() before
818 * retrying is required as we want to give the writers a chance
819 * to complete when CONFIG_PREEMPT is not set.
820 */
821int netdev_get_name(struct net *net, char *name, int ifindex)
822{
823 struct net_device *dev;
824 unsigned int seq;
825
826retry:
827 seq = raw_seqcount_begin(&devnet_rename_seq);
828 rcu_read_lock();
829 dev = dev_get_by_index_rcu(net, ifindex);
830 if (!dev) {
831 rcu_read_unlock();
832 return -ENODEV;
833 }
834
835 strcpy(name, dev->name);
836 rcu_read_unlock();
837 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
838 cond_resched();
839 goto retry;
840 }
841
842 return 0;
843}
844
845/**
Eric Dumazet941666c2010-12-05 01:23:53 +0000846 * dev_getbyhwaddr_rcu - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700847 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 * @type: media type of device
849 * @ha: hardware address
850 *
851 * Search for an interface by MAC address. Returns NULL if the device
Eric Dumazetc5066532011-01-24 13:16:16 -0800852 * is not found or a pointer to the device.
853 * The caller must hold RCU or RTNL.
Eric Dumazet941666c2010-12-05 01:23:53 +0000854 * The returned device has not had its ref count increased
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 * and the caller must therefore be careful about locking
856 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
858
Eric Dumazet941666c2010-12-05 01:23:53 +0000859struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
860 const char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861{
862 struct net_device *dev;
863
Eric Dumazet941666c2010-12-05 01:23:53 +0000864 for_each_netdev_rcu(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 if (dev->type == type &&
866 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700867 return dev;
868
869 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870}
Eric Dumazet941666c2010-12-05 01:23:53 +0000871EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300872
Eric W. Biederman881d9662007-09-17 11:56:21 -0700873struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700874{
875 struct net_device *dev;
876
877 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700878 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700879 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700880 return dev;
881
882 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700883}
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700884EXPORT_SYMBOL(__dev_getfirstbyhwtype);
885
Eric W. Biederman881d9662007-09-17 11:56:21 -0700886struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887{
Eric Dumazet99fe3c32010-03-18 11:27:25 +0000888 struct net_device *dev, *ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
Eric Dumazet99fe3c32010-03-18 11:27:25 +0000890 rcu_read_lock();
891 for_each_netdev_rcu(net, dev)
892 if (dev->type == type) {
893 dev_hold(dev);
894 ret = dev;
895 break;
896 }
897 rcu_read_unlock();
898 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900EXPORT_SYMBOL(dev_getfirstbyhwtype);
901
902/**
WANG Cong6c555492014-09-11 15:35:09 -0700903 * __dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700904 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 * @if_flags: IFF_* values
906 * @mask: bitmask of bits in if_flags to check
907 *
908 * Search for any interface with the given flags. Returns NULL if a device
Eric Dumazetbb69ae02010-06-07 11:42:13 +0000909 * is not found or a pointer to the device. Must be called inside
WANG Cong6c555492014-09-11 15:35:09 -0700910 * rtnl_lock(), and result refcount is unchanged.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 */
912
WANG Cong6c555492014-09-11 15:35:09 -0700913struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
914 unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700916 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917
WANG Cong6c555492014-09-11 15:35:09 -0700918 ASSERT_RTNL();
919
Pavel Emelianov7562f872007-05-03 15:13:45 -0700920 ret = NULL;
WANG Cong6c555492014-09-11 15:35:09 -0700921 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 if (((dev->flags ^ if_flags) & mask) == 0) {
Pavel Emelianov7562f872007-05-03 15:13:45 -0700923 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 break;
925 }
926 }
Pavel Emelianov7562f872007-05-03 15:13:45 -0700927 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928}
WANG Cong6c555492014-09-11 15:35:09 -0700929EXPORT_SYMBOL(__dev_get_by_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
931/**
932 * dev_valid_name - check if name is okay for network device
933 * @name: name string
934 *
935 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700936 * to allow sysfs to work. We also disallow any kind of
937 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 */
David S. Miller95f050b2012-03-06 16:12:15 -0500939bool dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700941 if (*name == '\0')
David S. Miller95f050b2012-03-06 16:12:15 -0500942 return false;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700943 if (strlen(name) >= IFNAMSIZ)
David S. Miller95f050b2012-03-06 16:12:15 -0500944 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700945 if (!strcmp(name, ".") || !strcmp(name, ".."))
David S. Miller95f050b2012-03-06 16:12:15 -0500946 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700947
948 while (*name) {
949 if (*name == '/' || isspace(*name))
David S. Miller95f050b2012-03-06 16:12:15 -0500950 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700951 name++;
952 }
David S. Miller95f050b2012-03-06 16:12:15 -0500953 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700955EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200958 * __dev_alloc_name - allocate a name for a device
959 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200961 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 *
963 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700964 * id. It scans list of devices to build up a free map, then chooses
965 * the first empty slot. The caller must hold the dev_base or rtnl lock
966 * while allocating the name and adding the device in order to avoid
967 * duplicates.
968 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
969 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 */
971
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200972static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973{
974 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 const char *p;
976 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700977 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 struct net_device *d;
979
980 p = strnchr(name, IFNAMSIZ-1, '%');
981 if (p) {
982 /*
983 * Verify the string as this thing may have come from
984 * the user. There must be either one "%d" and no other "%"
985 * characters.
986 */
987 if (p[1] != 'd' || strchr(p + 2, '%'))
988 return -EINVAL;
989
990 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700991 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (!inuse)
993 return -ENOMEM;
994
Eric W. Biederman881d9662007-09-17 11:56:21 -0700995 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 if (!sscanf(d->name, name, &i))
997 continue;
998 if (i < 0 || i >= max_netdevices)
999 continue;
1000
1001 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +02001002 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 if (!strncmp(buf, d->name, IFNAMSIZ))
1004 set_bit(i, inuse);
1005 }
1006
1007 i = find_first_zero_bit(inuse, max_netdevices);
1008 free_page((unsigned long) inuse);
1009 }
1010
Octavian Purdilad9031022009-11-18 02:36:59 +00001011 if (buf != name)
1012 snprintf(buf, IFNAMSIZ, name, i);
Eric W. Biedermanb267b172007-09-12 13:48:45 +02001013 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
1016 /* It is possible to run out of possible slots
1017 * when the name is long and there isn't enough space left
1018 * for the digits, or if all bits are used.
1019 */
1020 return -ENFILE;
1021}
1022
Eric W. Biedermanb267b172007-09-12 13:48:45 +02001023/**
1024 * dev_alloc_name - allocate a name for a device
1025 * @dev: device
1026 * @name: name format string
1027 *
1028 * Passed a format string - eg "lt%d" it will try and find a suitable
1029 * id. It scans list of devices to build up a free map, then chooses
1030 * the first empty slot. The caller must hold the dev_base or rtnl lock
1031 * while allocating the name and adding the device in order to avoid
1032 * duplicates.
1033 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1034 * Returns the number of the unit assigned or a negative errno code.
1035 */
1036
1037int dev_alloc_name(struct net_device *dev, const char *name)
1038{
1039 char buf[IFNAMSIZ];
1040 struct net *net;
1041 int ret;
1042
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001043 BUG_ON(!dev_net(dev));
1044 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +02001045 ret = __dev_alloc_name(net, name, buf);
1046 if (ret >= 0)
1047 strlcpy(dev->name, buf, IFNAMSIZ);
1048 return ret;
1049}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001050EXPORT_SYMBOL(dev_alloc_name);
Eric W. Biedermanb267b172007-09-12 13:48:45 +02001051
Gao feng828de4f2012-09-13 20:58:27 +00001052static int dev_alloc_name_ns(struct net *net,
1053 struct net_device *dev,
1054 const char *name)
Octavian Purdilad9031022009-11-18 02:36:59 +00001055{
Gao feng828de4f2012-09-13 20:58:27 +00001056 char buf[IFNAMSIZ];
1057 int ret;
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +00001058
Gao feng828de4f2012-09-13 20:58:27 +00001059 ret = __dev_alloc_name(net, name, buf);
1060 if (ret >= 0)
1061 strlcpy(dev->name, buf, IFNAMSIZ);
1062 return ret;
1063}
1064
1065static int dev_get_valid_name(struct net *net,
1066 struct net_device *dev,
1067 const char *name)
1068{
1069 BUG_ON(!net);
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +00001070
Octavian Purdilad9031022009-11-18 02:36:59 +00001071 if (!dev_valid_name(name))
1072 return -EINVAL;
1073
Jiri Pirko1c5cae82011-04-30 01:21:32 +00001074 if (strchr(name, '%'))
Gao feng828de4f2012-09-13 20:58:27 +00001075 return dev_alloc_name_ns(net, dev, name);
Octavian Purdilad9031022009-11-18 02:36:59 +00001076 else if (__dev_get_by_name(net, name))
1077 return -EEXIST;
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +00001078 else if (dev->name != name)
1079 strlcpy(dev->name, name, IFNAMSIZ);
Octavian Purdilad9031022009-11-18 02:36:59 +00001080
1081 return 0;
1082}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
1084/**
1085 * dev_change_name - change name of a device
1086 * @dev: device
1087 * @newname: name (or format string) must be at least IFNAMSIZ
1088 *
1089 * Change name of a device, can pass format strings "eth%d".
1090 * for wildcarding.
1091 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07001092int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
Tom Gundersen238fa362014-07-14 16:37:23 +02001094 unsigned char old_assign_type;
Herbert Xufcc5a032007-07-30 17:03:38 -07001095 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -07001097 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001098 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001101 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001103 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 if (dev->flags & IFF_UP)
1105 return -EBUSY;
1106
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001107 write_seqcount_begin(&devnet_rename_seq);
Brian Haleyc91f6df2012-11-26 05:21:08 +00001108
1109 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001110 write_seqcount_end(&devnet_rename_seq);
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -07001111 return 0;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001112 }
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -07001113
Herbert Xufcc5a032007-07-30 17:03:38 -07001114 memcpy(oldname, dev->name, IFNAMSIZ);
1115
Gao feng828de4f2012-09-13 20:58:27 +00001116 err = dev_get_valid_name(net, dev, newname);
Brian Haleyc91f6df2012-11-26 05:21:08 +00001117 if (err < 0) {
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001118 write_seqcount_end(&devnet_rename_seq);
Octavian Purdilad9031022009-11-18 02:36:59 +00001119 return err;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001120 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
Veaceslav Falico6fe82a32014-07-17 20:33:32 +02001122 if (oldname[0] && !strchr(oldname, '%'))
1123 netdev_info(dev, "renamed from %s\n", oldname);
1124
Tom Gundersen238fa362014-07-14 16:37:23 +02001125 old_assign_type = dev->name_assign_type;
1126 dev->name_assign_type = NET_NAME_RENAMED;
1127
Herbert Xufcc5a032007-07-30 17:03:38 -07001128rollback:
Eric W. Biedermana1b3f592010-05-04 17:36:49 -07001129 ret = device_rename(&dev->dev, dev->name);
1130 if (ret) {
1131 memcpy(dev->name, oldname, IFNAMSIZ);
Tom Gundersen238fa362014-07-14 16:37:23 +02001132 dev->name_assign_type = old_assign_type;
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001133 write_seqcount_end(&devnet_rename_seq);
Eric W. Biedermana1b3f592010-05-04 17:36:49 -07001134 return ret;
Stephen Hemmingerdcc99772008-05-14 22:33:38 -07001135 }
Herbert Xu7f988ea2007-07-30 16:35:46 -07001136
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001137 write_seqcount_end(&devnet_rename_seq);
Brian Haleyc91f6df2012-11-26 05:21:08 +00001138
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01001139 netdev_adjacent_rename_links(dev, oldname);
1140
Herbert Xu7f988ea2007-07-30 16:35:46 -07001141 write_lock_bh(&dev_base_lock);
Eric Dumazet372b2312011-05-17 13:56:59 -04001142 hlist_del_rcu(&dev->name_hlist);
Eric Dumazet72c95282009-10-30 07:11:27 +00001143 write_unlock_bh(&dev_base_lock);
1144
1145 synchronize_rcu();
1146
1147 write_lock_bh(&dev_base_lock);
1148 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -07001149 write_unlock_bh(&dev_base_lock);
1150
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001151 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001152 ret = notifier_to_errno(ret);
1153
1154 if (ret) {
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001155 /* err >= 0 after dev_alloc_name() or stores the first errno */
1156 if (err >= 0) {
Herbert Xufcc5a032007-07-30 17:03:38 -07001157 err = ret;
Eric Dumazet30e6c9f2012-12-20 17:25:08 +00001158 write_seqcount_begin(&devnet_rename_seq);
Herbert Xufcc5a032007-07-30 17:03:38 -07001159 memcpy(dev->name, oldname, IFNAMSIZ);
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01001160 memcpy(oldname, newname, IFNAMSIZ);
Tom Gundersen238fa362014-07-14 16:37:23 +02001161 dev->name_assign_type = old_assign_type;
1162 old_assign_type = NET_NAME_RENAMED;
Herbert Xufcc5a032007-07-30 17:03:38 -07001163 goto rollback;
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001164 } else {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001165 pr_err("%s: name change rollback failed: %d\n",
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001166 dev->name, ret);
Herbert Xufcc5a032007-07-30 17:03:38 -07001167 }
1168 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
1170 return err;
1171}
1172
1173/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001174 * dev_set_alias - change ifalias of a device
1175 * @dev: device
1176 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07001177 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001178 *
1179 * Set ifalias for a device,
1180 */
1181int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1182{
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001183 char *new_ifalias;
1184
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001185 ASSERT_RTNL();
1186
1187 if (len >= IFALIASZ)
1188 return -EINVAL;
1189
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -07001190 if (!len) {
Sachin Kamat388dfc22012-11-20 00:57:04 +00001191 kfree(dev->ifalias);
1192 dev->ifalias = NULL;
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -07001193 return 0;
1194 }
1195
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001196 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1197 if (!new_ifalias)
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001198 return -ENOMEM;
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001199 dev->ifalias = new_ifalias;
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001200
1201 strlcpy(dev->ifalias, alias, len+1);
1202 return len;
1203}
1204
1205
1206/**
Stephen Hemminger3041a062006-05-26 13:25:24 -07001207 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001208 * @dev: device to cause notification
1209 *
1210 * Called to indicate a device has changed features.
1211 */
1212void netdev_features_change(struct net_device *dev)
1213{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001214 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001215}
1216EXPORT_SYMBOL(netdev_features_change);
1217
1218/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 * netdev_state_change - device changes state
1220 * @dev: device to cause notification
1221 *
1222 * Called to indicate a device has changed state. This function calls
1223 * the notifier chains for netdev_chain and sends a NEWLINK message
1224 * to the routing socket.
1225 */
1226void netdev_state_change(struct net_device *dev)
1227{
1228 if (dev->flags & IFF_UP) {
Loic Prylli5495119462014-07-01 21:39:43 -07001229 struct netdev_notifier_change_info change_info;
1230
1231 change_info.flags_changed = 0;
1232 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1233 &change_info.info);
Alexei Starovoitov7f294052013-10-23 16:02:42 -07001234 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 }
1236}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001237EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
Amerigo Wangee89bab2012-08-09 22:14:56 +00001239/**
1240 * netdev_notify_peers - notify network peers about existence of @dev
1241 * @dev: network device
1242 *
1243 * Generate traffic such that interested network peers are aware of
1244 * @dev, such as by generating a gratuitous ARP. This may be used when
1245 * a device wants to inform the rest of the network about some sort of
1246 * reconfiguration such as a failover event or virtual machine
1247 * migration.
1248 */
1249void netdev_notify_peers(struct net_device *dev)
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001250{
Amerigo Wangee89bab2012-08-09 22:14:56 +00001251 rtnl_lock();
1252 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1253 rtnl_unlock();
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001254}
Amerigo Wangee89bab2012-08-09 22:14:56 +00001255EXPORT_SYMBOL(netdev_notify_peers);
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001256
Patrick McHardybd380812010-02-26 06:34:53 +00001257static int __dev_open(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001259 const struct net_device_ops *ops = dev->netdev_ops;
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001260 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001262 ASSERT_RTNL();
1263
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 if (!netif_device_present(dev))
1265 return -ENODEV;
1266
Neil Hormanca99ca12013-02-05 08:05:43 +00001267 /* Block netpoll from trying to do any rx path servicing.
1268 * If we don't do this there is a chance ndo_poll_controller
1269 * or ndo_poll may be running while we open the device
1270 */
Eric W. Biederman66b55522014-03-27 15:39:03 -07001271 netpoll_poll_disable(dev);
Neil Hormanca99ca12013-02-05 08:05:43 +00001272
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001273 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1274 ret = notifier_to_errno(ret);
1275 if (ret)
1276 return ret;
1277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001279
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001280 if (ops->ndo_validate_addr)
1281 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001282
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001283 if (!ret && ops->ndo_open)
1284 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
Eric W. Biederman66b55522014-03-27 15:39:03 -07001286 netpoll_poll_enable(dev);
Neil Hormanca99ca12013-02-05 08:05:43 +00001287
Jeff Garzikbada3392007-10-23 20:19:37 -07001288 if (ret)
1289 clear_bit(__LINK_STATE_START, &dev->state);
1290 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 dev->flags |= IFF_UP;
Patrick McHardy4417da62007-06-27 01:28:10 -07001292 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 dev_activate(dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04001294 add_device_randomness(dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001296
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 return ret;
1298}
Patrick McHardybd380812010-02-26 06:34:53 +00001299
1300/**
1301 * dev_open - prepare an interface for use.
1302 * @dev: device to open
1303 *
1304 * Takes a device from down to up state. The device's private open
1305 * function is invoked and then the multicast lists are loaded. Finally
1306 * the device is moved into the up state and a %NETDEV_UP message is
1307 * sent to the netdev notifier chain.
1308 *
1309 * Calling this function on an active interface is a nop. On a failure
1310 * a negative errno code is returned.
1311 */
1312int dev_open(struct net_device *dev)
1313{
1314 int ret;
1315
Patrick McHardybd380812010-02-26 06:34:53 +00001316 if (dev->flags & IFF_UP)
1317 return 0;
1318
Patrick McHardybd380812010-02-26 06:34:53 +00001319 ret = __dev_open(dev);
1320 if (ret < 0)
1321 return ret;
1322
Alexei Starovoitov7f294052013-10-23 16:02:42 -07001323 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
Patrick McHardybd380812010-02-26 06:34:53 +00001324 call_netdevice_notifiers(NETDEV_UP, dev);
1325
1326 return ret;
1327}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001328EXPORT_SYMBOL(dev_open);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329
Octavian Purdila44345722010-12-13 12:44:07 +00001330static int __dev_close_many(struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331{
Octavian Purdila44345722010-12-13 12:44:07 +00001332 struct net_device *dev;
Patrick McHardybd380812010-02-26 06:34:53 +00001333
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001334 ASSERT_RTNL();
David S. Miller9d5010d2007-09-12 14:33:25 +02001335 might_sleep();
1336
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001337 list_for_each_entry(dev, head, close_list) {
Eric W. Biederman3f4df202014-03-27 15:38:17 -07001338 /* Temporarily disable netpoll until the interface is down */
Eric W. Biederman66b55522014-03-27 15:39:03 -07001339 netpoll_poll_disable(dev);
Eric W. Biederman3f4df202014-03-27 15:38:17 -07001340
Octavian Purdila44345722010-12-13 12:44:07 +00001341 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
Octavian Purdila44345722010-12-13 12:44:07 +00001343 clear_bit(__LINK_STATE_START, &dev->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
Octavian Purdila44345722010-12-13 12:44:07 +00001345 /* Synchronize to scheduled poll. We cannot touch poll list, it
1346 * can be even on different cpu. So just clear netif_running().
1347 *
1348 * dev->stop() will invoke napi_disable() on all of it's
1349 * napi_struct instances on this device.
1350 */
Peter Zijlstra4e857c52014-03-17 18:06:10 +01001351 smp_mb__after_atomic(); /* Commit netif_running(). */
Octavian Purdila44345722010-12-13 12:44:07 +00001352 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353
Octavian Purdila44345722010-12-13 12:44:07 +00001354 dev_deactivate_many(head);
1355
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001356 list_for_each_entry(dev, head, close_list) {
Octavian Purdila44345722010-12-13 12:44:07 +00001357 const struct net_device_ops *ops = dev->netdev_ops;
1358
1359 /*
1360 * Call the device specific close. This cannot fail.
1361 * Only if device is UP
1362 *
1363 * We allow it to be called even after a DETACH hot-plug
1364 * event.
1365 */
1366 if (ops->ndo_stop)
1367 ops->ndo_stop(dev);
1368
Octavian Purdila44345722010-12-13 12:44:07 +00001369 dev->flags &= ~IFF_UP;
Eric W. Biederman66b55522014-03-27 15:39:03 -07001370 netpoll_poll_enable(dev);
Octavian Purdila44345722010-12-13 12:44:07 +00001371 }
1372
1373 return 0;
1374}
1375
1376static int __dev_close(struct net_device *dev)
1377{
Linus Torvaldsf87e6f42011-02-17 22:54:38 +00001378 int retval;
Octavian Purdila44345722010-12-13 12:44:07 +00001379 LIST_HEAD(single);
1380
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001381 list_add(&dev->close_list, &single);
Linus Torvaldsf87e6f42011-02-17 22:54:38 +00001382 retval = __dev_close_many(&single);
1383 list_del(&single);
Neil Hormanca99ca12013-02-05 08:05:43 +00001384
Linus Torvaldsf87e6f42011-02-17 22:54:38 +00001385 return retval;
Octavian Purdila44345722010-12-13 12:44:07 +00001386}
1387
Eric Dumazet3fbd8752011-01-19 21:23:22 +00001388static int dev_close_many(struct list_head *head)
Octavian Purdila44345722010-12-13 12:44:07 +00001389{
1390 struct net_device *dev, *tmp;
Octavian Purdila44345722010-12-13 12:44:07 +00001391
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001392 /* Remove the devices that don't need to be closed */
1393 list_for_each_entry_safe(dev, tmp, head, close_list)
Octavian Purdila44345722010-12-13 12:44:07 +00001394 if (!(dev->flags & IFF_UP))
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001395 list_del_init(&dev->close_list);
Octavian Purdila44345722010-12-13 12:44:07 +00001396
1397 __dev_close_many(head);
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001398
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001399 list_for_each_entry_safe(dev, tmp, head, close_list) {
Alexei Starovoitov7f294052013-10-23 16:02:42 -07001400 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
Octavian Purdila44345722010-12-13 12:44:07 +00001401 call_netdevice_notifiers(NETDEV_DOWN, dev);
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001402 list_del_init(&dev->close_list);
Octavian Purdila44345722010-12-13 12:44:07 +00001403 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 return 0;
1406}
Patrick McHardybd380812010-02-26 06:34:53 +00001407
1408/**
1409 * dev_close - shutdown an interface.
1410 * @dev: device to shutdown
1411 *
1412 * This function moves an active device into down state. A
1413 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1414 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1415 * chain.
1416 */
1417int dev_close(struct net_device *dev)
1418{
Eric Dumazete14a5992011-05-10 12:26:06 -07001419 if (dev->flags & IFF_UP) {
1420 LIST_HEAD(single);
Patrick McHardybd380812010-02-26 06:34:53 +00001421
Eric W. Biederman5cde2822013-10-05 19:26:05 -07001422 list_add(&dev->close_list, &single);
Eric Dumazete14a5992011-05-10 12:26:06 -07001423 dev_close_many(&single);
1424 list_del(&single);
1425 }
dingtianhongda6e3782013-05-27 19:53:31 +00001426 return 0;
Patrick McHardybd380812010-02-26 06:34:53 +00001427}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001428EXPORT_SYMBOL(dev_close);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429
1430
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001431/**
1432 * dev_disable_lro - disable Large Receive Offload on a device
1433 * @dev: device
1434 *
1435 * Disable Large Receive Offload (LRO) on a net device. Must be
1436 * called under RTNL. This is needed if received packets may be
1437 * forwarded to another interface.
1438 */
1439void dev_disable_lro(struct net_device *dev)
1440{
Michal Kubečekfbe168b2014-11-13 07:54:50 +01001441 struct net_device *lower_dev;
1442 struct list_head *iter;
Michal Kubeček529d0482013-11-15 06:18:50 +01001443
Michał Mirosławbc5787c62011-11-15 15:29:55 +00001444 dev->wanted_features &= ~NETIF_F_LRO;
1445 netdev_update_features(dev);
Michał Mirosław27660512011-03-18 16:56:34 +00001446
Michał Mirosław22d59692011-04-21 12:42:15 +00001447 if (unlikely(dev->features & NETIF_F_LRO))
1448 netdev_WARN(dev, "failed to disable LRO!\n");
Michal Kubečekfbe168b2014-11-13 07:54:50 +01001449
1450 netdev_for_each_lower_dev(dev, lower_dev, iter)
1451 dev_disable_lro(lower_dev);
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001452}
1453EXPORT_SYMBOL(dev_disable_lro);
1454
Jiri Pirko351638e2013-05-28 01:30:21 +00001455static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1456 struct net_device *dev)
1457{
1458 struct netdev_notifier_info info;
1459
1460 netdev_notifier_info_init(&info, dev);
1461 return nb->notifier_call(nb, val, &info);
1462}
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001463
Eric W. Biederman881d9662007-09-17 11:56:21 -07001464static int dev_boot_phase = 1;
1465
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466/**
1467 * register_netdevice_notifier - register a network notifier block
1468 * @nb: notifier
1469 *
1470 * Register a notifier to be called when network device events occur.
1471 * The notifier passed is linked into the kernel structures and must
1472 * not be reused until it has been unregistered. A negative errno code
1473 * is returned on a failure.
1474 *
1475 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001476 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 * view of the network device list.
1478 */
1479
1480int register_netdevice_notifier(struct notifier_block *nb)
1481{
1482 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001483 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001484 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 int err;
1486
1487 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001488 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001489 if (err)
1490 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001491 if (dev_boot_phase)
1492 goto unlock;
1493 for_each_net(net) {
1494 for_each_netdev(net, dev) {
Jiri Pirko351638e2013-05-28 01:30:21 +00001495 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001496 err = notifier_to_errno(err);
1497 if (err)
1498 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
Eric W. Biederman881d9662007-09-17 11:56:21 -07001500 if (!(dev->flags & IFF_UP))
1501 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001502
Jiri Pirko351638e2013-05-28 01:30:21 +00001503 call_netdevice_notifier(nb, NETDEV_UP, dev);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001504 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001506
1507unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 rtnl_unlock();
1509 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001510
1511rollback:
1512 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001513 for_each_net(net) {
1514 for_each_netdev(net, dev) {
1515 if (dev == last)
RongQing.Li8f891482011-11-30 23:43:07 -05001516 goto outroll;
Herbert Xufcc5a032007-07-30 17:03:38 -07001517
Eric W. Biederman881d9662007-09-17 11:56:21 -07001518 if (dev->flags & IFF_UP) {
Jiri Pirko351638e2013-05-28 01:30:21 +00001519 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1520 dev);
1521 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001522 }
Jiri Pirko351638e2013-05-28 01:30:21 +00001523 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001524 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001525 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001526
RongQing.Li8f891482011-11-30 23:43:07 -05001527outroll:
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001528 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001529 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001531EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532
1533/**
1534 * unregister_netdevice_notifier - unregister a network notifier block
1535 * @nb: notifier
1536 *
1537 * Unregister a notifier previously registered by
1538 * register_netdevice_notifier(). The notifier is unlinked into the
1539 * kernel structures and may then be reused. A negative errno code
1540 * is returned on a failure.
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001541 *
1542 * After unregistering unregister and down device events are synthesized
1543 * for all devices on the device list to the removed notifier to remove
1544 * the need for special case cleanup code.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 */
1546
1547int unregister_netdevice_notifier(struct notifier_block *nb)
1548{
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001549 struct net_device *dev;
1550 struct net *net;
Herbert Xu9f514952006-03-25 01:24:25 -08001551 int err;
1552
1553 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001554 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001555 if (err)
1556 goto unlock;
1557
1558 for_each_net(net) {
1559 for_each_netdev(net, dev) {
1560 if (dev->flags & IFF_UP) {
Jiri Pirko351638e2013-05-28 01:30:21 +00001561 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1562 dev);
1563 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001564 }
Jiri Pirko351638e2013-05-28 01:30:21 +00001565 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001566 }
1567 }
1568unlock:
Herbert Xu9f514952006-03-25 01:24:25 -08001569 rtnl_unlock();
1570 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001572EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
1574/**
Jiri Pirko351638e2013-05-28 01:30:21 +00001575 * call_netdevice_notifiers_info - call all network notifier blocks
1576 * @val: value passed unmodified to notifier function
1577 * @dev: net_device pointer passed unmodified to notifier function
1578 * @info: notifier information data
1579 *
1580 * Call all network notifier blocks. Parameters and return value
1581 * are as for raw_notifier_call_chain().
1582 */
1583
stephen hemminger1d143d92013-12-29 14:01:29 -08001584static int call_netdevice_notifiers_info(unsigned long val,
1585 struct net_device *dev,
1586 struct netdev_notifier_info *info)
Jiri Pirko351638e2013-05-28 01:30:21 +00001587{
1588 ASSERT_RTNL();
1589 netdev_notifier_info_init(info, dev);
1590 return raw_notifier_call_chain(&netdev_chain, val, info);
1591}
Jiri Pirko351638e2013-05-28 01:30:21 +00001592
1593/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 * call_netdevice_notifiers - call all network notifier blocks
1595 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001596 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 *
1598 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001599 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 */
1601
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001602int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603{
Jiri Pirko351638e2013-05-28 01:30:21 +00001604 struct netdev_notifier_info info;
1605
1606 return call_netdevice_notifiers_info(val, dev, &info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607}
stephen hemmingeredf947f2011-03-24 13:24:01 +00001608EXPORT_SYMBOL(call_netdevice_notifiers);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609
Ingo Molnarc5905af2012-02-24 08:31:31 +01001610static struct static_key netstamp_needed __read_mostly;
Eric Dumazetb90e5792011-11-28 11:16:50 +00001611#ifdef HAVE_JUMP_LABEL
Ingo Molnarc5905af2012-02-24 08:31:31 +01001612/* We are not allowed to call static_key_slow_dec() from irq context
Eric Dumazetb90e5792011-11-28 11:16:50 +00001613 * If net_disable_timestamp() is called from irq context, defer the
Ingo Molnarc5905af2012-02-24 08:31:31 +01001614 * static_key_slow_dec() calls.
Eric Dumazetb90e5792011-11-28 11:16:50 +00001615 */
1616static atomic_t netstamp_needed_deferred;
1617#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618
1619void net_enable_timestamp(void)
1620{
Eric Dumazetb90e5792011-11-28 11:16:50 +00001621#ifdef HAVE_JUMP_LABEL
1622 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1623
1624 if (deferred) {
1625 while (--deferred)
Ingo Molnarc5905af2012-02-24 08:31:31 +01001626 static_key_slow_dec(&netstamp_needed);
Eric Dumazetb90e5792011-11-28 11:16:50 +00001627 return;
1628 }
1629#endif
Ingo Molnarc5905af2012-02-24 08:31:31 +01001630 static_key_slow_inc(&netstamp_needed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001632EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
1634void net_disable_timestamp(void)
1635{
Eric Dumazetb90e5792011-11-28 11:16:50 +00001636#ifdef HAVE_JUMP_LABEL
1637 if (in_interrupt()) {
1638 atomic_inc(&netstamp_needed_deferred);
1639 return;
1640 }
1641#endif
Ingo Molnarc5905af2012-02-24 08:31:31 +01001642 static_key_slow_dec(&netstamp_needed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001644EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
Eric Dumazet3b098e22010-05-15 23:57:10 -07001646static inline void net_timestamp_set(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647{
Eric Dumazet588f0332011-11-15 04:12:55 +00001648 skb->tstamp.tv64 = 0;
Ingo Molnarc5905af2012-02-24 08:31:31 +01001649 if (static_key_false(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001650 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651}
1652
Eric Dumazet588f0332011-11-15 04:12:55 +00001653#define net_timestamp_check(COND, SKB) \
Ingo Molnarc5905af2012-02-24 08:31:31 +01001654 if (static_key_false(&netstamp_needed)) { \
Eric Dumazet588f0332011-11-15 04:12:55 +00001655 if ((COND) && !(SKB)->tstamp.tv64) \
1656 __net_timestamp(SKB); \
1657 } \
Eric Dumazet3b098e22010-05-15 23:57:10 -07001658
Vlad Yasevich1ee481f2014-03-27 17:32:29 -04001659bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
Daniel Lezcano79b569f2011-03-30 02:42:17 -07001660{
1661 unsigned int len;
1662
1663 if (!(dev->flags & IFF_UP))
1664 return false;
1665
1666 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1667 if (skb->len <= len)
1668 return true;
1669
1670 /* if TSO is enabled, we don't care about the length as the packet
1671 * could be forwarded without being segmented before
1672 */
1673 if (skb_is_gso(skb))
1674 return true;
1675
1676 return false;
1677}
Vlad Yasevich1ee481f2014-03-27 17:32:29 -04001678EXPORT_SYMBOL_GPL(is_skb_forwardable);
Daniel Lezcano79b569f2011-03-30 02:42:17 -07001679
Herbert Xua0265d22014-04-17 13:45:03 +08001680int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1681{
1682 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1683 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1684 atomic_long_inc(&dev->rx_dropped);
1685 kfree_skb(skb);
1686 return NET_RX_DROP;
1687 }
1688 }
1689
1690 if (unlikely(!is_skb_forwardable(dev, skb))) {
1691 atomic_long_inc(&dev->rx_dropped);
1692 kfree_skb(skb);
1693 return NET_RX_DROP;
1694 }
1695
1696 skb_scrub_packet(skb, true);
1697 skb->protocol = eth_type_trans(skb, dev);
Jay Vosburgh2c26d342014-12-19 15:32:00 -08001698 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
Herbert Xua0265d22014-04-17 13:45:03 +08001699
1700 return 0;
1701}
1702EXPORT_SYMBOL_GPL(__dev_forward_skb);
1703
Arnd Bergmann44540962009-11-26 06:07:08 +00001704/**
1705 * dev_forward_skb - loopback an skb to another netif
1706 *
1707 * @dev: destination network device
1708 * @skb: buffer to forward
1709 *
1710 * return values:
1711 * NET_RX_SUCCESS (no congestion)
Eric Dumazet6ec82562010-05-06 00:53:53 -07001712 * NET_RX_DROP (packet was dropped, but freed)
Arnd Bergmann44540962009-11-26 06:07:08 +00001713 *
1714 * dev_forward_skb can be used for injecting an skb from the
1715 * start_xmit function of one device into the receive queue
1716 * of another device.
1717 *
1718 * The receiving device may be in another namespace, so
1719 * we have to clear all information in the skb that could
1720 * impact namespace isolation.
1721 */
1722int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1723{
Herbert Xua0265d22014-04-17 13:45:03 +08001724 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
Arnd Bergmann44540962009-11-26 06:07:08 +00001725}
1726EXPORT_SYMBOL_GPL(dev_forward_skb);
1727
Changli Gao71d9dec2010-12-15 19:57:25 +00001728static inline int deliver_skb(struct sk_buff *skb,
1729 struct packet_type *pt_prev,
1730 struct net_device *orig_dev)
1731{
Michael S. Tsirkin1080e512012-07-20 09:23:17 +00001732 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1733 return -ENOMEM;
Changli Gao71d9dec2010-12-15 19:57:25 +00001734 atomic_inc(&skb->users);
1735 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1736}
1737
Salam Noureddine7866a622015-01-27 11:35:48 -08001738static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1739 struct packet_type **pt,
1740 struct net_device *dev, __be16 type,
1741 struct list_head *ptype_list)
1742{
1743 struct packet_type *ptype, *pt_prev = *pt;
1744
1745 list_for_each_entry_rcu(ptype, ptype_list, list) {
1746 if (ptype->type != type)
1747 continue;
1748 if (pt_prev)
1749 deliver_skb(skb, pt_prev, dev);
1750 pt_prev = ptype;
1751 }
1752 *pt = pt_prev;
1753}
1754
Eric Leblondc0de08d2012-08-16 22:02:58 +00001755static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1756{
Eric Leblonda3d744e2012-11-06 02:10:10 +00001757 if (!ptype->af_packet_priv || !skb->sk)
Eric Leblondc0de08d2012-08-16 22:02:58 +00001758 return false;
1759
1760 if (ptype->id_match)
1761 return ptype->id_match(ptype, skb->sk);
1762 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1763 return true;
1764
1765 return false;
1766}
1767
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768/*
1769 * Support routine. Sends outgoing frames to any network
1770 * taps currently in use.
1771 */
1772
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001773static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774{
1775 struct packet_type *ptype;
Changli Gao71d9dec2010-12-15 19:57:25 +00001776 struct sk_buff *skb2 = NULL;
1777 struct packet_type *pt_prev = NULL;
Salam Noureddine7866a622015-01-27 11:35:48 -08001778 struct list_head *ptype_list = &ptype_all;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001779
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 rcu_read_lock();
Salam Noureddine7866a622015-01-27 11:35:48 -08001781again:
1782 list_for_each_entry_rcu(ptype, ptype_list, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 /* Never send packets back to the socket
1784 * they originated from - MvS (miquels@drinkel.ow.org)
1785 */
Salam Noureddine7866a622015-01-27 11:35:48 -08001786 if (skb_loop_sk(ptype, skb))
1787 continue;
Changli Gao71d9dec2010-12-15 19:57:25 +00001788
Salam Noureddine7866a622015-01-27 11:35:48 -08001789 if (pt_prev) {
1790 deliver_skb(skb2, pt_prev, skb->dev);
Changli Gao71d9dec2010-12-15 19:57:25 +00001791 pt_prev = ptype;
Salam Noureddine7866a622015-01-27 11:35:48 -08001792 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 }
Salam Noureddine7866a622015-01-27 11:35:48 -08001794
1795 /* need to clone skb, done only once */
1796 skb2 = skb_clone(skb, GFP_ATOMIC);
1797 if (!skb2)
1798 goto out_unlock;
1799
1800 net_timestamp_set(skb2);
1801
1802 /* skb->nh should be correctly
1803 * set by sender, so that the second statement is
1804 * just protection against buggy protocols.
1805 */
1806 skb_reset_mac_header(skb2);
1807
1808 if (skb_network_header(skb2) < skb2->data ||
1809 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1810 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1811 ntohs(skb2->protocol),
1812 dev->name);
1813 skb_reset_network_header(skb2);
1814 }
1815
1816 skb2->transport_header = skb2->network_header;
1817 skb2->pkt_type = PACKET_OUTGOING;
1818 pt_prev = ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 }
Salam Noureddine7866a622015-01-27 11:35:48 -08001820
1821 if (ptype_list == &ptype_all) {
1822 ptype_list = &dev->ptype_all;
1823 goto again;
1824 }
1825out_unlock:
Changli Gao71d9dec2010-12-15 19:57:25 +00001826 if (pt_prev)
1827 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 rcu_read_unlock();
1829}
1830
Ben Hutchings2c530402012-07-10 10:55:09 +00001831/**
1832 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
John Fastabend4f57c082011-01-17 08:06:04 +00001833 * @dev: Network device
1834 * @txq: number of queues available
1835 *
1836 * If real_num_tx_queues is changed the tc mappings may no longer be
1837 * valid. To resolve this verify the tc mapping remains valid and if
1838 * not NULL the mapping. With no priorities mapping to this
1839 * offset/count pair it will no longer be used. In the worst case TC0
1840 * is invalid nothing can be done so disable priority mappings. If is
1841 * expected that drivers will fix this mapping if they can before
1842 * calling netif_set_real_num_tx_queues.
1843 */
Eric Dumazetbb134d22011-01-20 19:18:08 +00001844static void netif_setup_tc(struct net_device *dev, unsigned int txq)
John Fastabend4f57c082011-01-17 08:06:04 +00001845{
1846 int i;
1847 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1848
1849 /* If TC0 is invalidated disable TC mapping */
1850 if (tc->offset + tc->count > txq) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001851 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
John Fastabend4f57c082011-01-17 08:06:04 +00001852 dev->num_tc = 0;
1853 return;
1854 }
1855
1856 /* Invalidated prio to tc mappings set to TC0 */
1857 for (i = 1; i < TC_BITMASK + 1; i++) {
1858 int q = netdev_get_prio_tc_map(dev, i);
1859
1860 tc = &dev->tc_to_txq[q];
1861 if (tc->offset + tc->count > txq) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001862 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1863 i, q);
John Fastabend4f57c082011-01-17 08:06:04 +00001864 netdev_set_prio_tc_map(dev, i, 0);
1865 }
1866 }
1867}
1868
Alexander Duyck537c00d2013-01-10 08:57:02 +00001869#ifdef CONFIG_XPS
1870static DEFINE_MUTEX(xps_map_mutex);
1871#define xmap_dereference(P) \
1872 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1873
Alexander Duyck10cdc3f2013-01-10 08:57:17 +00001874static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1875 int cpu, u16 index)
1876{
1877 struct xps_map *map = NULL;
1878 int pos;
1879
1880 if (dev_maps)
1881 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1882
1883 for (pos = 0; map && pos < map->len; pos++) {
1884 if (map->queues[pos] == index) {
1885 if (map->len > 1) {
1886 map->queues[pos] = map->queues[--map->len];
1887 } else {
1888 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1889 kfree_rcu(map, rcu);
1890 map = NULL;
1891 }
1892 break;
1893 }
1894 }
1895
1896 return map;
1897}
1898
Alexander Duyck024e9672013-01-10 08:57:46 +00001899static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
Alexander Duyck537c00d2013-01-10 08:57:02 +00001900{
1901 struct xps_dev_maps *dev_maps;
Alexander Duyck024e9672013-01-10 08:57:46 +00001902 int cpu, i;
Alexander Duyck10cdc3f2013-01-10 08:57:17 +00001903 bool active = false;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001904
1905 mutex_lock(&xps_map_mutex);
1906 dev_maps = xmap_dereference(dev->xps_maps);
1907
1908 if (!dev_maps)
1909 goto out_no_maps;
1910
Alexander Duyck10cdc3f2013-01-10 08:57:17 +00001911 for_each_possible_cpu(cpu) {
Alexander Duyck024e9672013-01-10 08:57:46 +00001912 for (i = index; i < dev->num_tx_queues; i++) {
1913 if (!remove_xps_queue(dev_maps, cpu, i))
1914 break;
1915 }
1916 if (i == dev->num_tx_queues)
Alexander Duyck10cdc3f2013-01-10 08:57:17 +00001917 active = true;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001918 }
1919
Alexander Duyck10cdc3f2013-01-10 08:57:17 +00001920 if (!active) {
Alexander Duyck537c00d2013-01-10 08:57:02 +00001921 RCU_INIT_POINTER(dev->xps_maps, NULL);
1922 kfree_rcu(dev_maps, rcu);
1923 }
1924
Alexander Duyck024e9672013-01-10 08:57:46 +00001925 for (i = index; i < dev->num_tx_queues; i++)
1926 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1927 NUMA_NO_NODE);
1928
Alexander Duyck537c00d2013-01-10 08:57:02 +00001929out_no_maps:
1930 mutex_unlock(&xps_map_mutex);
1931}
1932
Alexander Duyck01c5f862013-01-10 08:57:35 +00001933static struct xps_map *expand_xps_map(struct xps_map *map,
1934 int cpu, u16 index)
1935{
1936 struct xps_map *new_map;
1937 int alloc_len = XPS_MIN_MAP_ALLOC;
1938 int i, pos;
1939
1940 for (pos = 0; map && pos < map->len; pos++) {
1941 if (map->queues[pos] != index)
1942 continue;
1943 return map;
1944 }
1945
1946 /* Need to add queue to this CPU's existing map */
1947 if (map) {
1948 if (pos < map->alloc_len)
1949 return map;
1950
1951 alloc_len = map->alloc_len * 2;
1952 }
1953
1954 /* Need to allocate new map to store queue on this CPU's map */
1955 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1956 cpu_to_node(cpu));
1957 if (!new_map)
1958 return NULL;
1959
1960 for (i = 0; i < pos; i++)
1961 new_map->queues[i] = map->queues[i];
1962 new_map->alloc_len = alloc_len;
1963 new_map->len = pos;
1964
1965 return new_map;
1966}
1967
Michael S. Tsirkin35735402013-10-02 09:14:06 +03001968int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
1969 u16 index)
Alexander Duyck537c00d2013-01-10 08:57:02 +00001970{
Alexander Duyck01c5f862013-01-10 08:57:35 +00001971 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001972 struct xps_map *map, *new_map;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001973 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
Alexander Duyck01c5f862013-01-10 08:57:35 +00001974 int cpu, numa_node_id = -2;
1975 bool active = false;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001976
1977 mutex_lock(&xps_map_mutex);
1978
1979 dev_maps = xmap_dereference(dev->xps_maps);
1980
Alexander Duyck01c5f862013-01-10 08:57:35 +00001981 /* allocate memory for queue storage */
1982 for_each_online_cpu(cpu) {
1983 if (!cpumask_test_cpu(cpu, mask))
1984 continue;
Alexander Duyck537c00d2013-01-10 08:57:02 +00001985
Alexander Duyck01c5f862013-01-10 08:57:35 +00001986 if (!new_dev_maps)
1987 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
Alexander Duyck2bb60cb2013-02-22 06:38:44 +00001988 if (!new_dev_maps) {
1989 mutex_unlock(&xps_map_mutex);
Alexander Duyck01c5f862013-01-10 08:57:35 +00001990 return -ENOMEM;
Alexander Duyck2bb60cb2013-02-22 06:38:44 +00001991 }
Alexander Duyck01c5f862013-01-10 08:57:35 +00001992
1993 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1994 NULL;
1995
1996 map = expand_xps_map(map, cpu, index);
1997 if (!map)
1998 goto error;
1999
2000 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2001 }
2002
2003 if (!new_dev_maps)
2004 goto out_no_new_maps;
2005
2006 for_each_possible_cpu(cpu) {
2007 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2008 /* add queue to CPU maps */
2009 int pos = 0;
2010
2011 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2012 while ((pos < map->len) && (map->queues[pos] != index))
2013 pos++;
2014
2015 if (pos == map->len)
2016 map->queues[map->len++] = index;
Alexander Duyck537c00d2013-01-10 08:57:02 +00002017#ifdef CONFIG_NUMA
Alexander Duyck537c00d2013-01-10 08:57:02 +00002018 if (numa_node_id == -2)
2019 numa_node_id = cpu_to_node(cpu);
2020 else if (numa_node_id != cpu_to_node(cpu))
2021 numa_node_id = -1;
Alexander Duyck537c00d2013-01-10 08:57:02 +00002022#endif
Alexander Duyck01c5f862013-01-10 08:57:35 +00002023 } else if (dev_maps) {
2024 /* fill in the new device map from the old device map */
2025 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2026 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
Alexander Duyck537c00d2013-01-10 08:57:02 +00002027 }
Alexander Duyck01c5f862013-01-10 08:57:35 +00002028
Alexander Duyck537c00d2013-01-10 08:57:02 +00002029 }
2030
Alexander Duyck01c5f862013-01-10 08:57:35 +00002031 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2032
Alexander Duyck537c00d2013-01-10 08:57:02 +00002033 /* Cleanup old maps */
Alexander Duyck01c5f862013-01-10 08:57:35 +00002034 if (dev_maps) {
2035 for_each_possible_cpu(cpu) {
2036 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2037 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2038 if (map && map != new_map)
2039 kfree_rcu(map, rcu);
2040 }
Alexander Duyck537c00d2013-01-10 08:57:02 +00002041
Alexander Duyck537c00d2013-01-10 08:57:02 +00002042 kfree_rcu(dev_maps, rcu);
Alexander Duyck01c5f862013-01-10 08:57:35 +00002043 }
Alexander Duyck537c00d2013-01-10 08:57:02 +00002044
Alexander Duyck01c5f862013-01-10 08:57:35 +00002045 dev_maps = new_dev_maps;
2046 active = true;
2047
2048out_no_new_maps:
2049 /* update Tx queue numa node */
Alexander Duyck537c00d2013-01-10 08:57:02 +00002050 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2051 (numa_node_id >= 0) ? numa_node_id :
2052 NUMA_NO_NODE);
2053
Alexander Duyck01c5f862013-01-10 08:57:35 +00002054 if (!dev_maps)
2055 goto out_no_maps;
2056
2057 /* removes queue from unused CPUs */
2058 for_each_possible_cpu(cpu) {
2059 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2060 continue;
2061
2062 if (remove_xps_queue(dev_maps, cpu, index))
2063 active = true;
2064 }
2065
2066 /* free map if not active */
2067 if (!active) {
2068 RCU_INIT_POINTER(dev->xps_maps, NULL);
2069 kfree_rcu(dev_maps, rcu);
2070 }
2071
2072out_no_maps:
Alexander Duyck537c00d2013-01-10 08:57:02 +00002073 mutex_unlock(&xps_map_mutex);
2074
2075 return 0;
2076error:
Alexander Duyck01c5f862013-01-10 08:57:35 +00002077 /* remove any maps that we added */
2078 for_each_possible_cpu(cpu) {
2079 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2080 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2081 NULL;
2082 if (new_map && new_map != map)
2083 kfree(new_map);
2084 }
2085
Alexander Duyck537c00d2013-01-10 08:57:02 +00002086 mutex_unlock(&xps_map_mutex);
2087
Alexander Duyck537c00d2013-01-10 08:57:02 +00002088 kfree(new_dev_maps);
2089 return -ENOMEM;
2090}
2091EXPORT_SYMBOL(netif_set_xps_queue);
2092
2093#endif
John Fastabendf0796d52010-07-01 13:21:57 +00002094/*
2095 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2096 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
2097 */
Tom Herberte6484932010-10-18 18:04:39 +00002098int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
John Fastabendf0796d52010-07-01 13:21:57 +00002099{
Tom Herbert1d24eb42010-11-21 13:17:27 +00002100 int rc;
2101
Tom Herberte6484932010-10-18 18:04:39 +00002102 if (txq < 1 || txq > dev->num_tx_queues)
2103 return -EINVAL;
John Fastabendf0796d52010-07-01 13:21:57 +00002104
Ben Hutchings5c565802011-02-15 19:39:21 +00002105 if (dev->reg_state == NETREG_REGISTERED ||
2106 dev->reg_state == NETREG_UNREGISTERING) {
Tom Herberte6484932010-10-18 18:04:39 +00002107 ASSERT_RTNL();
2108
Tom Herbert1d24eb42010-11-21 13:17:27 +00002109 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2110 txq);
Tom Herbertbf264142010-11-26 08:36:09 +00002111 if (rc)
2112 return rc;
2113
John Fastabend4f57c082011-01-17 08:06:04 +00002114 if (dev->num_tc)
2115 netif_setup_tc(dev, txq);
2116
Alexander Duyck024e9672013-01-10 08:57:46 +00002117 if (txq < dev->real_num_tx_queues) {
Tom Herberte6484932010-10-18 18:04:39 +00002118 qdisc_reset_all_tx_gt(dev, txq);
Alexander Duyck024e9672013-01-10 08:57:46 +00002119#ifdef CONFIG_XPS
2120 netif_reset_xps_queues_gt(dev, txq);
2121#endif
2122 }
John Fastabendf0796d52010-07-01 13:21:57 +00002123 }
Tom Herberte6484932010-10-18 18:04:39 +00002124
2125 dev->real_num_tx_queues = txq;
2126 return 0;
John Fastabendf0796d52010-07-01 13:21:57 +00002127}
2128EXPORT_SYMBOL(netif_set_real_num_tx_queues);
Denis Vlasenko56079432006-03-29 15:57:29 -08002129
Michael Daltona953be52014-01-16 22:23:28 -08002130#ifdef CONFIG_SYSFS
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002131/**
2132 * netif_set_real_num_rx_queues - set actual number of RX queues used
2133 * @dev: Network device
2134 * @rxq: Actual number of RX queues
2135 *
2136 * This must be called either with the rtnl_lock held or before
2137 * registration of the net device. Returns 0 on success, or a
Ben Hutchings4e7f7952010-10-08 10:33:39 -07002138 * negative error code. If called before registration, it always
2139 * succeeds.
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002140 */
2141int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2142{
2143 int rc;
2144
Tom Herbertbd25fa72010-10-18 18:00:16 +00002145 if (rxq < 1 || rxq > dev->num_rx_queues)
2146 return -EINVAL;
2147
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002148 if (dev->reg_state == NETREG_REGISTERED) {
2149 ASSERT_RTNL();
2150
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002151 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2152 rxq);
2153 if (rc)
2154 return rc;
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002155 }
2156
2157 dev->real_num_rx_queues = rxq;
2158 return 0;
2159}
2160EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2161#endif
2162
Ben Hutchings2c530402012-07-10 10:55:09 +00002163/**
2164 * netif_get_num_default_rss_queues - default number of RSS queues
Yuval Mintz16917b82012-07-01 03:18:50 +00002165 *
2166 * This routine should set an upper limit on the number of RSS queues
2167 * used by default by multiqueue devices.
2168 */
Ben Hutchingsa55b1382012-07-10 10:54:38 +00002169int netif_get_num_default_rss_queues(void)
Yuval Mintz16917b82012-07-01 03:18:50 +00002170{
2171 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2172}
2173EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2174
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002175static inline void __netif_reschedule(struct Qdisc *q)
2176{
2177 struct softnet_data *sd;
2178 unsigned long flags;
2179
2180 local_irq_save(flags);
Christoph Lameter903ceff2014-08-17 12:30:35 -05002181 sd = this_cpu_ptr(&softnet_data);
Changli Gaoa9cbd582010-04-26 23:06:24 +00002182 q->next_sched = NULL;
2183 *sd->output_queue_tailp = q;
2184 sd->output_queue_tailp = &q->next_sched;
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002185 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2186 local_irq_restore(flags);
2187}
2188
David S. Miller37437bb2008-07-16 02:15:04 -07002189void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08002190{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002191 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2192 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08002193}
2194EXPORT_SYMBOL(__netif_schedule);
2195
Eric Dumazete6247022013-12-05 04:45:08 -08002196struct dev_kfree_skb_cb {
2197 enum skb_free_reason reason;
2198};
2199
2200static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08002201{
Eric Dumazete6247022013-12-05 04:45:08 -08002202 return (struct dev_kfree_skb_cb *)skb->cb;
Denis Vlasenko56079432006-03-29 15:57:29 -08002203}
Denis Vlasenko56079432006-03-29 15:57:29 -08002204
John Fastabend46e5da42014-09-12 20:04:52 -07002205void netif_schedule_queue(struct netdev_queue *txq)
2206{
2207 rcu_read_lock();
2208 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2209 struct Qdisc *q = rcu_dereference(txq->qdisc);
2210
2211 __netif_schedule(q);
2212 }
2213 rcu_read_unlock();
2214}
2215EXPORT_SYMBOL(netif_schedule_queue);
2216
2217/**
2218 * netif_wake_subqueue - allow sending packets on subqueue
2219 * @dev: network device
2220 * @queue_index: sub queue index
2221 *
2222 * Resume individual transmit queue of a device with multiple transmit queues.
2223 */
2224void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2225{
2226 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2227
2228 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
2229 struct Qdisc *q;
2230
2231 rcu_read_lock();
2232 q = rcu_dereference(txq->qdisc);
2233 __netif_schedule(q);
2234 rcu_read_unlock();
2235 }
2236}
2237EXPORT_SYMBOL(netif_wake_subqueue);
2238
2239void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2240{
2241 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2242 struct Qdisc *q;
2243
2244 rcu_read_lock();
2245 q = rcu_dereference(dev_queue->qdisc);
2246 __netif_schedule(q);
2247 rcu_read_unlock();
2248 }
2249}
2250EXPORT_SYMBOL(netif_tx_wake_queue);
2251
Eric Dumazete6247022013-12-05 04:45:08 -08002252void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2253{
2254 unsigned long flags;
2255
2256 if (likely(atomic_read(&skb->users) == 1)) {
2257 smp_rmb();
2258 atomic_set(&skb->users, 0);
2259 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2260 return;
2261 }
2262 get_kfree_skb_cb(skb)->reason = reason;
2263 local_irq_save(flags);
2264 skb->next = __this_cpu_read(softnet_data.completion_queue);
2265 __this_cpu_write(softnet_data.completion_queue, skb);
2266 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2267 local_irq_restore(flags);
2268}
2269EXPORT_SYMBOL(__dev_kfree_skb_irq);
2270
2271void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
Denis Vlasenko56079432006-03-29 15:57:29 -08002272{
2273 if (in_irq() || irqs_disabled())
Eric Dumazete6247022013-12-05 04:45:08 -08002274 __dev_kfree_skb_irq(skb, reason);
Denis Vlasenko56079432006-03-29 15:57:29 -08002275 else
2276 dev_kfree_skb(skb);
2277}
Eric Dumazete6247022013-12-05 04:45:08 -08002278EXPORT_SYMBOL(__dev_kfree_skb_any);
Denis Vlasenko56079432006-03-29 15:57:29 -08002279
2280
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002281/**
2282 * netif_device_detach - mark device as removed
2283 * @dev: network device
2284 *
2285 * Mark device as removed from system and therefore no longer available.
2286 */
Denis Vlasenko56079432006-03-29 15:57:29 -08002287void netif_device_detach(struct net_device *dev)
2288{
2289 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2290 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00002291 netif_tx_stop_all_queues(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08002292 }
2293}
2294EXPORT_SYMBOL(netif_device_detach);
2295
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002296/**
2297 * netif_device_attach - mark device as attached
2298 * @dev: network device
2299 *
2300 * Mark device as attached from system and restart if needed.
2301 */
Denis Vlasenko56079432006-03-29 15:57:29 -08002302void netif_device_attach(struct net_device *dev)
2303{
2304 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2305 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00002306 netif_tx_wake_all_queues(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002307 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08002308 }
2309}
2310EXPORT_SYMBOL(netif_device_attach);
2311
Ben Hutchings36c92472012-01-17 07:57:56 +00002312static void skb_warn_bad_offload(const struct sk_buff *skb)
2313{
Michał Mirosław65e9d2f2012-01-17 10:00:40 +00002314 static const netdev_features_t null_features = 0;
Ben Hutchings36c92472012-01-17 07:57:56 +00002315 struct net_device *dev = skb->dev;
2316 const char *driver = "";
2317
Ben Greearc846ad92013-04-19 10:45:52 +00002318 if (!net_ratelimit())
2319 return;
2320
Ben Hutchings36c92472012-01-17 07:57:56 +00002321 if (dev && dev->dev.parent)
2322 driver = dev_driver_string(dev->dev.parent);
2323
2324 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2325 "gso_type=%d ip_summed=%d\n",
Michał Mirosław65e9d2f2012-01-17 10:00:40 +00002326 driver, dev ? &dev->features : &null_features,
2327 skb->sk ? &skb->sk->sk_route_caps : &null_features,
Ben Hutchings36c92472012-01-17 07:57:56 +00002328 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2329 skb_shinfo(skb)->gso_type, skb->ip_summed);
2330}
2331
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332/*
2333 * Invalidate hardware checksum when packet is to be mangled, and
2334 * complete checksum manually on outgoing path.
2335 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07002336int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337{
Al Virod3bc23e2006-11-14 21:24:49 -08002338 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07002339 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340
Patrick McHardy84fa7932006-08-29 16:44:56 -07002341 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07002342 goto out_set_summed;
2343
2344 if (unlikely(skb_shinfo(skb)->gso_size)) {
Ben Hutchings36c92472012-01-17 07:57:56 +00002345 skb_warn_bad_offload(skb);
2346 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 }
2348
Eric Dumazetcef401d2013-01-25 20:34:37 +00002349 /* Before computing a checksum, we should make sure no frag could
2350 * be modified by an external entity : checksum could be wrong.
2351 */
2352 if (skb_has_shared_frag(skb)) {
2353 ret = __skb_linearize(skb);
2354 if (ret)
2355 goto out;
2356 }
2357
Michał Mirosław55508d62010-12-14 15:24:08 +00002358 offset = skb_checksum_start_offset(skb);
Herbert Xua0308472007-10-15 01:47:15 -07002359 BUG_ON(offset >= skb_headlen(skb));
2360 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2361
2362 offset += skb->csum_offset;
2363 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2364
2365 if (skb_cloned(skb) &&
2366 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2368 if (ret)
2369 goto out;
2370 }
2371
Herbert Xua0308472007-10-15 01:47:15 -07002372 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07002373out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002375out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 return ret;
2377}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002378EXPORT_SYMBOL(skb_checksum_help);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379
Vlad Yasevich53d64712014-03-27 17:26:18 -04002380__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
Pravin B Shelarec5f0612013-03-07 09:28:01 +00002381{
2382 __be16 type = skb->protocol;
2383
Pravin B Shelar19acc322013-05-07 20:41:07 +00002384 /* Tunnel gso handlers can set protocol to ethernet. */
2385 if (type == htons(ETH_P_TEB)) {
2386 struct ethhdr *eth;
2387
2388 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2389 return 0;
2390
2391 eth = (struct ethhdr *)skb_mac_header(skb);
2392 type = eth->h_proto;
2393 }
2394
Toshiaki Makitad4bcef32015-01-29 20:37:07 +09002395 return __vlan_get_protocol(skb, type, depth);
Pravin B Shelarec5f0612013-03-07 09:28:01 +00002396}
2397
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002398/**
2399 * skb_mac_gso_segment - mac layer segmentation handler.
2400 * @skb: buffer to segment
2401 * @features: features for the output path (see dev->features)
2402 */
2403struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2404 netdev_features_t features)
2405{
2406 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2407 struct packet_offload *ptype;
Vlad Yasevich53d64712014-03-27 17:26:18 -04002408 int vlan_depth = skb->mac_len;
2409 __be16 type = skb_network_protocol(skb, &vlan_depth);
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002410
Pravin B Shelarec5f0612013-03-07 09:28:01 +00002411 if (unlikely(!type))
2412 return ERR_PTR(-EINVAL);
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002413
Vlad Yasevich53d64712014-03-27 17:26:18 -04002414 __skb_pull(skb, vlan_depth);
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002415
2416 rcu_read_lock();
2417 list_for_each_entry_rcu(ptype, &offload_base, list) {
2418 if (ptype->type == type && ptype->callbacks.gso_segment) {
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002419 segs = ptype->callbacks.gso_segment(skb, features);
2420 break;
2421 }
2422 }
2423 rcu_read_unlock();
2424
2425 __skb_push(skb, skb->data - skb_mac_header(skb));
2426
2427 return segs;
2428}
2429EXPORT_SYMBOL(skb_mac_gso_segment);
2430
2431
Cong Wang12b00042013-02-05 16:36:38 +00002432/* openvswitch calls this on rx path, so we need a different check.
2433 */
2434static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2435{
2436 if (tx_path)
2437 return skb->ip_summed != CHECKSUM_PARTIAL;
2438 else
2439 return skb->ip_summed == CHECKSUM_NONE;
2440}
2441
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002442/**
Cong Wang12b00042013-02-05 16:36:38 +00002443 * __skb_gso_segment - Perform segmentation on skb.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002444 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07002445 * @features: features for the output path (see dev->features)
Cong Wang12b00042013-02-05 16:36:38 +00002446 * @tx_path: whether it is called in TX path
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002447 *
2448 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07002449 *
2450 * It may return NULL if the skb requires no segmentation. This is
2451 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002452 */
Cong Wang12b00042013-02-05 16:36:38 +00002453struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2454 netdev_features_t features, bool tx_path)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002455{
Cong Wang12b00042013-02-05 16:36:38 +00002456 if (unlikely(skb_needs_check(skb, tx_path))) {
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002457 int err;
2458
Ben Hutchings36c92472012-01-17 07:57:56 +00002459 skb_warn_bad_offload(skb);
Herbert Xu67fd1a72009-01-19 16:26:44 -08002460
françois romieua40e0a62014-07-15 23:55:35 +02002461 err = skb_cow_head(skb, 0);
2462 if (err < 0)
Herbert Xua430a432006-07-08 13:34:56 -07002463 return ERR_PTR(err);
2464 }
2465
Pravin B Shelar68c33162013-02-14 14:02:41 +00002466 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
Eric Dumazet3347c962013-10-19 11:42:56 -07002467 SKB_GSO_CB(skb)->encap_level = 0;
2468
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002469 skb_reset_mac_header(skb);
2470 skb_reset_mac_len(skb);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002471
Pravin B Shelar05e8ef42013-02-14 09:44:55 +00002472 return skb_mac_gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002473}
Cong Wang12b00042013-02-05 16:36:38 +00002474EXPORT_SYMBOL(__skb_gso_segment);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002475
Herbert Xufb286bb2005-11-10 13:01:24 -08002476/* Take action when hardware reception checksum errors are detected. */
2477#ifdef CONFIG_BUG
2478void netdev_rx_csum_fault(struct net_device *dev)
2479{
2480 if (net_ratelimit()) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00002481 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08002482 dump_stack();
2483 }
2484}
2485EXPORT_SYMBOL(netdev_rx_csum_fault);
2486#endif
2487
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488/* Actually, we should eliminate this check as soon as we know, that:
2489 * 1. IOMMU is present and allows to map all the memory.
2490 * 2. No high memory really exists on this machine.
2491 */
2492
Florian Westphalc1e756b2014-05-05 15:00:44 +02002493static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494{
Herbert Xu3d3a8532006-06-27 13:33:10 -07002495#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 int i;
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002497 if (!(dev->features & NETIF_F_HIGHDMA)) {
Ian Campbellea2ab692011-08-22 23:44:58 +00002498 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2499 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2500 if (PageHighMem(skb_frag_page(frag)))
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002501 return 1;
Ian Campbellea2ab692011-08-22 23:44:58 +00002502 }
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002503 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002505 if (PCI_DMA_BUS_IS_PHYS) {
2506 struct device *pdev = dev->dev.parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507
Eric Dumazet9092c652010-04-02 13:34:49 -07002508 if (!pdev)
2509 return 0;
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002510 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
Ian Campbellea2ab692011-08-22 23:44:58 +00002511 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2512 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002513 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2514 return 1;
2515 }
2516 }
Herbert Xu3d3a8532006-06-27 13:33:10 -07002517#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 return 0;
2519}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520
Simon Horman3b392dd2014-06-04 08:53:17 +09002521/* If MPLS offload request, verify we are testing hardware MPLS features
2522 * instead of standard features for the netdev.
2523 */
Pravin B Shelard0edc7b2014-12-23 16:20:11 -08002524#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
Simon Horman3b392dd2014-06-04 08:53:17 +09002525static netdev_features_t net_mpls_features(struct sk_buff *skb,
2526 netdev_features_t features,
2527 __be16 type)
2528{
Simon Horman25cd9ba2014-10-06 05:05:13 -07002529 if (eth_p_mpls(type))
Simon Horman3b392dd2014-06-04 08:53:17 +09002530 features &= skb->dev->mpls_features;
2531
2532 return features;
2533}
2534#else
2535static netdev_features_t net_mpls_features(struct sk_buff *skb,
2536 netdev_features_t features,
2537 __be16 type)
2538{
2539 return features;
2540}
2541#endif
2542
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002543static netdev_features_t harmonize_features(struct sk_buff *skb,
Florian Westphalc1e756b2014-05-05 15:00:44 +02002544 netdev_features_t features)
Jesse Grossf01a5232011-01-09 06:23:31 +00002545{
Vlad Yasevich53d64712014-03-27 17:26:18 -04002546 int tmp;
Simon Horman3b392dd2014-06-04 08:53:17 +09002547 __be16 type;
2548
2549 type = skb_network_protocol(skb, &tmp);
2550 features = net_mpls_features(skb, features, type);
Vlad Yasevich53d64712014-03-27 17:26:18 -04002551
Ed Cashinc0d680e2012-09-19 15:49:00 +00002552 if (skb->ip_summed != CHECKSUM_NONE &&
Simon Horman3b392dd2014-06-04 08:53:17 +09002553 !can_checksum_protocol(features, type)) {
Jesse Grossf01a5232011-01-09 06:23:31 +00002554 features &= ~NETIF_F_ALL_CSUM;
Florian Westphalc1e756b2014-05-05 15:00:44 +02002555 } else if (illegal_highdma(skb->dev, skb)) {
Jesse Grossf01a5232011-01-09 06:23:31 +00002556 features &= ~NETIF_F_SG;
2557 }
2558
2559 return features;
2560}
2561
Florian Westphalc1e756b2014-05-05 15:00:44 +02002562netdev_features_t netif_skb_features(struct sk_buff *skb)
Jesse Gross58e998c2010-10-29 12:14:55 +00002563{
Jesse Gross5f352272014-12-23 22:37:26 -08002564 struct net_device *dev = skb->dev;
Eric Dumazetfcbeb972014-10-05 10:11:27 -07002565 netdev_features_t features = dev->features;
2566 u16 gso_segs = skb_shinfo(skb)->gso_segs;
Jesse Gross58e998c2010-10-29 12:14:55 +00002567 __be16 protocol = skb->protocol;
2568
Eric Dumazetfcbeb972014-10-05 10:11:27 -07002569 if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
Ben Hutchings30b678d2012-07-30 15:57:00 +00002570 features &= ~NETIF_F_GSO_MASK;
2571
Jesse Gross5f352272014-12-23 22:37:26 -08002572 /* If encapsulation offload request, verify we are testing
2573 * hardware encapsulation features instead of standard
2574 * features for the netdev
2575 */
2576 if (skb->encapsulation)
2577 features &= dev->hw_enc_features;
2578
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01002579 if (!skb_vlan_tag_present(skb)) {
Toshiaki Makita796f2da2014-12-22 19:04:14 +09002580 if (unlikely(protocol == htons(ETH_P_8021Q) ||
2581 protocol == htons(ETH_P_8021AD))) {
2582 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2583 protocol = veh->h_vlan_encapsulated_proto;
2584 } else {
Jesse Gross5f352272014-12-23 22:37:26 -08002585 goto finalize;
Toshiaki Makita796f2da2014-12-22 19:04:14 +09002586 }
Jesse Grossf01a5232011-01-09 06:23:31 +00002587 }
Jesse Gross58e998c2010-10-29 12:14:55 +00002588
Michal Kubečekdb115032014-08-25 15:16:22 +02002589 features = netdev_intersect_features(features,
Eric Dumazetfcbeb972014-10-05 10:11:27 -07002590 dev->vlan_features |
Michal Kubečekdb115032014-08-25 15:16:22 +02002591 NETIF_F_HW_VLAN_CTAG_TX |
2592 NETIF_F_HW_VLAN_STAG_TX);
Jesse Grossf01a5232011-01-09 06:23:31 +00002593
Alexander Duyckcdbaa0b2013-07-10 17:05:06 -07002594 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
Michal Kubečekdb115032014-08-25 15:16:22 +02002595 features = netdev_intersect_features(features,
2596 NETIF_F_SG |
2597 NETIF_F_HIGHDMA |
2598 NETIF_F_FRAGLIST |
2599 NETIF_F_GEN_CSUM |
2600 NETIF_F_HW_VLAN_CTAG_TX |
2601 NETIF_F_HW_VLAN_STAG_TX);
Alexander Duyckcdbaa0b2013-07-10 17:05:06 -07002602
Jesse Gross5f352272014-12-23 22:37:26 -08002603finalize:
2604 if (dev->netdev_ops->ndo_features_check)
2605 features &= dev->netdev_ops->ndo_features_check(skb, dev,
2606 features);
2607
Florian Westphalc1e756b2014-05-05 15:00:44 +02002608 return harmonize_features(skb, features);
Jesse Gross58e998c2010-10-29 12:14:55 +00002609}
Florian Westphalc1e756b2014-05-05 15:00:44 +02002610EXPORT_SYMBOL(netif_skb_features);
Jesse Gross58e998c2010-10-29 12:14:55 +00002611
David S. Miller2ea25512014-08-29 21:10:01 -07002612static int xmit_one(struct sk_buff *skb, struct net_device *dev,
David S. Miller95f6b3d2014-08-29 21:57:30 -07002613 struct netdev_queue *txq, bool more)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002614{
David S. Miller2ea25512014-08-29 21:10:01 -07002615 unsigned int len;
2616 int rc;
Stephen Hemminger00829822008-11-20 20:14:53 -08002617
Salam Noureddine7866a622015-01-27 11:35:48 -08002618 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
David S. Miller2ea25512014-08-29 21:10:01 -07002619 dev_queue_xmit_nit(skb, dev);
Jesse Grossfc741212011-01-09 06:23:32 +00002620
David S. Miller2ea25512014-08-29 21:10:01 -07002621 len = skb->len;
2622 trace_net_dev_start_xmit(skb, dev);
David S. Miller95f6b3d2014-08-29 21:57:30 -07002623 rc = netdev_start_xmit(skb, dev, txq, more);
David S. Miller2ea25512014-08-29 21:10:01 -07002624 trace_net_dev_xmit(skb, rc, dev, len);
Eric Dumazetadf30902009-06-02 05:19:30 +00002625
Patrick McHardy572a9d72009-11-10 06:14:14 +00002626 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002627}
David S. Miller2ea25512014-08-29 21:10:01 -07002628
David S. Miller8dcda222014-09-01 15:06:40 -07002629struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
2630 struct netdev_queue *txq, int *ret)
David S. Miller7f2e8702014-08-29 21:19:14 -07002631{
2632 struct sk_buff *skb = first;
2633 int rc = NETDEV_TX_OK;
2634
2635 while (skb) {
2636 struct sk_buff *next = skb->next;
2637
2638 skb->next = NULL;
David S. Miller95f6b3d2014-08-29 21:57:30 -07002639 rc = xmit_one(skb, dev, txq, next != NULL);
David S. Miller7f2e8702014-08-29 21:19:14 -07002640 if (unlikely(!dev_xmit_complete(rc))) {
2641 skb->next = next;
2642 goto out;
2643 }
2644
2645 skb = next;
2646 if (netif_xmit_stopped(txq) && skb) {
2647 rc = NETDEV_TX_BUSY;
2648 break;
2649 }
2650 }
2651
2652out:
2653 *ret = rc;
2654 return skb;
2655}
2656
Eric Dumazet1ff0dc92014-10-06 11:26:27 -07002657static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2658 netdev_features_t features)
David S. Millereae3f882014-08-30 15:17:13 -07002659{
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01002660 if (skb_vlan_tag_present(skb) &&
Jiri Pirko59682502014-11-19 14:04:59 +01002661 !vlan_hw_offload_capable(features, skb->vlan_proto))
2662 skb = __vlan_hwaccel_push_inside(skb);
David S. Millereae3f882014-08-30 15:17:13 -07002663 return skb;
2664}
2665
Eric Dumazet55a93b32014-10-03 15:31:07 -07002666static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
David S. Millereae3f882014-08-30 15:17:13 -07002667{
2668 netdev_features_t features;
2669
2670 if (skb->next)
2671 return skb;
2672
David S. Millereae3f882014-08-30 15:17:13 -07002673 features = netif_skb_features(skb);
2674 skb = validate_xmit_vlan(skb, features);
2675 if (unlikely(!skb))
2676 goto out_null;
2677
Tom Herbert04ffcb22014-10-14 15:19:06 -07002678 if (netif_needs_gso(dev, skb, features)) {
David S. Millerce937182014-08-30 19:22:20 -07002679 struct sk_buff *segs;
2680
2681 segs = skb_gso_segment(skb, features);
Jason Wangcecda692014-09-19 16:04:38 +08002682 if (IS_ERR(segs)) {
Jason Wangaf6dabc2014-12-19 11:09:13 +08002683 goto out_kfree_skb;
Jason Wangcecda692014-09-19 16:04:38 +08002684 } else if (segs) {
2685 consume_skb(skb);
2686 skb = segs;
2687 }
David S. Millereae3f882014-08-30 15:17:13 -07002688 } else {
2689 if (skb_needs_linearize(skb, features) &&
2690 __skb_linearize(skb))
2691 goto out_kfree_skb;
2692
2693 /* If packet is not checksummed and device does not
2694 * support checksumming for this protocol, complete
2695 * checksumming here.
2696 */
2697 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2698 if (skb->encapsulation)
2699 skb_set_inner_transport_header(skb,
2700 skb_checksum_start_offset(skb));
2701 else
2702 skb_set_transport_header(skb,
2703 skb_checksum_start_offset(skb));
2704 if (!(features & NETIF_F_ALL_CSUM) &&
2705 skb_checksum_help(skb))
2706 goto out_kfree_skb;
2707 }
2708 }
2709
2710 return skb;
2711
2712out_kfree_skb:
2713 kfree_skb(skb);
2714out_null:
2715 return NULL;
2716}
2717
Eric Dumazet55a93b32014-10-03 15:31:07 -07002718struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
2719{
2720 struct sk_buff *next, *head = NULL, *tail;
2721
Eric Dumazetbec3cfd2014-10-03 20:59:19 -07002722 for (; skb != NULL; skb = next) {
Eric Dumazet55a93b32014-10-03 15:31:07 -07002723 next = skb->next;
2724 skb->next = NULL;
Eric Dumazet55a93b32014-10-03 15:31:07 -07002725
Eric Dumazetbec3cfd2014-10-03 20:59:19 -07002726 /* in case skb wont be segmented, point to itself */
2727 skb->prev = skb;
2728
2729 skb = validate_xmit_skb(skb, dev);
2730 if (!skb)
2731 continue;
2732
2733 if (!head)
2734 head = skb;
2735 else
2736 tail->next = skb;
2737 /* If skb was segmented, skb->prev points to
2738 * the last segment. If not, it still contains skb.
2739 */
2740 tail = skb->prev;
Eric Dumazet55a93b32014-10-03 15:31:07 -07002741 }
2742 return head;
2743}
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002744
Eric Dumazet1def9232013-01-10 12:36:42 +00002745static void qdisc_pkt_len_init(struct sk_buff *skb)
2746{
2747 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2748
2749 qdisc_skb_cb(skb)->pkt_len = skb->len;
2750
2751 /* To get more precise estimation of bytes sent on wire,
2752 * we add to pkt_len the headers size of all segments
2753 */
2754 if (shinfo->gso_size) {
Eric Dumazet757b8b12013-01-15 21:14:21 -08002755 unsigned int hdr_len;
Jason Wang15e5a032013-03-25 20:19:59 +00002756 u16 gso_segs = shinfo->gso_segs;
Eric Dumazet1def9232013-01-10 12:36:42 +00002757
Eric Dumazet757b8b12013-01-15 21:14:21 -08002758 /* mac layer + network layer */
2759 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2760
2761 /* + transport layer */
Eric Dumazet1def9232013-01-10 12:36:42 +00002762 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2763 hdr_len += tcp_hdrlen(skb);
2764 else
2765 hdr_len += sizeof(struct udphdr);
Jason Wang15e5a032013-03-25 20:19:59 +00002766
2767 if (shinfo->gso_type & SKB_GSO_DODGY)
2768 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2769 shinfo->gso_size);
2770
2771 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
Eric Dumazet1def9232013-01-10 12:36:42 +00002772 }
2773}
2774
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002775static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2776 struct net_device *dev,
2777 struct netdev_queue *txq)
2778{
2779 spinlock_t *root_lock = qdisc_lock(q);
Eric Dumazeta2da5702011-01-20 03:48:19 +00002780 bool contended;
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002781 int rc;
2782
Eric Dumazet1def9232013-01-10 12:36:42 +00002783 qdisc_pkt_len_init(skb);
Eric Dumazeta2da5702011-01-20 03:48:19 +00002784 qdisc_calculate_pkt_len(skb, q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002785 /*
2786 * Heuristic to force contended enqueues to serialize on a
2787 * separate lock before trying to get qdisc main lock.
Ying Xue9bf2b8c2014-06-26 15:56:31 +08002788 * This permits __QDISC___STATE_RUNNING owner to get the lock more
2789 * often and dequeue packets faster.
Eric Dumazet79640a42010-06-02 05:09:29 -07002790 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00002791 contended = qdisc_is_running(q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002792 if (unlikely(contended))
2793 spin_lock(&q->busylock);
2794
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002795 spin_lock(root_lock);
2796 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2797 kfree_skb(skb);
2798 rc = NET_XMIT_DROP;
2799 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
Eric Dumazetbc135b22010-06-02 03:23:51 -07002800 qdisc_run_begin(q)) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002801 /*
2802 * This is a work-conserving queue; there are no old skbs
2803 * waiting to be sent out; and the qdisc is not running -
2804 * xmit the skb directly.
2805 */
Eric Dumazetbfe0d022011-01-09 08:30:54 +00002806
Eric Dumazetbfe0d022011-01-09 08:30:54 +00002807 qdisc_bstats_update(q, skb);
2808
Eric Dumazet55a93b32014-10-03 15:31:07 -07002809 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
Eric Dumazet79640a42010-06-02 05:09:29 -07002810 if (unlikely(contended)) {
2811 spin_unlock(&q->busylock);
2812 contended = false;
2813 }
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002814 __qdisc_run(q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002815 } else
Eric Dumazetbc135b22010-06-02 03:23:51 -07002816 qdisc_run_end(q);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002817
2818 rc = NET_XMIT_SUCCESS;
2819 } else {
Eric Dumazeta2da5702011-01-20 03:48:19 +00002820 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
Eric Dumazet79640a42010-06-02 05:09:29 -07002821 if (qdisc_run_begin(q)) {
2822 if (unlikely(contended)) {
2823 spin_unlock(&q->busylock);
2824 contended = false;
2825 }
2826 __qdisc_run(q);
2827 }
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002828 }
2829 spin_unlock(root_lock);
Eric Dumazet79640a42010-06-02 05:09:29 -07002830 if (unlikely(contended))
2831 spin_unlock(&q->busylock);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002832 return rc;
2833}
2834
Daniel Borkmann86f85152013-12-29 17:27:11 +01002835#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
Neil Horman5bc14212011-11-22 05:10:51 +00002836static void skb_update_prio(struct sk_buff *skb)
2837{
Igor Maravic6977a792011-11-25 07:44:54 +00002838 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
Neil Horman5bc14212011-11-22 05:10:51 +00002839
Eric Dumazet91c68ce2012-07-08 21:45:10 +00002840 if (!skb->priority && skb->sk && map) {
2841 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2842
2843 if (prioidx < map->priomap_len)
2844 skb->priority = map->priomap[prioidx];
2845 }
Neil Horman5bc14212011-11-22 05:10:51 +00002846}
2847#else
2848#define skb_update_prio(skb)
2849#endif
2850
Eric Dumazet745e20f2010-09-29 13:23:09 -07002851static DEFINE_PER_CPU(int, xmit_recursion);
David S. Miller11a766c2010-10-25 12:51:55 -07002852#define RECURSION_LIMIT 10
Eric Dumazet745e20f2010-09-29 13:23:09 -07002853
Dave Jonesd29f7492008-07-22 14:09:06 -07002854/**
Michel Machado95603e22012-06-12 10:16:35 +00002855 * dev_loopback_xmit - loop back @skb
2856 * @skb: buffer to transmit
2857 */
2858int dev_loopback_xmit(struct sk_buff *skb)
2859{
2860 skb_reset_mac_header(skb);
2861 __skb_pull(skb, skb_network_offset(skb));
2862 skb->pkt_type = PACKET_LOOPBACK;
2863 skb->ip_summed = CHECKSUM_UNNECESSARY;
2864 WARN_ON(!skb_dst(skb));
2865 skb_dst_force(skb);
2866 netif_rx_ni(skb);
2867 return 0;
2868}
2869EXPORT_SYMBOL(dev_loopback_xmit);
2870
2871/**
Jason Wang9d08dd32014-01-20 11:25:13 +08002872 * __dev_queue_xmit - transmit a buffer
Dave Jonesd29f7492008-07-22 14:09:06 -07002873 * @skb: buffer to transmit
Jason Wang9d08dd32014-01-20 11:25:13 +08002874 * @accel_priv: private data used for L2 forwarding offload
Dave Jonesd29f7492008-07-22 14:09:06 -07002875 *
2876 * Queue a buffer for transmission to a network device. The caller must
2877 * have set the device and priority and built the buffer before calling
2878 * this function. The function can be called from an interrupt.
2879 *
2880 * A negative errno code is returned on a failure. A success does not
2881 * guarantee the frame will be transmitted as it may be dropped due
2882 * to congestion or traffic shaping.
2883 *
2884 * -----------------------------------------------------------------------------------
2885 * I notice this method can also return errors from the queue disciplines,
2886 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2887 * be positive.
2888 *
2889 * Regardless of the return value, the skb is consumed, so it is currently
2890 * difficult to retry a send to this method. (You can bump the ref count
2891 * before sending to hold a reference for retry if you are careful.)
2892 *
2893 * When calling this method, interrupts MUST be enabled. This is because
2894 * the BH enable code must have IRQs enabled so that it will not deadlock.
2895 * --BLG
2896 */
Rashika Kheria0a59f3a2014-02-09 20:26:25 +05302897static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002898{
2899 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07002900 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002901 struct Qdisc *q;
2902 int rc = -ENOMEM;
2903
Eric Dumazet6d1ccff2013-02-05 20:22:20 +00002904 skb_reset_mac_header(skb);
2905
Willem de Bruijne7fd2882014-08-04 22:11:48 -04002906 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
2907 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
2908
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002909 /* Disable soft irqs for various locks below. Also
2910 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002911 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002912 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002913
Neil Horman5bc14212011-11-22 05:10:51 +00002914 skb_update_prio(skb);
2915
Eric Dumazet02875872014-10-05 18:38:35 -07002916 /* If device/qdisc don't need skb->dst, release it right now while
2917 * its hot in this cpu cache.
2918 */
2919 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2920 skb_dst_drop(skb);
2921 else
2922 skb_dst_force(skb);
2923
Jason Wangf663dd92014-01-10 16:18:26 +08002924 txq = netdev_pick_tx(dev, skb, accel_priv);
Paul E. McKenneya898def2010-02-22 17:04:49 -08002925 q = rcu_dereference_bh(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07002926
Linus Torvalds1da177e2005-04-16 15:20:36 -07002927#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002928 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002929#endif
Koki Sanagicf66ba52010-08-23 18:45:02 +09002930 trace_net_dev_queue(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931 if (q->enqueue) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002932 rc = __dev_xmit_skb(skb, q, dev, txq);
David S. Miller37437bb2008-07-16 02:15:04 -07002933 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002934 }
2935
2936 /* The device has no queue. Common case for software devices:
2937 loopback, all the sorts of tunnels...
2938
Herbert Xu932ff272006-06-09 12:20:56 -07002939 Really, it is unlikely that netif_tx_lock protection is necessary
2940 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07002941 counters.)
2942 However, it is possible, that they rely on protection
2943 made by us here.
2944
2945 Check this and shot the lock. It is not prone from deadlocks.
2946 Either shot noqueue qdisc, it is even simpler 8)
2947 */
2948 if (dev->flags & IFF_UP) {
2949 int cpu = smp_processor_id(); /* ok because BHs are off */
2950
David S. Millerc773e842008-07-08 23:13:53 -07002951 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002952
Eric Dumazet745e20f2010-09-29 13:23:09 -07002953 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2954 goto recursion_alert;
2955
Jesper Dangaard Brouer1f595332014-09-03 17:56:09 +02002956 skb = validate_xmit_skb(skb, dev);
2957 if (!skb)
2958 goto drop;
2959
David S. Millerc773e842008-07-08 23:13:53 -07002960 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002961
Tom Herbert734664982011-11-28 16:32:44 +00002962 if (!netif_xmit_stopped(txq)) {
Eric Dumazet745e20f2010-09-29 13:23:09 -07002963 __this_cpu_inc(xmit_recursion);
David S. Millerce937182014-08-30 19:22:20 -07002964 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
Eric Dumazet745e20f2010-09-29 13:23:09 -07002965 __this_cpu_dec(xmit_recursion);
Patrick McHardy572a9d72009-11-10 06:14:14 +00002966 if (dev_xmit_complete(rc)) {
David S. Millerc773e842008-07-08 23:13:53 -07002967 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002968 goto out;
2969 }
2970 }
David S. Millerc773e842008-07-08 23:13:53 -07002971 HARD_TX_UNLOCK(dev, txq);
Joe Perchese87cc472012-05-13 21:56:26 +00002972 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2973 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002974 } else {
2975 /* Recursion is detected! It is possible,
Eric Dumazet745e20f2010-09-29 13:23:09 -07002976 * unfortunately
2977 */
2978recursion_alert:
Joe Perchese87cc472012-05-13 21:56:26 +00002979 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2980 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002981 }
2982 }
2983
2984 rc = -ENETDOWN;
Jesper Dangaard Brouer1f595332014-09-03 17:56:09 +02002985drop:
Herbert Xud4828d82006-06-22 02:28:18 -07002986 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002987
Eric Dumazet015f0682014-03-27 08:45:56 -07002988 atomic_long_inc(&dev->tx_dropped);
Jesper Dangaard Brouer1f595332014-09-03 17:56:09 +02002989 kfree_skb_list(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002990 return rc;
2991out:
Herbert Xud4828d82006-06-22 02:28:18 -07002992 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 return rc;
2994}
Jason Wangf663dd92014-01-10 16:18:26 +08002995
2996int dev_queue_xmit(struct sk_buff *skb)
2997{
2998 return __dev_queue_xmit(skb, NULL);
2999}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003000EXPORT_SYMBOL(dev_queue_xmit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003001
Jason Wangf663dd92014-01-10 16:18:26 +08003002int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
3003{
3004 return __dev_queue_xmit(skb, accel_priv);
3005}
3006EXPORT_SYMBOL(dev_queue_xmit_accel);
3007
Linus Torvalds1da177e2005-04-16 15:20:36 -07003008
3009/*=======================================================================
3010 Receiver routines
3011 =======================================================================*/
3012
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07003013int netdev_max_backlog __read_mostly = 1000;
Eric Dumazetc9e6bc62012-09-27 19:29:05 +00003014EXPORT_SYMBOL(netdev_max_backlog);
3015
Eric Dumazet3b098e22010-05-15 23:57:10 -07003016int netdev_tstamp_prequeue __read_mostly = 1;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07003017int netdev_budget __read_mostly = 300;
3018int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003019
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003020/* Called with irq disabled */
3021static inline void ____napi_schedule(struct softnet_data *sd,
3022 struct napi_struct *napi)
3023{
3024 list_add_tail(&napi->poll_list, &sd->poll_list);
3025 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3026}
3027
Eric Dumazetdf334542010-03-24 19:13:54 +00003028#ifdef CONFIG_RPS
Tom Herbertfec5e652010-04-16 16:01:27 -07003029
3030/* One global table that all flow-based protocols share. */
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00003031struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
Tom Herbertfec5e652010-04-16 16:01:27 -07003032EXPORT_SYMBOL(rps_sock_flow_table);
3033
Ingo Molnarc5905af2012-02-24 08:31:31 +01003034struct static_key rps_needed __read_mostly;
Eric Dumazetadc93002011-11-17 03:13:26 +00003035
Ben Hutchingsc4454772011-01-19 11:03:53 +00003036static struct rps_dev_flow *
3037set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3038 struct rps_dev_flow *rflow, u16 next_cpu)
3039{
Ben Hutchings09994d12011-10-03 04:42:46 +00003040 if (next_cpu != RPS_NO_CPU) {
Ben Hutchingsc4454772011-01-19 11:03:53 +00003041#ifdef CONFIG_RFS_ACCEL
3042 struct netdev_rx_queue *rxqueue;
3043 struct rps_dev_flow_table *flow_table;
3044 struct rps_dev_flow *old_rflow;
3045 u32 flow_id;
3046 u16 rxq_index;
3047 int rc;
3048
3049 /* Should we steer this flow to a different hardware queue? */
Ben Hutchings69a19ee2011-02-15 20:32:04 +00003050 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3051 !(dev->features & NETIF_F_NTUPLE))
Ben Hutchingsc4454772011-01-19 11:03:53 +00003052 goto out;
3053 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3054 if (rxq_index == skb_get_rx_queue(skb))
3055 goto out;
3056
3057 rxqueue = dev->_rx + rxq_index;
3058 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3059 if (!flow_table)
3060 goto out;
Tom Herbert61b905d2014-03-24 15:34:47 -07003061 flow_id = skb_get_hash(skb) & flow_table->mask;
Ben Hutchingsc4454772011-01-19 11:03:53 +00003062 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3063 rxq_index, flow_id);
3064 if (rc < 0)
3065 goto out;
3066 old_rflow = rflow;
3067 rflow = &flow_table->flows[flow_id];
Ben Hutchingsc4454772011-01-19 11:03:53 +00003068 rflow->filter = rc;
3069 if (old_rflow->filter == rflow->filter)
3070 old_rflow->filter = RPS_NO_FILTER;
3071 out:
3072#endif
3073 rflow->last_qtail =
Ben Hutchings09994d12011-10-03 04:42:46 +00003074 per_cpu(softnet_data, next_cpu).input_queue_head;
Ben Hutchingsc4454772011-01-19 11:03:53 +00003075 }
3076
Ben Hutchings09994d12011-10-03 04:42:46 +00003077 rflow->cpu = next_cpu;
Ben Hutchingsc4454772011-01-19 11:03:53 +00003078 return rflow;
3079}
3080
Tom Herbert0a9627f2010-03-16 08:03:29 +00003081/*
3082 * get_rps_cpu is called from netif_receive_skb and returns the target
3083 * CPU from the RPS map of the receiving queue for a given skb.
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003084 * rcu_read_lock must be held on entry.
Tom Herbert0a9627f2010-03-16 08:03:29 +00003085 */
Tom Herbertfec5e652010-04-16 16:01:27 -07003086static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3087 struct rps_dev_flow **rflowp)
Tom Herbert0a9627f2010-03-16 08:03:29 +00003088{
Tom Herbert0a9627f2010-03-16 08:03:29 +00003089 struct netdev_rx_queue *rxqueue;
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00003090 struct rps_map *map;
Tom Herbertfec5e652010-04-16 16:01:27 -07003091 struct rps_dev_flow_table *flow_table;
3092 struct rps_sock_flow_table *sock_flow_table;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003093 int cpu = -1;
Tom Herbertfec5e652010-04-16 16:01:27 -07003094 u16 tcpu;
Tom Herbert61b905d2014-03-24 15:34:47 -07003095 u32 hash;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003096
Tom Herbert0a9627f2010-03-16 08:03:29 +00003097 if (skb_rx_queue_recorded(skb)) {
3098 u16 index = skb_get_rx_queue(skb);
Ben Hutchings62fe0b42010-09-27 08:24:33 +00003099 if (unlikely(index >= dev->real_num_rx_queues)) {
3100 WARN_ONCE(dev->real_num_rx_queues > 1,
3101 "%s received packet on queue %u, but number "
3102 "of RX queues is %u\n",
3103 dev->name, index, dev->real_num_rx_queues);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003104 goto done;
3105 }
3106 rxqueue = dev->_rx + index;
3107 } else
3108 rxqueue = dev->_rx;
3109
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00003110 map = rcu_dereference(rxqueue->rps_map);
3111 if (map) {
Tom Herbert85875232011-01-31 16:23:42 -08003112 if (map->len == 1 &&
Eric Dumazet33d480c2011-08-11 19:30:52 +00003113 !rcu_access_pointer(rxqueue->rps_flow_table)) {
Changli Gao6febfca2010-09-03 23:12:37 +00003114 tcpu = map->cpus[0];
3115 if (cpu_online(tcpu))
3116 cpu = tcpu;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003117 goto done;
Eric Dumazetb249dcb2010-04-19 21:56:38 +00003118 }
Eric Dumazet33d480c2011-08-11 19:30:52 +00003119 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
Tom Herbert0a9627f2010-03-16 08:03:29 +00003120 goto done;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003121 }
3122
Changli Gao2d47b452010-08-17 19:00:56 +00003123 skb_reset_network_header(skb);
Tom Herbert61b905d2014-03-24 15:34:47 -07003124 hash = skb_get_hash(skb);
3125 if (!hash)
Tom Herbert0a9627f2010-03-16 08:03:29 +00003126 goto done;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003127
Tom Herbertfec5e652010-04-16 16:01:27 -07003128 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3129 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3130 if (flow_table && sock_flow_table) {
3131 u16 next_cpu;
3132 struct rps_dev_flow *rflow;
3133
Tom Herbert61b905d2014-03-24 15:34:47 -07003134 rflow = &flow_table->flows[hash & flow_table->mask];
Tom Herbertfec5e652010-04-16 16:01:27 -07003135 tcpu = rflow->cpu;
3136
Tom Herbert61b905d2014-03-24 15:34:47 -07003137 next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
Tom Herbertfec5e652010-04-16 16:01:27 -07003138
3139 /*
3140 * If the desired CPU (where last recvmsg was done) is
3141 * different from current CPU (one in the rx-queue flow
3142 * table entry), switch if one of the following holds:
3143 * - Current CPU is unset (equal to RPS_NO_CPU).
3144 * - Current CPU is offline.
3145 * - The current CPU's queue tail has advanced beyond the
3146 * last packet that was enqueued using this table entry.
3147 * This guarantees that all previous packets for the flow
3148 * have been dequeued, thus preserving in order delivery.
3149 */
3150 if (unlikely(tcpu != next_cpu) &&
3151 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3152 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
Tom Herbertbaefa312012-11-16 09:04:15 +00003153 rflow->last_qtail)) >= 0)) {
3154 tcpu = next_cpu;
Ben Hutchingsc4454772011-01-19 11:03:53 +00003155 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
Tom Herbertbaefa312012-11-16 09:04:15 +00003156 }
Ben Hutchingsc4454772011-01-19 11:03:53 +00003157
Tom Herbertfec5e652010-04-16 16:01:27 -07003158 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3159 *rflowp = rflow;
3160 cpu = tcpu;
3161 goto done;
3162 }
3163 }
3164
Tom Herbert0a9627f2010-03-16 08:03:29 +00003165 if (map) {
Daniel Borkmann8fc54f62014-08-23 20:58:54 +02003166 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
Tom Herbert0a9627f2010-03-16 08:03:29 +00003167 if (cpu_online(tcpu)) {
3168 cpu = tcpu;
3169 goto done;
3170 }
3171 }
3172
3173done:
Tom Herbert0a9627f2010-03-16 08:03:29 +00003174 return cpu;
3175}
3176
Ben Hutchingsc4454772011-01-19 11:03:53 +00003177#ifdef CONFIG_RFS_ACCEL
3178
3179/**
3180 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
3181 * @dev: Device on which the filter was set
3182 * @rxq_index: RX queue index
3183 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
3184 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
3185 *
3186 * Drivers that implement ndo_rx_flow_steer() should periodically call
3187 * this function for each installed filter and remove the filters for
3188 * which it returns %true.
3189 */
3190bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3191 u32 flow_id, u16 filter_id)
3192{
3193 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3194 struct rps_dev_flow_table *flow_table;
3195 struct rps_dev_flow *rflow;
3196 bool expire = true;
3197 int cpu;
3198
3199 rcu_read_lock();
3200 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3201 if (flow_table && flow_id <= flow_table->mask) {
3202 rflow = &flow_table->flows[flow_id];
3203 cpu = ACCESS_ONCE(rflow->cpu);
3204 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3205 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3206 rflow->last_qtail) <
3207 (int)(10 * flow_table->mask)))
3208 expire = false;
3209 }
3210 rcu_read_unlock();
3211 return expire;
3212}
3213EXPORT_SYMBOL(rps_may_expire_flow);
3214
3215#endif /* CONFIG_RFS_ACCEL */
3216
Tom Herbert0a9627f2010-03-16 08:03:29 +00003217/* Called from hardirq (IPI) context */
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003218static void rps_trigger_softirq(void *data)
Tom Herbert0a9627f2010-03-16 08:03:29 +00003219{
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003220 struct softnet_data *sd = data;
3221
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003222 ____napi_schedule(sd, &sd->backlog);
Changli Gaodee42872010-05-02 05:42:16 +00003223 sd->received_rps++;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003224}
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003225
Tom Herbertfec5e652010-04-16 16:01:27 -07003226#endif /* CONFIG_RPS */
Tom Herbert0a9627f2010-03-16 08:03:29 +00003227
3228/*
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003229 * Check if this softnet_data structure is another cpu one
3230 * If yes, queue it to our IPI list and return 1
3231 * If no, return 0
3232 */
3233static int rps_ipi_queued(struct softnet_data *sd)
3234{
3235#ifdef CONFIG_RPS
Christoph Lameter903ceff2014-08-17 12:30:35 -05003236 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003237
3238 if (sd != mysd) {
3239 sd->rps_ipi_next = mysd->rps_ipi_list;
3240 mysd->rps_ipi_list = sd;
3241
3242 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3243 return 1;
3244 }
3245#endif /* CONFIG_RPS */
3246 return 0;
3247}
3248
Willem de Bruijn99bbc702013-05-20 04:02:32 +00003249#ifdef CONFIG_NET_FLOW_LIMIT
3250int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3251#endif
3252
3253static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3254{
3255#ifdef CONFIG_NET_FLOW_LIMIT
3256 struct sd_flow_limit *fl;
3257 struct softnet_data *sd;
3258 unsigned int old_flow, new_flow;
3259
3260 if (qlen < (netdev_max_backlog >> 1))
3261 return false;
3262
Christoph Lameter903ceff2014-08-17 12:30:35 -05003263 sd = this_cpu_ptr(&softnet_data);
Willem de Bruijn99bbc702013-05-20 04:02:32 +00003264
3265 rcu_read_lock();
3266 fl = rcu_dereference(sd->flow_limit);
3267 if (fl) {
Tom Herbert3958afa1b2013-12-15 22:12:06 -08003268 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
Willem de Bruijn99bbc702013-05-20 04:02:32 +00003269 old_flow = fl->history[fl->history_head];
3270 fl->history[fl->history_head] = new_flow;
3271
3272 fl->history_head++;
3273 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3274
3275 if (likely(fl->buckets[old_flow]))
3276 fl->buckets[old_flow]--;
3277
3278 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3279 fl->count++;
3280 rcu_read_unlock();
3281 return true;
3282 }
3283 }
3284 rcu_read_unlock();
3285#endif
3286 return false;
3287}
3288
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003289/*
Tom Herbert0a9627f2010-03-16 08:03:29 +00003290 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3291 * queue (may be a remote CPU queue).
3292 */
Tom Herbertfec5e652010-04-16 16:01:27 -07003293static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3294 unsigned int *qtail)
Tom Herbert0a9627f2010-03-16 08:03:29 +00003295{
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003296 struct softnet_data *sd;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003297 unsigned long flags;
Willem de Bruijn99bbc702013-05-20 04:02:32 +00003298 unsigned int qlen;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003299
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003300 sd = &per_cpu(softnet_data, cpu);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003301
3302 local_irq_save(flags);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003303
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003304 rps_lock(sd);
Willem de Bruijn99bbc702013-05-20 04:02:32 +00003305 qlen = skb_queue_len(&sd->input_pkt_queue);
3306 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
Li RongQinge008f3f2014-12-08 09:42:55 +08003307 if (qlen) {
Tom Herbert0a9627f2010-03-16 08:03:29 +00003308enqueue:
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003309 __skb_queue_tail(&sd->input_pkt_queue, skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003310 input_queue_tail_incr_save(sd, qtail);
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003311 rps_unlock(sd);
Changli Gao152102c2010-03-30 20:16:22 +00003312 local_irq_restore(flags);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003313 return NET_RX_SUCCESS;
3314 }
3315
Eric Dumazetebda37c22010-05-06 23:51:21 +00003316 /* Schedule NAPI for backlog device
3317 * We can use non atomic operation since we own the queue lock
3318 */
3319 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003320 if (!rps_ipi_queued(sd))
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003321 ____napi_schedule(sd, &sd->backlog);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003322 }
3323 goto enqueue;
3324 }
3325
Changli Gaodee42872010-05-02 05:42:16 +00003326 sd->dropped++;
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003327 rps_unlock(sd);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003328
Tom Herbert0a9627f2010-03-16 08:03:29 +00003329 local_irq_restore(flags);
3330
Eric Dumazetcaf586e2010-09-30 21:06:55 +00003331 atomic_long_inc(&skb->dev->rx_dropped);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003332 kfree_skb(skb);
3333 return NET_RX_DROP;
3334}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003335
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003336static int netif_rx_internal(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003337{
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003338 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003339
Eric Dumazet588f0332011-11-15 04:12:55 +00003340 net_timestamp_check(netdev_tstamp_prequeue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003341
Koki Sanagicf66ba52010-08-23 18:45:02 +09003342 trace_netif_rx(skb);
Eric Dumazetdf334542010-03-24 19:13:54 +00003343#ifdef CONFIG_RPS
Ingo Molnarc5905af2012-02-24 08:31:31 +01003344 if (static_key_false(&rps_needed)) {
Tom Herbertfec5e652010-04-16 16:01:27 -07003345 struct rps_dev_flow voidflow, *rflow = &voidflow;
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003346 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347
Changli Gaocece1942010-08-07 20:35:43 -07003348 preempt_disable();
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003349 rcu_read_lock();
Tom Herbertfec5e652010-04-16 16:01:27 -07003350
3351 cpu = get_rps_cpu(skb->dev, skb, &rflow);
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003352 if (cpu < 0)
3353 cpu = smp_processor_id();
Tom Herbertfec5e652010-04-16 16:01:27 -07003354
3355 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3356
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003357 rcu_read_unlock();
Changli Gaocece1942010-08-07 20:35:43 -07003358 preempt_enable();
Eric Dumazetadc93002011-11-17 03:13:26 +00003359 } else
3360#endif
Tom Herbertfec5e652010-04-16 16:01:27 -07003361 {
3362 unsigned int qtail;
3363 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3364 put_cpu();
3365 }
Eric Dumazetb0e28f12010-04-15 00:14:07 -07003366 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003367}
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003368
3369/**
3370 * netif_rx - post buffer to the network code
3371 * @skb: buffer to post
3372 *
3373 * This function receives a packet from a device driver and queues it for
3374 * the upper (protocol) levels to process. It always succeeds. The buffer
3375 * may be dropped during processing for congestion control or by the
3376 * protocol layers.
3377 *
3378 * return values:
3379 * NET_RX_SUCCESS (no congestion)
3380 * NET_RX_DROP (packet was dropped)
3381 *
3382 */
3383
3384int netif_rx(struct sk_buff *skb)
3385{
3386 trace_netif_rx_entry(skb);
3387
3388 return netif_rx_internal(skb);
3389}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003390EXPORT_SYMBOL(netif_rx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003391
3392int netif_rx_ni(struct sk_buff *skb)
3393{
3394 int err;
3395
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003396 trace_netif_rx_ni_entry(skb);
3397
Linus Torvalds1da177e2005-04-16 15:20:36 -07003398 preempt_disable();
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003399 err = netif_rx_internal(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003400 if (local_softirq_pending())
3401 do_softirq();
3402 preempt_enable();
3403
3404 return err;
3405}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003406EXPORT_SYMBOL(netif_rx_ni);
3407
Linus Torvalds1da177e2005-04-16 15:20:36 -07003408static void net_tx_action(struct softirq_action *h)
3409{
Christoph Lameter903ceff2014-08-17 12:30:35 -05003410 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003411
3412 if (sd->completion_queue) {
3413 struct sk_buff *clist;
3414
3415 local_irq_disable();
3416 clist = sd->completion_queue;
3417 sd->completion_queue = NULL;
3418 local_irq_enable();
3419
3420 while (clist) {
3421 struct sk_buff *skb = clist;
3422 clist = clist->next;
3423
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003424 WARN_ON(atomic_read(&skb->users));
Eric Dumazete6247022013-12-05 04:45:08 -08003425 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3426 trace_consume_skb(skb);
3427 else
3428 trace_kfree_skb(skb, net_tx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003429 __kfree_skb(skb);
3430 }
3431 }
3432
3433 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07003434 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003435
3436 local_irq_disable();
3437 head = sd->output_queue;
3438 sd->output_queue = NULL;
Changli Gaoa9cbd582010-04-26 23:06:24 +00003439 sd->output_queue_tailp = &sd->output_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003440 local_irq_enable();
3441
3442 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07003443 struct Qdisc *q = head;
3444 spinlock_t *root_lock;
3445
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446 head = head->next_sched;
3447
David S. Miller5fb66222008-08-02 20:02:43 -07003448 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07003449 if (spin_trylock(root_lock)) {
Peter Zijlstra4e857c52014-03-17 18:06:10 +01003450 smp_mb__before_atomic();
Jarek Poplawskidef82a12008-08-17 21:54:43 -07003451 clear_bit(__QDISC_STATE_SCHED,
3452 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07003453 qdisc_run(q);
3454 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003455 } else {
David S. Miller195648b2008-08-19 04:00:36 -07003456 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07003457 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07003458 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07003459 } else {
Peter Zijlstra4e857c52014-03-17 18:06:10 +01003460 smp_mb__before_atomic();
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07003461 clear_bit(__QDISC_STATE_SCHED,
3462 &q->state);
3463 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464 }
3465 }
3466 }
3467}
3468
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003469#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3470 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
Michał Mirosławda678292009-06-05 05:35:28 +00003471/* This hook is defined here for ATM LANE */
3472int (*br_fdb_test_addr_hook)(struct net_device *dev,
3473 unsigned char *addr) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07003474EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00003475#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07003476
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477#ifdef CONFIG_NET_CLS_ACT
3478/* TODO: Maybe we should just force sch_ingress to be compiled in
3479 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3480 * a compare and 2 stores extra right now if we dont have it on
3481 * but have CONFIG_NET_CLS_ACT
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003482 * NOTE: This doesn't stop any functionality; if you dont have
3483 * the ingress scheduler, you just can't add policies on ingress.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003484 *
3485 */
Eric Dumazet24824a02010-10-02 06:11:55 +00003486static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003487{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003488 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07003489 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07003490 int result = TC_ACT_OK;
3491 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003492
Stephen Hemmingerde384832010-08-01 00:33:23 -07003493 if (unlikely(MAX_RED_LOOP < ttl++)) {
Joe Perchese87cc472012-05-13 21:56:26 +00003494 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3495 skb->skb_iif, dev->ifindex);
Herbert Xuf697c3e2007-10-14 00:38:47 -07003496 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003497 }
3498
Herbert Xuf697c3e2007-10-14 00:38:47 -07003499 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3500 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3501
John Fastabend46e5da42014-09-12 20:04:52 -07003502 q = rcu_dereference(rxq->qdisc);
David S. Miller8d50b532008-07-30 02:37:46 -07003503 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07003504 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07003505 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3506 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07003507 spin_unlock(qdisc_lock(q));
3508 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07003509
Linus Torvalds1da177e2005-04-16 15:20:36 -07003510 return result;
3511}
Herbert Xuf697c3e2007-10-14 00:38:47 -07003512
3513static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3514 struct packet_type **pt_prev,
3515 int *ret, struct net_device *orig_dev)
3516{
Eric Dumazet24824a02010-10-02 06:11:55 +00003517 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3518
John Fastabend46e5da42014-09-12 20:04:52 -07003519 if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07003520 goto out;
3521
3522 if (*pt_prev) {
3523 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3524 *pt_prev = NULL;
Herbert Xuf697c3e2007-10-14 00:38:47 -07003525 }
3526
Eric Dumazet24824a02010-10-02 06:11:55 +00003527 switch (ing_filter(skb, rxq)) {
Herbert Xuf697c3e2007-10-14 00:38:47 -07003528 case TC_ACT_SHOT:
3529 case TC_ACT_STOLEN:
3530 kfree_skb(skb);
3531 return NULL;
3532 }
3533
3534out:
3535 skb->tc_verd = 0;
3536 return skb;
3537}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003538#endif
3539
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003540/**
3541 * netdev_rx_handler_register - register receive handler
3542 * @dev: device to register a handler for
3543 * @rx_handler: receive handler to register
Jiri Pirko93e2c322010-06-10 03:34:59 +00003544 * @rx_handler_data: data pointer that is used by rx handler
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003545 *
Masanari Iidae2278672014-02-18 22:54:36 +09003546 * Register a receive handler for a device. This handler will then be
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003547 * called from __netif_receive_skb. A negative errno code is returned
3548 * on a failure.
3549 *
3550 * The caller must hold the rtnl_mutex.
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003551 *
3552 * For a general description of rx_handler, see enum rx_handler_result.
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003553 */
3554int netdev_rx_handler_register(struct net_device *dev,
Jiri Pirko93e2c322010-06-10 03:34:59 +00003555 rx_handler_func_t *rx_handler,
3556 void *rx_handler_data)
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003557{
3558 ASSERT_RTNL();
3559
3560 if (dev->rx_handler)
3561 return -EBUSY;
3562
Eric Dumazet00cfec32013-03-29 03:01:22 +00003563 /* Note: rx_handler_data must be set before rx_handler */
Jiri Pirko93e2c322010-06-10 03:34:59 +00003564 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003565 rcu_assign_pointer(dev->rx_handler, rx_handler);
3566
3567 return 0;
3568}
3569EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3570
3571/**
3572 * netdev_rx_handler_unregister - unregister receive handler
3573 * @dev: device to unregister a handler from
3574 *
Kusanagi Kouichi166ec362013-03-18 02:59:52 +00003575 * Unregister a receive handler from a device.
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003576 *
3577 * The caller must hold the rtnl_mutex.
3578 */
3579void netdev_rx_handler_unregister(struct net_device *dev)
3580{
3581
3582 ASSERT_RTNL();
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00003583 RCU_INIT_POINTER(dev->rx_handler, NULL);
Eric Dumazet00cfec32013-03-29 03:01:22 +00003584 /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
3585 * section has a guarantee to see a non NULL rx_handler_data
3586 * as well.
3587 */
3588 synchronize_net();
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00003589 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003590}
3591EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3592
Mel Gormanb4b9e352012-07-31 16:44:26 -07003593/*
3594 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3595 * the special handling of PFMEMALLOC skbs.
3596 */
3597static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3598{
3599 switch (skb->protocol) {
Joe Perches2b8837a2014-03-12 10:04:17 -07003600 case htons(ETH_P_ARP):
3601 case htons(ETH_P_IP):
3602 case htons(ETH_P_IPV6):
3603 case htons(ETH_P_8021Q):
3604 case htons(ETH_P_8021AD):
Mel Gormanb4b9e352012-07-31 16:44:26 -07003605 return true;
3606 default:
3607 return false;
3608 }
3609}
3610
David S. Miller9754e292013-02-14 15:57:38 -05003611static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003612{
3613 struct packet_type *ptype, *pt_prev;
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003614 rx_handler_func_t *rx_handler;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07003615 struct net_device *orig_dev;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003616 bool deliver_exact = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003617 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08003618 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003619
Eric Dumazet588f0332011-11-15 04:12:55 +00003620 net_timestamp_check(!netdev_tstamp_prequeue, skb);
Eric Dumazet81bbb3d2009-09-30 16:42:42 -07003621
Koki Sanagicf66ba52010-08-23 18:45:02 +09003622 trace_netif_receive_skb(skb);
Patrick McHardy9b22ea52008-11-04 14:49:57 -08003623
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07003624 orig_dev = skb->dev;
Jiri Pirko1765a572011-02-12 06:48:36 +00003625
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07003626 skb_reset_network_header(skb);
Eric Dumazetfda55ec2013-01-07 09:28:21 +00003627 if (!skb_transport_header_was_set(skb))
3628 skb_reset_transport_header(skb);
Jiri Pirko0b5c9db2011-06-10 06:56:58 +00003629 skb_reset_mac_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003630
3631 pt_prev = NULL;
3632
3633 rcu_read_lock();
3634
David S. Miller63d8ea72011-02-28 10:48:59 -08003635another_round:
David S. Millerb6858172012-07-23 16:27:54 -07003636 skb->skb_iif = skb->dev->ifindex;
David S. Miller63d8ea72011-02-28 10:48:59 -08003637
3638 __this_cpu_inc(softnet_data.processed);
3639
Patrick McHardy8ad227f2013-04-19 02:04:31 +00003640 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3641 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
Vlad Yasevich0d5501c2014-08-08 14:42:13 -04003642 skb = skb_vlan_untag(skb);
Jiri Pirkobcc6d472011-04-07 19:48:33 +00003643 if (unlikely(!skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003644 goto unlock;
Jiri Pirkobcc6d472011-04-07 19:48:33 +00003645 }
3646
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647#ifdef CONFIG_NET_CLS_ACT
3648 if (skb->tc_verd & TC_NCLS) {
3649 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3650 goto ncls;
3651 }
3652#endif
3653
David S. Miller9754e292013-02-14 15:57:38 -05003654 if (pfmemalloc)
Mel Gormanb4b9e352012-07-31 16:44:26 -07003655 goto skip_taps;
3656
Linus Torvalds1da177e2005-04-16 15:20:36 -07003657 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Salam Noureddine7866a622015-01-27 11:35:48 -08003658 if (pt_prev)
3659 ret = deliver_skb(skb, pt_prev, orig_dev);
3660 pt_prev = ptype;
3661 }
3662
3663 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
3664 if (pt_prev)
3665 ret = deliver_skb(skb, pt_prev, orig_dev);
3666 pt_prev = ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003667 }
3668
Mel Gormanb4b9e352012-07-31 16:44:26 -07003669skip_taps:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003670#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07003671 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3672 if (!skb)
Mel Gormanb4b9e352012-07-31 16:44:26 -07003673 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003674ncls:
3675#endif
3676
David S. Miller9754e292013-02-14 15:57:38 -05003677 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003678 goto drop;
3679
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01003680 if (skb_vlan_tag_present(skb)) {
John Fastabend24257172011-10-10 09:16:41 +00003681 if (pt_prev) {
3682 ret = deliver_skb(skb, pt_prev, orig_dev);
3683 pt_prev = NULL;
3684 }
Florian Zumbiehl48cc32d32012-10-07 15:51:58 +00003685 if (vlan_do_receive(&skb))
John Fastabend24257172011-10-10 09:16:41 +00003686 goto another_round;
3687 else if (unlikely(!skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003688 goto unlock;
John Fastabend24257172011-10-10 09:16:41 +00003689 }
3690
Florian Zumbiehl48cc32d32012-10-07 15:51:58 +00003691 rx_handler = rcu_dereference(skb->dev->rx_handler);
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003692 if (rx_handler) {
3693 if (pt_prev) {
3694 ret = deliver_skb(skb, pt_prev, orig_dev);
3695 pt_prev = NULL;
3696 }
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003697 switch (rx_handler(&skb)) {
3698 case RX_HANDLER_CONSUMED:
Cristian Bercaru3bc1b1a2013-03-08 07:03:38 +00003699 ret = NET_RX_SUCCESS;
Mel Gormanb4b9e352012-07-31 16:44:26 -07003700 goto unlock;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003701 case RX_HANDLER_ANOTHER:
David S. Miller63d8ea72011-02-28 10:48:59 -08003702 goto another_round;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003703 case RX_HANDLER_EXACT:
3704 deliver_exact = true;
3705 case RX_HANDLER_PASS:
3706 break;
3707 default:
3708 BUG();
3709 }
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003710 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003711
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01003712 if (unlikely(skb_vlan_tag_present(skb))) {
3713 if (skb_vlan_tag_get_id(skb))
Eric Dumazetd4b812d2013-07-18 07:19:26 -07003714 skb->pkt_type = PACKET_OTHERHOST;
3715 /* Note: we might in the future use prio bits
3716 * and set skb->priority like in vlan_do_receive()
3717 * For the time being, just ignore Priority Code Point
3718 */
3719 skb->vlan_tci = 0;
3720 }
Florian Zumbiehl48cc32d32012-10-07 15:51:58 +00003721
Linus Torvalds1da177e2005-04-16 15:20:36 -07003722 type = skb->protocol;
Salam Noureddine7866a622015-01-27 11:35:48 -08003723
3724 /* deliver only exact match when indicated */
3725 if (likely(!deliver_exact)) {
3726 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3727 &ptype_base[ntohs(type) &
3728 PTYPE_HASH_MASK]);
3729 }
3730
3731 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3732 &orig_dev->ptype_specific);
3733
3734 if (unlikely(skb->dev != orig_dev)) {
3735 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3736 &skb->dev->ptype_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003737 }
3738
3739 if (pt_prev) {
Michael S. Tsirkin1080e512012-07-20 09:23:17 +00003740 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
Michael S. Tsirkin0e698bf2012-09-15 22:44:16 +00003741 goto drop;
Michael S. Tsirkin1080e512012-07-20 09:23:17 +00003742 else
3743 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003744 } else {
Mel Gormanb4b9e352012-07-31 16:44:26 -07003745drop:
Eric Dumazetcaf586e2010-09-30 21:06:55 +00003746 atomic_long_inc(&skb->dev->rx_dropped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003747 kfree_skb(skb);
3748 /* Jamal, now you will not able to escape explaining
3749 * me how you were going to use this. :-)
3750 */
3751 ret = NET_RX_DROP;
3752 }
3753
Mel Gormanb4b9e352012-07-31 16:44:26 -07003754unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003755 rcu_read_unlock();
David S. Miller9754e292013-02-14 15:57:38 -05003756 return ret;
3757}
3758
3759static int __netif_receive_skb(struct sk_buff *skb)
3760{
3761 int ret;
3762
3763 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3764 unsigned long pflags = current->flags;
3765
3766 /*
3767 * PFMEMALLOC skbs are special, they should
3768 * - be delivered to SOCK_MEMALLOC sockets only
3769 * - stay away from userspace
3770 * - have bounded memory usage
3771 *
3772 * Use PF_MEMALLOC as this saves us from propagating the allocation
3773 * context down to all allocation sites.
3774 */
3775 current->flags |= PF_MEMALLOC;
3776 ret = __netif_receive_skb_core(skb, true);
3777 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3778 } else
3779 ret = __netif_receive_skb_core(skb, false);
3780
Linus Torvalds1da177e2005-04-16 15:20:36 -07003781 return ret;
3782}
Tom Herbert0a9627f2010-03-16 08:03:29 +00003783
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003784static int netif_receive_skb_internal(struct sk_buff *skb)
Tom Herbert0a9627f2010-03-16 08:03:29 +00003785{
Eric Dumazet588f0332011-11-15 04:12:55 +00003786 net_timestamp_check(netdev_tstamp_prequeue, skb);
Eric Dumazet3b098e22010-05-15 23:57:10 -07003787
Richard Cochranc1f19b52010-07-17 08:49:36 +00003788 if (skb_defer_rx_timestamp(skb))
3789 return NET_RX_SUCCESS;
3790
Eric Dumazetdf334542010-03-24 19:13:54 +00003791#ifdef CONFIG_RPS
Ingo Molnarc5905af2012-02-24 08:31:31 +01003792 if (static_key_false(&rps_needed)) {
Eric Dumazet3b098e22010-05-15 23:57:10 -07003793 struct rps_dev_flow voidflow, *rflow = &voidflow;
3794 int cpu, ret;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003795
Eric Dumazet3b098e22010-05-15 23:57:10 -07003796 rcu_read_lock();
Tom Herbert0a9627f2010-03-16 08:03:29 +00003797
Eric Dumazet3b098e22010-05-15 23:57:10 -07003798 cpu = get_rps_cpu(skb->dev, skb, &rflow);
Tom Herbertfec5e652010-04-16 16:01:27 -07003799
Eric Dumazet3b098e22010-05-15 23:57:10 -07003800 if (cpu >= 0) {
3801 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3802 rcu_read_unlock();
Eric Dumazetadc93002011-11-17 03:13:26 +00003803 return ret;
Eric Dumazet3b098e22010-05-15 23:57:10 -07003804 }
Eric Dumazetadc93002011-11-17 03:13:26 +00003805 rcu_read_unlock();
Tom Herbertfec5e652010-04-16 16:01:27 -07003806 }
Tom Herbert1e94d722010-03-18 17:45:44 -07003807#endif
Eric Dumazetadc93002011-11-17 03:13:26 +00003808 return __netif_receive_skb(skb);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003809}
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003810
3811/**
3812 * netif_receive_skb - process receive buffer from network
3813 * @skb: buffer to process
3814 *
3815 * netif_receive_skb() is the main receive data processing function.
3816 * It always succeeds. The buffer may be dropped during processing
3817 * for congestion control or by the protocol layers.
3818 *
3819 * This function may only be called from softirq context and interrupts
3820 * should be enabled.
3821 *
3822 * Return values (usually ignored):
3823 * NET_RX_SUCCESS: no congestion
3824 * NET_RX_DROP: packet was dropped
3825 */
3826int netif_receive_skb(struct sk_buff *skb)
3827{
3828 trace_netif_receive_skb_entry(skb);
3829
3830 return netif_receive_skb_internal(skb);
3831}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003832EXPORT_SYMBOL(netif_receive_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003833
Eric Dumazet88751272010-04-19 05:07:33 +00003834/* Network device is going away, flush any packets still pending
3835 * Called with irqs disabled.
3836 */
Changli Gao152102c2010-03-30 20:16:22 +00003837static void flush_backlog(void *arg)
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003838{
Changli Gao152102c2010-03-30 20:16:22 +00003839 struct net_device *dev = arg;
Christoph Lameter903ceff2014-08-17 12:30:35 -05003840 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003841 struct sk_buff *skb, *tmp;
3842
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003843 rps_lock(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003844 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003845 if (skb->dev == dev) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003846 __skb_unlink(skb, &sd->input_pkt_queue);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003847 kfree_skb(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003848 input_queue_head_incr(sd);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003849 }
Changli Gao6e7676c2010-04-27 15:07:33 -07003850 }
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003851 rps_unlock(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003852
3853 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3854 if (skb->dev == dev) {
3855 __skb_unlink(skb, &sd->process_queue);
3856 kfree_skb(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003857 input_queue_head_incr(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003858 }
3859 }
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003860}
3861
Herbert Xud565b0a2008-12-15 23:38:52 -08003862static int napi_gro_complete(struct sk_buff *skb)
3863{
Vlad Yasevich22061d82012-11-15 08:49:11 +00003864 struct packet_offload *ptype;
Herbert Xud565b0a2008-12-15 23:38:52 -08003865 __be16 type = skb->protocol;
Vlad Yasevich22061d82012-11-15 08:49:11 +00003866 struct list_head *head = &offload_base;
Herbert Xud565b0a2008-12-15 23:38:52 -08003867 int err = -ENOENT;
3868
Eric Dumazetc3c7c252012-12-06 13:54:59 +00003869 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3870
Herbert Xufc59f9a2009-04-14 15:11:06 -07003871 if (NAPI_GRO_CB(skb)->count == 1) {
3872 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003873 goto out;
Herbert Xufc59f9a2009-04-14 15:11:06 -07003874 }
Herbert Xud565b0a2008-12-15 23:38:52 -08003875
3876 rcu_read_lock();
3877 list_for_each_entry_rcu(ptype, head, list) {
Vlad Yasevichf191a1d2012-11-15 08:49:23 +00003878 if (ptype->type != type || !ptype->callbacks.gro_complete)
Herbert Xud565b0a2008-12-15 23:38:52 -08003879 continue;
3880
Jerry Chu299603e82013-12-11 20:53:45 -08003881 err = ptype->callbacks.gro_complete(skb, 0);
Herbert Xud565b0a2008-12-15 23:38:52 -08003882 break;
3883 }
3884 rcu_read_unlock();
3885
3886 if (err) {
3887 WARN_ON(&ptype->list == head);
3888 kfree_skb(skb);
3889 return NET_RX_SUCCESS;
3890 }
3891
3892out:
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00003893 return netif_receive_skb_internal(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08003894}
3895
Eric Dumazet2e71a6f2012-10-06 08:08:49 +00003896/* napi->gro_list contains packets ordered by age.
3897 * youngest packets at the head of it.
3898 * Complete skbs in reverse order to reduce latencies.
3899 */
3900void napi_gro_flush(struct napi_struct *napi, bool flush_old)
Herbert Xud565b0a2008-12-15 23:38:52 -08003901{
Eric Dumazet2e71a6f2012-10-06 08:08:49 +00003902 struct sk_buff *skb, *prev = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08003903
Eric Dumazet2e71a6f2012-10-06 08:08:49 +00003904 /* scan list and build reverse chain */
3905 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3906 skb->prev = prev;
3907 prev = skb;
Herbert Xud565b0a2008-12-15 23:38:52 -08003908 }
3909
Eric Dumazet2e71a6f2012-10-06 08:08:49 +00003910 for (skb = prev; skb; skb = prev) {
3911 skb->next = NULL;
3912
3913 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3914 return;
3915
3916 prev = skb->prev;
3917 napi_gro_complete(skb);
3918 napi->gro_count--;
3919 }
3920
Herbert Xud565b0a2008-12-15 23:38:52 -08003921 napi->gro_list = NULL;
3922}
Eric Dumazet86cac582010-08-31 18:25:32 +00003923EXPORT_SYMBOL(napi_gro_flush);
Herbert Xud565b0a2008-12-15 23:38:52 -08003924
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003925static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3926{
3927 struct sk_buff *p;
3928 unsigned int maclen = skb->dev->hard_header_len;
Tom Herbert0b4cec82014-01-15 08:58:06 -08003929 u32 hash = skb_get_hash_raw(skb);
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003930
3931 for (p = napi->gro_list; p; p = p->next) {
3932 unsigned long diffs;
3933
Tom Herbert0b4cec82014-01-15 08:58:06 -08003934 NAPI_GRO_CB(p)->flush = 0;
3935
3936 if (hash != skb_get_hash_raw(p)) {
3937 NAPI_GRO_CB(p)->same_flow = 0;
3938 continue;
3939 }
3940
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003941 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3942 diffs |= p->vlan_tci ^ skb->vlan_tci;
3943 if (maclen == ETH_HLEN)
3944 diffs |= compare_ether_header(skb_mac_header(p),
Eric Dumazeta50e2332014-03-29 21:28:21 -07003945 skb_mac_header(skb));
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003946 else if (!diffs)
3947 diffs = memcmp(skb_mac_header(p),
Eric Dumazeta50e2332014-03-29 21:28:21 -07003948 skb_mac_header(skb),
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003949 maclen);
3950 NAPI_GRO_CB(p)->same_flow = !diffs;
Eric Dumazet89c5fa32012-12-10 13:28:16 +00003951 }
3952}
3953
Jerry Chu299603e82013-12-11 20:53:45 -08003954static void skb_gro_reset_offset(struct sk_buff *skb)
3955{
3956 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3957 const skb_frag_t *frag0 = &pinfo->frags[0];
3958
3959 NAPI_GRO_CB(skb)->data_offset = 0;
3960 NAPI_GRO_CB(skb)->frag0 = NULL;
3961 NAPI_GRO_CB(skb)->frag0_len = 0;
3962
3963 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3964 pinfo->nr_frags &&
3965 !PageHighMem(skb_frag_page(frag0))) {
3966 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3967 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
Herbert Xud565b0a2008-12-15 23:38:52 -08003968 }
3969}
3970
Eric Dumazeta50e2332014-03-29 21:28:21 -07003971static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
3972{
3973 struct skb_shared_info *pinfo = skb_shinfo(skb);
3974
3975 BUG_ON(skb->end - skb->tail < grow);
3976
3977 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3978
3979 skb->data_len -= grow;
3980 skb->tail += grow;
3981
3982 pinfo->frags[0].page_offset += grow;
3983 skb_frag_size_sub(&pinfo->frags[0], grow);
3984
3985 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
3986 skb_frag_unref(skb, 0);
3987 memmove(pinfo->frags, pinfo->frags + 1,
3988 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
3989 }
3990}
3991
Rami Rosenbb728822012-11-28 21:55:25 +00003992static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08003993{
3994 struct sk_buff **pp = NULL;
Vlad Yasevich22061d82012-11-15 08:49:11 +00003995 struct packet_offload *ptype;
Herbert Xud565b0a2008-12-15 23:38:52 -08003996 __be16 type = skb->protocol;
Vlad Yasevich22061d82012-11-15 08:49:11 +00003997 struct list_head *head = &offload_base;
Herbert Xu0da2afd52008-12-26 14:57:42 -08003998 int same_flow;
Ben Hutchings5b252f02009-10-29 07:17:09 +00003999 enum gro_result ret;
Eric Dumazeta50e2332014-03-29 21:28:21 -07004000 int grow;
Herbert Xud565b0a2008-12-15 23:38:52 -08004001
Eric W. Biederman9c62a682014-03-14 20:51:52 -07004002 if (!(skb->dev->features & NETIF_F_GRO))
Herbert Xud565b0a2008-12-15 23:38:52 -08004003 goto normal;
4004
Tom Herbert5a212322014-08-31 15:12:41 -07004005 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
Herbert Xuf17f5c92009-01-14 14:36:12 -08004006 goto normal;
4007
Eric Dumazet89c5fa32012-12-10 13:28:16 +00004008 gro_list_prepare(napi, skb);
4009
Herbert Xud565b0a2008-12-15 23:38:52 -08004010 rcu_read_lock();
4011 list_for_each_entry_rcu(ptype, head, list) {
Vlad Yasevichf191a1d2012-11-15 08:49:23 +00004012 if (ptype->type != type || !ptype->callbacks.gro_receive)
Herbert Xud565b0a2008-12-15 23:38:52 -08004013 continue;
4014
Herbert Xu86911732009-01-29 14:19:50 +00004015 skb_set_network_header(skb, skb_gro_offset(skb));
Eric Dumazetefd94502013-02-14 17:31:48 +00004016 skb_reset_mac_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08004017 NAPI_GRO_CB(skb)->same_flow = 0;
4018 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08004019 NAPI_GRO_CB(skb)->free = 0;
Or Gerlitzb582ef02014-01-20 13:59:19 +02004020 NAPI_GRO_CB(skb)->udp_mark = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08004021
Tom Herbert662880f2014-08-27 21:26:56 -07004022 /* Setup for GRO checksum validation */
4023 switch (skb->ip_summed) {
4024 case CHECKSUM_COMPLETE:
4025 NAPI_GRO_CB(skb)->csum = skb->csum;
4026 NAPI_GRO_CB(skb)->csum_valid = 1;
4027 NAPI_GRO_CB(skb)->csum_cnt = 0;
4028 break;
4029 case CHECKSUM_UNNECESSARY:
4030 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
4031 NAPI_GRO_CB(skb)->csum_valid = 0;
4032 break;
4033 default:
4034 NAPI_GRO_CB(skb)->csum_cnt = 0;
4035 NAPI_GRO_CB(skb)->csum_valid = 0;
4036 }
Herbert Xud565b0a2008-12-15 23:38:52 -08004037
Vlad Yasevichf191a1d2012-11-15 08:49:23 +00004038 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08004039 break;
4040 }
4041 rcu_read_unlock();
4042
4043 if (&ptype->list == head)
4044 goto normal;
4045
Herbert Xu0da2afd52008-12-26 14:57:42 -08004046 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004047 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08004048
Herbert Xud565b0a2008-12-15 23:38:52 -08004049 if (pp) {
4050 struct sk_buff *nskb = *pp;
4051
4052 *pp = nskb->next;
4053 nskb->next = NULL;
4054 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00004055 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08004056 }
4057
Herbert Xu0da2afd52008-12-26 14:57:42 -08004058 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08004059 goto ok;
4060
Eric Dumazet600adc12014-01-09 14:12:19 -08004061 if (NAPI_GRO_CB(skb)->flush)
Herbert Xud565b0a2008-12-15 23:38:52 -08004062 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08004063
Eric Dumazet600adc12014-01-09 14:12:19 -08004064 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
4065 struct sk_buff *nskb = napi->gro_list;
4066
4067 /* locate the end of the list to select the 'oldest' flow */
4068 while (nskb->next) {
4069 pp = &nskb->next;
4070 nskb = *pp;
4071 }
4072 *pp = NULL;
4073 nskb->next = NULL;
4074 napi_gro_complete(nskb);
4075 } else {
4076 napi->gro_count++;
4077 }
Herbert Xud565b0a2008-12-15 23:38:52 -08004078 NAPI_GRO_CB(skb)->count = 1;
Eric Dumazet2e71a6f2012-10-06 08:08:49 +00004079 NAPI_GRO_CB(skb)->age = jiffies;
Eric Dumazet29e98242014-05-16 11:34:37 -07004080 NAPI_GRO_CB(skb)->last = skb;
Herbert Xu86911732009-01-29 14:19:50 +00004081 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08004082 skb->next = napi->gro_list;
4083 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004084 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08004085
Herbert Xuad0f9902009-02-01 01:24:55 -08004086pull:
Eric Dumazeta50e2332014-03-29 21:28:21 -07004087 grow = skb_gro_offset(skb) - skb_headlen(skb);
4088 if (grow > 0)
4089 gro_pull_from_frag0(skb, grow);
Herbert Xud565b0a2008-12-15 23:38:52 -08004090ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004091 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08004092
4093normal:
Herbert Xuad0f9902009-02-01 01:24:55 -08004094 ret = GRO_NORMAL;
4095 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08004096}
Herbert Xu96e93ea2009-01-06 10:49:34 -08004097
Jerry Chubf5a7552014-01-07 10:23:19 -08004098struct packet_offload *gro_find_receive_by_type(__be16 type)
4099{
4100 struct list_head *offload_head = &offload_base;
4101 struct packet_offload *ptype;
4102
4103 list_for_each_entry_rcu(ptype, offload_head, list) {
4104 if (ptype->type != type || !ptype->callbacks.gro_receive)
4105 continue;
4106 return ptype;
4107 }
4108 return NULL;
4109}
Or Gerlitze27a2f82014-01-20 13:59:20 +02004110EXPORT_SYMBOL(gro_find_receive_by_type);
Jerry Chubf5a7552014-01-07 10:23:19 -08004111
4112struct packet_offload *gro_find_complete_by_type(__be16 type)
4113{
4114 struct list_head *offload_head = &offload_base;
4115 struct packet_offload *ptype;
4116
4117 list_for_each_entry_rcu(ptype, offload_head, list) {
4118 if (ptype->type != type || !ptype->callbacks.gro_complete)
4119 continue;
4120 return ptype;
4121 }
4122 return NULL;
4123}
Or Gerlitze27a2f82014-01-20 13:59:20 +02004124EXPORT_SYMBOL(gro_find_complete_by_type);
Herbert Xu96e93ea2009-01-06 10:49:34 -08004125
Rami Rosenbb728822012-11-28 21:55:25 +00004126static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08004127{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004128 switch (ret) {
4129 case GRO_NORMAL:
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00004130 if (netif_receive_skb_internal(skb))
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004131 ret = GRO_DROP;
4132 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08004133
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004134 case GRO_DROP:
Herbert Xu5d38a072009-01-04 16:13:40 -08004135 kfree_skb(skb);
4136 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00004137
Eric Dumazetdaa86542012-04-19 07:07:40 +00004138 case GRO_MERGED_FREE:
Eric Dumazetd7e88832012-04-30 08:10:34 +00004139 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
4140 kmem_cache_free(skbuff_head_cache, skb);
4141 else
4142 __kfree_skb(skb);
Eric Dumazetdaa86542012-04-19 07:07:40 +00004143 break;
4144
Ben Hutchings5b252f02009-10-29 07:17:09 +00004145 case GRO_HELD:
4146 case GRO_MERGED:
4147 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08004148 }
4149
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004150 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004151}
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004152
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004153gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004154{
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00004155 trace_napi_gro_receive_entry(skb);
Herbert Xu86911732009-01-29 14:19:50 +00004156
Eric Dumazeta50e2332014-03-29 21:28:21 -07004157 skb_gro_reset_offset(skb);
4158
Eric Dumazet89c5fa32012-12-10 13:28:16 +00004159 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08004160}
4161EXPORT_SYMBOL(napi_gro_receive);
4162
stephen hemmingerd0c2b0d2010-10-19 07:12:10 +00004163static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08004164{
Eric Dumazet93a35f52014-10-23 06:30:30 -07004165 if (unlikely(skb->pfmemalloc)) {
4166 consume_skb(skb);
4167 return;
4168 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08004169 __skb_pull(skb, skb_headlen(skb));
Eric Dumazet2a2a4592012-03-21 06:58:03 +00004170 /* restore the reserve we had after netdev_alloc_skb_ip_align() */
4171 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
Jesse Gross3701e512010-10-20 13:56:06 +00004172 skb->vlan_tci = 0;
Herbert Xu66c46d72011-01-29 20:44:54 -08004173 skb->dev = napi->dev;
Andy Gospodarek6d152e22011-02-02 14:53:25 -08004174 skb->skb_iif = 0;
Jerry Chuc3caf112014-07-14 15:54:46 -07004175 skb->encapsulation = 0;
4176 skb_shinfo(skb)->gso_type = 0;
Eric Dumazete33d0ba2014-04-03 09:28:10 -07004177 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
Herbert Xu96e93ea2009-01-06 10:49:34 -08004178
4179 napi->skb = skb;
4180}
Herbert Xu96e93ea2009-01-06 10:49:34 -08004181
Herbert Xu76620aa2009-04-16 02:02:07 -07004182struct sk_buff *napi_get_frags(struct napi_struct *napi)
Herbert Xu5d38a072009-01-04 16:13:40 -08004183{
Herbert Xu5d38a072009-01-04 16:13:40 -08004184 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08004185
4186 if (!skb) {
Alexander Duyckfd11a832014-12-09 19:40:49 -08004187 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
Eric Dumazet84b9cd62013-12-05 21:44:27 -08004188 napi->skb = skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08004189 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08004190 return skb;
4191}
Herbert Xu76620aa2009-04-16 02:02:07 -07004192EXPORT_SYMBOL(napi_get_frags);
Herbert Xu96e93ea2009-01-06 10:49:34 -08004193
Eric Dumazeta50e2332014-03-29 21:28:21 -07004194static gro_result_t napi_frags_finish(struct napi_struct *napi,
4195 struct sk_buff *skb,
4196 gro_result_t ret)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004197{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004198 switch (ret) {
4199 case GRO_NORMAL:
Eric Dumazeta50e2332014-03-29 21:28:21 -07004200 case GRO_HELD:
4201 __skb_push(skb, ETH_HLEN);
4202 skb->protocol = eth_type_trans(skb, skb->dev);
4203 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004204 ret = GRO_DROP;
Herbert Xu86911732009-01-29 14:19:50 +00004205 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004206
4207 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004208 case GRO_MERGED_FREE:
4209 napi_reuse_skb(napi, skb);
4210 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00004211
4212 case GRO_MERGED:
4213 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004214 }
4215
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004216 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004217}
Herbert Xu5d0d9be2009-01-29 14:19:48 +00004218
Eric Dumazeta50e2332014-03-29 21:28:21 -07004219/* Upper GRO stack assumes network header starts at gro_offset=0
4220 * Drivers could call both napi_gro_frags() and napi_gro_receive()
4221 * We copy ethernet header into skb->data to have a common layout.
4222 */
Eric Dumazet4adb9c42012-05-18 20:49:06 +00004223static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
Herbert Xu96e93ea2009-01-06 10:49:34 -08004224{
Herbert Xu76620aa2009-04-16 02:02:07 -07004225 struct sk_buff *skb = napi->skb;
Eric Dumazeta50e2332014-03-29 21:28:21 -07004226 const struct ethhdr *eth;
4227 unsigned int hlen = sizeof(*eth);
Herbert Xu76620aa2009-04-16 02:02:07 -07004228
4229 napi->skb = NULL;
4230
Eric Dumazeta50e2332014-03-29 21:28:21 -07004231 skb_reset_mac_header(skb);
4232 skb_gro_reset_offset(skb);
4233
4234 eth = skb_gro_header_fast(skb, 0);
4235 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4236 eth = skb_gro_header_slow(skb, hlen, 0);
4237 if (unlikely(!eth)) {
4238 napi_reuse_skb(napi, skb);
4239 return NULL;
4240 }
4241 } else {
4242 gro_pull_from_frag0(skb, hlen);
4243 NAPI_GRO_CB(skb)->frag0 += hlen;
4244 NAPI_GRO_CB(skb)->frag0_len -= hlen;
Herbert Xu76620aa2009-04-16 02:02:07 -07004245 }
Eric Dumazeta50e2332014-03-29 21:28:21 -07004246 __skb_pull(skb, hlen);
4247
4248 /*
4249 * This works because the only protocols we care about don't require
4250 * special handling.
4251 * We'll fix it up properly in napi_frags_finish()
4252 */
4253 skb->protocol = eth->h_proto;
Herbert Xu76620aa2009-04-16 02:02:07 -07004254
Herbert Xu76620aa2009-04-16 02:02:07 -07004255 return skb;
4256}
Herbert Xu76620aa2009-04-16 02:02:07 -07004257
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004258gro_result_t napi_gro_frags(struct napi_struct *napi)
Herbert Xu76620aa2009-04-16 02:02:07 -07004259{
4260 struct sk_buff *skb = napi_frags_skb(napi);
Herbert Xu96e93ea2009-01-06 10:49:34 -08004261
4262 if (!skb)
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07004263 return GRO_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08004264
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00004265 trace_napi_gro_frags_entry(skb);
4266
Eric Dumazet89c5fa32012-12-10 13:28:16 +00004267 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08004268}
4269EXPORT_SYMBOL(napi_gro_frags);
4270
Tom Herbert573e8fc2014-08-22 13:33:47 -07004271/* Compute the checksum from gro_offset and return the folded value
4272 * after adding in any pseudo checksum.
4273 */
4274__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
4275{
4276 __wsum wsum;
4277 __sum16 sum;
4278
4279 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
4280
4281 /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
4282 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
4283 if (likely(!sum)) {
4284 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
4285 !skb->csum_complete_sw)
4286 netdev_rx_csum_fault(skb->dev);
4287 }
4288
4289 NAPI_GRO_CB(skb)->csum = wsum;
4290 NAPI_GRO_CB(skb)->csum_valid = 1;
4291
4292 return sum;
4293}
4294EXPORT_SYMBOL(__skb_gro_checksum_complete);
4295
Eric Dumazete326bed2010-04-22 00:22:45 -07004296/*
Zhi Yong Wu855abcf2014-01-01 04:34:50 +08004297 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
Eric Dumazete326bed2010-04-22 00:22:45 -07004298 * Note: called with local irq disabled, but exits with local irq enabled.
4299 */
4300static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4301{
4302#ifdef CONFIG_RPS
4303 struct softnet_data *remsd = sd->rps_ipi_list;
4304
4305 if (remsd) {
4306 sd->rps_ipi_list = NULL;
4307
4308 local_irq_enable();
4309
4310 /* Send pending IPI's to kick RPS processing on remote cpus. */
4311 while (remsd) {
4312 struct softnet_data *next = remsd->rps_ipi_next;
4313
4314 if (cpu_online(remsd->cpu))
Frederic Weisbeckerc46fff22014-02-24 16:40:02 +01004315 smp_call_function_single_async(remsd->cpu,
Frederic Weisbeckerfce8ad12014-02-24 16:40:01 +01004316 &remsd->csd);
Eric Dumazete326bed2010-04-22 00:22:45 -07004317 remsd = next;
4318 }
4319 } else
4320#endif
4321 local_irq_enable();
4322}
4323
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004324static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4325{
4326#ifdef CONFIG_RPS
4327 return sd->rps_ipi_list != NULL;
4328#else
4329 return false;
4330#endif
4331}
4332
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004333static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004334{
4335 int work = 0;
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07004336 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004337
Eric Dumazete326bed2010-04-22 00:22:45 -07004338 /* Check if we have pending ipi, its better to send them now,
4339 * not waiting net_rx_action() end.
4340 */
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004341 if (sd_has_rps_ipi_waiting(sd)) {
Eric Dumazete326bed2010-04-22 00:22:45 -07004342 local_irq_disable();
4343 net_rps_action_and_irq_enable(sd);
4344 }
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004345
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004346 napi->weight = weight_p;
Changli Gao6e7676c2010-04-27 15:07:33 -07004347 local_irq_disable();
Tom Herbert11ef7a82014-06-30 09:50:40 -07004348 while (1) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004349 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004350
Changli Gao6e7676c2010-04-27 15:07:33 -07004351 while ((skb = __skb_dequeue(&sd->process_queue))) {
Eric Dumazete4008272010-04-05 15:42:39 -07004352 local_irq_enable();
Changli Gao6e7676c2010-04-27 15:07:33 -07004353 __netif_receive_skb(skb);
Changli Gao6e7676c2010-04-27 15:07:33 -07004354 local_irq_disable();
Tom Herbert76cc8b12010-05-20 18:37:59 +00004355 input_queue_head_incr(sd);
4356 if (++work >= quota) {
4357 local_irq_enable();
4358 return work;
4359 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004361
Changli Gao6e7676c2010-04-27 15:07:33 -07004362 rps_lock(sd);
Tom Herbert11ef7a82014-06-30 09:50:40 -07004363 if (skb_queue_empty(&sd->input_pkt_queue)) {
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07004364 /*
4365 * Inline a custom version of __napi_complete().
4366 * only current cpu owns and manipulates this napi,
Tom Herbert11ef7a82014-06-30 09:50:40 -07004367 * and NAPI_STATE_SCHED is the only possible flag set
4368 * on backlog.
4369 * We can use a plain write instead of clear_bit(),
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07004370 * and we dont need an smp_mb() memory barrier.
4371 */
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07004372 napi->state = 0;
Tom Herbert11ef7a82014-06-30 09:50:40 -07004373 rps_unlock(sd);
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07004374
Tom Herbert11ef7a82014-06-30 09:50:40 -07004375 break;
Changli Gao6e7676c2010-04-27 15:07:33 -07004376 }
Tom Herbert11ef7a82014-06-30 09:50:40 -07004377
4378 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4379 &sd->process_queue);
Changli Gao6e7676c2010-04-27 15:07:33 -07004380 rps_unlock(sd);
4381 }
4382 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004383
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004384 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004385}
4386
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004387/**
4388 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004389 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004390 *
Eric Dumazetbc9ad162014-10-28 18:05:13 -07004391 * The entry's receive function will be scheduled to run.
4392 * Consider using __napi_schedule_irqoff() if hard irqs are masked.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004393 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08004394void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004395{
4396 unsigned long flags;
4397
4398 local_irq_save(flags);
Christoph Lameter903ceff2014-08-17 12:30:35 -05004399 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004400 local_irq_restore(flags);
4401}
4402EXPORT_SYMBOL(__napi_schedule);
4403
Eric Dumazetbc9ad162014-10-28 18:05:13 -07004404/**
4405 * __napi_schedule_irqoff - schedule for receive
4406 * @n: entry to schedule
4407 *
4408 * Variant of __napi_schedule() assuming hard irqs are masked
4409 */
4410void __napi_schedule_irqoff(struct napi_struct *n)
4411{
4412 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4413}
4414EXPORT_SYMBOL(__napi_schedule_irqoff);
4415
Herbert Xud565b0a2008-12-15 23:38:52 -08004416void __napi_complete(struct napi_struct *n)
4417{
4418 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
Herbert Xud565b0a2008-12-15 23:38:52 -08004419
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004420 list_del_init(&n->poll_list);
Peter Zijlstra4e857c52014-03-17 18:06:10 +01004421 smp_mb__before_atomic();
Herbert Xud565b0a2008-12-15 23:38:52 -08004422 clear_bit(NAPI_STATE_SCHED, &n->state);
4423}
4424EXPORT_SYMBOL(__napi_complete);
4425
Eric Dumazet3b47d302014-11-06 21:09:44 -08004426void napi_complete_done(struct napi_struct *n, int work_done)
Herbert Xud565b0a2008-12-15 23:38:52 -08004427{
4428 unsigned long flags;
4429
4430 /*
4431 * don't let napi dequeue from the cpu poll list
4432 * just in case its running on a different cpu
4433 */
4434 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4435 return;
4436
Eric Dumazet3b47d302014-11-06 21:09:44 -08004437 if (n->gro_list) {
4438 unsigned long timeout = 0;
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004439
Eric Dumazet3b47d302014-11-06 21:09:44 -08004440 if (work_done)
4441 timeout = n->dev->gro_flush_timeout;
4442
4443 if (timeout)
4444 hrtimer_start(&n->timer, ns_to_ktime(timeout),
4445 HRTIMER_MODE_REL_PINNED);
4446 else
4447 napi_gro_flush(n, false);
4448 }
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004449 if (likely(list_empty(&n->poll_list))) {
4450 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4451 } else {
4452 /* If n->poll_list is not empty, we need to mask irqs */
4453 local_irq_save(flags);
4454 __napi_complete(n);
4455 local_irq_restore(flags);
4456 }
Herbert Xud565b0a2008-12-15 23:38:52 -08004457}
Eric Dumazet3b47d302014-11-06 21:09:44 -08004458EXPORT_SYMBOL(napi_complete_done);
Herbert Xud565b0a2008-12-15 23:38:52 -08004459
Eliezer Tamiraf12fa62013-06-10 11:39:41 +03004460/* must be called under rcu_read_lock(), as we dont take a reference */
4461struct napi_struct *napi_by_id(unsigned int napi_id)
4462{
4463 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4464 struct napi_struct *napi;
4465
4466 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4467 if (napi->napi_id == napi_id)
4468 return napi;
4469
4470 return NULL;
4471}
4472EXPORT_SYMBOL_GPL(napi_by_id);
4473
4474void napi_hash_add(struct napi_struct *napi)
4475{
4476 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4477
4478 spin_lock(&napi_hash_lock);
4479
4480 /* 0 is not a valid id, we also skip an id that is taken
4481 * we expect both events to be extremely rare
4482 */
4483 napi->napi_id = 0;
4484 while (!napi->napi_id) {
4485 napi->napi_id = ++napi_gen_id;
4486 if (napi_by_id(napi->napi_id))
4487 napi->napi_id = 0;
4488 }
4489
4490 hlist_add_head_rcu(&napi->napi_hash_node,
4491 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4492
4493 spin_unlock(&napi_hash_lock);
4494 }
4495}
4496EXPORT_SYMBOL_GPL(napi_hash_add);
4497
4498/* Warning : caller is responsible to make sure rcu grace period
4499 * is respected before freeing memory containing @napi
4500 */
4501void napi_hash_del(struct napi_struct *napi)
4502{
4503 spin_lock(&napi_hash_lock);
4504
4505 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4506 hlist_del_rcu(&napi->napi_hash_node);
4507
4508 spin_unlock(&napi_hash_lock);
4509}
4510EXPORT_SYMBOL_GPL(napi_hash_del);
4511
Eric Dumazet3b47d302014-11-06 21:09:44 -08004512static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
4513{
4514 struct napi_struct *napi;
4515
4516 napi = container_of(timer, struct napi_struct, timer);
4517 if (napi->gro_list)
4518 napi_schedule(napi);
4519
4520 return HRTIMER_NORESTART;
4521}
4522
Herbert Xud565b0a2008-12-15 23:38:52 -08004523void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4524 int (*poll)(struct napi_struct *, int), int weight)
4525{
4526 INIT_LIST_HEAD(&napi->poll_list);
Eric Dumazet3b47d302014-11-06 21:09:44 -08004527 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
4528 napi->timer.function = napi_watchdog;
Herbert Xu4ae55442009-02-08 18:00:36 +00004529 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08004530 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08004531 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08004532 napi->poll = poll;
Eric Dumazet82dc3c62013-03-05 15:57:22 +00004533 if (weight > NAPI_POLL_WEIGHT)
4534 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4535 weight, dev->name);
Herbert Xud565b0a2008-12-15 23:38:52 -08004536 napi->weight = weight;
4537 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08004538 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08004539#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08004540 spin_lock_init(&napi->poll_lock);
4541 napi->poll_owner = -1;
4542#endif
4543 set_bit(NAPI_STATE_SCHED, &napi->state);
4544}
4545EXPORT_SYMBOL(netif_napi_add);
4546
Eric Dumazet3b47d302014-11-06 21:09:44 -08004547void napi_disable(struct napi_struct *n)
4548{
4549 might_sleep();
4550 set_bit(NAPI_STATE_DISABLE, &n->state);
4551
4552 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
4553 msleep(1);
4554
4555 hrtimer_cancel(&n->timer);
4556
4557 clear_bit(NAPI_STATE_DISABLE, &n->state);
4558}
4559EXPORT_SYMBOL(napi_disable);
4560
Herbert Xud565b0a2008-12-15 23:38:52 -08004561void netif_napi_del(struct napi_struct *napi)
4562{
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08004563 list_del_init(&napi->dev_list);
Herbert Xu76620aa2009-04-16 02:02:07 -07004564 napi_free_frags(napi);
Herbert Xud565b0a2008-12-15 23:38:52 -08004565
Eric Dumazet289dccb2013-12-20 14:29:08 -08004566 kfree_skb_list(napi->gro_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08004567 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00004568 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08004569}
4570EXPORT_SYMBOL(netif_napi_del);
4571
Herbert Xu726ce702014-12-21 07:16:21 +11004572static int napi_poll(struct napi_struct *n, struct list_head *repoll)
4573{
4574 void *have;
4575 int work, weight;
4576
4577 list_del_init(&n->poll_list);
4578
4579 have = netpoll_poll_lock(n);
4580
4581 weight = n->weight;
4582
4583 /* This NAPI_STATE_SCHED test is for avoiding a race
4584 * with netpoll's poll_napi(). Only the entity which
4585 * obtains the lock and sees NAPI_STATE_SCHED set will
4586 * actually make the ->poll() call. Therefore we avoid
4587 * accidentally calling ->poll() when NAPI is not scheduled.
4588 */
4589 work = 0;
4590 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4591 work = n->poll(n, weight);
4592 trace_napi_poll(n);
4593 }
4594
4595 WARN_ON_ONCE(work > weight);
4596
4597 if (likely(work < weight))
4598 goto out_unlock;
4599
4600 /* Drivers must not modify the NAPI state if they
4601 * consume the entire weight. In such cases this code
4602 * still "owns" the NAPI instance and therefore can
4603 * move the instance around on the list at-will.
4604 */
4605 if (unlikely(napi_disable_pending(n))) {
4606 napi_complete(n);
4607 goto out_unlock;
4608 }
4609
4610 if (n->gro_list) {
4611 /* flush too old packets
4612 * If HZ < 1000, flush all packets.
4613 */
4614 napi_gro_flush(n, HZ >= 1000);
4615 }
4616
Herbert Xu001ce542014-12-21 07:16:22 +11004617 /* Some drivers may have called napi_schedule
4618 * prior to exhausting their budget.
4619 */
4620 if (unlikely(!list_empty(&n->poll_list))) {
4621 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
4622 n->dev ? n->dev->name : "backlog");
4623 goto out_unlock;
4624 }
4625
Herbert Xu726ce702014-12-21 07:16:21 +11004626 list_add_tail(&n->poll_list, repoll);
4627
4628out_unlock:
4629 netpoll_poll_unlock(have);
4630
4631 return work;
4632}
4633
Linus Torvalds1da177e2005-04-16 15:20:36 -07004634static void net_rx_action(struct softirq_action *h)
4635{
Christoph Lameter903ceff2014-08-17 12:30:35 -05004636 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
Stephen Hemminger24f8b232008-11-03 17:14:38 -08004637 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07004638 int budget = netdev_budget;
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004639 LIST_HEAD(list);
4640 LIST_HEAD(repoll);
Matt Mackall53fb95d2005-08-11 19:27:43 -07004641
Linus Torvalds1da177e2005-04-16 15:20:36 -07004642 local_irq_disable();
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004643 list_splice_init(&sd->poll_list, &list);
4644 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004645
Herbert Xuceb8d5b2014-12-21 07:16:25 +11004646 for (;;) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004647 struct napi_struct *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004648
Herbert Xuceb8d5b2014-12-21 07:16:25 +11004649 if (list_empty(&list)) {
4650 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
4651 return;
4652 break;
4653 }
4654
Herbert Xu6bd373e2014-12-21 07:16:24 +11004655 n = list_first_entry(&list, struct napi_struct, poll_list);
4656 budget -= napi_poll(n, &repoll);
4657
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004658 /* If softirq window is exhausted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08004659 * Allow this to run for 2 jiffies since which will allow
4660 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004661 */
Herbert Xuceb8d5b2014-12-21 07:16:25 +11004662 if (unlikely(budget <= 0 ||
4663 time_after_eq(jiffies, time_limit))) {
4664 sd->time_squeeze++;
4665 break;
4666 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004667 }
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004668
Eric Dumazetd75b1ad2014-11-02 06:19:33 -08004669 local_irq_disable();
4670
4671 list_splice_tail_init(&sd->poll_list, &list);
4672 list_splice_tail(&repoll, &list);
4673 list_splice(&list, &sd->poll_list);
4674 if (!list_empty(&sd->poll_list))
4675 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4676
Eric Dumazete326bed2010-04-22 00:22:45 -07004677 net_rps_action_and_irq_enable(sd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004678}
4679
Veaceslav Falicoaa9d8562013-08-28 23:25:04 +02004680struct netdev_adjacent {
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004681 struct net_device *dev;
Veaceslav Falico5d261912013-08-28 23:25:05 +02004682
4683 /* upper master flag, there can only be one master device per list */
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004684 bool master;
Veaceslav Falico5d261912013-08-28 23:25:05 +02004685
Veaceslav Falico5d261912013-08-28 23:25:05 +02004686 /* counter for the number of times this device was added to us */
4687 u16 ref_nr;
4688
Veaceslav Falico402dae92013-09-25 09:20:09 +02004689 /* private field for the users */
4690 void *private;
4691
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004692 struct list_head list;
4693 struct rcu_head rcu;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004694};
4695
Veaceslav Falico5d261912013-08-28 23:25:05 +02004696static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4697 struct net_device *adj_dev,
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004698 struct list_head *adj_list)
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004699{
Veaceslav Falico5d261912013-08-28 23:25:05 +02004700 struct netdev_adjacent *adj;
Veaceslav Falico5d261912013-08-28 23:25:05 +02004701
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004702 list_for_each_entry(adj, adj_list, list) {
Veaceslav Falico5d261912013-08-28 23:25:05 +02004703 if (adj->dev == adj_dev)
4704 return adj;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004705 }
4706 return NULL;
4707}
4708
4709/**
4710 * netdev_has_upper_dev - Check if device is linked to an upper device
4711 * @dev: device
4712 * @upper_dev: upper device to check
4713 *
4714 * Find out if a device is linked to specified upper device and return true
4715 * in case it is. Note that this checks only immediate upper device,
4716 * not through a complete stack of devices. The caller must hold the RTNL lock.
4717 */
4718bool netdev_has_upper_dev(struct net_device *dev,
4719 struct net_device *upper_dev)
4720{
4721 ASSERT_RTNL();
4722
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004723 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004724}
4725EXPORT_SYMBOL(netdev_has_upper_dev);
4726
4727/**
4728 * netdev_has_any_upper_dev - Check if device is linked to some device
4729 * @dev: device
4730 *
4731 * Find out if a device is linked to an upper device and return true in case
4732 * it is. The caller must hold the RTNL lock.
4733 */
stephen hemminger1d143d92013-12-29 14:01:29 -08004734static bool netdev_has_any_upper_dev(struct net_device *dev)
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004735{
4736 ASSERT_RTNL();
4737
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004738 return !list_empty(&dev->all_adj_list.upper);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004739}
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004740
4741/**
4742 * netdev_master_upper_dev_get - Get master upper device
4743 * @dev: device
4744 *
4745 * Find a master upper device and return pointer to it or NULL in case
4746 * it's not there. The caller must hold the RTNL lock.
4747 */
4748struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4749{
Veaceslav Falicoaa9d8562013-08-28 23:25:04 +02004750 struct netdev_adjacent *upper;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004751
4752 ASSERT_RTNL();
4753
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004754 if (list_empty(&dev->adj_list.upper))
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004755 return NULL;
4756
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004757 upper = list_first_entry(&dev->adj_list.upper,
Veaceslav Falicoaa9d8562013-08-28 23:25:04 +02004758 struct netdev_adjacent, list);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004759 if (likely(upper->master))
4760 return upper->dev;
4761 return NULL;
4762}
4763EXPORT_SYMBOL(netdev_master_upper_dev_get);
4764
Veaceslav Falicob6ccba42013-09-25 09:20:23 +02004765void *netdev_adjacent_get_private(struct list_head *adj_list)
4766{
4767 struct netdev_adjacent *adj;
4768
4769 adj = list_entry(adj_list, struct netdev_adjacent, list);
4770
4771 return adj->private;
4772}
4773EXPORT_SYMBOL(netdev_adjacent_get_private);
4774
Veaceslav Falico31088a12013-09-25 09:20:12 +02004775/**
Vlad Yasevich44a40852014-05-16 17:20:38 -04004776 * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
4777 * @dev: device
4778 * @iter: list_head ** of the current position
4779 *
4780 * Gets the next device from the dev's upper list, starting from iter
4781 * position. The caller must hold RCU read lock.
4782 */
4783struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
4784 struct list_head **iter)
4785{
4786 struct netdev_adjacent *upper;
4787
4788 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
4789
4790 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4791
4792 if (&upper->list == &dev->adj_list.upper)
4793 return NULL;
4794
4795 *iter = &upper->list;
4796
4797 return upper->dev;
4798}
4799EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
4800
4801/**
Veaceslav Falico31088a12013-09-25 09:20:12 +02004802 * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
Veaceslav Falico48311f42013-08-28 23:25:07 +02004803 * @dev: device
4804 * @iter: list_head ** of the current position
4805 *
4806 * Gets the next device from the dev's upper list, starting from iter
4807 * position. The caller must hold RCU read lock.
4808 */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004809struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
4810 struct list_head **iter)
Veaceslav Falico48311f42013-08-28 23:25:07 +02004811{
4812 struct netdev_adjacent *upper;
4813
John Fastabend85328242013-11-26 06:33:52 +00004814 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
Veaceslav Falico48311f42013-08-28 23:25:07 +02004815
4816 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4817
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004818 if (&upper->list == &dev->all_adj_list.upper)
Veaceslav Falico48311f42013-08-28 23:25:07 +02004819 return NULL;
4820
4821 *iter = &upper->list;
4822
4823 return upper->dev;
4824}
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004825EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
Veaceslav Falico48311f42013-08-28 23:25:07 +02004826
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004827/**
Veaceslav Falico31088a12013-09-25 09:20:12 +02004828 * netdev_lower_get_next_private - Get the next ->private from the
4829 * lower neighbour list
4830 * @dev: device
4831 * @iter: list_head ** of the current position
4832 *
4833 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4834 * list, starting from iter position. The caller must hold either hold the
4835 * RTNL lock or its own locking that guarantees that the neighbour lower
4836 * list will remain unchainged.
4837 */
4838void *netdev_lower_get_next_private(struct net_device *dev,
4839 struct list_head **iter)
4840{
4841 struct netdev_adjacent *lower;
4842
4843 lower = list_entry(*iter, struct netdev_adjacent, list);
4844
4845 if (&lower->list == &dev->adj_list.lower)
4846 return NULL;
4847
Veaceslav Falico6859e7d2014-04-07 11:25:12 +02004848 *iter = lower->list.next;
Veaceslav Falico31088a12013-09-25 09:20:12 +02004849
4850 return lower->private;
4851}
4852EXPORT_SYMBOL(netdev_lower_get_next_private);
4853
4854/**
4855 * netdev_lower_get_next_private_rcu - Get the next ->private from the
4856 * lower neighbour list, RCU
4857 * variant
4858 * @dev: device
4859 * @iter: list_head ** of the current position
4860 *
4861 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4862 * list, starting from iter position. The caller must hold RCU read lock.
4863 */
4864void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4865 struct list_head **iter)
4866{
4867 struct netdev_adjacent *lower;
4868
4869 WARN_ON_ONCE(!rcu_read_lock_held());
4870
4871 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4872
4873 if (&lower->list == &dev->adj_list.lower)
4874 return NULL;
4875
Veaceslav Falico6859e7d2014-04-07 11:25:12 +02004876 *iter = &lower->list;
Veaceslav Falico31088a12013-09-25 09:20:12 +02004877
4878 return lower->private;
4879}
4880EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4881
4882/**
Vlad Yasevich4085ebe2014-05-16 17:04:53 -04004883 * netdev_lower_get_next - Get the next device from the lower neighbour
4884 * list
4885 * @dev: device
4886 * @iter: list_head ** of the current position
4887 *
4888 * Gets the next netdev_adjacent from the dev's lower neighbour
4889 * list, starting from iter position. The caller must hold RTNL lock or
4890 * its own locking that guarantees that the neighbour lower
4891 * list will remain unchainged.
4892 */
4893void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
4894{
4895 struct netdev_adjacent *lower;
4896
4897 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
4898
4899 if (&lower->list == &dev->adj_list.lower)
4900 return NULL;
4901
4902 *iter = &lower->list;
4903
4904 return lower->dev;
4905}
4906EXPORT_SYMBOL(netdev_lower_get_next);
4907
4908/**
dingtianhonge001bfa2013-12-13 10:19:55 +08004909 * netdev_lower_get_first_private_rcu - Get the first ->private from the
4910 * lower neighbour list, RCU
4911 * variant
4912 * @dev: device
4913 *
4914 * Gets the first netdev_adjacent->private from the dev's lower neighbour
4915 * list. The caller must hold RCU read lock.
4916 */
4917void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4918{
4919 struct netdev_adjacent *lower;
4920
4921 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4922 struct netdev_adjacent, list);
4923 if (lower)
4924 return lower->private;
4925 return NULL;
4926}
4927EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4928
4929/**
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004930 * netdev_master_upper_dev_get_rcu - Get master upper device
4931 * @dev: device
4932 *
4933 * Find a master upper device and return pointer to it or NULL in case
4934 * it's not there. The caller must hold the RCU read lock.
4935 */
4936struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4937{
Veaceslav Falicoaa9d8562013-08-28 23:25:04 +02004938 struct netdev_adjacent *upper;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004939
Veaceslav Falico2f268f12013-09-25 09:20:07 +02004940 upper = list_first_or_null_rcu(&dev->adj_list.upper,
Veaceslav Falicoaa9d8562013-08-28 23:25:04 +02004941 struct netdev_adjacent, list);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00004942 if (upper && likely(upper->master))
4943 return upper->dev;
4944 return NULL;
4945}
4946EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4947
Rashika Kheria0a59f3a2014-02-09 20:26:25 +05304948static int netdev_adjacent_sysfs_add(struct net_device *dev,
Veaceslav Falico3ee32702014-01-14 21:58:50 +01004949 struct net_device *adj_dev,
4950 struct list_head *dev_list)
4951{
4952 char linkname[IFNAMSIZ+7];
4953 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4954 "upper_%s" : "lower_%s", adj_dev->name);
4955 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4956 linkname);
4957}
Rashika Kheria0a59f3a2014-02-09 20:26:25 +05304958static void netdev_adjacent_sysfs_del(struct net_device *dev,
Veaceslav Falico3ee32702014-01-14 21:58:50 +01004959 char *name,
4960 struct list_head *dev_list)
4961{
4962 char linkname[IFNAMSIZ+7];
4963 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4964 "upper_%s" : "lower_%s", name);
4965 sysfs_remove_link(&(dev->dev.kobj), linkname);
4966}
4967
Alexander Y. Fomichev7ce64c72014-09-15 14:22:35 +04004968static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
4969 struct net_device *adj_dev,
4970 struct list_head *dev_list)
4971{
4972 return (dev_list == &dev->adj_list.upper ||
4973 dev_list == &dev->adj_list.lower) &&
4974 net_eq(dev_net(dev), dev_net(adj_dev));
4975}
Veaceslav Falico3ee32702014-01-14 21:58:50 +01004976
Veaceslav Falico5d261912013-08-28 23:25:05 +02004977static int __netdev_adjacent_dev_insert(struct net_device *dev,
4978 struct net_device *adj_dev,
Veaceslav Falico7863c052013-09-25 09:20:06 +02004979 struct list_head *dev_list,
Veaceslav Falico402dae92013-09-25 09:20:09 +02004980 void *private, bool master)
Veaceslav Falico5d261912013-08-28 23:25:05 +02004981{
4982 struct netdev_adjacent *adj;
Veaceslav Falico842d67a2013-09-25 09:20:31 +02004983 int ret;
Veaceslav Falico5d261912013-08-28 23:25:05 +02004984
Veaceslav Falico7863c052013-09-25 09:20:06 +02004985 adj = __netdev_find_adj(dev, adj_dev, dev_list);
Veaceslav Falico5d261912013-08-28 23:25:05 +02004986
4987 if (adj) {
Veaceslav Falico5d261912013-08-28 23:25:05 +02004988 adj->ref_nr++;
4989 return 0;
4990 }
4991
4992 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4993 if (!adj)
4994 return -ENOMEM;
4995
4996 adj->dev = adj_dev;
4997 adj->master = master;
Veaceslav Falico5d261912013-08-28 23:25:05 +02004998 adj->ref_nr = 1;
Veaceslav Falico402dae92013-09-25 09:20:09 +02004999 adj->private = private;
Veaceslav Falico5d261912013-08-28 23:25:05 +02005000 dev_hold(adj_dev);
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005001
5002 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
5003 adj_dev->name, dev->name, adj_dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005004
Alexander Y. Fomichev7ce64c72014-09-15 14:22:35 +04005005 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
Veaceslav Falico3ee32702014-01-14 21:58:50 +01005006 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
Veaceslav Falico5831d66e2013-09-25 09:20:32 +02005007 if (ret)
5008 goto free_adj;
5009 }
5010
Veaceslav Falico7863c052013-09-25 09:20:06 +02005011 /* Ensure that master link is always the first item in list. */
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005012 if (master) {
5013 ret = sysfs_create_link(&(dev->dev.kobj),
5014 &(adj_dev->dev.kobj), "master");
5015 if (ret)
Veaceslav Falico5831d66e2013-09-25 09:20:32 +02005016 goto remove_symlinks;
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005017
Veaceslav Falico7863c052013-09-25 09:20:06 +02005018 list_add_rcu(&adj->list, dev_list);
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005019 } else {
Veaceslav Falico7863c052013-09-25 09:20:06 +02005020 list_add_tail_rcu(&adj->list, dev_list);
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005021 }
Veaceslav Falico5d261912013-08-28 23:25:05 +02005022
5023 return 0;
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005024
Veaceslav Falico5831d66e2013-09-25 09:20:32 +02005025remove_symlinks:
Alexander Y. Fomichev7ce64c72014-09-15 14:22:35 +04005026 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
Veaceslav Falico3ee32702014-01-14 21:58:50 +01005027 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005028free_adj:
5029 kfree(adj);
Nikolay Aleksandrov974daef2013-10-23 15:28:56 +02005030 dev_put(adj_dev);
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005031
5032 return ret;
Veaceslav Falico5d261912013-08-28 23:25:05 +02005033}
5034
stephen hemminger1d143d92013-12-29 14:01:29 -08005035static void __netdev_adjacent_dev_remove(struct net_device *dev,
5036 struct net_device *adj_dev,
5037 struct list_head *dev_list)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005038{
5039 struct netdev_adjacent *adj;
5040
Veaceslav Falico7863c052013-09-25 09:20:06 +02005041 adj = __netdev_find_adj(dev, adj_dev, dev_list);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005042
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005043 if (!adj) {
5044 pr_err("tried to remove device %s from %s\n",
5045 dev->name, adj_dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005046 BUG();
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005047 }
Veaceslav Falico5d261912013-08-28 23:25:05 +02005048
5049 if (adj->ref_nr > 1) {
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005050 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
5051 adj->ref_nr-1);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005052 adj->ref_nr--;
5053 return;
5054 }
5055
Veaceslav Falico842d67a2013-09-25 09:20:31 +02005056 if (adj->master)
5057 sysfs_remove_link(&(dev->dev.kobj), "master");
5058
Alexander Y. Fomichev7ce64c72014-09-15 14:22:35 +04005059 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
Veaceslav Falico3ee32702014-01-14 21:58:50 +01005060 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
Veaceslav Falico5831d66e2013-09-25 09:20:32 +02005061
Veaceslav Falico5d261912013-08-28 23:25:05 +02005062 list_del_rcu(&adj->list);
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005063 pr_debug("dev_put for %s, because link removed from %s to %s\n",
5064 adj_dev->name, dev->name, adj_dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005065 dev_put(adj_dev);
5066 kfree_rcu(adj, rcu);
5067}
5068
stephen hemminger1d143d92013-12-29 14:01:29 -08005069static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5070 struct net_device *upper_dev,
5071 struct list_head *up_list,
5072 struct list_head *down_list,
5073 void *private, bool master)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005074{
5075 int ret;
5076
Veaceslav Falico402dae92013-09-25 09:20:09 +02005077 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
5078 master);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005079 if (ret)
5080 return ret;
5081
Veaceslav Falico402dae92013-09-25 09:20:09 +02005082 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
5083 false);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005084 if (ret) {
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005085 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005086 return ret;
5087 }
5088
5089 return 0;
5090}
5091
stephen hemminger1d143d92013-12-29 14:01:29 -08005092static int __netdev_adjacent_dev_link(struct net_device *dev,
5093 struct net_device *upper_dev)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005094{
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005095 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
5096 &dev->all_adj_list.upper,
5097 &upper_dev->all_adj_list.lower,
Veaceslav Falico402dae92013-09-25 09:20:09 +02005098 NULL, false);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005099}
5100
stephen hemminger1d143d92013-12-29 14:01:29 -08005101static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5102 struct net_device *upper_dev,
5103 struct list_head *up_list,
5104 struct list_head *down_list)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005105{
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005106 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5107 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005108}
5109
stephen hemminger1d143d92013-12-29 14:01:29 -08005110static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5111 struct net_device *upper_dev)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005112{
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005113 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5114 &dev->all_adj_list.upper,
5115 &upper_dev->all_adj_list.lower);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005116}
5117
stephen hemminger1d143d92013-12-29 14:01:29 -08005118static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5119 struct net_device *upper_dev,
5120 void *private, bool master)
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005121{
5122 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
5123
5124 if (ret)
5125 return ret;
5126
5127 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
5128 &dev->adj_list.upper,
5129 &upper_dev->adj_list.lower,
Veaceslav Falico402dae92013-09-25 09:20:09 +02005130 private, master);
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005131 if (ret) {
5132 __netdev_adjacent_dev_unlink(dev, upper_dev);
5133 return ret;
5134 }
5135
5136 return 0;
5137}
5138
stephen hemminger1d143d92013-12-29 14:01:29 -08005139static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5140 struct net_device *upper_dev)
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005141{
5142 __netdev_adjacent_dev_unlink(dev, upper_dev);
5143 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5144 &dev->adj_list.upper,
5145 &upper_dev->adj_list.lower);
5146}
Veaceslav Falico5d261912013-08-28 23:25:05 +02005147
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005148static int __netdev_upper_dev_link(struct net_device *dev,
Veaceslav Falico402dae92013-09-25 09:20:09 +02005149 struct net_device *upper_dev, bool master,
5150 void *private)
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005151{
Veaceslav Falico5d261912013-08-28 23:25:05 +02005152 struct netdev_adjacent *i, *j, *to_i, *to_j;
5153 int ret = 0;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005154
5155 ASSERT_RTNL();
5156
5157 if (dev == upper_dev)
5158 return -EBUSY;
5159
5160 /* To prevent loops, check if dev is not upper device to upper_dev. */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005161 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005162 return -EBUSY;
5163
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005164 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005165 return -EEXIST;
5166
5167 if (master && netdev_master_upper_dev_get(dev))
5168 return -EBUSY;
5169
Veaceslav Falico402dae92013-09-25 09:20:09 +02005170 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
5171 master);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005172 if (ret)
5173 return ret;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005174
Veaceslav Falico5d261912013-08-28 23:25:05 +02005175 /* Now that we linked these devs, make all the upper_dev's
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005176 * all_adj_list.upper visible to every dev's all_adj_list.lower an
Veaceslav Falico5d261912013-08-28 23:25:05 +02005177 * versa, and don't forget the devices itself. All of these
5178 * links are non-neighbours.
5179 */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005180 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5181 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5182 pr_debug("Interlinking %s with %s, non-neighbour\n",
5183 i->dev->name, j->dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005184 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
5185 if (ret)
5186 goto rollback_mesh;
5187 }
5188 }
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005189
Veaceslav Falico5d261912013-08-28 23:25:05 +02005190 /* add dev to every upper_dev's upper device */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005191 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5192 pr_debug("linking %s's upper device %s with %s\n",
5193 upper_dev->name, i->dev->name, dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005194 ret = __netdev_adjacent_dev_link(dev, i->dev);
5195 if (ret)
5196 goto rollback_upper_mesh;
5197 }
5198
5199 /* add upper_dev to every dev's lower device */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005200 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5201 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5202 i->dev->name, upper_dev->name);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005203 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
5204 if (ret)
5205 goto rollback_lower_mesh;
5206 }
5207
Jiri Pirko42e52bf2013-05-25 04:12:10 +00005208 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005209 return 0;
Veaceslav Falico5d261912013-08-28 23:25:05 +02005210
5211rollback_lower_mesh:
5212 to_i = i;
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005213 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
Veaceslav Falico5d261912013-08-28 23:25:05 +02005214 if (i == to_i)
5215 break;
5216 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5217 }
5218
5219 i = NULL;
5220
5221rollback_upper_mesh:
5222 to_i = i;
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005223 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
Veaceslav Falico5d261912013-08-28 23:25:05 +02005224 if (i == to_i)
5225 break;
5226 __netdev_adjacent_dev_unlink(dev, i->dev);
5227 }
5228
5229 i = j = NULL;
5230
5231rollback_mesh:
5232 to_i = i;
5233 to_j = j;
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005234 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5235 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
Veaceslav Falico5d261912013-08-28 23:25:05 +02005236 if (i == to_i && j == to_j)
5237 break;
5238 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5239 }
5240 if (i == to_i)
5241 break;
5242 }
5243
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005244 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005245
5246 return ret;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005247}
5248
5249/**
5250 * netdev_upper_dev_link - Add a link to the upper device
5251 * @dev: device
5252 * @upper_dev: new upper device
5253 *
5254 * Adds a link to device which is upper to this one. The caller must hold
5255 * the RTNL lock. On a failure a negative errno code is returned.
5256 * On success the reference counts are adjusted and the function
5257 * returns zero.
5258 */
5259int netdev_upper_dev_link(struct net_device *dev,
5260 struct net_device *upper_dev)
5261{
Veaceslav Falico402dae92013-09-25 09:20:09 +02005262 return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005263}
5264EXPORT_SYMBOL(netdev_upper_dev_link);
5265
5266/**
5267 * netdev_master_upper_dev_link - Add a master link to the upper device
5268 * @dev: device
5269 * @upper_dev: new upper device
5270 *
5271 * Adds a link to device which is upper to this one. In this case, only
5272 * one master upper device can be linked, although other non-master devices
5273 * might be linked as well. The caller must hold the RTNL lock.
5274 * On a failure a negative errno code is returned. On success the reference
5275 * counts are adjusted and the function returns zero.
5276 */
5277int netdev_master_upper_dev_link(struct net_device *dev,
5278 struct net_device *upper_dev)
5279{
Veaceslav Falico402dae92013-09-25 09:20:09 +02005280 return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005281}
5282EXPORT_SYMBOL(netdev_master_upper_dev_link);
5283
Veaceslav Falico402dae92013-09-25 09:20:09 +02005284int netdev_master_upper_dev_link_private(struct net_device *dev,
5285 struct net_device *upper_dev,
5286 void *private)
5287{
5288 return __netdev_upper_dev_link(dev, upper_dev, true, private);
5289}
5290EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
5291
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005292/**
5293 * netdev_upper_dev_unlink - Removes a link to upper device
5294 * @dev: device
5295 * @upper_dev: new upper device
5296 *
5297 * Removes a link to device which is upper to this one. The caller must hold
5298 * the RTNL lock.
5299 */
5300void netdev_upper_dev_unlink(struct net_device *dev,
5301 struct net_device *upper_dev)
5302{
Veaceslav Falico5d261912013-08-28 23:25:05 +02005303 struct netdev_adjacent *i, *j;
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005304 ASSERT_RTNL();
5305
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005306 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
Veaceslav Falico5d261912013-08-28 23:25:05 +02005307
5308 /* Here is the tricky part. We must remove all dev's lower
5309 * devices from all upper_dev's upper devices and vice
5310 * versa, to maintain the graph relationship.
5311 */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005312 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5313 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005314 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5315
5316 /* remove also the devices itself from lower/upper device
5317 * list
5318 */
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005319 list_for_each_entry(i, &dev->all_adj_list.lower, list)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005320 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5321
Veaceslav Falico2f268f12013-09-25 09:20:07 +02005322 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
Veaceslav Falico5d261912013-08-28 23:25:05 +02005323 __netdev_adjacent_dev_unlink(dev, i->dev);
5324
Jiri Pirko42e52bf2013-05-25 04:12:10 +00005325 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
Jiri Pirko9ff162a2013-01-03 22:48:49 +00005326}
5327EXPORT_SYMBOL(netdev_upper_dev_unlink);
5328
Moni Shoua61bd3852015-02-03 16:48:29 +02005329/**
5330 * netdev_bonding_info_change - Dispatch event about slave change
5331 * @dev: device
5332 * @netdev_bonding_info: info to dispatch
5333 *
5334 * Send NETDEV_BONDING_INFO to netdev notifiers with info.
5335 * The caller must hold the RTNL lock.
5336 */
5337void netdev_bonding_info_change(struct net_device *dev,
5338 struct netdev_bonding_info *bonding_info)
5339{
5340 struct netdev_notifier_bonding_info info;
5341
5342 memcpy(&info.bonding_info, bonding_info,
5343 sizeof(struct netdev_bonding_info));
5344 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
5345 &info.info);
5346}
5347EXPORT_SYMBOL(netdev_bonding_info_change);
5348
Eric Dumazet2ce1ee12015-02-04 13:37:44 -08005349static void netdev_adjacent_add_links(struct net_device *dev)
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04005350{
5351 struct netdev_adjacent *iter;
5352
5353 struct net *net = dev_net(dev);
5354
5355 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5356 if (!net_eq(net,dev_net(iter->dev)))
5357 continue;
5358 netdev_adjacent_sysfs_add(iter->dev, dev,
5359 &iter->dev->adj_list.lower);
5360 netdev_adjacent_sysfs_add(dev, iter->dev,
5361 &dev->adj_list.upper);
5362 }
5363
5364 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5365 if (!net_eq(net,dev_net(iter->dev)))
5366 continue;
5367 netdev_adjacent_sysfs_add(iter->dev, dev,
5368 &iter->dev->adj_list.upper);
5369 netdev_adjacent_sysfs_add(dev, iter->dev,
5370 &dev->adj_list.lower);
5371 }
5372}
5373
Eric Dumazet2ce1ee12015-02-04 13:37:44 -08005374static void netdev_adjacent_del_links(struct net_device *dev)
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04005375{
5376 struct netdev_adjacent *iter;
5377
5378 struct net *net = dev_net(dev);
5379
5380 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5381 if (!net_eq(net,dev_net(iter->dev)))
5382 continue;
5383 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5384 &iter->dev->adj_list.lower);
5385 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5386 &dev->adj_list.upper);
5387 }
5388
5389 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5390 if (!net_eq(net,dev_net(iter->dev)))
5391 continue;
5392 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5393 &iter->dev->adj_list.upper);
5394 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5395 &dev->adj_list.lower);
5396 }
5397}
5398
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005399void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
Veaceslav Falico402dae92013-09-25 09:20:09 +02005400{
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005401 struct netdev_adjacent *iter;
Veaceslav Falico402dae92013-09-25 09:20:09 +02005402
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04005403 struct net *net = dev_net(dev);
5404
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005405 list_for_each_entry(iter, &dev->adj_list.upper, list) {
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04005406 if (!net_eq(net,dev_net(iter->dev)))
5407 continue;
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005408 netdev_adjacent_sysfs_del(iter->dev, oldname,
5409 &iter->dev->adj_list.lower);
5410 netdev_adjacent_sysfs_add(iter->dev, dev,
5411 &iter->dev->adj_list.lower);
5412 }
Veaceslav Falico402dae92013-09-25 09:20:09 +02005413
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005414 list_for_each_entry(iter, &dev->adj_list.lower, list) {
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04005415 if (!net_eq(net,dev_net(iter->dev)))
5416 continue;
Veaceslav Falico5bb025f2014-01-14 21:58:51 +01005417 netdev_adjacent_sysfs_del(iter->dev, oldname,
5418 &iter->dev->adj_list.upper);
5419 netdev_adjacent_sysfs_add(iter->dev, dev,
5420 &iter->dev->adj_list.upper);
5421 }
Veaceslav Falico402dae92013-09-25 09:20:09 +02005422}
Veaceslav Falico402dae92013-09-25 09:20:09 +02005423
5424void *netdev_lower_dev_get_private(struct net_device *dev,
5425 struct net_device *lower_dev)
5426{
5427 struct netdev_adjacent *lower;
5428
5429 if (!lower_dev)
5430 return NULL;
5431 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
5432 if (!lower)
5433 return NULL;
5434
5435 return lower->private;
5436}
5437EXPORT_SYMBOL(netdev_lower_dev_get_private);
5438
Vlad Yasevich4085ebe2014-05-16 17:04:53 -04005439
5440int dev_get_nest_level(struct net_device *dev,
5441 bool (*type_check)(struct net_device *dev))
5442{
5443 struct net_device *lower = NULL;
5444 struct list_head *iter;
5445 int max_nest = -1;
5446 int nest;
5447
5448 ASSERT_RTNL();
5449
5450 netdev_for_each_lower_dev(dev, lower, iter) {
5451 nest = dev_get_nest_level(lower, type_check);
5452 if (max_nest < nest)
5453 max_nest = nest;
5454 }
5455
5456 if (type_check(dev))
5457 max_nest++;
5458
5459 return max_nest;
5460}
5461EXPORT_SYMBOL(dev_get_nest_level);
5462
Patrick McHardyb6c40d62008-10-07 15:26:48 -07005463static void dev_change_rx_flags(struct net_device *dev, int flags)
5464{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005465 const struct net_device_ops *ops = dev->netdev_ops;
5466
Vlad Yasevichd2615bf2013-11-19 20:47:15 -05005467 if (ops->ndo_change_rx_flags)
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005468 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07005469}
5470
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005471static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
Patrick McHardy4417da62007-06-27 01:28:10 -07005472{
Eric Dumazetb536db92011-11-30 21:42:26 +00005473 unsigned int old_flags = dev->flags;
Eric W. Biedermand04a48b2012-05-23 17:01:57 -06005474 kuid_t uid;
5475 kgid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07005476
Patrick McHardy24023452007-07-14 18:51:31 -07005477 ASSERT_RTNL();
5478
Wang Chendad9b332008-06-18 01:48:28 -07005479 dev->flags |= IFF_PROMISC;
5480 dev->promiscuity += inc;
5481 if (dev->promiscuity == 0) {
5482 /*
5483 * Avoid overflow.
5484 * If inc causes overflow, untouch promisc and return error.
5485 */
5486 if (inc < 0)
5487 dev->flags &= ~IFF_PROMISC;
5488 else {
5489 dev->promiscuity -= inc;
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005490 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
5491 dev->name);
Wang Chendad9b332008-06-18 01:48:28 -07005492 return -EOVERFLOW;
5493 }
5494 }
Patrick McHardy4417da62007-06-27 01:28:10 -07005495 if (dev->flags != old_flags) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005496 pr_info("device %s %s promiscuous mode\n",
5497 dev->name,
5498 dev->flags & IFF_PROMISC ? "entered" : "left");
David Howells8192b0c2008-11-14 10:39:10 +11005499 if (audit_enabled) {
5500 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05005501 audit_log(current->audit_context, GFP_ATOMIC,
5502 AUDIT_ANOM_PROMISCUOUS,
5503 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
5504 dev->name, (dev->flags & IFF_PROMISC),
5505 (old_flags & IFF_PROMISC),
Eric W. Biedermane1760bd2012-09-10 22:39:43 -07005506 from_kuid(&init_user_ns, audit_get_loginuid(current)),
Eric W. Biedermand04a48b2012-05-23 17:01:57 -06005507 from_kuid(&init_user_ns, uid),
5508 from_kgid(&init_user_ns, gid),
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05005509 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11005510 }
Patrick McHardy24023452007-07-14 18:51:31 -07005511
Patrick McHardyb6c40d62008-10-07 15:26:48 -07005512 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07005513 }
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005514 if (notify)
5515 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
Wang Chendad9b332008-06-18 01:48:28 -07005516 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07005517}
5518
Linus Torvalds1da177e2005-04-16 15:20:36 -07005519/**
5520 * dev_set_promiscuity - update promiscuity count on a device
5521 * @dev: device
5522 * @inc: modifier
5523 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07005524 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005525 * remains above zero the interface remains promiscuous. Once it hits zero
5526 * the device reverts back to normal filtering operation. A negative inc
5527 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07005528 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005529 */
Wang Chendad9b332008-06-18 01:48:28 -07005530int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005531{
Eric Dumazetb536db92011-11-30 21:42:26 +00005532 unsigned int old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07005533 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005534
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005535 err = __dev_set_promiscuity(dev, inc, true);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07005536 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07005537 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07005538 if (dev->flags != old_flags)
5539 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07005540 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005541}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005542EXPORT_SYMBOL(dev_set_promiscuity);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005543
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005544static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005545{
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005546 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005547
Patrick McHardy24023452007-07-14 18:51:31 -07005548 ASSERT_RTNL();
5549
Linus Torvalds1da177e2005-04-16 15:20:36 -07005550 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07005551 dev->allmulti += inc;
5552 if (dev->allmulti == 0) {
5553 /*
5554 * Avoid overflow.
5555 * If inc causes overflow, untouch allmulti and return error.
5556 */
5557 if (inc < 0)
5558 dev->flags &= ~IFF_ALLMULTI;
5559 else {
5560 dev->allmulti -= inc;
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005561 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
5562 dev->name);
Wang Chendad9b332008-06-18 01:48:28 -07005563 return -EOVERFLOW;
5564 }
5565 }
Patrick McHardy24023452007-07-14 18:51:31 -07005566 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07005567 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07005568 dev_set_rx_mode(dev);
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005569 if (notify)
5570 __dev_notify_flags(dev, old_flags,
5571 dev->gflags ^ old_gflags);
Patrick McHardy24023452007-07-14 18:51:31 -07005572 }
Wang Chendad9b332008-06-18 01:48:28 -07005573 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07005574}
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005575
5576/**
5577 * dev_set_allmulti - update allmulti count on a device
5578 * @dev: device
5579 * @inc: modifier
5580 *
5581 * Add or remove reception of all multicast frames to a device. While the
5582 * count in the device remains above zero the interface remains listening
5583 * to all interfaces. Once it hits zero the device reverts back to normal
5584 * filtering operation. A negative @inc value is used to drop the counter
5585 * when releasing a resource needing all multicasts.
5586 * Return 0 if successful or a negative errno code on error.
5587 */
5588
5589int dev_set_allmulti(struct net_device *dev, int inc)
5590{
5591 return __dev_set_allmulti(dev, inc, true);
5592}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005593EXPORT_SYMBOL(dev_set_allmulti);
Patrick McHardy4417da62007-06-27 01:28:10 -07005594
5595/*
5596 * Upload unicast and multicast address lists to device and
5597 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08005598 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07005599 * are present.
5600 */
5601void __dev_set_rx_mode(struct net_device *dev)
5602{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005603 const struct net_device_ops *ops = dev->netdev_ops;
5604
Patrick McHardy4417da62007-06-27 01:28:10 -07005605 /* dev_open will call this function so the list will stay sane. */
5606 if (!(dev->flags&IFF_UP))
5607 return;
5608
5609 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09005610 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07005611
Jiri Pirko01789342011-08-16 06:29:00 +00005612 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
Patrick McHardy4417da62007-06-27 01:28:10 -07005613 /* Unicast addresses changes may only happen under the rtnl,
5614 * therefore calling __dev_set_promiscuity here is safe.
5615 */
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08005616 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005617 __dev_set_promiscuity(dev, 1, false);
Joe Perches2d348d12011-07-25 16:17:35 -07005618 dev->uc_promisc = true;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08005619 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005620 __dev_set_promiscuity(dev, -1, false);
Joe Perches2d348d12011-07-25 16:17:35 -07005621 dev->uc_promisc = false;
Patrick McHardy4417da62007-06-27 01:28:10 -07005622 }
Patrick McHardy4417da62007-06-27 01:28:10 -07005623 }
Jiri Pirko01789342011-08-16 06:29:00 +00005624
5625 if (ops->ndo_set_rx_mode)
5626 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07005627}
5628
5629void dev_set_rx_mode(struct net_device *dev)
5630{
David S. Millerb9e40852008-07-15 00:15:08 -07005631 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07005632 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07005633 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005634}
5635
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005636/**
5637 * dev_get_flags - get flags reported to userspace
5638 * @dev: device
5639 *
5640 * Get the combination of flag bits exported through APIs to userspace.
5641 */
Eric Dumazet95c96172012-04-15 05:58:06 +00005642unsigned int dev_get_flags(const struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005643{
Eric Dumazet95c96172012-04-15 05:58:06 +00005644 unsigned int flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005645
5646 flags = (dev->flags & ~(IFF_PROMISC |
5647 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08005648 IFF_RUNNING |
5649 IFF_LOWER_UP |
5650 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07005651 (dev->gflags & (IFF_PROMISC |
5652 IFF_ALLMULTI));
5653
Stefan Rompfb00055a2006-03-20 17:09:11 -08005654 if (netif_running(dev)) {
5655 if (netif_oper_up(dev))
5656 flags |= IFF_RUNNING;
5657 if (netif_carrier_ok(dev))
5658 flags |= IFF_LOWER_UP;
5659 if (netif_dormant(dev))
5660 flags |= IFF_DORMANT;
5661 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005662
5663 return flags;
5664}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005665EXPORT_SYMBOL(dev_get_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005666
Patrick McHardybd380812010-02-26 06:34:53 +00005667int __dev_change_flags(struct net_device *dev, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005668{
Eric Dumazetb536db92011-11-30 21:42:26 +00005669 unsigned int old_flags = dev->flags;
Patrick McHardybd380812010-02-26 06:34:53 +00005670 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005671
Patrick McHardy24023452007-07-14 18:51:31 -07005672 ASSERT_RTNL();
5673
Linus Torvalds1da177e2005-04-16 15:20:36 -07005674 /*
5675 * Set the flags on our device.
5676 */
5677
5678 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
5679 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
5680 IFF_AUTOMEDIA)) |
5681 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
5682 IFF_ALLMULTI));
5683
5684 /*
5685 * Load in the correct multicast list now the flags have changed.
5686 */
5687
Patrick McHardyb6c40d62008-10-07 15:26:48 -07005688 if ((old_flags ^ flags) & IFF_MULTICAST)
5689 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07005690
Patrick McHardy4417da62007-06-27 01:28:10 -07005691 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005692
5693 /*
5694 * Have we downed the interface. We handle IFF_UP ourselves
5695 * according to user attempts to set it, rather than blindly
5696 * setting it.
5697 */
5698
5699 ret = 0;
Peter Pan(潘卫平)d215d102014-06-16 21:57:22 +08005700 if ((old_flags ^ flags) & IFF_UP)
Patrick McHardybd380812010-02-26 06:34:53 +00005701 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005702
Linus Torvalds1da177e2005-04-16 15:20:36 -07005703 if ((flags ^ dev->gflags) & IFF_PROMISC) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005704 int inc = (flags & IFF_PROMISC) ? 1 : -1;
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005705 unsigned int old_flags = dev->flags;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005706
Linus Torvalds1da177e2005-04-16 15:20:36 -07005707 dev->gflags ^= IFF_PROMISC;
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005708
5709 if (__dev_set_promiscuity(dev, inc, false) >= 0)
5710 if (dev->flags != old_flags)
5711 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005712 }
5713
5714 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
5715 is important. Some (broken) drivers set IFF_PROMISC, when
5716 IFF_ALLMULTI is requested not asking us and not reporting.
5717 */
5718 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005719 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5720
Linus Torvalds1da177e2005-04-16 15:20:36 -07005721 dev->gflags ^= IFF_ALLMULTI;
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005722 __dev_set_allmulti(dev, inc, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005723 }
5724
Patrick McHardybd380812010-02-26 06:34:53 +00005725 return ret;
5726}
5727
Nicolas Dichtela528c212013-09-25 12:02:44 +02005728void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
5729 unsigned int gchanges)
Patrick McHardybd380812010-02-26 06:34:53 +00005730{
5731 unsigned int changes = dev->flags ^ old_flags;
5732
Nicolas Dichtela528c212013-09-25 12:02:44 +02005733 if (gchanges)
Alexei Starovoitov7f294052013-10-23 16:02:42 -07005734 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
Nicolas Dichtela528c212013-09-25 12:02:44 +02005735
Patrick McHardybd380812010-02-26 06:34:53 +00005736 if (changes & IFF_UP) {
5737 if (dev->flags & IFF_UP)
5738 call_netdevice_notifiers(NETDEV_UP, dev);
5739 else
5740 call_netdevice_notifiers(NETDEV_DOWN, dev);
5741 }
5742
5743 if (dev->flags & IFF_UP &&
Jiri Pirkobe9efd32013-05-28 01:30:22 +00005744 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
5745 struct netdev_notifier_change_info change_info;
5746
5747 change_info.flags_changed = changes;
5748 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
5749 &change_info.info);
5750 }
Patrick McHardybd380812010-02-26 06:34:53 +00005751}
5752
5753/**
5754 * dev_change_flags - change device settings
5755 * @dev: device
5756 * @flags: device state flags
5757 *
5758 * Change settings on device based state flags. The flags are
5759 * in the userspace exported format.
5760 */
Eric Dumazetb536db92011-11-30 21:42:26 +00005761int dev_change_flags(struct net_device *dev, unsigned int flags)
Patrick McHardybd380812010-02-26 06:34:53 +00005762{
Eric Dumazetb536db92011-11-30 21:42:26 +00005763 int ret;
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005764 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
Patrick McHardybd380812010-02-26 06:34:53 +00005765
5766 ret = __dev_change_flags(dev, flags);
5767 if (ret < 0)
5768 return ret;
5769
Nicolas Dichtel991fb3f2013-09-25 12:02:45 +02005770 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
Nicolas Dichtela528c212013-09-25 12:02:44 +02005771 __dev_notify_flags(dev, old_flags, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005772 return ret;
5773}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005774EXPORT_SYMBOL(dev_change_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005775
Veaceslav Falico2315dc92014-01-10 16:56:25 +01005776static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5777{
5778 const struct net_device_ops *ops = dev->netdev_ops;
5779
5780 if (ops->ndo_change_mtu)
5781 return ops->ndo_change_mtu(dev, new_mtu);
5782
5783 dev->mtu = new_mtu;
5784 return 0;
5785}
5786
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005787/**
5788 * dev_set_mtu - Change maximum transfer unit
5789 * @dev: device
5790 * @new_mtu: new transfer unit
5791 *
5792 * Change the maximum transfer size of the network device.
5793 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005794int dev_set_mtu(struct net_device *dev, int new_mtu)
5795{
Veaceslav Falico2315dc92014-01-10 16:56:25 +01005796 int err, orig_mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005797
5798 if (new_mtu == dev->mtu)
5799 return 0;
5800
5801 /* MTU must be positive. */
5802 if (new_mtu < 0)
5803 return -EINVAL;
5804
5805 if (!netif_device_present(dev))
5806 return -ENODEV;
5807
Veaceslav Falico1d486bf2014-01-16 00:02:18 +01005808 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5809 err = notifier_to_errno(err);
5810 if (err)
5811 return err;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005812
Veaceslav Falico2315dc92014-01-10 16:56:25 +01005813 orig_mtu = dev->mtu;
5814 err = __dev_set_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005815
Veaceslav Falico2315dc92014-01-10 16:56:25 +01005816 if (!err) {
5817 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5818 err = notifier_to_errno(err);
5819 if (err) {
5820 /* setting mtu back and notifying everyone again,
5821 * so that they have a chance to revert changes.
5822 */
5823 __dev_set_mtu(dev, orig_mtu);
5824 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5825 }
5826 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005827 return err;
5828}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005829EXPORT_SYMBOL(dev_set_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005830
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005831/**
Vlad Dogarucbda10f2011-01-13 23:38:30 +00005832 * dev_set_group - Change group this device belongs to
5833 * @dev: device
5834 * @new_group: group this device should belong to
5835 */
5836void dev_set_group(struct net_device *dev, int new_group)
5837{
5838 dev->group = new_group;
5839}
5840EXPORT_SYMBOL(dev_set_group);
5841
5842/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005843 * dev_set_mac_address - Change Media Access Control Address
5844 * @dev: device
5845 * @sa: new address
5846 *
5847 * Change the hardware (MAC) address of the device
5848 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005849int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5850{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005851 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005852 int err;
5853
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005854 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005855 return -EOPNOTSUPP;
5856 if (sa->sa_family != dev->type)
5857 return -EINVAL;
5858 if (!netif_device_present(dev))
5859 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005860 err = ops->ndo_set_mac_address(dev, sa);
Jiri Pirkof6521512013-01-01 03:30:14 +00005861 if (err)
5862 return err;
Jiri Pirkofbdeca22013-01-01 03:30:16 +00005863 dev->addr_assign_type = NET_ADDR_SET;
Jiri Pirkof6521512013-01-01 03:30:14 +00005864 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04005865 add_device_randomness(dev->dev_addr, dev->addr_len);
Jiri Pirkof6521512013-01-01 03:30:14 +00005866 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005867}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005868EXPORT_SYMBOL(dev_set_mac_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005869
Jiri Pirko4bf84c32012-12-27 23:49:37 +00005870/**
5871 * dev_change_carrier - Change device carrier
5872 * @dev: device
Randy Dunlap691b3b72013-03-04 12:32:43 +00005873 * @new_carrier: new value
Jiri Pirko4bf84c32012-12-27 23:49:37 +00005874 *
5875 * Change device carrier
5876 */
5877int dev_change_carrier(struct net_device *dev, bool new_carrier)
5878{
5879 const struct net_device_ops *ops = dev->netdev_ops;
5880
5881 if (!ops->ndo_change_carrier)
5882 return -EOPNOTSUPP;
5883 if (!netif_device_present(dev))
5884 return -ENODEV;
5885 return ops->ndo_change_carrier(dev, new_carrier);
5886}
5887EXPORT_SYMBOL(dev_change_carrier);
5888
Linus Torvalds1da177e2005-04-16 15:20:36 -07005889/**
Jiri Pirko66b52b02013-07-29 18:16:49 +02005890 * dev_get_phys_port_id - Get device physical port ID
5891 * @dev: device
5892 * @ppid: port ID
5893 *
5894 * Get device physical port ID
5895 */
5896int dev_get_phys_port_id(struct net_device *dev,
Jiri Pirko02637fc2014-11-28 14:34:16 +01005897 struct netdev_phys_item_id *ppid)
Jiri Pirko66b52b02013-07-29 18:16:49 +02005898{
5899 const struct net_device_ops *ops = dev->netdev_ops;
5900
5901 if (!ops->ndo_get_phys_port_id)
5902 return -EOPNOTSUPP;
5903 return ops->ndo_get_phys_port_id(dev, ppid);
5904}
5905EXPORT_SYMBOL(dev_get_phys_port_id);
5906
5907/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005908 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07005909 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07005910 *
5911 * Returns a suitable unique value for a new device interface
5912 * number. The caller must hold the rtnl semaphore or the
5913 * dev_base_lock to be sure it remains unique.
5914 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07005915static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005916{
Pavel Emelyanovaa79e662012-08-08 21:53:19 +00005917 int ifindex = net->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005918 for (;;) {
5919 if (++ifindex <= 0)
5920 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005921 if (!__dev_get_by_index(net, ifindex))
Pavel Emelyanovaa79e662012-08-08 21:53:19 +00005922 return net->ifindex = ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005923 }
5924}
5925
Linus Torvalds1da177e2005-04-16 15:20:36 -07005926/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08005927static LIST_HEAD(net_todo_list);
Cong Wang200b9162014-05-12 15:11:20 -07005928DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005929
Stephen Hemminger6f05f622007-03-08 20:46:03 -08005930static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005931{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005932 list_add_tail(&dev->todo_list, &net_todo_list);
Eric W. Biederman50624c92013-09-23 21:19:49 -07005933 dev_net(dev)->dev_unreg_count++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005934}
5935
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005936static void rollback_registered_many(struct list_head *head)
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005937{
Krishna Kumare93737b2009-12-08 22:26:02 +00005938 struct net_device *dev, *tmp;
Eric W. Biederman5cde2822013-10-05 19:26:05 -07005939 LIST_HEAD(close_head);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005940
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005941 BUG_ON(dev_boot_phase);
5942 ASSERT_RTNL();
5943
Krishna Kumare93737b2009-12-08 22:26:02 +00005944 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005945 /* Some devices call without registering
Krishna Kumare93737b2009-12-08 22:26:02 +00005946 * for initialization unwind. Remove those
5947 * devices and proceed with the remaining.
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005948 */
5949 if (dev->reg_state == NETREG_UNINITIALIZED) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005950 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5951 dev->name, dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005952
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005953 WARN_ON(1);
Krishna Kumare93737b2009-12-08 22:26:02 +00005954 list_del(&dev->unreg_list);
5955 continue;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005956 }
Eric Dumazet449f4542011-05-19 12:24:16 +00005957 dev->dismantle = true;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005958 BUG_ON(dev->reg_state != NETREG_REGISTERED);
Octavian Purdila44345722010-12-13 12:44:07 +00005959 }
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005960
Octavian Purdila44345722010-12-13 12:44:07 +00005961 /* If device is running, close it first. */
Eric W. Biederman5cde2822013-10-05 19:26:05 -07005962 list_for_each_entry(dev, head, unreg_list)
5963 list_add_tail(&dev->close_list, &close_head);
5964 dev_close_many(&close_head);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005965
Octavian Purdila44345722010-12-13 12:44:07 +00005966 list_for_each_entry(dev, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005967 /* And unlink it from device chain. */
5968 unlist_netdevice(dev);
5969
5970 dev->reg_state = NETREG_UNREGISTERING;
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005971 }
5972
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005973 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005974
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005975 list_for_each_entry(dev, head, unreg_list) {
Mahesh Bandewar395eea62014-12-03 13:46:24 -08005976 struct sk_buff *skb = NULL;
5977
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005978 /* Shutdown queueing discipline. */
5979 dev_shutdown(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005980
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005981
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005982 /* Notify protocols, that we are about to destroy
5983 this device. They should clean all the things.
5984 */
5985 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5986
Mahesh Bandewar395eea62014-12-03 13:46:24 -08005987 if (!dev->rtnl_link_ops ||
5988 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5989 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
5990 GFP_KERNEL);
5991
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005992 /*
5993 * Flush the unicast and multicast chains
5994 */
Jiri Pirkoa748ee22010-04-01 21:22:09 +00005995 dev_uc_flush(dev);
Jiri Pirko22bedad32010-04-01 21:22:57 +00005996 dev_mc_flush(dev);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005997
5998 if (dev->netdev_ops->ndo_uninit)
5999 dev->netdev_ops->ndo_uninit(dev);
6000
Mahesh Bandewar395eea62014-12-03 13:46:24 -08006001 if (skb)
6002 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
Roopa Prabhu56bfa7e2014-05-01 11:40:30 -07006003
Jiri Pirko9ff162a2013-01-03 22:48:49 +00006004 /* Notifier chain MUST detach us all upper devices. */
6005 WARN_ON(netdev_has_any_upper_dev(dev));
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006006
6007 /* Remove entries from kobject tree */
6008 netdev_unregister_kobject(dev);
Alexander Duyck024e9672013-01-10 08:57:46 +00006009#ifdef CONFIG_XPS
6010 /* Remove XPS queueing entries */
6011 netif_reset_xps_queues_gt(dev, 0);
6012#endif
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006013 }
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006014
Eric W. Biederman850a5452011-10-13 22:25:23 +00006015 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006016
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00006017 list_for_each_entry(dev, head, unreg_list)
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006018 dev_put(dev);
6019}
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006020
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006021static void rollback_registered(struct net_device *dev)
6022{
6023 LIST_HEAD(single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006024
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006025 list_add(&dev->unreg_list, &single);
6026 rollback_registered_many(&single);
Eric Dumazetceaaec92011-02-17 22:59:19 +00006027 list_del(&single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006028}
6029
Michał Mirosławc8f44af2011-11-15 15:29:55 +00006030static netdev_features_t netdev_fix_features(struct net_device *dev,
6031 netdev_features_t features)
Herbert Xub63365a2008-10-23 01:11:29 -07006032{
Michał Mirosław57422dc2011-01-22 12:14:12 +00006033 /* Fix illegal checksum combinations */
6034 if ((features & NETIF_F_HW_CSUM) &&
6035 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04006036 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
Michał Mirosław57422dc2011-01-22 12:14:12 +00006037 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
6038 }
6039
Herbert Xub63365a2008-10-23 01:11:29 -07006040 /* TSO requires that SG is present as well. */
Ben Hutchingsea2d3682011-04-12 14:38:37 +00006041 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04006042 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
Ben Hutchingsea2d3682011-04-12 14:38:37 +00006043 features &= ~NETIF_F_ALL_TSO;
Herbert Xub63365a2008-10-23 01:11:29 -07006044 }
6045
Pravin B Shelarec5f0612013-03-07 09:28:01 +00006046 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
6047 !(features & NETIF_F_IP_CSUM)) {
6048 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
6049 features &= ~NETIF_F_TSO;
6050 features &= ~NETIF_F_TSO_ECN;
6051 }
6052
6053 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
6054 !(features & NETIF_F_IPV6_CSUM)) {
6055 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
6056 features &= ~NETIF_F_TSO6;
6057 }
6058
Ben Hutchings31d8b9e2011-04-12 14:47:15 +00006059 /* TSO ECN requires that TSO is present as well. */
6060 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6061 features &= ~NETIF_F_TSO_ECN;
6062
Michał Mirosław212b5732011-02-15 16:59:16 +00006063 /* Software GSO depends on SG. */
6064 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04006065 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
Michał Mirosław212b5732011-02-15 16:59:16 +00006066 features &= ~NETIF_F_GSO;
6067 }
6068
Michał Mirosławacd11302011-01-24 15:45:15 -08006069 /* UFO needs SG and checksumming */
Herbert Xub63365a2008-10-23 01:11:29 -07006070 if (features & NETIF_F_UFO) {
Michał Mirosław79032642010-11-30 06:38:00 +00006071 /* maybe split UFO into V4 and V6? */
6072 if (!((features & NETIF_F_GEN_CSUM) ||
6073 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
6074 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04006075 netdev_dbg(dev,
Michał Mirosławacd11302011-01-24 15:45:15 -08006076 "Dropping NETIF_F_UFO since no checksum offload features.\n");
Herbert Xub63365a2008-10-23 01:11:29 -07006077 features &= ~NETIF_F_UFO;
6078 }
6079
6080 if (!(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04006081 netdev_dbg(dev,
Michał Mirosławacd11302011-01-24 15:45:15 -08006082 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
Herbert Xub63365a2008-10-23 01:11:29 -07006083 features &= ~NETIF_F_UFO;
6084 }
6085 }
6086
Jiri Pirkod0290212014-04-02 23:09:31 +02006087#ifdef CONFIG_NET_RX_BUSY_POLL
6088 if (dev->netdev_ops->ndo_busy_poll)
6089 features |= NETIF_F_BUSY_POLL;
6090 else
6091#endif
6092 features &= ~NETIF_F_BUSY_POLL;
6093
Herbert Xub63365a2008-10-23 01:11:29 -07006094 return features;
6095}
Herbert Xub63365a2008-10-23 01:11:29 -07006096
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006097int __netdev_update_features(struct net_device *dev)
Michał Mirosław5455c692011-02-15 16:59:17 +00006098{
Michał Mirosławc8f44af2011-11-15 15:29:55 +00006099 netdev_features_t features;
Michał Mirosław5455c692011-02-15 16:59:17 +00006100 int err = 0;
6101
Michał Mirosław87267482011-04-12 09:56:38 +00006102 ASSERT_RTNL();
6103
Michał Mirosław5455c692011-02-15 16:59:17 +00006104 features = netdev_get_wanted_features(dev);
6105
6106 if (dev->netdev_ops->ndo_fix_features)
6107 features = dev->netdev_ops->ndo_fix_features(dev, features);
6108
6109 /* driver might be less strict about feature dependencies */
6110 features = netdev_fix_features(dev, features);
6111
6112 if (dev->features == features)
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006113 return 0;
Michał Mirosław5455c692011-02-15 16:59:17 +00006114
Michał Mirosławc8f44af2011-11-15 15:29:55 +00006115 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6116 &dev->features, &features);
Michał Mirosław5455c692011-02-15 16:59:17 +00006117
6118 if (dev->netdev_ops->ndo_set_features)
6119 err = dev->netdev_ops->ndo_set_features(dev, features);
6120
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006121 if (unlikely(err < 0)) {
Michał Mirosław5455c692011-02-15 16:59:17 +00006122 netdev_err(dev,
Michał Mirosławc8f44af2011-11-15 15:29:55 +00006123 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6124 err, &features, &dev->features);
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006125 return -1;
6126 }
6127
6128 if (!err)
6129 dev->features = features;
6130
6131 return 1;
6132}
6133
Michał Mirosławafe12cc2011-05-07 03:22:17 +00006134/**
6135 * netdev_update_features - recalculate device features
6136 * @dev: the device to check
6137 *
6138 * Recalculate dev->features set and send notifications if it
6139 * has changed. Should be called after driver or hardware dependent
6140 * conditions might have changed that influence the features.
6141 */
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006142void netdev_update_features(struct net_device *dev)
6143{
6144 if (__netdev_update_features(dev))
6145 netdev_features_change(dev);
Michał Mirosław5455c692011-02-15 16:59:17 +00006146}
6147EXPORT_SYMBOL(netdev_update_features);
6148
Linus Torvalds1da177e2005-04-16 15:20:36 -07006149/**
Michał Mirosławafe12cc2011-05-07 03:22:17 +00006150 * netdev_change_features - recalculate device features
6151 * @dev: the device to check
6152 *
6153 * Recalculate dev->features set and send notifications even
6154 * if they have not changed. Should be called instead of
6155 * netdev_update_features() if also dev->vlan_features might
6156 * have changed to allow the changes to be propagated to stacked
6157 * VLAN devices.
6158 */
6159void netdev_change_features(struct net_device *dev)
6160{
6161 __netdev_update_features(dev);
6162 netdev_features_change(dev);
6163}
6164EXPORT_SYMBOL(netdev_change_features);
6165
6166/**
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08006167 * netif_stacked_transfer_operstate - transfer operstate
6168 * @rootdev: the root or lower level device to transfer state from
6169 * @dev: the device to transfer operstate to
6170 *
6171 * Transfer operational state from root to device. This is normally
6172 * called when a stacking relationship exists between the root
6173 * device and the device(a leaf device).
6174 */
6175void netif_stacked_transfer_operstate(const struct net_device *rootdev,
6176 struct net_device *dev)
6177{
6178 if (rootdev->operstate == IF_OPER_DORMANT)
6179 netif_dormant_on(dev);
6180 else
6181 netif_dormant_off(dev);
6182
6183 if (netif_carrier_ok(rootdev)) {
6184 if (!netif_carrier_ok(dev))
6185 netif_carrier_on(dev);
6186 } else {
6187 if (netif_carrier_ok(dev))
6188 netif_carrier_off(dev);
6189 }
6190}
6191EXPORT_SYMBOL(netif_stacked_transfer_operstate);
6192
Michael Daltona953be52014-01-16 22:23:28 -08006193#ifdef CONFIG_SYSFS
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006194static int netif_alloc_rx_queues(struct net_device *dev)
6195{
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006196 unsigned int i, count = dev->num_rx_queues;
Tom Herbertbd25fa72010-10-18 18:00:16 +00006197 struct netdev_rx_queue *rx;
Pankaj Gupta10595902015-01-12 11:41:28 +05306198 size_t sz = count * sizeof(*rx);
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006199
Tom Herbertbd25fa72010-10-18 18:00:16 +00006200 BUG_ON(count < 1);
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006201
Pankaj Gupta10595902015-01-12 11:41:28 +05306202 rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6203 if (!rx) {
6204 rx = vzalloc(sz);
6205 if (!rx)
6206 return -ENOMEM;
6207 }
Tom Herbertbd25fa72010-10-18 18:00:16 +00006208 dev->_rx = rx;
6209
Tom Herbertbd25fa72010-10-18 18:00:16 +00006210 for (i = 0; i < count; i++)
Tom Herbertfe822242010-11-09 10:47:38 +00006211 rx[i].dev = dev;
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006212 return 0;
6213}
Tom Herbertbf264142010-11-26 08:36:09 +00006214#endif
Eric Dumazet1b4bf462010-09-23 17:26:35 +00006215
Changli Gaoaa942102010-12-04 02:31:41 +00006216static void netdev_init_one_queue(struct net_device *dev,
6217 struct netdev_queue *queue, void *_unused)
6218{
6219 /* Initialize queue lock */
6220 spin_lock_init(&queue->_xmit_lock);
6221 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
6222 queue->xmit_lock_owner = -1;
Changli Gaob236da62010-12-14 03:09:15 +00006223 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
Changli Gaoaa942102010-12-04 02:31:41 +00006224 queue->dev = dev;
Tom Herbert114cf582011-11-28 16:33:09 +00006225#ifdef CONFIG_BQL
6226 dql_init(&queue->dql, HZ);
6227#endif
Changli Gaoaa942102010-12-04 02:31:41 +00006228}
6229
Eric Dumazet60877a32013-06-20 01:15:51 -07006230static void netif_free_tx_queues(struct net_device *dev)
6231{
WANG Cong4cb28972014-06-02 15:55:22 -07006232 kvfree(dev->_tx);
Eric Dumazet60877a32013-06-20 01:15:51 -07006233}
6234
Tom Herberte6484932010-10-18 18:04:39 +00006235static int netif_alloc_netdev_queues(struct net_device *dev)
6236{
6237 unsigned int count = dev->num_tx_queues;
6238 struct netdev_queue *tx;
Eric Dumazet60877a32013-06-20 01:15:51 -07006239 size_t sz = count * sizeof(*tx);
Tom Herberte6484932010-10-18 18:04:39 +00006240
Eric Dumazet60877a32013-06-20 01:15:51 -07006241 BUG_ON(count < 1 || count > 0xffff);
Tom Herberte6484932010-10-18 18:04:39 +00006242
Eric Dumazet60877a32013-06-20 01:15:51 -07006243 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6244 if (!tx) {
6245 tx = vzalloc(sz);
6246 if (!tx)
6247 return -ENOMEM;
6248 }
Tom Herberte6484932010-10-18 18:04:39 +00006249 dev->_tx = tx;
Tom Herbert1d24eb42010-11-21 13:17:27 +00006250
Tom Herberte6484932010-10-18 18:04:39 +00006251 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
6252 spin_lock_init(&dev->tx_global_lock);
Changli Gaoaa942102010-12-04 02:31:41 +00006253
6254 return 0;
Tom Herberte6484932010-10-18 18:04:39 +00006255}
6256
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08006257/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07006258 * register_netdevice - register a network device
6259 * @dev: device to register
6260 *
6261 * Take a completed network device structure and add it to the kernel
6262 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
6263 * chain. 0 is returned on success. A negative errno code is returned
6264 * on a failure to set up the device, or if the name is a duplicate.
6265 *
6266 * Callers must hold the rtnl semaphore. You may want
6267 * register_netdev() instead of this.
6268 *
6269 * BUGS:
6270 * The locking appears insufficient to guarantee two parallel registers
6271 * will not get the same name.
6272 */
6273
6274int register_netdevice(struct net_device *dev)
6275{
Linus Torvalds1da177e2005-04-16 15:20:36 -07006276 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08006277 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006278
6279 BUG_ON(dev_boot_phase);
6280 ASSERT_RTNL();
6281
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006282 might_sleep();
6283
Linus Torvalds1da177e2005-04-16 15:20:36 -07006284 /* When net_device's are persistent, this will be fatal. */
6285 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08006286 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006287
David S. Millerf1f28aa2008-07-15 00:08:33 -07006288 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07006289 netdev_set_addr_lockdep_class(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006290
Linus Torvalds1da177e2005-04-16 15:20:36 -07006291 dev->iflink = -1;
6292
Gao feng828de4f2012-09-13 20:58:27 +00006293 ret = dev_get_valid_name(net, dev, dev->name);
Peter Pan(潘卫平)0696c3a2011-05-12 15:46:56 +00006294 if (ret < 0)
6295 goto out;
6296
Linus Torvalds1da177e2005-04-16 15:20:36 -07006297 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08006298 if (dev->netdev_ops->ndo_init) {
6299 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006300 if (ret) {
6301 if (ret > 0)
6302 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08006303 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006304 }
6305 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006306
Patrick McHardyf6469682013-04-19 02:04:27 +00006307 if (((dev->hw_features | dev->features) &
6308 NETIF_F_HW_VLAN_CTAG_FILTER) &&
Michał Mirosławd2ed2732013-01-29 15:14:16 +00006309 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
6310 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
6311 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
6312 ret = -EINVAL;
6313 goto err_uninit;
6314 }
6315
Pavel Emelyanov9c7dafb2012-08-08 21:52:46 +00006316 ret = -EBUSY;
6317 if (!dev->ifindex)
6318 dev->ifindex = dev_new_index(net);
6319 else if (__dev_get_by_index(net, dev->ifindex))
6320 goto err_uninit;
6321
Linus Torvalds1da177e2005-04-16 15:20:36 -07006322 if (dev->iflink == -1)
6323 dev->iflink = dev->ifindex;
6324
Michał Mirosław5455c692011-02-15 16:59:17 +00006325 /* Transfer changeable features to wanted_features and enable
6326 * software offloads (GSO and GRO).
6327 */
6328 dev->hw_features |= NETIF_F_SOFT_FEATURES;
Michał Mirosław14d12322011-02-22 16:52:28 +00006329 dev->features |= NETIF_F_SOFT_FEATURES;
6330 dev->wanted_features = dev->features & dev->hw_features;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006331
Michał Mirosław34324dc2011-11-15 15:29:55 +00006332 if (!(dev->flags & IFF_LOOPBACK)) {
6333 dev->hw_features |= NETIF_F_NOCACHE_COPY;
Tom Herbertc6e1a0d2011-04-04 22:30:30 -07006334 }
6335
Michał Mirosław1180e7d2011-07-14 14:41:11 -07006336 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
Brandon Philips16c3ea72010-09-15 09:24:24 +00006337 */
Michał Mirosław1180e7d2011-07-14 14:41:11 -07006338 dev->vlan_features |= NETIF_F_HIGHDMA;
Brandon Philips16c3ea72010-09-15 09:24:24 +00006339
Pravin B Shelaree579672013-03-07 09:28:08 +00006340 /* Make NETIF_F_SG inheritable to tunnel devices.
6341 */
6342 dev->hw_enc_features |= NETIF_F_SG;
6343
Simon Horman0d89d202013-05-23 21:02:52 +00006344 /* Make NETIF_F_SG inheritable to MPLS.
6345 */
6346 dev->mpls_features |= NETIF_F_SG;
6347
Johannes Berg7ffbe3f2009-10-02 05:15:27 +00006348 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
6349 ret = notifier_to_errno(ret);
6350 if (ret)
6351 goto err_uninit;
6352
Eric W. Biederman8b41d182007-09-26 22:02:53 -07006353 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006354 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07006355 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006356 dev->reg_state = NETREG_REGISTERED;
6357
Michał Mirosław6cb6a272011-04-02 22:48:47 -07006358 __netdev_update_features(dev);
Michał Mirosław8e9b59b2011-02-22 16:52:28 +00006359
Linus Torvalds1da177e2005-04-16 15:20:36 -07006360 /*
6361 * Default initial state at registry is that the
6362 * device is present.
6363 */
6364
6365 set_bit(__LINK_STATE_PRESENT, &dev->state);
6366
Ben Hutchings8f4cccb2012-08-20 22:16:51 +01006367 linkwatch_init_dev(dev);
6368
Linus Torvalds1da177e2005-04-16 15:20:36 -07006369 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006370 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006371 list_netdevice(dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04006372 add_device_randomness(dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006373
Jiri Pirko948b3372013-01-08 01:38:25 +00006374 /* If the device has permanent device address, driver should
6375 * set dev_addr and also addr_assign_type should be set to
6376 * NET_ADDR_PERM (default value).
6377 */
6378 if (dev->addr_assign_type == NET_ADDR_PERM)
6379 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
6380
Linus Torvalds1da177e2005-04-16 15:20:36 -07006381 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07006382 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07006383 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07006384 if (ret) {
6385 rollback_registered(dev);
6386 dev->reg_state = NETREG_UNREGISTERED;
6387 }
Eric W. Biedermand90a9092009-12-12 22:11:15 +00006388 /*
6389 * Prevent userspace races by waiting until the network
6390 * device is fully setup before sending notifications.
6391 */
Patrick McHardya2835762010-02-26 06:34:51 +00006392 if (!dev->rtnl_link_ops ||
6393 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
Alexei Starovoitov7f294052013-10-23 16:02:42 -07006394 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006395
6396out:
6397 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07006398
6399err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08006400 if (dev->netdev_ops->ndo_uninit)
6401 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07006402 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006403}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07006404EXPORT_SYMBOL(register_netdevice);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006405
6406/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08006407 * init_dummy_netdev - init a dummy network device for NAPI
6408 * @dev: device to init
6409 *
6410 * This takes a network device structure and initialize the minimum
6411 * amount of fields so it can be used to schedule NAPI polls without
6412 * registering a full blown interface. This is to be used by drivers
6413 * that need to tie several hardware interfaces to a single NAPI
6414 * poll scheduler due to HW limitations.
6415 */
6416int init_dummy_netdev(struct net_device *dev)
6417{
6418 /* Clear everything. Note we don't initialize spinlocks
6419 * are they aren't supposed to be taken by any of the
6420 * NAPI code and this dummy netdev is supposed to be
6421 * only ever used for NAPI polls
6422 */
6423 memset(dev, 0, sizeof(struct net_device));
6424
6425 /* make sure we BUG if trying to hit standard
6426 * register/unregister code path
6427 */
6428 dev->reg_state = NETREG_DUMMY;
6429
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08006430 /* NAPI wants this */
6431 INIT_LIST_HEAD(&dev->napi_list);
6432
6433 /* a dummy interface is started by default */
6434 set_bit(__LINK_STATE_PRESENT, &dev->state);
6435 set_bit(__LINK_STATE_START, &dev->state);
6436
Eric Dumazet29b44332010-10-11 10:22:12 +00006437 /* Note : We dont allocate pcpu_refcnt for dummy devices,
6438 * because users of this 'device' dont need to change
6439 * its refcount.
6440 */
6441
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08006442 return 0;
6443}
6444EXPORT_SYMBOL_GPL(init_dummy_netdev);
6445
6446
6447/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07006448 * register_netdev - register a network device
6449 * @dev: device to register
6450 *
6451 * Take a completed network device structure and add it to the kernel
6452 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
6453 * chain. 0 is returned on success. A negative errno code is returned
6454 * on a failure to set up the device, or if the name is a duplicate.
6455 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07006456 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07006457 * and expands the device name if you passed a format string to
6458 * alloc_netdev.
6459 */
6460int register_netdev(struct net_device *dev)
6461{
6462 int err;
6463
6464 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07006465 err = register_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006466 rtnl_unlock();
6467 return err;
6468}
6469EXPORT_SYMBOL(register_netdev);
6470
Eric Dumazet29b44332010-10-11 10:22:12 +00006471int netdev_refcnt_read(const struct net_device *dev)
6472{
6473 int i, refcnt = 0;
6474
6475 for_each_possible_cpu(i)
6476 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
6477 return refcnt;
6478}
6479EXPORT_SYMBOL(netdev_refcnt_read);
6480
Ben Hutchings2c530402012-07-10 10:55:09 +00006481/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07006482 * netdev_wait_allrefs - wait until all references are gone.
Randy Dunlap3de7a372012-08-18 14:36:44 +00006483 * @dev: target net_device
Linus Torvalds1da177e2005-04-16 15:20:36 -07006484 *
6485 * This is called when unregistering network devices.
6486 *
6487 * Any protocol or device that holds a reference should register
6488 * for netdevice notification, and cleanup and put back the
6489 * reference if they receive an UNREGISTER event.
6490 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006491 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006492 */
6493static void netdev_wait_allrefs(struct net_device *dev)
6494{
6495 unsigned long rebroadcast_time, warning_time;
Eric Dumazet29b44332010-10-11 10:22:12 +00006496 int refcnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006497
Eric Dumazete014deb2009-11-17 05:59:21 +00006498 linkwatch_forget_dev(dev);
6499
Linus Torvalds1da177e2005-04-16 15:20:36 -07006500 rebroadcast_time = warning_time = jiffies;
Eric Dumazet29b44332010-10-11 10:22:12 +00006501 refcnt = netdev_refcnt_read(dev);
6502
6503 while (refcnt != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07006504 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08006505 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07006506
6507 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07006508 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006509
Eric Dumazet748e2d92012-08-22 21:50:59 +00006510 __rtnl_unlock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00006511 rcu_barrier();
Eric Dumazet748e2d92012-08-22 21:50:59 +00006512 rtnl_lock();
6513
Eric Dumazet0115e8e2012-08-22 17:19:46 +00006514 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006515 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
6516 &dev->state)) {
6517 /* We must not have linkwatch events
6518 * pending on unregister. If this
6519 * happens, we simply run the queue
6520 * unscheduled, resulting in a noop
6521 * for this device.
6522 */
6523 linkwatch_run_queue();
6524 }
6525
Stephen Hemminger6756ae42006-03-20 22:23:58 -08006526 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07006527
6528 rebroadcast_time = jiffies;
6529 }
6530
6531 msleep(250);
6532
Eric Dumazet29b44332010-10-11 10:22:12 +00006533 refcnt = netdev_refcnt_read(dev);
6534
Linus Torvalds1da177e2005-04-16 15:20:36 -07006535 if (time_after(jiffies, warning_time + 10 * HZ)) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006536 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
6537 dev->name, refcnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006538 warning_time = jiffies;
6539 }
6540 }
6541}
6542
6543/* The sequence is:
6544 *
6545 * rtnl_lock();
6546 * ...
6547 * register_netdevice(x1);
6548 * register_netdevice(x2);
6549 * ...
6550 * unregister_netdevice(y1);
6551 * unregister_netdevice(y2);
6552 * ...
6553 * rtnl_unlock();
6554 * free_netdev(y1);
6555 * free_netdev(y2);
6556 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07006557 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07006558 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006559 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07006560 * without deadlocking with linkwatch via keventd.
6561 * 2) Since we run with the RTNL semaphore not held, we can sleep
6562 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07006563 *
6564 * We must not return until all unregister events added during
6565 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006566 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006567void netdev_run_todo(void)
6568{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07006569 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006570
Linus Torvalds1da177e2005-04-16 15:20:36 -07006571 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07006572 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07006573
6574 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07006575
Eric Dumazet0115e8e2012-08-22 17:19:46 +00006576
6577 /* Wait for rcu callbacks to finish before next phase */
Eric W. Biederman850a5452011-10-13 22:25:23 +00006578 if (!list_empty(&list))
6579 rcu_barrier();
6580
Linus Torvalds1da177e2005-04-16 15:20:36 -07006581 while (!list_empty(&list)) {
6582 struct net_device *dev
stephen hemmingere5e26d72010-02-24 14:01:38 +00006583 = list_first_entry(&list, struct net_device, todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006584 list_del(&dev->todo_list);
6585
Eric Dumazet748e2d92012-08-22 21:50:59 +00006586 rtnl_lock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00006587 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Eric Dumazet748e2d92012-08-22 21:50:59 +00006588 __rtnl_unlock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00006589
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006590 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006591 pr_err("network todo '%s' but state %d\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07006592 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006593 dump_stack();
6594 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006595 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006596
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006597 dev->reg_state = NETREG_UNREGISTERED;
6598
Changli Gao152102c2010-03-30 20:16:22 +00006599 on_each_cpu(flush_backlog, dev, 1);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07006600
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006601 netdev_wait_allrefs(dev);
6602
6603 /* paranoia */
Eric Dumazet29b44332010-10-11 10:22:12 +00006604 BUG_ON(netdev_refcnt_read(dev));
Salam Noureddine7866a622015-01-27 11:35:48 -08006605 BUG_ON(!list_empty(&dev->ptype_all));
6606 BUG_ON(!list_empty(&dev->ptype_specific));
Eric Dumazet33d480c2011-08-11 19:30:52 +00006607 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6608 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07006609 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006610
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07006611 if (dev->destructor)
6612 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07006613
Eric W. Biederman50624c92013-09-23 21:19:49 -07006614 /* Report a network device has been unregistered */
6615 rtnl_lock();
6616 dev_net(dev)->dev_unreg_count--;
6617 __rtnl_unlock();
6618 wake_up(&netdev_unregistering_wq);
6619
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07006620 /* Free network device */
6621 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006622 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006623}
6624
Ben Hutchings3cfde792010-07-09 09:11:52 +00006625/* Convert net_device_stats to rtnl_link_stats64. They have the same
6626 * fields in the same order, with only the type differing.
6627 */
Eric Dumazet77a1abf2012-03-05 04:50:09 +00006628void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
6629 const struct net_device_stats *netdev_stats)
Ben Hutchings3cfde792010-07-09 09:11:52 +00006630{
6631#if BITS_PER_LONG == 64
Eric Dumazet77a1abf2012-03-05 04:50:09 +00006632 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
6633 memcpy(stats64, netdev_stats, sizeof(*stats64));
Ben Hutchings3cfde792010-07-09 09:11:52 +00006634#else
6635 size_t i, n = sizeof(*stats64) / sizeof(u64);
6636 const unsigned long *src = (const unsigned long *)netdev_stats;
6637 u64 *dst = (u64 *)stats64;
6638
6639 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
6640 sizeof(*stats64) / sizeof(u64));
6641 for (i = 0; i < n; i++)
6642 dst[i] = src[i];
6643#endif
6644}
Eric Dumazet77a1abf2012-03-05 04:50:09 +00006645EXPORT_SYMBOL(netdev_stats_to_stats64);
Ben Hutchings3cfde792010-07-09 09:11:52 +00006646
Eric Dumazetd83345a2009-11-16 03:36:51 +00006647/**
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08006648 * dev_get_stats - get network device statistics
6649 * @dev: device to get statistics from
Eric Dumazet28172732010-07-07 14:58:56 -07006650 * @storage: place to store stats
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08006651 *
Ben Hutchingsd7753512010-07-09 09:12:41 +00006652 * Get network statistics from device. Return @storage.
6653 * The device driver may provide its own method by setting
6654 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
6655 * otherwise the internal statistics structure is used.
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08006656 */
Ben Hutchingsd7753512010-07-09 09:12:41 +00006657struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6658 struct rtnl_link_stats64 *storage)
Eric Dumazet7004bf22009-05-18 00:34:33 +00006659{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08006660 const struct net_device_ops *ops = dev->netdev_ops;
6661
Eric Dumazet28172732010-07-07 14:58:56 -07006662 if (ops->ndo_get_stats64) {
6663 memset(storage, 0, sizeof(*storage));
Eric Dumazetcaf586e2010-09-30 21:06:55 +00006664 ops->ndo_get_stats64(dev, storage);
6665 } else if (ops->ndo_get_stats) {
Ben Hutchings3cfde792010-07-09 09:11:52 +00006666 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
Eric Dumazetcaf586e2010-09-30 21:06:55 +00006667 } else {
6668 netdev_stats_to_stats64(storage, &dev->stats);
Eric Dumazet28172732010-07-07 14:58:56 -07006669 }
Eric Dumazetcaf586e2010-09-30 21:06:55 +00006670 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
Eric Dumazet015f0682014-03-27 08:45:56 -07006671 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
Eric Dumazet28172732010-07-07 14:58:56 -07006672 return storage;
Rusty Russellc45d2862007-03-28 14:29:08 -07006673}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08006674EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07006675
Eric Dumazet24824a02010-10-02 06:11:55 +00006676struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
David S. Millerdc2b4842008-07-08 17:18:23 -07006677{
Eric Dumazet24824a02010-10-02 06:11:55 +00006678 struct netdev_queue *queue = dev_ingress_queue(dev);
David S. Millerdc2b4842008-07-08 17:18:23 -07006679
Eric Dumazet24824a02010-10-02 06:11:55 +00006680#ifdef CONFIG_NET_CLS_ACT
6681 if (queue)
6682 return queue;
6683 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
6684 if (!queue)
6685 return NULL;
6686 netdev_init_one_queue(dev, queue, NULL);
Eric Dumazet2ce1ee12015-02-04 13:37:44 -08006687 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
Eric Dumazet24824a02010-10-02 06:11:55 +00006688 queue->qdisc_sleeping = &noop_qdisc;
6689 rcu_assign_pointer(dev->ingress_queue, queue);
6690#endif
6691 return queue;
David S. Millerbb949fb2008-07-08 16:55:56 -07006692}
6693
Eric Dumazet2c60db02012-09-16 09:17:26 +00006694static const struct ethtool_ops default_ethtool_ops;
6695
Stanislaw Gruszkad07d7502013-01-10 23:19:10 +00006696void netdev_set_default_ethtool_ops(struct net_device *dev,
6697 const struct ethtool_ops *ops)
6698{
6699 if (dev->ethtool_ops == &default_ethtool_ops)
6700 dev->ethtool_ops = ops;
6701}
6702EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6703
Eric Dumazet74d332c2013-10-30 13:10:44 -07006704void netdev_freemem(struct net_device *dev)
6705{
6706 char *addr = (char *)dev - dev->padded;
6707
WANG Cong4cb28972014-06-02 15:55:22 -07006708 kvfree(addr);
Eric Dumazet74d332c2013-10-30 13:10:44 -07006709}
6710
Linus Torvalds1da177e2005-04-16 15:20:36 -07006711/**
Tom Herbert36909ea2011-01-09 19:36:31 +00006712 * alloc_netdev_mqs - allocate network device
Tom Gundersenc835a672014-07-14 16:37:24 +02006713 * @sizeof_priv: size of private data to allocate space for
6714 * @name: device name format string
6715 * @name_assign_type: origin of device name
6716 * @setup: callback to initialize device
6717 * @txqs: the number of TX subqueues to allocate
6718 * @rxqs: the number of RX subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07006719 *
6720 * Allocates a struct net_device with private data area for driver use
Li Zhong90e51ad2013-11-22 15:04:46 +08006721 * and performs basic initialization. Also allocates subqueue structs
Tom Herbert36909ea2011-01-09 19:36:31 +00006722 * for each queue on the device.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006723 */
Tom Herbert36909ea2011-01-09 19:36:31 +00006724struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
Tom Gundersenc835a672014-07-14 16:37:24 +02006725 unsigned char name_assign_type,
Tom Herbert36909ea2011-01-09 19:36:31 +00006726 void (*setup)(struct net_device *),
6727 unsigned int txqs, unsigned int rxqs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006728{
Linus Torvalds1da177e2005-04-16 15:20:36 -07006729 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07006730 size_t alloc_size;
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006731 struct net_device *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006732
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07006733 BUG_ON(strlen(name) >= sizeof(dev->name));
6734
Tom Herbert36909ea2011-01-09 19:36:31 +00006735 if (txqs < 1) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006736 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
Tom Herbert55513fb2010-10-18 17:55:58 +00006737 return NULL;
6738 }
6739
Michael Daltona953be52014-01-16 22:23:28 -08006740#ifdef CONFIG_SYSFS
Tom Herbert36909ea2011-01-09 19:36:31 +00006741 if (rxqs < 1) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006742 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
Tom Herbert36909ea2011-01-09 19:36:31 +00006743 return NULL;
6744 }
6745#endif
6746
David S. Millerfd2ea0a2008-07-17 01:56:23 -07006747 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07006748 if (sizeof_priv) {
6749 /* ensure 32-byte alignment of private area */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006750 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07006751 alloc_size += sizeof_priv;
6752 }
6753 /* ensure 32-byte alignment of whole construct */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006754 alloc_size += NETDEV_ALIGN - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006755
Eric Dumazet74d332c2013-10-30 13:10:44 -07006756 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6757 if (!p)
6758 p = vzalloc(alloc_size);
Joe Perches62b59422013-02-04 16:48:16 +00006759 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006760 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006761
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006762 dev = PTR_ALIGN(p, NETDEV_ALIGN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006763 dev->padded = (char *)dev - (char *)p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006764
Eric Dumazet29b44332010-10-11 10:22:12 +00006765 dev->pcpu_refcnt = alloc_percpu(int);
6766 if (!dev->pcpu_refcnt)
Eric Dumazet74d332c2013-10-30 13:10:44 -07006767 goto free_dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006768
Linus Torvalds1da177e2005-04-16 15:20:36 -07006769 if (dev_addr_init(dev))
Eric Dumazet29b44332010-10-11 10:22:12 +00006770 goto free_pcpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006771
Jiri Pirko22bedad32010-04-01 21:22:57 +00006772 dev_mc_init(dev);
Jiri Pirkoa748ee22010-04-01 21:22:09 +00006773 dev_uc_init(dev);
Jiri Pirkoccffad252009-05-22 23:22:17 +00006774
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09006775 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006776
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07006777 dev->gso_max_size = GSO_MAX_SIZE;
Ben Hutchings30b678d2012-07-30 15:57:00 +00006778 dev->gso_max_segs = GSO_MAX_SEGS;
Eric Dumazetfcbeb972014-10-05 10:11:27 -07006779 dev->gso_min_segs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006780
Herbert Xud565b0a2008-12-15 23:38:52 -08006781 INIT_LIST_HEAD(&dev->napi_list);
Eric W. Biederman9fdce092009-10-30 14:51:13 +00006782 INIT_LIST_HEAD(&dev->unreg_list);
Eric W. Biederman5cde2822013-10-05 19:26:05 -07006783 INIT_LIST_HEAD(&dev->close_list);
Eric Dumazete014deb2009-11-17 05:59:21 +00006784 INIT_LIST_HEAD(&dev->link_watch_list);
Veaceslav Falico2f268f12013-09-25 09:20:07 +02006785 INIT_LIST_HEAD(&dev->adj_list.upper);
6786 INIT_LIST_HEAD(&dev->adj_list.lower);
6787 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6788 INIT_LIST_HEAD(&dev->all_adj_list.lower);
Salam Noureddine7866a622015-01-27 11:35:48 -08006789 INIT_LIST_HEAD(&dev->ptype_all);
6790 INIT_LIST_HEAD(&dev->ptype_specific);
Eric Dumazet02875872014-10-05 18:38:35 -07006791 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006792 setup(dev);
David S. Miller8d3bdbd2011-02-08 15:02:50 -08006793
6794 dev->num_tx_queues = txqs;
6795 dev->real_num_tx_queues = txqs;
6796 if (netif_alloc_netdev_queues(dev))
6797 goto free_all;
6798
Michael Daltona953be52014-01-16 22:23:28 -08006799#ifdef CONFIG_SYSFS
David S. Miller8d3bdbd2011-02-08 15:02:50 -08006800 dev->num_rx_queues = rxqs;
6801 dev->real_num_rx_queues = rxqs;
6802 if (netif_alloc_rx_queues(dev))
6803 goto free_all;
6804#endif
6805
Linus Torvalds1da177e2005-04-16 15:20:36 -07006806 strcpy(dev->name, name);
Tom Gundersenc835a672014-07-14 16:37:24 +02006807 dev->name_assign_type = name_assign_type;
Vlad Dogarucbda10f2011-01-13 23:38:30 +00006808 dev->group = INIT_NETDEV_GROUP;
Eric Dumazet2c60db02012-09-16 09:17:26 +00006809 if (!dev->ethtool_ops)
6810 dev->ethtool_ops = &default_ethtool_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006811 return dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006812
David S. Miller8d3bdbd2011-02-08 15:02:50 -08006813free_all:
6814 free_netdev(dev);
6815 return NULL;
6816
Eric Dumazet29b44332010-10-11 10:22:12 +00006817free_pcpu:
6818 free_percpu(dev->pcpu_refcnt);
Eric Dumazet74d332c2013-10-30 13:10:44 -07006819free_dev:
6820 netdev_freemem(dev);
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006821 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006822}
Tom Herbert36909ea2011-01-09 19:36:31 +00006823EXPORT_SYMBOL(alloc_netdev_mqs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006824
6825/**
6826 * free_netdev - free network device
6827 * @dev: device
6828 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006829 * This function does the last stage of destroying an allocated device
6830 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006831 * If this is the last reference then it will be freed.
6832 */
6833void free_netdev(struct net_device *dev)
6834{
Herbert Xud565b0a2008-12-15 23:38:52 -08006835 struct napi_struct *p, *n;
6836
Denis V. Lunevf3005d72008-04-16 02:02:18 -07006837 release_net(dev_net(dev));
6838
Eric Dumazet60877a32013-06-20 01:15:51 -07006839 netif_free_tx_queues(dev);
Michael Daltona953be52014-01-16 22:23:28 -08006840#ifdef CONFIG_SYSFS
Pankaj Gupta10595902015-01-12 11:41:28 +05306841 kvfree(dev->_rx);
Tom Herbertfe822242010-11-09 10:47:38 +00006842#endif
David S. Millere8a04642008-07-17 00:34:19 -07006843
Eric Dumazet33d480c2011-08-11 19:30:52 +00006844 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
Eric Dumazet24824a02010-10-02 06:11:55 +00006845
Jiri Pirkof001fde2009-05-05 02:48:28 +00006846 /* Flush device addresses */
6847 dev_addr_flush(dev);
6848
Herbert Xud565b0a2008-12-15 23:38:52 -08006849 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6850 netif_napi_del(p);
6851
Eric Dumazet29b44332010-10-11 10:22:12 +00006852 free_percpu(dev->pcpu_refcnt);
6853 dev->pcpu_refcnt = NULL;
6854
Stephen Hemminger3041a062006-05-26 13:25:24 -07006855 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006856 if (dev->reg_state == NETREG_UNINITIALIZED) {
Eric Dumazet74d332c2013-10-30 13:10:44 -07006857 netdev_freemem(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006858 return;
6859 }
6860
6861 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6862 dev->reg_state = NETREG_RELEASED;
6863
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07006864 /* will free via device release */
6865 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006866}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07006867EXPORT_SYMBOL(free_netdev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006868
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07006869/**
6870 * synchronize_net - Synchronize with packet receive processing
6871 *
6872 * Wait for packets currently being received to be done.
6873 * Does not block later packets from starting.
6874 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006875void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006876{
6877 might_sleep();
Eric Dumazetbe3fc412011-05-23 23:07:32 +00006878 if (rtnl_is_locked())
6879 synchronize_rcu_expedited();
6880 else
6881 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07006882}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07006883EXPORT_SYMBOL(synchronize_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006884
6885/**
Eric Dumazet44a08732009-10-27 07:03:04 +00006886 * unregister_netdevice_queue - remove device from the kernel
Linus Torvalds1da177e2005-04-16 15:20:36 -07006887 * @dev: device
Eric Dumazet44a08732009-10-27 07:03:04 +00006888 * @head: list
Jaswinder Singh Rajput6ebfbc02009-11-22 20:43:13 -08006889 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07006890 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08006891 * from the kernel tables.
Eric Dumazet44a08732009-10-27 07:03:04 +00006892 * If head not NULL, device is queued to be unregistered later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006893 *
6894 * Callers must hold the rtnl semaphore. You may want
6895 * unregister_netdev() instead of this.
6896 */
6897
Eric Dumazet44a08732009-10-27 07:03:04 +00006898void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006899{
Herbert Xua6620712007-12-12 19:21:56 -08006900 ASSERT_RTNL();
6901
Eric Dumazet44a08732009-10-27 07:03:04 +00006902 if (head) {
Eric W. Biederman9fdce092009-10-30 14:51:13 +00006903 list_move_tail(&dev->unreg_list, head);
Eric Dumazet44a08732009-10-27 07:03:04 +00006904 } else {
6905 rollback_registered(dev);
6906 /* Finish processing unregister after unlock */
6907 net_set_todo(dev);
6908 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006909}
Eric Dumazet44a08732009-10-27 07:03:04 +00006910EXPORT_SYMBOL(unregister_netdevice_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006911
6912/**
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006913 * unregister_netdevice_many - unregister many devices
6914 * @head: list of devices
Eric Dumazet87757a92014-06-06 06:44:03 -07006915 *
6916 * Note: As most callers use a stack allocated list_head,
6917 * we force a list_del() to make sure stack wont be corrupted later.
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006918 */
6919void unregister_netdevice_many(struct list_head *head)
6920{
6921 struct net_device *dev;
6922
6923 if (!list_empty(head)) {
6924 rollback_registered_many(head);
6925 list_for_each_entry(dev, head, unreg_list)
6926 net_set_todo(dev);
Eric Dumazet87757a92014-06-06 06:44:03 -07006927 list_del(head);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006928 }
6929}
Eric Dumazet63c80992009-10-27 07:06:49 +00006930EXPORT_SYMBOL(unregister_netdevice_many);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006931
6932/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07006933 * unregister_netdev - remove device from the kernel
6934 * @dev: device
6935 *
6936 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08006937 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006938 *
6939 * This is just a wrapper for unregister_netdevice that takes
6940 * the rtnl semaphore. In general you want to use this and not
6941 * unregister_netdevice.
6942 */
6943void unregister_netdev(struct net_device *dev)
6944{
6945 rtnl_lock();
6946 unregister_netdevice(dev);
6947 rtnl_unlock();
6948}
Linus Torvalds1da177e2005-04-16 15:20:36 -07006949EXPORT_SYMBOL(unregister_netdev);
6950
Eric W. Biedermance286d32007-09-12 13:53:49 +02006951/**
6952 * dev_change_net_namespace - move device to different nethost namespace
6953 * @dev: device
6954 * @net: network namespace
6955 * @pat: If not NULL name pattern to try if the current device name
6956 * is already taken in the destination network namespace.
6957 *
6958 * This function shuts down a device interface and moves it
6959 * to a new network namespace. On success 0 is returned, on
6960 * a failure a netagive errno code is returned.
6961 *
6962 * Callers must hold the rtnl semaphore.
6963 */
6964
6965int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6966{
Eric W. Biedermance286d32007-09-12 13:53:49 +02006967 int err;
6968
6969 ASSERT_RTNL();
6970
6971 /* Don't allow namespace local devices to be moved. */
6972 err = -EINVAL;
6973 if (dev->features & NETIF_F_NETNS_LOCAL)
6974 goto out;
6975
6976 /* Ensure the device has been registrered */
Eric W. Biedermance286d32007-09-12 13:53:49 +02006977 if (dev->reg_state != NETREG_REGISTERED)
6978 goto out;
6979
6980 /* Get out if there is nothing todo */
6981 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09006982 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02006983 goto out;
6984
6985 /* Pick the destination device name, and ensure
6986 * we can use it in the destination network namespace.
6987 */
6988 err = -EEXIST;
Octavian Purdilad9031022009-11-18 02:36:59 +00006989 if (__dev_get_by_name(net, dev->name)) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02006990 /* We get here if we can't use the current device name */
6991 if (!pat)
6992 goto out;
Gao feng828de4f2012-09-13 20:58:27 +00006993 if (dev_get_valid_name(net, dev, pat) < 0)
Eric W. Biedermance286d32007-09-12 13:53:49 +02006994 goto out;
6995 }
6996
6997 /*
6998 * And now a mini version of register_netdevice unregister_netdevice.
6999 */
7000
7001 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07007002 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007003
7004 /* And unlink it from device chain */
7005 err = -ENODEV;
7006 unlist_netdevice(dev);
7007
7008 synchronize_net();
7009
7010 /* Shutdown queueing discipline. */
7011 dev_shutdown(dev);
7012
7013 /* Notify protocols, that we are about to destroy
7014 this device. They should clean all the things.
David Lamparter3b27e102010-09-17 03:22:19 +00007015
7016 Note that dev->reg_state stays at NETREG_REGISTERED.
7017 This is wanted because this way 8021q and macvlan know
7018 the device is just moving and can keep their slaves up.
Eric W. Biedermance286d32007-09-12 13:53:49 +02007019 */
7020 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Gao feng6549dd42012-08-23 15:36:55 +00007021 rcu_barrier();
7022 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Alexei Starovoitov7f294052013-10-23 16:02:42 -07007023 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007024
7025 /*
7026 * Flush the unicast and multicast chains
7027 */
Jiri Pirkoa748ee22010-04-01 21:22:09 +00007028 dev_uc_flush(dev);
Jiri Pirko22bedad32010-04-01 21:22:57 +00007029 dev_mc_flush(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007030
Serge Hallyn4e66ae22012-12-03 16:17:12 +00007031 /* Send a netdev-removed uevent to the old namespace */
7032 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04007033 netdev_adjacent_del_links(dev);
Serge Hallyn4e66ae22012-12-03 16:17:12 +00007034
Eric W. Biedermance286d32007-09-12 13:53:49 +02007035 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09007036 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007037
Eric W. Biedermance286d32007-09-12 13:53:49 +02007038 /* If there is an ifindex conflict assign a new one */
7039 if (__dev_get_by_index(net, dev->ifindex)) {
7040 int iflink = (dev->iflink == dev->ifindex);
7041 dev->ifindex = dev_new_index(net);
7042 if (iflink)
7043 dev->iflink = dev->ifindex;
7044 }
7045
Serge Hallyn4e66ae22012-12-03 16:17:12 +00007046 /* Send a netdev-add uevent to the new namespace */
7047 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
Alexander Y. Fomichev4c754312014-08-25 16:26:45 +04007048 netdev_adjacent_add_links(dev);
Serge Hallyn4e66ae22012-12-03 16:17:12 +00007049
Eric W. Biederman8b41d182007-09-26 22:02:53 -07007050 /* Fixup kobjects */
Eric W. Biedermana1b3f592010-05-04 17:36:49 -07007051 err = device_rename(&dev->dev, dev->name);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07007052 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007053
7054 /* Add the device back in the hashes */
7055 list_netdevice(dev);
7056
7057 /* Notify protocols, that a new device appeared. */
7058 call_netdevice_notifiers(NETDEV_REGISTER, dev);
7059
Eric W. Biedermand90a9092009-12-12 22:11:15 +00007060 /*
7061 * Prevent userspace races by waiting until the network
7062 * device is fully setup before sending notifications.
7063 */
Alexei Starovoitov7f294052013-10-23 16:02:42 -07007064 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
Eric W. Biedermand90a9092009-12-12 22:11:15 +00007065
Eric W. Biedermance286d32007-09-12 13:53:49 +02007066 synchronize_net();
7067 err = 0;
7068out:
7069 return err;
7070}
Johannes Berg463d0182009-07-14 00:33:35 +02007071EXPORT_SYMBOL_GPL(dev_change_net_namespace);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007072
Linus Torvalds1da177e2005-04-16 15:20:36 -07007073static int dev_cpu_callback(struct notifier_block *nfb,
7074 unsigned long action,
7075 void *ocpu)
7076{
7077 struct sk_buff **list_skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07007078 struct sk_buff *skb;
7079 unsigned int cpu, oldcpu = (unsigned long)ocpu;
7080 struct softnet_data *sd, *oldsd;
7081
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07007082 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07007083 return NOTIFY_OK;
7084
7085 local_irq_disable();
7086 cpu = smp_processor_id();
7087 sd = &per_cpu(softnet_data, cpu);
7088 oldsd = &per_cpu(softnet_data, oldcpu);
7089
7090 /* Find end of our completion_queue. */
7091 list_skb = &sd->completion_queue;
7092 while (*list_skb)
7093 list_skb = &(*list_skb)->next;
7094 /* Append completion queue from offline CPU. */
7095 *list_skb = oldsd->completion_queue;
7096 oldsd->completion_queue = NULL;
7097
Linus Torvalds1da177e2005-04-16 15:20:36 -07007098 /* Append output queue from offline CPU. */
Changli Gaoa9cbd582010-04-26 23:06:24 +00007099 if (oldsd->output_queue) {
7100 *sd->output_queue_tailp = oldsd->output_queue;
7101 sd->output_queue_tailp = oldsd->output_queue_tailp;
7102 oldsd->output_queue = NULL;
7103 oldsd->output_queue_tailp = &oldsd->output_queue;
7104 }
Eric Dumazetac64da02015-01-15 17:04:22 -08007105 /* Append NAPI poll list from offline CPU, with one exception :
7106 * process_backlog() must be called by cpu owning percpu backlog.
7107 * We properly handle process_queue & input_pkt_queue later.
7108 */
7109 while (!list_empty(&oldsd->poll_list)) {
7110 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
7111 struct napi_struct,
7112 poll_list);
7113
7114 list_del_init(&napi->poll_list);
7115 if (napi->poll == process_backlog)
7116 napi->state = 0;
7117 else
7118 ____napi_schedule(sd, napi);
Heiko Carstens264524d2011-06-06 20:50:03 +00007119 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07007120
7121 raise_softirq_irqoff(NET_TX_SOFTIRQ);
7122 local_irq_enable();
7123
7124 /* Process offline CPU's input_pkt_queue */
Tom Herbert76cc8b12010-05-20 18:37:59 +00007125 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00007126 netif_rx_internal(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00007127 input_queue_head_incr(oldsd);
7128 }
Eric Dumazetac64da02015-01-15 17:04:22 -08007129 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
Ben Hutchingsae78dbf2014-01-10 22:17:24 +00007130 netif_rx_internal(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00007131 input_queue_head_incr(oldsd);
Tom Herbertfec5e652010-04-16 16:01:27 -07007132 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07007133
7134 return NOTIFY_OK;
7135}
Linus Torvalds1da177e2005-04-16 15:20:36 -07007136
7137
Herbert Xu7f353bf2007-08-10 15:47:58 -07007138/**
Herbert Xub63365a2008-10-23 01:11:29 -07007139 * netdev_increment_features - increment feature set by one
7140 * @all: current feature set
7141 * @one: new feature set
7142 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07007143 *
7144 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07007145 * @one to the master device with current feature set @all. Will not
7146 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07007147 */
Michał Mirosławc8f44af2011-11-15 15:29:55 +00007148netdev_features_t netdev_increment_features(netdev_features_t all,
7149 netdev_features_t one, netdev_features_t mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07007150{
Michał Mirosław1742f182011-04-22 06:31:16 +00007151 if (mask & NETIF_F_GEN_CSUM)
7152 mask |= NETIF_F_ALL_CSUM;
7153 mask |= NETIF_F_VLAN_CHALLENGED;
7154
7155 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
7156 all &= one | ~NETIF_F_ALL_FOR_ALL;
7157
Michał Mirosław1742f182011-04-22 06:31:16 +00007158 /* If one device supports hw checksumming, set for all. */
7159 if (all & NETIF_F_GEN_CSUM)
7160 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
Herbert Xu7f353bf2007-08-10 15:47:58 -07007161
7162 return all;
7163}
Herbert Xub63365a2008-10-23 01:11:29 -07007164EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07007165
Baruch Siach430f03c2013-06-02 20:43:55 +00007166static struct hlist_head * __net_init netdev_create_hash(void)
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07007167{
7168 int i;
7169 struct hlist_head *hash;
7170
7171 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
7172 if (hash != NULL)
7173 for (i = 0; i < NETDEV_HASHENTRIES; i++)
7174 INIT_HLIST_HEAD(&hash[i]);
7175
7176 return hash;
7177}
7178
Eric W. Biederman881d9662007-09-17 11:56:21 -07007179/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07007180static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07007181{
Rustad, Mark D734b6542012-07-18 09:06:07 +00007182 if (net != &init_net)
7183 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07007184
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07007185 net->dev_name_head = netdev_create_hash();
7186 if (net->dev_name_head == NULL)
7187 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07007188
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07007189 net->dev_index_head = netdev_create_hash();
7190 if (net->dev_index_head == NULL)
7191 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07007192
7193 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07007194
7195err_idx:
7196 kfree(net->dev_name_head);
7197err_name:
7198 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07007199}
7200
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07007201/**
7202 * netdev_drivername - network driver for the device
7203 * @dev: network device
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07007204 *
7205 * Determine network driver for device.
7206 */
David S. Miller3019de12011-06-06 16:41:33 -07007207const char *netdev_drivername(const struct net_device *dev)
Arjan van de Ven6579e572008-07-21 13:31:48 -07007208{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07007209 const struct device_driver *driver;
7210 const struct device *parent;
David S. Miller3019de12011-06-06 16:41:33 -07007211 const char *empty = "";
Arjan van de Ven6579e572008-07-21 13:31:48 -07007212
7213 parent = dev->dev.parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07007214 if (!parent)
David S. Miller3019de12011-06-06 16:41:33 -07007215 return empty;
Arjan van de Ven6579e572008-07-21 13:31:48 -07007216
7217 driver = parent->driver;
7218 if (driver && driver->name)
David S. Miller3019de12011-06-06 16:41:33 -07007219 return driver->name;
7220 return empty;
Arjan van de Ven6579e572008-07-21 13:31:48 -07007221}
7222
Joe Perches6ea754e2014-09-22 11:10:50 -07007223static void __netdev_printk(const char *level, const struct net_device *dev,
7224 struct va_format *vaf)
Joe Perches256df2f2010-06-27 01:02:35 +00007225{
Joe Perchesb004ff42012-09-12 20:12:19 -07007226 if (dev && dev->dev.parent) {
Joe Perches6ea754e2014-09-22 11:10:50 -07007227 dev_printk_emit(level[1] - '0',
7228 dev->dev.parent,
7229 "%s %s %s%s: %pV",
7230 dev_driver_string(dev->dev.parent),
7231 dev_name(dev->dev.parent),
7232 netdev_name(dev), netdev_reg_state(dev),
7233 vaf);
Joe Perchesb004ff42012-09-12 20:12:19 -07007234 } else if (dev) {
Joe Perches6ea754e2014-09-22 11:10:50 -07007235 printk("%s%s%s: %pV",
7236 level, netdev_name(dev), netdev_reg_state(dev), vaf);
Joe Perchesb004ff42012-09-12 20:12:19 -07007237 } else {
Joe Perches6ea754e2014-09-22 11:10:50 -07007238 printk("%s(NULL net_device): %pV", level, vaf);
Joe Perchesb004ff42012-09-12 20:12:19 -07007239 }
Joe Perches256df2f2010-06-27 01:02:35 +00007240}
7241
Joe Perches6ea754e2014-09-22 11:10:50 -07007242void netdev_printk(const char *level, const struct net_device *dev,
7243 const char *format, ...)
Joe Perches256df2f2010-06-27 01:02:35 +00007244{
7245 struct va_format vaf;
7246 va_list args;
Joe Perches256df2f2010-06-27 01:02:35 +00007247
7248 va_start(args, format);
7249
7250 vaf.fmt = format;
7251 vaf.va = &args;
7252
Joe Perches6ea754e2014-09-22 11:10:50 -07007253 __netdev_printk(level, dev, &vaf);
Joe Perchesb004ff42012-09-12 20:12:19 -07007254
Joe Perches256df2f2010-06-27 01:02:35 +00007255 va_end(args);
Joe Perches256df2f2010-06-27 01:02:35 +00007256}
7257EXPORT_SYMBOL(netdev_printk);
7258
7259#define define_netdev_printk_level(func, level) \
Joe Perches6ea754e2014-09-22 11:10:50 -07007260void func(const struct net_device *dev, const char *fmt, ...) \
Joe Perches256df2f2010-06-27 01:02:35 +00007261{ \
Joe Perches256df2f2010-06-27 01:02:35 +00007262 struct va_format vaf; \
7263 va_list args; \
7264 \
7265 va_start(args, fmt); \
7266 \
7267 vaf.fmt = fmt; \
7268 vaf.va = &args; \
7269 \
Joe Perches6ea754e2014-09-22 11:10:50 -07007270 __netdev_printk(level, dev, &vaf); \
Joe Perchesb004ff42012-09-12 20:12:19 -07007271 \
Joe Perches256df2f2010-06-27 01:02:35 +00007272 va_end(args); \
Joe Perches256df2f2010-06-27 01:02:35 +00007273} \
7274EXPORT_SYMBOL(func);
7275
7276define_netdev_printk_level(netdev_emerg, KERN_EMERG);
7277define_netdev_printk_level(netdev_alert, KERN_ALERT);
7278define_netdev_printk_level(netdev_crit, KERN_CRIT);
7279define_netdev_printk_level(netdev_err, KERN_ERR);
7280define_netdev_printk_level(netdev_warn, KERN_WARNING);
7281define_netdev_printk_level(netdev_notice, KERN_NOTICE);
7282define_netdev_printk_level(netdev_info, KERN_INFO);
7283
Pavel Emelyanov46650792007-10-08 20:38:39 -07007284static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07007285{
7286 kfree(net->dev_name_head);
7287 kfree(net->dev_index_head);
7288}
7289
Denis V. Lunev022cbae2007-11-13 03:23:50 -08007290static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07007291 .init = netdev_init,
7292 .exit = netdev_exit,
7293};
7294
Pavel Emelyanov46650792007-10-08 20:38:39 -07007295static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02007296{
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00007297 struct net_device *dev, *aux;
Eric W. Biedermance286d32007-09-12 13:53:49 +02007298 /*
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00007299 * Push all migratable network devices back to the
Eric W. Biedermance286d32007-09-12 13:53:49 +02007300 * initial network namespace
7301 */
7302 rtnl_lock();
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00007303 for_each_netdev_safe(net, dev, aux) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02007304 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07007305 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02007306
7307 /* Ignore unmoveable devices (i.e. loopback) */
7308 if (dev->features & NETIF_F_NETNS_LOCAL)
7309 continue;
7310
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00007311 /* Leave virtual devices for the generic cleanup */
7312 if (dev->rtnl_link_ops)
7313 continue;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08007314
Lucas De Marchi25985ed2011-03-30 22:57:33 -03007315 /* Push remaining network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07007316 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
7317 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02007318 if (err) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00007319 pr_emerg("%s: failed to move %s to init_net: %d\n",
7320 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07007321 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02007322 }
7323 }
7324 rtnl_unlock();
7325}
7326
Eric W. Biederman50624c92013-09-23 21:19:49 -07007327static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
7328{
7329 /* Return with the rtnl_lock held when there are no network
7330 * devices unregistering in any network namespace in net_list.
7331 */
7332 struct net *net;
7333 bool unregistering;
Peter Zijlstraff960a72014-10-29 17:04:56 +01007334 DEFINE_WAIT_FUNC(wait, woken_wake_function);
Eric W. Biederman50624c92013-09-23 21:19:49 -07007335
Peter Zijlstraff960a72014-10-29 17:04:56 +01007336 add_wait_queue(&netdev_unregistering_wq, &wait);
Eric W. Biederman50624c92013-09-23 21:19:49 -07007337 for (;;) {
Eric W. Biederman50624c92013-09-23 21:19:49 -07007338 unregistering = false;
7339 rtnl_lock();
7340 list_for_each_entry(net, net_list, exit_list) {
7341 if (net->dev_unreg_count > 0) {
7342 unregistering = true;
7343 break;
7344 }
7345 }
7346 if (!unregistering)
7347 break;
7348 __rtnl_unlock();
Peter Zijlstraff960a72014-10-29 17:04:56 +01007349
7350 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
Eric W. Biederman50624c92013-09-23 21:19:49 -07007351 }
Peter Zijlstraff960a72014-10-29 17:04:56 +01007352 remove_wait_queue(&netdev_unregistering_wq, &wait);
Eric W. Biederman50624c92013-09-23 21:19:49 -07007353}
7354
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00007355static void __net_exit default_device_exit_batch(struct list_head *net_list)
7356{
7357 /* At exit all network devices most be removed from a network
Uwe Kleine-Königb5950762010-11-01 15:38:34 -04007358 * namespace. Do this in the reverse order of registration.
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00007359 * Do this across as many network namespaces as possible to
7360 * improve batching efficiency.
7361 */
7362 struct net_device *dev;
7363 struct net *net;
7364 LIST_HEAD(dev_kill_list);
7365
Eric W. Biederman50624c92013-09-23 21:19:49 -07007366 /* To prevent network device cleanup code from dereferencing
7367 * loopback devices or network devices that have been freed
7368 * wait here for all pending unregistrations to complete,
7369 * before unregistring the loopback device and allowing the
7370 * network namespace be freed.
7371 *
7372 * The netdev todo list containing all network devices
7373 * unregistrations that happen in default_device_exit_batch
7374 * will run in the rtnl_unlock() at the end of
7375 * default_device_exit_batch.
7376 */
7377 rtnl_lock_unregistering(net_list);
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00007378 list_for_each_entry(net, net_list, exit_list) {
7379 for_each_netdev_reverse(net, dev) {
Jiri Pirkob0ab2fa2014-06-26 09:58:25 +02007380 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00007381 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
7382 else
7383 unregister_netdevice_queue(dev, &dev_kill_list);
7384 }
7385 }
7386 unregister_netdevice_many(&dev_kill_list);
7387 rtnl_unlock();
7388}
7389
Denis V. Lunev022cbae2007-11-13 03:23:50 -08007390static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02007391 .exit = default_device_exit,
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00007392 .exit_batch = default_device_exit_batch,
Eric W. Biedermance286d32007-09-12 13:53:49 +02007393};
7394
Linus Torvalds1da177e2005-04-16 15:20:36 -07007395/*
7396 * Initialize the DEV module. At boot time this walks the device list and
7397 * unhooks any devices that fail to initialise (normally hardware not
7398 * present) and leaves us with a valid list of present and active devices.
7399 *
7400 */
7401
7402/*
7403 * This is called single threaded during boot, so no need
7404 * to take the rtnl semaphore.
7405 */
7406static int __init net_dev_init(void)
7407{
7408 int i, rc = -ENOMEM;
7409
7410 BUG_ON(!dev_boot_phase);
7411
Linus Torvalds1da177e2005-04-16 15:20:36 -07007412 if (dev_proc_init())
7413 goto out;
7414
Eric W. Biederman8b41d182007-09-26 22:02:53 -07007415 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07007416 goto out;
7417
7418 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08007419 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07007420 INIT_LIST_HEAD(&ptype_base[i]);
7421
Vlad Yasevich62532da2012-11-15 08:49:10 +00007422 INIT_LIST_HEAD(&offload_base);
7423
Eric W. Biederman881d9662007-09-17 11:56:21 -07007424 if (register_pernet_subsys(&netdev_net_ops))
7425 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07007426
7427 /*
7428 * Initialise the packet receive queues.
7429 */
7430
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07007431 for_each_possible_cpu(i) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007432 struct softnet_data *sd = &per_cpu(softnet_data, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07007433
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007434 skb_queue_head_init(&sd->input_pkt_queue);
Changli Gao6e7676c2010-04-27 15:07:33 -07007435 skb_queue_head_init(&sd->process_queue);
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007436 INIT_LIST_HEAD(&sd->poll_list);
Changli Gaoa9cbd582010-04-26 23:06:24 +00007437 sd->output_queue_tailp = &sd->output_queue;
Eric Dumazetdf334542010-03-24 19:13:54 +00007438#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007439 sd->csd.func = rps_trigger_softirq;
7440 sd->csd.info = sd;
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007441 sd->cpu = i;
Tom Herbert1e94d722010-03-18 17:45:44 -07007442#endif
Tom Herbert0a9627f2010-03-16 08:03:29 +00007443
Eric Dumazete36fa2f2010-04-19 21:17:14 +00007444 sd->backlog.poll = process_backlog;
7445 sd->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07007446 }
7447
Linus Torvalds1da177e2005-04-16 15:20:36 -07007448 dev_boot_phase = 0;
7449
Eric W. Biederman505d4f72008-11-07 22:54:20 -08007450 /* The loopback device is special if any other network devices
7451 * is present in a network namespace the loopback device must
7452 * be present. Since we now dynamically allocate and free the
7453 * loopback device ensure this invariant is maintained by
7454 * keeping the loopback device as the first device on the
7455 * list of network devices. Ensuring the loopback devices
7456 * is the first device that appears and the last network device
7457 * that disappears.
7458 */
7459 if (register_pernet_device(&loopback_net_ops))
7460 goto out;
7461
7462 if (register_pernet_device(&default_device_ops))
7463 goto out;
7464
Carlos R. Mafra962cf362008-05-15 11:15:37 -03007465 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
7466 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07007467
7468 hotcpu_notifier(dev_cpu_callback, 0);
7469 dst_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07007470 rc = 0;
7471out:
7472 return rc;
7473}
7474
7475subsys_initcall(net_dev_init);