blob: 52cd1d7f004afac3065d7bdf190609573896b15e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070076#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080077#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070078#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
stephen hemminger08e98972009-11-10 07:20:34 +000081#include <linux/hash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090082#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080084#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070094#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/notifier.h>
96#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
Arnd Bergmann44540962009-11-26 06:07:08 +0000106#include <net/xfrm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
David S. Miller9cbc1cb2009-06-15 03:02:23 -0700129#include <trace/events/napi.h>
Koki Sanagicf66ba52010-08-23 18:45:02 +0900130#include <trace/events/net.h>
Koki Sanagi07dc22e2010-08-23 18:46:12 +0900131#include <trace/events/skb.h>
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +0000132#include <linux/pci.h>
Stephen Rothwellcaeda9b2010-09-16 21:39:16 -0700133#include <linux/inetdevice.h>
Ben Hutchingsc4454772011-01-19 11:03:53 +0000134#include <linux/cpu_rmap.h>
Richard Cochran4dc360c2011-10-19 17:00:35 -0400135#include <linux/net_tstamp.h>
Ingo Molnarc5905af2012-02-24 08:31:31 +0100136#include <linux/static_key.h>
Eric Dumazet4504b862011-11-28 05:23:23 +0000137#include <net/flow_keys.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700139#include "net-sysfs.h"
140
Herbert Xud565b0a2008-12-15 23:38:52 -0800141/* Instead of increasing this, you should create a hash table. */
142#define MAX_GRO_SKBS 8
143
Herbert Xu5d38a072009-01-04 16:13:40 -0800144/* This should be increased if a protocol with a bigger head is added. */
145#define GRO_MAX_HEAD (MAX_HEADER + 128)
146
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147/*
148 * The list of packet types we will receive (as opposed to discard)
149 * and the routines to invoke.
150 *
151 * Why 16. Because with 16 the only overlap we get on a hash of the
152 * low nibble of the protocol value is RARP/SNAP/X.25.
153 *
154 * NOTE: That is no longer true with the addition of VLAN tags. Not
155 * sure which should go first, but I bet it won't make much
156 * difference if we are running VLANs. The good news is that
157 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700158 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 * --BLG
160 *
161 * 0800 IP
162 * 8100 802.1Q VLAN
163 * 0001 802.3
164 * 0002 AX.25
165 * 0004 802.2
166 * 8035 RARP
167 * 0005 SNAP
168 * 0805 X.25
169 * 0806 ARP
170 * 8137 IPX
171 * 0009 Localtalk
172 * 86DD IPv6
173 */
174
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800175#define PTYPE_HASH_SIZE (16)
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800179static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700180static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 * semaphore.
185 *
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800186 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 *
188 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700189 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 * actual updates. This allows pure readers to access the list even
191 * while a writer is preparing to update it.
192 *
193 * To put it another way, dev_base_lock is held for writing only to
194 * protect against pure readers; the rtnl semaphore provides the
195 * protection against other writers.
196 *
197 * See, for example usages, register_netdevice() and
198 * unregister_netdevice(), which must be called with the rtnl
199 * semaphore held.
200 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201DEFINE_RWLOCK(dev_base_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202EXPORT_SYMBOL(dev_base_lock);
203
Thomas Graf4e985ad2011-06-21 03:11:20 +0000204static inline void dev_base_seq_inc(struct net *net)
205{
206 while (++net->dev_base_seq == 0);
207}
208
Eric W. Biederman881d9662007-09-17 11:56:21 -0700209static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210{
Eric Dumazet95c96172012-04-15 05:58:06 +0000211 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
212
stephen hemminger08e98972009-11-10 07:20:34 +0000213 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
Eric W. Biederman881d9662007-09-17 11:56:21 -0700216static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217{
Eric Dumazet7c28bd02009-10-24 06:13:17 -0700218 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000221static inline void rps_lock(struct softnet_data *sd)
Changli Gao152102c2010-03-30 20:16:22 +0000222{
223#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000224 spin_lock(&sd->input_pkt_queue.lock);
Changli Gao152102c2010-03-30 20:16:22 +0000225#endif
226}
227
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000228static inline void rps_unlock(struct softnet_data *sd)
Changli Gao152102c2010-03-30 20:16:22 +0000229{
230#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +0000231 spin_unlock(&sd->input_pkt_queue.lock);
Changli Gao152102c2010-03-30 20:16:22 +0000232#endif
233}
234
Eric W. Biedermance286d32007-09-12 13:53:49 +0200235/* Device list insertion */
236static int list_netdevice(struct net_device *dev)
237{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900238 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200239
240 ASSERT_RTNL();
241
242 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800243 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
Eric Dumazet72c95282009-10-30 07:11:27 +0000244 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000245 hlist_add_head_rcu(&dev->index_hlist,
246 dev_index_hash(net, dev->ifindex));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200247 write_unlock_bh(&dev_base_lock);
Thomas Graf4e985ad2011-06-21 03:11:20 +0000248
249 dev_base_seq_inc(net);
250
Eric W. Biedermance286d32007-09-12 13:53:49 +0200251 return 0;
252}
253
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000254/* Device list removal
255 * caller must respect a RCU grace period before freeing/reusing dev
256 */
Eric W. Biedermance286d32007-09-12 13:53:49 +0200257static void unlist_netdevice(struct net_device *dev)
258{
259 ASSERT_RTNL();
260
261 /* Unlink dev from the device chain */
262 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800263 list_del_rcu(&dev->dev_list);
Eric Dumazet72c95282009-10-30 07:11:27 +0000264 hlist_del_rcu(&dev->name_hlist);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000265 hlist_del_rcu(&dev->index_hlist);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200266 write_unlock_bh(&dev_base_lock);
Thomas Graf4e985ad2011-06-21 03:11:20 +0000267
268 dev_base_seq_inc(dev_net(dev));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200269}
270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271/*
272 * Our notifier list
273 */
274
Alan Sternf07d5b92006-05-09 15:23:03 -0700275static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276
277/*
278 * Device drivers call our routines to queue packets here. We empty the
279 * queue in the local softnet handler.
280 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700281
Eric Dumazet9958da02010-04-17 04:17:02 +0000282DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700283EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
David S. Millercf508b12008-07-22 14:16:42 -0700285#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700286/*
David S. Millerc773e842008-07-08 23:13:53 -0700287 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700288 * according to dev->type
289 */
290static const unsigned short netdev_lock_type[] =
291 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
292 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
293 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
294 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
295 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
296 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
297 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
298 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
299 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
300 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
301 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
302 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
Paul Gortmaker211ed862012-05-10 17:14:35 -0400303 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
304 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
305 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700306
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700307static const char *const netdev_lock_name[] =
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700308 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
309 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
310 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
311 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
312 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
313 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
314 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
315 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
316 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
317 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
318 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
319 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
Paul Gortmaker211ed862012-05-10 17:14:35 -0400320 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
321 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
322 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700323
324static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700325static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700326
327static inline unsigned short netdev_lock_pos(unsigned short dev_type)
328{
329 int i;
330
331 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
332 if (netdev_lock_type[i] == dev_type)
333 return i;
334 /* the last key is used by default */
335 return ARRAY_SIZE(netdev_lock_type) - 1;
336}
337
David S. Millercf508b12008-07-22 14:16:42 -0700338static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700340{
341 int i;
342
343 i = netdev_lock_pos(dev_type);
344 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
345 netdev_lock_name[i]);
346}
David S. Millercf508b12008-07-22 14:16:42 -0700347
348static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
349{
350 int i;
351
352 i = netdev_lock_pos(dev->type);
353 lockdep_set_class_and_name(&dev->addr_list_lock,
354 &netdev_addr_lock_key[i],
355 netdev_lock_name[i]);
356}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700357#else
David S. Millercf508b12008-07-22 14:16:42 -0700358static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
359 unsigned short dev_type)
360{
361}
362static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700363{
364}
365#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
367/*******************************************************************************
368
369 Protocol management and registration routines
370
371*******************************************************************************/
372
373/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 * Add a protocol ID to the list. Now that the input handler is
375 * smarter we can dispense with all the messy stuff that used to be
376 * here.
377 *
378 * BEWARE!!! Protocol handlers, mangling input packets,
379 * MUST BE last in hash buckets and checking protocol handlers
380 * MUST start from promiscuous ptype_all chain in net_bh.
381 * It is true now, do not change it.
382 * Explanation follows: if protocol handler, mangling packet, will
383 * be the first on list, it is not able to sense, that packet
384 * is cloned and should be copied-on-write, so that it will
385 * change it and subsequent readers will get broken packet.
386 * --ANK (980803)
387 */
388
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000389static inline struct list_head *ptype_head(const struct packet_type *pt)
390{
391 if (pt->type == htons(ETH_P_ALL))
392 return &ptype_all;
393 else
394 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395}
396
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397/**
398 * dev_add_pack - add packet handler
399 * @pt: packet type declaration
400 *
401 * Add a protocol handler to the networking stack. The passed &packet_type
402 * is linked into kernel lists and may not be freed until it has been
403 * removed from the kernel lists.
404 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900405 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 * guarantee all CPU's that are in middle of receiving packets
407 * will see the new packet type (until the next received packet).
408 */
409
410void dev_add_pack(struct packet_type *pt)
411{
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000412 struct list_head *head = ptype_head(pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000414 spin_lock(&ptype_lock);
415 list_add_rcu(&pt->list, head);
416 spin_unlock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700418EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420/**
421 * __dev_remove_pack - remove packet handler
422 * @pt: packet type declaration
423 *
424 * Remove a protocol handler that was previously added to the kernel
425 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
426 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900427 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 *
429 * The packet type might still be in use by receivers
430 * and must not be freed until after all the CPU's have gone
431 * through a quiescent state.
432 */
433void __dev_remove_pack(struct packet_type *pt)
434{
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000435 struct list_head *head = ptype_head(pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 struct packet_type *pt1;
437
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000438 spin_lock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 list_for_each_entry(pt1, head, list) {
441 if (pt == pt1) {
442 list_del_rcu(&pt->list);
443 goto out;
444 }
445 }
446
Joe Perches7b6cd1c2012-02-01 10:54:43 +0000447 pr_warn("dev_remove_pack: %p not found\n", pt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448out:
Eric Dumazetc07b68e2010-09-02 03:53:46 +0000449 spin_unlock(&ptype_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700451EXPORT_SYMBOL(__dev_remove_pack);
452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453/**
454 * dev_remove_pack - remove packet handler
455 * @pt: packet type declaration
456 *
457 * Remove a protocol handler that was previously added to the kernel
458 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
459 * from the kernel lists and can be freed or reused once this function
460 * returns.
461 *
462 * This call sleeps to guarantee that no CPU is looking at the packet
463 * type after return.
464 */
465void dev_remove_pack(struct packet_type *pt)
466{
467 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900468
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 synchronize_net();
470}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700471EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473/******************************************************************************
474
475 Device Boot-time Settings Routines
476
477*******************************************************************************/
478
479/* Boot time configuration table */
480static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
481
482/**
483 * netdev_boot_setup_add - add new setup entry
484 * @name: name of the device
485 * @map: configured settings for the device
486 *
487 * Adds new setup entry to the dev_boot_setup list. The function
488 * returns 0 on error and 1 on success. This is a generic routine to
489 * all netdevices.
490 */
491static int netdev_boot_setup_add(char *name, struct ifmap *map)
492{
493 struct netdev_boot_setup *s;
494 int i;
495
496 s = dev_boot_setup;
497 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
499 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700500 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 memcpy(&s[i].map, map, sizeof(s[i].map));
502 break;
503 }
504 }
505
506 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
507}
508
509/**
510 * netdev_boot_setup_check - check boot time settings
511 * @dev: the netdevice
512 *
513 * Check boot time settings for the device.
514 * The found settings are set for the device to be used
515 * later in the device probing.
516 * Returns 0 if no settings found, 1 if they are.
517 */
518int netdev_boot_setup_check(struct net_device *dev)
519{
520 struct netdev_boot_setup *s = dev_boot_setup;
521 int i;
522
523 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
524 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700525 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 dev->irq = s[i].map.irq;
527 dev->base_addr = s[i].map.base_addr;
528 dev->mem_start = s[i].map.mem_start;
529 dev->mem_end = s[i].map.mem_end;
530 return 1;
531 }
532 }
533 return 0;
534}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700535EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537
538/**
539 * netdev_boot_base - get address from boot time settings
540 * @prefix: prefix for network device
541 * @unit: id for network device
542 *
543 * Check boot time settings for the base address of device.
544 * The found settings are set for the device to be used
545 * later in the device probing.
546 * Returns 0 if no settings found.
547 */
548unsigned long netdev_boot_base(const char *prefix, int unit)
549{
550 const struct netdev_boot_setup *s = dev_boot_setup;
551 char name[IFNAMSIZ];
552 int i;
553
554 sprintf(name, "%s%d", prefix, unit);
555
556 /*
557 * If device already registered then return base of 1
558 * to indicate not to probe for this interface
559 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700560 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 return 1;
562
563 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
564 if (!strcmp(name, s[i].name))
565 return s[i].map.base_addr;
566 return 0;
567}
568
569/*
570 * Saves at boot time configured settings for any netdevice.
571 */
572int __init netdev_boot_setup(char *str)
573{
574 int ints[5];
575 struct ifmap map;
576
577 str = get_options(str, ARRAY_SIZE(ints), ints);
578 if (!str || !*str)
579 return 0;
580
581 /* Save settings */
582 memset(&map, 0, sizeof(map));
583 if (ints[0] > 0)
584 map.irq = ints[1];
585 if (ints[0] > 1)
586 map.base_addr = ints[2];
587 if (ints[0] > 2)
588 map.mem_start = ints[3];
589 if (ints[0] > 3)
590 map.mem_end = ints[4];
591
592 /* Add new entry to the list */
593 return netdev_boot_setup_add(str, &map);
594}
595
596__setup("netdev=", netdev_boot_setup);
597
598/*******************************************************************************
599
600 Device Interface Subroutines
601
602*******************************************************************************/
603
604/**
605 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700606 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 * @name: name to find
608 *
609 * Find an interface by name. Must be called under RTNL semaphore
610 * or @dev_base_lock. If the name is found a pointer to the device
611 * is returned. If the name is not found then %NULL is returned. The
612 * reference counters are not incremented so the caller must be
613 * careful with locks.
614 */
615
Eric W. Biederman881d9662007-09-17 11:56:21 -0700616struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
618 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700619 struct net_device *dev;
620 struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700622 hlist_for_each_entry(dev, p, head, name_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 if (!strncmp(dev->name, name, IFNAMSIZ))
624 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700625
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 return NULL;
627}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700628EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
630/**
Eric Dumazet72c95282009-10-30 07:11:27 +0000631 * dev_get_by_name_rcu - find a device by its name
632 * @net: the applicable net namespace
633 * @name: name to find
634 *
635 * Find an interface by name.
636 * If the name is found a pointer to the device is returned.
637 * If the name is not found then %NULL is returned.
638 * The reference counters are not incremented so the caller must be
639 * careful with locks. The caller must hold RCU lock.
640 */
641
642struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
643{
644 struct hlist_node *p;
645 struct net_device *dev;
646 struct hlist_head *head = dev_name_hash(net, name);
647
648 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
649 if (!strncmp(dev->name, name, IFNAMSIZ))
650 return dev;
651
652 return NULL;
653}
654EXPORT_SYMBOL(dev_get_by_name_rcu);
655
656/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700658 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 * @name: name to find
660 *
661 * Find an interface by name. This can be called from any
662 * context and does its own locking. The returned handle has
663 * the usage count incremented and the caller must use dev_put() to
664 * release it when it is no longer needed. %NULL is returned if no
665 * matching device is found.
666 */
667
Eric W. Biederman881d9662007-09-17 11:56:21 -0700668struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669{
670 struct net_device *dev;
671
Eric Dumazet72c95282009-10-30 07:11:27 +0000672 rcu_read_lock();
673 dev = dev_get_by_name_rcu(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 if (dev)
675 dev_hold(dev);
Eric Dumazet72c95282009-10-30 07:11:27 +0000676 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 return dev;
678}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700679EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681/**
682 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700683 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 * @ifindex: index of device
685 *
686 * Search for an interface by index. Returns %NULL if the device
687 * is not found or a pointer to the device. The device has not
688 * had its reference counter increased so the caller must be careful
689 * about locking. The caller must hold either the RTNL semaphore
690 * or @dev_base_lock.
691 */
692
Eric W. Biederman881d9662007-09-17 11:56:21 -0700693struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694{
695 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700696 struct net_device *dev;
697 struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700699 hlist_for_each_entry(dev, p, head, index_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 if (dev->ifindex == ifindex)
701 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700702
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 return NULL;
704}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700705EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000707/**
708 * dev_get_by_index_rcu - find a device by its ifindex
709 * @net: the applicable net namespace
710 * @ifindex: index of device
711 *
712 * Search for an interface by index. Returns %NULL if the device
713 * is not found or a pointer to the device. The device has not
714 * had its reference counter increased so the caller must be careful
715 * about locking. The caller must hold RCU lock.
716 */
717
718struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
719{
720 struct hlist_node *p;
721 struct net_device *dev;
722 struct hlist_head *head = dev_index_hash(net, ifindex);
723
724 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
725 if (dev->ifindex == ifindex)
726 return dev;
727
728 return NULL;
729}
730EXPORT_SYMBOL(dev_get_by_index_rcu);
731
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733/**
734 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700735 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 * @ifindex: index of device
737 *
738 * Search for an interface by index. Returns NULL if the device
739 * is not found or a pointer to the device. The device returned has
740 * had a reference added and the pointer is safe until the user calls
741 * dev_put to indicate they have finished with it.
742 */
743
Eric W. Biederman881d9662007-09-17 11:56:21 -0700744struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745{
746 struct net_device *dev;
747
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000748 rcu_read_lock();
749 dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 if (dev)
751 dev_hold(dev);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000752 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 return dev;
754}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700755EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
757/**
Eric Dumazet941666c2010-12-05 01:23:53 +0000758 * dev_getbyhwaddr_rcu - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700759 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 * @type: media type of device
761 * @ha: hardware address
762 *
763 * Search for an interface by MAC address. Returns NULL if the device
Eric Dumazetc5066532011-01-24 13:16:16 -0800764 * is not found or a pointer to the device.
765 * The caller must hold RCU or RTNL.
Eric Dumazet941666c2010-12-05 01:23:53 +0000766 * The returned device has not had its ref count increased
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 * and the caller must therefore be careful about locking
768 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 */
770
Eric Dumazet941666c2010-12-05 01:23:53 +0000771struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
772 const char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773{
774 struct net_device *dev;
775
Eric Dumazet941666c2010-12-05 01:23:53 +0000776 for_each_netdev_rcu(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 if (dev->type == type &&
778 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700779 return dev;
780
781 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782}
Eric Dumazet941666c2010-12-05 01:23:53 +0000783EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300784
Eric W. Biederman881d9662007-09-17 11:56:21 -0700785struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700786{
787 struct net_device *dev;
788
789 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700790 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700791 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700792 return dev;
793
794 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700795}
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700796EXPORT_SYMBOL(__dev_getfirstbyhwtype);
797
Eric W. Biederman881d9662007-09-17 11:56:21 -0700798struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799{
Eric Dumazet99fe3c32010-03-18 11:27:25 +0000800 struct net_device *dev, *ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
Eric Dumazet99fe3c32010-03-18 11:27:25 +0000802 rcu_read_lock();
803 for_each_netdev_rcu(net, dev)
804 if (dev->type == type) {
805 dev_hold(dev);
806 ret = dev;
807 break;
808 }
809 rcu_read_unlock();
810 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812EXPORT_SYMBOL(dev_getfirstbyhwtype);
813
814/**
Eric Dumazetbb69ae02010-06-07 11:42:13 +0000815 * dev_get_by_flags_rcu - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700816 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 * @if_flags: IFF_* values
818 * @mask: bitmask of bits in if_flags to check
819 *
820 * Search for any interface with the given flags. Returns NULL if a device
Eric Dumazetbb69ae02010-06-07 11:42:13 +0000821 * is not found or a pointer to the device. Must be called inside
822 * rcu_read_lock(), and result refcount is unchanged.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 */
824
Eric Dumazetbb69ae02010-06-07 11:42:13 +0000825struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700826 unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700828 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
Pavel Emelianov7562f872007-05-03 15:13:45 -0700830 ret = NULL;
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800831 for_each_netdev_rcu(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (((dev->flags ^ if_flags) & mask) == 0) {
Pavel Emelianov7562f872007-05-03 15:13:45 -0700833 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 break;
835 }
836 }
Pavel Emelianov7562f872007-05-03 15:13:45 -0700837 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838}
Eric Dumazetbb69ae02010-06-07 11:42:13 +0000839EXPORT_SYMBOL(dev_get_by_flags_rcu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
841/**
842 * dev_valid_name - check if name is okay for network device
843 * @name: name string
844 *
845 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700846 * to allow sysfs to work. We also disallow any kind of
847 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 */
David S. Miller95f050b2012-03-06 16:12:15 -0500849bool dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700851 if (*name == '\0')
David S. Miller95f050b2012-03-06 16:12:15 -0500852 return false;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700853 if (strlen(name) >= IFNAMSIZ)
David S. Miller95f050b2012-03-06 16:12:15 -0500854 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700855 if (!strcmp(name, ".") || !strcmp(name, ".."))
David S. Miller95f050b2012-03-06 16:12:15 -0500856 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700857
858 while (*name) {
859 if (*name == '/' || isspace(*name))
David S. Miller95f050b2012-03-06 16:12:15 -0500860 return false;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700861 name++;
862 }
David S. Miller95f050b2012-03-06 16:12:15 -0500863 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700865EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200868 * __dev_alloc_name - allocate a name for a device
869 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200871 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 *
873 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700874 * id. It scans list of devices to build up a free map, then chooses
875 * the first empty slot. The caller must hold the dev_base or rtnl lock
876 * while allocating the name and adding the device in order to avoid
877 * duplicates.
878 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
879 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 */
881
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200882static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883{
884 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 const char *p;
886 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700887 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 struct net_device *d;
889
890 p = strnchr(name, IFNAMSIZ-1, '%');
891 if (p) {
892 /*
893 * Verify the string as this thing may have come from
894 * the user. There must be either one "%d" and no other "%"
895 * characters.
896 */
897 if (p[1] != 'd' || strchr(p + 2, '%'))
898 return -EINVAL;
899
900 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700901 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 if (!inuse)
903 return -ENOMEM;
904
Eric W. Biederman881d9662007-09-17 11:56:21 -0700905 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 if (!sscanf(d->name, name, &i))
907 continue;
908 if (i < 0 || i >= max_netdevices)
909 continue;
910
911 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200912 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 if (!strncmp(buf, d->name, IFNAMSIZ))
914 set_bit(i, inuse);
915 }
916
917 i = find_first_zero_bit(inuse, max_netdevices);
918 free_page((unsigned long) inuse);
919 }
920
Octavian Purdilad9031022009-11-18 02:36:59 +0000921 if (buf != name)
922 snprintf(buf, IFNAMSIZ, name, i);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200923 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
926 /* It is possible to run out of possible slots
927 * when the name is long and there isn't enough space left
928 * for the digits, or if all bits are used.
929 */
930 return -ENFILE;
931}
932
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200933/**
934 * dev_alloc_name - allocate a name for a device
935 * @dev: device
936 * @name: name format string
937 *
938 * Passed a format string - eg "lt%d" it will try and find a suitable
939 * id. It scans list of devices to build up a free map, then chooses
940 * the first empty slot. The caller must hold the dev_base or rtnl lock
941 * while allocating the name and adding the device in order to avoid
942 * duplicates.
943 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
944 * Returns the number of the unit assigned or a negative errno code.
945 */
946
947int dev_alloc_name(struct net_device *dev, const char *name)
948{
949 char buf[IFNAMSIZ];
950 struct net *net;
951 int ret;
952
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900953 BUG_ON(!dev_net(dev));
954 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200955 ret = __dev_alloc_name(net, name, buf);
956 if (ret >= 0)
957 strlcpy(dev->name, buf, IFNAMSIZ);
958 return ret;
959}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700960EXPORT_SYMBOL(dev_alloc_name);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200961
Jiri Pirko1c5cae82011-04-30 01:21:32 +0000962static int dev_get_valid_name(struct net_device *dev, const char *name)
Octavian Purdilad9031022009-11-18 02:36:59 +0000963{
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +0000964 struct net *net;
965
966 BUG_ON(!dev_net(dev));
967 net = dev_net(dev);
968
Octavian Purdilad9031022009-11-18 02:36:59 +0000969 if (!dev_valid_name(name))
970 return -EINVAL;
971
Jiri Pirko1c5cae82011-04-30 01:21:32 +0000972 if (strchr(name, '%'))
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +0000973 return dev_alloc_name(dev, name);
Octavian Purdilad9031022009-11-18 02:36:59 +0000974 else if (__dev_get_by_name(net, name))
975 return -EEXIST;
Daniel Lezcano8ce6cebc2010-05-19 10:12:19 +0000976 else if (dev->name != name)
977 strlcpy(dev->name, name, IFNAMSIZ);
Octavian Purdilad9031022009-11-18 02:36:59 +0000978
979 return 0;
980}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981
982/**
983 * dev_change_name - change name of a device
984 * @dev: device
985 * @newname: name (or format string) must be at least IFNAMSIZ
986 *
987 * Change name of a device, can pass format strings "eth%d".
988 * for wildcarding.
989 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700990int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991{
Herbert Xufcc5a032007-07-30 17:03:38 -0700992 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700994 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700995 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900998 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001000 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 if (dev->flags & IFF_UP)
1002 return -EBUSY;
1003
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -07001004 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1005 return 0;
1006
Herbert Xufcc5a032007-07-30 17:03:38 -07001007 memcpy(oldname, dev->name, IFNAMSIZ);
1008
Jiri Pirko1c5cae82011-04-30 01:21:32 +00001009 err = dev_get_valid_name(dev, newname);
Octavian Purdilad9031022009-11-18 02:36:59 +00001010 if (err < 0)
1011 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Herbert Xufcc5a032007-07-30 17:03:38 -07001013rollback:
Eric W. Biedermana1b3f592010-05-04 17:36:49 -07001014 ret = device_rename(&dev->dev, dev->name);
1015 if (ret) {
1016 memcpy(dev->name, oldname, IFNAMSIZ);
1017 return ret;
Stephen Hemmingerdcc99772008-05-14 22:33:38 -07001018 }
Herbert Xu7f988ea2007-07-30 16:35:46 -07001019
1020 write_lock_bh(&dev_base_lock);
Eric Dumazet372b2312011-05-17 13:56:59 -04001021 hlist_del_rcu(&dev->name_hlist);
Eric Dumazet72c95282009-10-30 07:11:27 +00001022 write_unlock_bh(&dev_base_lock);
1023
1024 synchronize_rcu();
1025
1026 write_lock_bh(&dev_base_lock);
1027 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -07001028 write_unlock_bh(&dev_base_lock);
1029
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001030 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001031 ret = notifier_to_errno(ret);
1032
1033 if (ret) {
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001034 /* err >= 0 after dev_alloc_name() or stores the first errno */
1035 if (err >= 0) {
Herbert Xufcc5a032007-07-30 17:03:38 -07001036 err = ret;
1037 memcpy(dev->name, oldname, IFNAMSIZ);
1038 goto rollback;
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001039 } else {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001040 pr_err("%s: name change rollback failed: %d\n",
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001041 dev->name, ret);
Herbert Xufcc5a032007-07-30 17:03:38 -07001042 }
1043 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044
1045 return err;
1046}
1047
1048/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001049 * dev_set_alias - change ifalias of a device
1050 * @dev: device
1051 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07001052 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001053 *
1054 * Set ifalias for a device,
1055 */
1056int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1057{
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001058 char *new_ifalias;
1059
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001060 ASSERT_RTNL();
1061
1062 if (len >= IFALIASZ)
1063 return -EINVAL;
1064
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -07001065 if (!len) {
1066 if (dev->ifalias) {
1067 kfree(dev->ifalias);
1068 dev->ifalias = NULL;
1069 }
1070 return 0;
1071 }
1072
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001073 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1074 if (!new_ifalias)
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001075 return -ENOMEM;
Alexey Khoroshilov7364e442012-08-08 00:33:25 +00001076 dev->ifalias = new_ifalias;
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001077
1078 strlcpy(dev->ifalias, alias, len+1);
1079 return len;
1080}
1081
1082
1083/**
Stephen Hemminger3041a062006-05-26 13:25:24 -07001084 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001085 * @dev: device to cause notification
1086 *
1087 * Called to indicate a device has changed features.
1088 */
1089void netdev_features_change(struct net_device *dev)
1090{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001091 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001092}
1093EXPORT_SYMBOL(netdev_features_change);
1094
1095/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 * netdev_state_change - device changes state
1097 * @dev: device to cause notification
1098 *
1099 * Called to indicate a device has changed state. This function calls
1100 * the notifier chains for netdev_chain and sends a NEWLINK message
1101 * to the routing socket.
1102 */
1103void netdev_state_change(struct net_device *dev)
1104{
1105 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001106 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1108 }
1109}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001110EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
Amerigo Wangee89bab2012-08-09 22:14:56 +00001112/**
1113 * netdev_notify_peers - notify network peers about existence of @dev
1114 * @dev: network device
1115 *
1116 * Generate traffic such that interested network peers are aware of
1117 * @dev, such as by generating a gratuitous ARP. This may be used when
1118 * a device wants to inform the rest of the network about some sort of
1119 * reconfiguration such as a failover event or virtual machine
1120 * migration.
1121 */
1122void netdev_notify_peers(struct net_device *dev)
1123{
1124 rtnl_lock();
1125 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1126 rtnl_unlock();
1127}
1128EXPORT_SYMBOL(netdev_notify_peers);
1129
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130/**
1131 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001132 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 * @name: name of interface
1134 *
1135 * If a network interface is not present and the process has suitable
1136 * privileges this function loads the module. If module loading is not
1137 * available in this kernel then it becomes a nop.
1138 */
1139
Eric W. Biederman881d9662007-09-17 11:56:21 -07001140void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001142 struct net_device *dev;
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001143 int no_module;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
Eric Dumazet72c95282009-10-30 07:11:27 +00001145 rcu_read_lock();
1146 dev = dev_get_by_name_rcu(net, name);
1147 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001149 no_module = !dev;
1150 if (no_module && capable(CAP_NET_ADMIN))
1151 no_module = request_module("netdev-%s", name);
1152 if (no_module && capable(CAP_SYS_MODULE)) {
1153 if (!request_module("%s", name))
Vinson Lee7cecb522012-06-27 14:32:07 +00001154 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1155 name);
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001156 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001158EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Patrick McHardybd380812010-02-26 06:34:53 +00001160static int __dev_open(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001162 const struct net_device_ops *ops = dev->netdev_ops;
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001163 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001165 ASSERT_RTNL();
1166
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (!netif_device_present(dev))
1168 return -ENODEV;
1169
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001170 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1171 ret = notifier_to_errno(ret);
1172 if (ret)
1173 return ret;
1174
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001176
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001177 if (ops->ndo_validate_addr)
1178 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001179
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001180 if (!ret && ops->ndo_open)
1181 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
Jeff Garzikbada3392007-10-23 20:19:37 -07001183 if (ret)
1184 clear_bit(__LINK_STATE_START, &dev->state);
1185 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 dev->flags |= IFF_UP;
David S. Millerb4bd07c2009-02-06 22:06:43 -08001187 net_dmaengine_get();
Patrick McHardy4417da62007-06-27 01:28:10 -07001188 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 dev_activate(dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04001190 add_device_randomness(dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001192
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 return ret;
1194}
Patrick McHardybd380812010-02-26 06:34:53 +00001195
1196/**
1197 * dev_open - prepare an interface for use.
1198 * @dev: device to open
1199 *
1200 * Takes a device from down to up state. The device's private open
1201 * function is invoked and then the multicast lists are loaded. Finally
1202 * the device is moved into the up state and a %NETDEV_UP message is
1203 * sent to the netdev notifier chain.
1204 *
1205 * Calling this function on an active interface is a nop. On a failure
1206 * a negative errno code is returned.
1207 */
1208int dev_open(struct net_device *dev)
1209{
1210 int ret;
1211
Patrick McHardybd380812010-02-26 06:34:53 +00001212 if (dev->flags & IFF_UP)
1213 return 0;
1214
Patrick McHardybd380812010-02-26 06:34:53 +00001215 ret = __dev_open(dev);
1216 if (ret < 0)
1217 return ret;
1218
Patrick McHardybd380812010-02-26 06:34:53 +00001219 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1220 call_netdevice_notifiers(NETDEV_UP, dev);
1221
1222 return ret;
1223}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001224EXPORT_SYMBOL(dev_open);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Octavian Purdila44345722010-12-13 12:44:07 +00001226static int __dev_close_many(struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227{
Octavian Purdila44345722010-12-13 12:44:07 +00001228 struct net_device *dev;
Patrick McHardybd380812010-02-26 06:34:53 +00001229
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001230 ASSERT_RTNL();
David S. Miller9d5010d2007-09-12 14:33:25 +02001231 might_sleep();
1232
Octavian Purdila44345722010-12-13 12:44:07 +00001233 list_for_each_entry(dev, head, unreg_list) {
Octavian Purdila44345722010-12-13 12:44:07 +00001234 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
Octavian Purdila44345722010-12-13 12:44:07 +00001236 clear_bit(__LINK_STATE_START, &dev->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
Octavian Purdila44345722010-12-13 12:44:07 +00001238 /* Synchronize to scheduled poll. We cannot touch poll list, it
1239 * can be even on different cpu. So just clear netif_running().
1240 *
1241 * dev->stop() will invoke napi_disable() on all of it's
1242 * napi_struct instances on this device.
1243 */
1244 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1245 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
Octavian Purdila44345722010-12-13 12:44:07 +00001247 dev_deactivate_many(head);
1248
1249 list_for_each_entry(dev, head, unreg_list) {
1250 const struct net_device_ops *ops = dev->netdev_ops;
1251
1252 /*
1253 * Call the device specific close. This cannot fail.
1254 * Only if device is UP
1255 *
1256 * We allow it to be called even after a DETACH hot-plug
1257 * event.
1258 */
1259 if (ops->ndo_stop)
1260 ops->ndo_stop(dev);
1261
Octavian Purdila44345722010-12-13 12:44:07 +00001262 dev->flags &= ~IFF_UP;
Octavian Purdila44345722010-12-13 12:44:07 +00001263 net_dmaengine_put();
1264 }
1265
1266 return 0;
1267}
1268
1269static int __dev_close(struct net_device *dev)
1270{
Linus Torvaldsf87e6f42011-02-17 22:54:38 +00001271 int retval;
Octavian Purdila44345722010-12-13 12:44:07 +00001272 LIST_HEAD(single);
1273
1274 list_add(&dev->unreg_list, &single);
Linus Torvaldsf87e6f42011-02-17 22:54:38 +00001275 retval = __dev_close_many(&single);
1276 list_del(&single);
1277 return retval;
Octavian Purdila44345722010-12-13 12:44:07 +00001278}
1279
Eric Dumazet3fbd8752011-01-19 21:23:22 +00001280static int dev_close_many(struct list_head *head)
Octavian Purdila44345722010-12-13 12:44:07 +00001281{
1282 struct net_device *dev, *tmp;
1283 LIST_HEAD(tmp_list);
1284
1285 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1286 if (!(dev->flags & IFF_UP))
1287 list_move(&dev->unreg_list, &tmp_list);
1288
1289 __dev_close_many(head);
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001290
Octavian Purdila44345722010-12-13 12:44:07 +00001291 list_for_each_entry(dev, head, unreg_list) {
1292 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1293 call_netdevice_notifiers(NETDEV_DOWN, dev);
1294 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295
Octavian Purdila44345722010-12-13 12:44:07 +00001296 /* rollback_registered_many needs the complete original list */
1297 list_splice(&tmp_list, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 return 0;
1299}
Patrick McHardybd380812010-02-26 06:34:53 +00001300
1301/**
1302 * dev_close - shutdown an interface.
1303 * @dev: device to shutdown
1304 *
1305 * This function moves an active device into down state. A
1306 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1307 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1308 * chain.
1309 */
1310int dev_close(struct net_device *dev)
1311{
Eric Dumazete14a5992011-05-10 12:26:06 -07001312 if (dev->flags & IFF_UP) {
1313 LIST_HEAD(single);
Patrick McHardybd380812010-02-26 06:34:53 +00001314
Eric Dumazete14a5992011-05-10 12:26:06 -07001315 list_add(&dev->unreg_list, &single);
1316 dev_close_many(&single);
1317 list_del(&single);
1318 }
Patrick McHardybd380812010-02-26 06:34:53 +00001319 return 0;
1320}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001321EXPORT_SYMBOL(dev_close);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322
1323
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001324/**
1325 * dev_disable_lro - disable Large Receive Offload on a device
1326 * @dev: device
1327 *
1328 * Disable Large Receive Offload (LRO) on a net device. Must be
1329 * called under RTNL. This is needed if received packets may be
1330 * forwarded to another interface.
1331 */
1332void dev_disable_lro(struct net_device *dev)
1333{
Neil Hormanf11970e2011-05-24 08:31:09 +00001334 /*
1335 * If we're trying to disable lro on a vlan device
1336 * use the underlying physical device instead
1337 */
1338 if (is_vlan_dev(dev))
1339 dev = vlan_dev_real_dev(dev);
1340
Michał Mirosławbc5787c62011-11-15 15:29:55 +00001341 dev->wanted_features &= ~NETIF_F_LRO;
1342 netdev_update_features(dev);
Michał Mirosław27660512011-03-18 16:56:34 +00001343
Michał Mirosław22d59692011-04-21 12:42:15 +00001344 if (unlikely(dev->features & NETIF_F_LRO))
1345 netdev_WARN(dev, "failed to disable LRO!\n");
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001346}
1347EXPORT_SYMBOL(dev_disable_lro);
1348
1349
Eric W. Biederman881d9662007-09-17 11:56:21 -07001350static int dev_boot_phase = 1;
1351
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352/**
1353 * register_netdevice_notifier - register a network notifier block
1354 * @nb: notifier
1355 *
1356 * Register a notifier to be called when network device events occur.
1357 * The notifier passed is linked into the kernel structures and must
1358 * not be reused until it has been unregistered. A negative errno code
1359 * is returned on a failure.
1360 *
1361 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001362 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 * view of the network device list.
1364 */
1365
1366int register_netdevice_notifier(struct notifier_block *nb)
1367{
1368 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001369 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001370 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 int err;
1372
1373 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001374 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001375 if (err)
1376 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001377 if (dev_boot_phase)
1378 goto unlock;
1379 for_each_net(net) {
1380 for_each_netdev(net, dev) {
1381 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1382 err = notifier_to_errno(err);
1383 if (err)
1384 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
Eric W. Biederman881d9662007-09-17 11:56:21 -07001386 if (!(dev->flags & IFF_UP))
1387 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001388
Eric W. Biederman881d9662007-09-17 11:56:21 -07001389 nb->notifier_call(nb, NETDEV_UP, dev);
1390 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001392
1393unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 rtnl_unlock();
1395 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001396
1397rollback:
1398 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001399 for_each_net(net) {
1400 for_each_netdev(net, dev) {
1401 if (dev == last)
RongQing.Li8f891482011-11-30 23:43:07 -05001402 goto outroll;
Herbert Xufcc5a032007-07-30 17:03:38 -07001403
Eric W. Biederman881d9662007-09-17 11:56:21 -07001404 if (dev->flags & IFF_UP) {
1405 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1406 nb->notifier_call(nb, NETDEV_DOWN, dev);
1407 }
1408 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001409 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001410 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001411
RongQing.Li8f891482011-11-30 23:43:07 -05001412outroll:
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001413 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001414 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001416EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418/**
1419 * unregister_netdevice_notifier - unregister a network notifier block
1420 * @nb: notifier
1421 *
1422 * Unregister a notifier previously registered by
1423 * register_netdevice_notifier(). The notifier is unlinked into the
1424 * kernel structures and may then be reused. A negative errno code
1425 * is returned on a failure.
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001426 *
1427 * After unregistering unregister and down device events are synthesized
1428 * for all devices on the device list to the removed notifier to remove
1429 * the need for special case cleanup code.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 */
1431
1432int unregister_netdevice_notifier(struct notifier_block *nb)
1433{
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001434 struct net_device *dev;
1435 struct net *net;
Herbert Xu9f514952006-03-25 01:24:25 -08001436 int err;
1437
1438 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001439 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001440 if (err)
1441 goto unlock;
1442
1443 for_each_net(net) {
1444 for_each_netdev(net, dev) {
1445 if (dev->flags & IFF_UP) {
1446 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1447 nb->notifier_call(nb, NETDEV_DOWN, dev);
1448 }
1449 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Eric W. Biederman7d3d43d2012-04-06 15:33:35 +00001450 }
1451 }
1452unlock:
Herbert Xu9f514952006-03-25 01:24:25 -08001453 rtnl_unlock();
1454 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001456EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457
1458/**
1459 * call_netdevice_notifiers - call all network notifier blocks
1460 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001461 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 *
1463 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001464 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 */
1466
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001467int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468{
Eric Dumazet748e2d92012-08-22 21:50:59 +00001469 ASSERT_RTNL();
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001470 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471}
stephen hemmingeredf947f2011-03-24 13:24:01 +00001472EXPORT_SYMBOL(call_netdevice_notifiers);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
Ingo Molnarc5905af2012-02-24 08:31:31 +01001474static struct static_key netstamp_needed __read_mostly;
Eric Dumazetb90e5792011-11-28 11:16:50 +00001475#ifdef HAVE_JUMP_LABEL
Ingo Molnarc5905af2012-02-24 08:31:31 +01001476/* We are not allowed to call static_key_slow_dec() from irq context
Eric Dumazetb90e5792011-11-28 11:16:50 +00001477 * If net_disable_timestamp() is called from irq context, defer the
Ingo Molnarc5905af2012-02-24 08:31:31 +01001478 * static_key_slow_dec() calls.
Eric Dumazetb90e5792011-11-28 11:16:50 +00001479 */
1480static atomic_t netstamp_needed_deferred;
1481#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
1483void net_enable_timestamp(void)
1484{
Eric Dumazetb90e5792011-11-28 11:16:50 +00001485#ifdef HAVE_JUMP_LABEL
1486 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1487
1488 if (deferred) {
1489 while (--deferred)
Ingo Molnarc5905af2012-02-24 08:31:31 +01001490 static_key_slow_dec(&netstamp_needed);
Eric Dumazetb90e5792011-11-28 11:16:50 +00001491 return;
1492 }
1493#endif
1494 WARN_ON(in_interrupt());
Ingo Molnarc5905af2012-02-24 08:31:31 +01001495 static_key_slow_inc(&netstamp_needed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001497EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
1499void net_disable_timestamp(void)
1500{
Eric Dumazetb90e5792011-11-28 11:16:50 +00001501#ifdef HAVE_JUMP_LABEL
1502 if (in_interrupt()) {
1503 atomic_inc(&netstamp_needed_deferred);
1504 return;
1505 }
1506#endif
Ingo Molnarc5905af2012-02-24 08:31:31 +01001507 static_key_slow_dec(&netstamp_needed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001509EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510
Eric Dumazet3b098e22010-05-15 23:57:10 -07001511static inline void net_timestamp_set(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{
Eric Dumazet588f0332011-11-15 04:12:55 +00001513 skb->tstamp.tv64 = 0;
Ingo Molnarc5905af2012-02-24 08:31:31 +01001514 if (static_key_false(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001515 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516}
1517
Eric Dumazet588f0332011-11-15 04:12:55 +00001518#define net_timestamp_check(COND, SKB) \
Ingo Molnarc5905af2012-02-24 08:31:31 +01001519 if (static_key_false(&netstamp_needed)) { \
Eric Dumazet588f0332011-11-15 04:12:55 +00001520 if ((COND) && !(SKB)->tstamp.tv64) \
1521 __net_timestamp(SKB); \
1522 } \
Eric Dumazet3b098e22010-05-15 23:57:10 -07001523
Richard Cochran4dc360c2011-10-19 17:00:35 -04001524static int net_hwtstamp_validate(struct ifreq *ifr)
1525{
1526 struct hwtstamp_config cfg;
1527 enum hwtstamp_tx_types tx_type;
1528 enum hwtstamp_rx_filters rx_filter;
1529 int tx_type_valid = 0;
1530 int rx_filter_valid = 0;
1531
1532 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1533 return -EFAULT;
1534
1535 if (cfg.flags) /* reserved for future extensions */
1536 return -EINVAL;
1537
1538 tx_type = cfg.tx_type;
1539 rx_filter = cfg.rx_filter;
1540
1541 switch (tx_type) {
1542 case HWTSTAMP_TX_OFF:
1543 case HWTSTAMP_TX_ON:
1544 case HWTSTAMP_TX_ONESTEP_SYNC:
1545 tx_type_valid = 1;
1546 break;
1547 }
1548
1549 switch (rx_filter) {
1550 case HWTSTAMP_FILTER_NONE:
1551 case HWTSTAMP_FILTER_ALL:
1552 case HWTSTAMP_FILTER_SOME:
1553 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1554 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1555 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1556 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1557 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1558 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1559 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1560 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1561 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1562 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1563 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1564 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1565 rx_filter_valid = 1;
1566 break;
1567 }
1568
1569 if (!tx_type_valid || !rx_filter_valid)
1570 return -ERANGE;
1571
1572 return 0;
1573}
1574
Daniel Lezcano79b569f2011-03-30 02:42:17 -07001575static inline bool is_skb_forwardable(struct net_device *dev,
1576 struct sk_buff *skb)
1577{
1578 unsigned int len;
1579
1580 if (!(dev->flags & IFF_UP))
1581 return false;
1582
1583 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1584 if (skb->len <= len)
1585 return true;
1586
1587 /* if TSO is enabled, we don't care about the length as the packet
1588 * could be forwarded without being segmented before
1589 */
1590 if (skb_is_gso(skb))
1591 return true;
1592
1593 return false;
1594}
1595
Arnd Bergmann44540962009-11-26 06:07:08 +00001596/**
1597 * dev_forward_skb - loopback an skb to another netif
1598 *
1599 * @dev: destination network device
1600 * @skb: buffer to forward
1601 *
1602 * return values:
1603 * NET_RX_SUCCESS (no congestion)
Eric Dumazet6ec82562010-05-06 00:53:53 -07001604 * NET_RX_DROP (packet was dropped, but freed)
Arnd Bergmann44540962009-11-26 06:07:08 +00001605 *
1606 * dev_forward_skb can be used for injecting an skb from the
1607 * start_xmit function of one device into the receive queue
1608 * of another device.
1609 *
1610 * The receiving device may be in another namespace, so
1611 * we have to clear all information in the skb that could
1612 * impact namespace isolation.
1613 */
1614int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1615{
Michael S. Tsirkin48c83012011-08-31 08:03:29 +00001616 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1617 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1618 atomic_long_inc(&dev->rx_dropped);
1619 kfree_skb(skb);
1620 return NET_RX_DROP;
1621 }
1622 }
1623
Arnd Bergmann44540962009-11-26 06:07:08 +00001624 skb_orphan(skb);
Ben Greearc736eef2010-07-22 09:54:47 +00001625 nf_reset(skb);
Arnd Bergmann44540962009-11-26 06:07:08 +00001626
Daniel Lezcano79b569f2011-03-30 02:42:17 -07001627 if (unlikely(!is_skb_forwardable(dev, skb))) {
Eric Dumazetcaf586e2010-09-30 21:06:55 +00001628 atomic_long_inc(&dev->rx_dropped);
Eric Dumazet6ec82562010-05-06 00:53:53 -07001629 kfree_skb(skb);
Arnd Bergmann44540962009-11-26 06:07:08 +00001630 return NET_RX_DROP;
Eric Dumazet6ec82562010-05-06 00:53:53 -07001631 }
Benjamin LaHaise3b9785c2012-03-27 15:55:44 +00001632 skb->skb_iif = 0;
David S. Miller59b99972012-05-10 23:03:34 -04001633 skb->dev = dev;
1634 skb_dst_drop(skb);
Arnd Bergmann44540962009-11-26 06:07:08 +00001635 skb->tstamp.tv64 = 0;
1636 skb->pkt_type = PACKET_HOST;
1637 skb->protocol = eth_type_trans(skb, dev);
David S. Miller59b99972012-05-10 23:03:34 -04001638 skb->mark = 0;
1639 secpath_reset(skb);
1640 nf_reset(skb);
Arnd Bergmann44540962009-11-26 06:07:08 +00001641 return netif_rx(skb);
1642}
1643EXPORT_SYMBOL_GPL(dev_forward_skb);
1644
Changli Gao71d9dec2010-12-15 19:57:25 +00001645static inline int deliver_skb(struct sk_buff *skb,
1646 struct packet_type *pt_prev,
1647 struct net_device *orig_dev)
1648{
Michael S. Tsirkin1080e512012-07-20 09:23:17 +00001649 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1650 return -ENOMEM;
Changli Gao71d9dec2010-12-15 19:57:25 +00001651 atomic_inc(&skb->users);
1652 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1653}
1654
Eric Leblondc0de08d2012-08-16 22:02:58 +00001655static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1656{
1657 if (ptype->af_packet_priv == NULL)
1658 return false;
1659
1660 if (ptype->id_match)
1661 return ptype->id_match(ptype, skb->sk);
1662 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1663 return true;
1664
1665 return false;
1666}
1667
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668/*
1669 * Support routine. Sends outgoing frames to any network
1670 * taps currently in use.
1671 */
1672
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001673static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674{
1675 struct packet_type *ptype;
Changli Gao71d9dec2010-12-15 19:57:25 +00001676 struct sk_buff *skb2 = NULL;
1677 struct packet_type *pt_prev = NULL;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001678
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 rcu_read_lock();
1680 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1681 /* Never send packets back to the socket
1682 * they originated from - MvS (miquels@drinkel.ow.org)
1683 */
1684 if ((ptype->dev == dev || !ptype->dev) &&
Eric Leblondc0de08d2012-08-16 22:02:58 +00001685 (!skb_loop_sk(ptype, skb))) {
Changli Gao71d9dec2010-12-15 19:57:25 +00001686 if (pt_prev) {
1687 deliver_skb(skb2, pt_prev, skb->dev);
1688 pt_prev = ptype;
1689 continue;
1690 }
1691
1692 skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 if (!skb2)
1694 break;
1695
Eric Dumazet70978182010-12-20 21:22:51 +00001696 net_timestamp_set(skb2);
1697
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 /* skb->nh should be correctly
1699 set by sender, so that the second statement is
1700 just protection against buggy protocols.
1701 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001702 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001704 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001705 skb2->network_header > skb2->tail) {
Joe Perchese87cc472012-05-13 21:56:26 +00001706 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1707 ntohs(skb2->protocol),
1708 dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001709 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 }
1711
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001712 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 skb2->pkt_type = PACKET_OUTGOING;
Changli Gao71d9dec2010-12-15 19:57:25 +00001714 pt_prev = ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 }
1716 }
Changli Gao71d9dec2010-12-15 19:57:25 +00001717 if (pt_prev)
1718 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 rcu_read_unlock();
1720}
1721
Ben Hutchings2c530402012-07-10 10:55:09 +00001722/**
1723 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
John Fastabend4f57c082011-01-17 08:06:04 +00001724 * @dev: Network device
1725 * @txq: number of queues available
1726 *
1727 * If real_num_tx_queues is changed the tc mappings may no longer be
1728 * valid. To resolve this verify the tc mapping remains valid and if
1729 * not NULL the mapping. With no priorities mapping to this
1730 * offset/count pair it will no longer be used. In the worst case TC0
1731 * is invalid nothing can be done so disable priority mappings. If is
1732 * expected that drivers will fix this mapping if they can before
1733 * calling netif_set_real_num_tx_queues.
1734 */
Eric Dumazetbb134d22011-01-20 19:18:08 +00001735static void netif_setup_tc(struct net_device *dev, unsigned int txq)
John Fastabend4f57c082011-01-17 08:06:04 +00001736{
1737 int i;
1738 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1739
1740 /* If TC0 is invalidated disable TC mapping */
1741 if (tc->offset + tc->count > txq) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001742 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
John Fastabend4f57c082011-01-17 08:06:04 +00001743 dev->num_tc = 0;
1744 return;
1745 }
1746
1747 /* Invalidated prio to tc mappings set to TC0 */
1748 for (i = 1; i < TC_BITMASK + 1; i++) {
1749 int q = netdev_get_prio_tc_map(dev, i);
1750
1751 tc = &dev->tc_to_txq[q];
1752 if (tc->offset + tc->count > txq) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00001753 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1754 i, q);
John Fastabend4f57c082011-01-17 08:06:04 +00001755 netdev_set_prio_tc_map(dev, i, 0);
1756 }
1757 }
1758}
1759
John Fastabendf0796d52010-07-01 13:21:57 +00001760/*
1761 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1762 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1763 */
Tom Herberte6484932010-10-18 18:04:39 +00001764int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
John Fastabendf0796d52010-07-01 13:21:57 +00001765{
Tom Herbert1d24eb42010-11-21 13:17:27 +00001766 int rc;
1767
Tom Herberte6484932010-10-18 18:04:39 +00001768 if (txq < 1 || txq > dev->num_tx_queues)
1769 return -EINVAL;
John Fastabendf0796d52010-07-01 13:21:57 +00001770
Ben Hutchings5c565802011-02-15 19:39:21 +00001771 if (dev->reg_state == NETREG_REGISTERED ||
1772 dev->reg_state == NETREG_UNREGISTERING) {
Tom Herberte6484932010-10-18 18:04:39 +00001773 ASSERT_RTNL();
1774
Tom Herbert1d24eb42010-11-21 13:17:27 +00001775 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1776 txq);
Tom Herbertbf264142010-11-26 08:36:09 +00001777 if (rc)
1778 return rc;
1779
John Fastabend4f57c082011-01-17 08:06:04 +00001780 if (dev->num_tc)
1781 netif_setup_tc(dev, txq);
1782
Tom Herberte6484932010-10-18 18:04:39 +00001783 if (txq < dev->real_num_tx_queues)
1784 qdisc_reset_all_tx_gt(dev, txq);
John Fastabendf0796d52010-07-01 13:21:57 +00001785 }
Tom Herberte6484932010-10-18 18:04:39 +00001786
1787 dev->real_num_tx_queues = txq;
1788 return 0;
John Fastabendf0796d52010-07-01 13:21:57 +00001789}
1790EXPORT_SYMBOL(netif_set_real_num_tx_queues);
Denis Vlasenko56079432006-03-29 15:57:29 -08001791
Ben Hutchings62fe0b42010-09-27 08:24:33 +00001792#ifdef CONFIG_RPS
1793/**
1794 * netif_set_real_num_rx_queues - set actual number of RX queues used
1795 * @dev: Network device
1796 * @rxq: Actual number of RX queues
1797 *
1798 * This must be called either with the rtnl_lock held or before
1799 * registration of the net device. Returns 0 on success, or a
Ben Hutchings4e7f7952010-10-08 10:33:39 -07001800 * negative error code. If called before registration, it always
1801 * succeeds.
Ben Hutchings62fe0b42010-09-27 08:24:33 +00001802 */
1803int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1804{
1805 int rc;
1806
Tom Herbertbd25fa72010-10-18 18:00:16 +00001807 if (rxq < 1 || rxq > dev->num_rx_queues)
1808 return -EINVAL;
1809
Ben Hutchings62fe0b42010-09-27 08:24:33 +00001810 if (dev->reg_state == NETREG_REGISTERED) {
1811 ASSERT_RTNL();
1812
Ben Hutchings62fe0b42010-09-27 08:24:33 +00001813 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1814 rxq);
1815 if (rc)
1816 return rc;
Ben Hutchings62fe0b42010-09-27 08:24:33 +00001817 }
1818
1819 dev->real_num_rx_queues = rxq;
1820 return 0;
1821}
1822EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1823#endif
1824
Ben Hutchings2c530402012-07-10 10:55:09 +00001825/**
1826 * netif_get_num_default_rss_queues - default number of RSS queues
Yuval Mintz16917b82012-07-01 03:18:50 +00001827 *
1828 * This routine should set an upper limit on the number of RSS queues
1829 * used by default by multiqueue devices.
1830 */
Ben Hutchingsa55b1382012-07-10 10:54:38 +00001831int netif_get_num_default_rss_queues(void)
Yuval Mintz16917b82012-07-01 03:18:50 +00001832{
1833 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1834}
1835EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1836
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001837static inline void __netif_reschedule(struct Qdisc *q)
1838{
1839 struct softnet_data *sd;
1840 unsigned long flags;
1841
1842 local_irq_save(flags);
1843 sd = &__get_cpu_var(softnet_data);
Changli Gaoa9cbd582010-04-26 23:06:24 +00001844 q->next_sched = NULL;
1845 *sd->output_queue_tailp = q;
1846 sd->output_queue_tailp = &q->next_sched;
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001847 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1848 local_irq_restore(flags);
1849}
1850
David S. Miller37437bb2008-07-16 02:15:04 -07001851void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001852{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001853 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1854 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001855}
1856EXPORT_SYMBOL(__netif_schedule);
1857
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001858void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001859{
David S. Miller3578b0c2010-08-03 00:24:04 -07001860 if (atomic_dec_and_test(&skb->users)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001861 struct softnet_data *sd;
1862 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001863
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001864 local_irq_save(flags);
1865 sd = &__get_cpu_var(softnet_data);
1866 skb->next = sd->completion_queue;
1867 sd->completion_queue = skb;
1868 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1869 local_irq_restore(flags);
1870 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001871}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001872EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001873
1874void dev_kfree_skb_any(struct sk_buff *skb)
1875{
1876 if (in_irq() || irqs_disabled())
1877 dev_kfree_skb_irq(skb);
1878 else
1879 dev_kfree_skb(skb);
1880}
1881EXPORT_SYMBOL(dev_kfree_skb_any);
1882
1883
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001884/**
1885 * netif_device_detach - mark device as removed
1886 * @dev: network device
1887 *
1888 * Mark device as removed from system and therefore no longer available.
1889 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001890void netif_device_detach(struct net_device *dev)
1891{
1892 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1893 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001894 netif_tx_stop_all_queues(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001895 }
1896}
1897EXPORT_SYMBOL(netif_device_detach);
1898
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001899/**
1900 * netif_device_attach - mark device as attached
1901 * @dev: network device
1902 *
1903 * Mark device as attached from system and restart if needed.
1904 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001905void netif_device_attach(struct net_device *dev)
1906{
1907 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1908 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001909 netif_tx_wake_all_queues(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001910 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001911 }
1912}
1913EXPORT_SYMBOL(netif_device_attach);
1914
Ben Hutchings36c92472012-01-17 07:57:56 +00001915static void skb_warn_bad_offload(const struct sk_buff *skb)
1916{
Michał Mirosław65e9d2f2012-01-17 10:00:40 +00001917 static const netdev_features_t null_features = 0;
Ben Hutchings36c92472012-01-17 07:57:56 +00001918 struct net_device *dev = skb->dev;
1919 const char *driver = "";
1920
1921 if (dev && dev->dev.parent)
1922 driver = dev_driver_string(dev->dev.parent);
1923
1924 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1925 "gso_type=%d ip_summed=%d\n",
Michał Mirosław65e9d2f2012-01-17 10:00:40 +00001926 driver, dev ? &dev->features : &null_features,
1927 skb->sk ? &skb->sk->sk_route_caps : &null_features,
Ben Hutchings36c92472012-01-17 07:57:56 +00001928 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
1929 skb_shinfo(skb)->gso_type, skb->ip_summed);
1930}
1931
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932/*
1933 * Invalidate hardware checksum when packet is to be mangled, and
1934 * complete checksum manually on outgoing path.
1935 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001936int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937{
Al Virod3bc23e2006-11-14 21:24:49 -08001938 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001939 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
Patrick McHardy84fa7932006-08-29 16:44:56 -07001941 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001942 goto out_set_summed;
1943
1944 if (unlikely(skb_shinfo(skb)->gso_size)) {
Ben Hutchings36c92472012-01-17 07:57:56 +00001945 skb_warn_bad_offload(skb);
1946 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 }
1948
Michał Mirosław55508d62010-12-14 15:24:08 +00001949 offset = skb_checksum_start_offset(skb);
Herbert Xua0308472007-10-15 01:47:15 -07001950 BUG_ON(offset >= skb_headlen(skb));
1951 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1952
1953 offset += skb->csum_offset;
1954 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1955
1956 if (skb_cloned(skb) &&
1957 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1959 if (ret)
1960 goto out;
1961 }
1962
Herbert Xua0308472007-10-15 01:47:15 -07001963 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001964out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001966out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 return ret;
1968}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001969EXPORT_SYMBOL(skb_checksum_help);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001971/**
1972 * skb_gso_segment - Perform segmentation on skb.
1973 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001974 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001975 *
1976 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001977 *
1978 * It may return NULL if the skb requires no segmentation. This is
1979 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001980 */
Michał Mirosławc8f44af2011-11-15 15:29:55 +00001981struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1982 netdev_features_t features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001983{
1984 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1985 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001986 __be16 type = skb->protocol;
Jesse Grossc8d5bcd2010-10-29 12:14:54 +00001987 int vlan_depth = ETH_HLEN;
Herbert Xua430a432006-07-08 13:34:56 -07001988 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001989
Jesse Grossc8d5bcd2010-10-29 12:14:54 +00001990 while (type == htons(ETH_P_8021Q)) {
1991 struct vlan_hdr *vh;
Jesse Gross7b9c6092010-10-20 13:56:04 +00001992
Jesse Grossc8d5bcd2010-10-29 12:14:54 +00001993 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
Jesse Gross7b9c6092010-10-20 13:56:04 +00001994 return ERR_PTR(-EINVAL);
1995
Jesse Grossc8d5bcd2010-10-29 12:14:54 +00001996 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1997 type = vh->h_vlan_encapsulated_proto;
1998 vlan_depth += VLAN_HLEN;
Jesse Gross7b9c6092010-10-20 13:56:04 +00001999 }
2000
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002001 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002002 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002003 __skb_pull(skb, skb->mac_len);
2004
Herbert Xu67fd1a72009-01-19 16:26:44 -08002005 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Ben Hutchings36c92472012-01-17 07:57:56 +00002006 skb_warn_bad_offload(skb);
Herbert Xu67fd1a72009-01-19 16:26:44 -08002007
Herbert Xua430a432006-07-08 13:34:56 -07002008 if (skb_header_cloned(skb) &&
2009 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2010 return ERR_PTR(err);
2011 }
2012
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002013 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002014 list_for_each_entry_rcu(ptype,
2015 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002016 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07002017 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07002018 err = ptype->gso_send_check(skb);
2019 segs = ERR_PTR(err);
2020 if (err || skb_gso_ok(skb, features))
2021 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07002022 __skb_push(skb, (skb->data -
2023 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07002024 }
Herbert Xu576a30e2006-06-27 13:22:38 -07002025 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002026 break;
2027 }
2028 }
2029 rcu_read_unlock();
2030
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07002031 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07002032
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002033 return segs;
2034}
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002035EXPORT_SYMBOL(skb_gso_segment);
2036
Herbert Xufb286bb2005-11-10 13:01:24 -08002037/* Take action when hardware reception checksum errors are detected. */
2038#ifdef CONFIG_BUG
2039void netdev_rx_csum_fault(struct net_device *dev)
2040{
2041 if (net_ratelimit()) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00002042 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08002043 dump_stack();
2044 }
2045}
2046EXPORT_SYMBOL(netdev_rx_csum_fault);
2047#endif
2048
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049/* Actually, we should eliminate this check as soon as we know, that:
2050 * 1. IOMMU is present and allows to map all the memory.
2051 * 2. No high memory really exists on this machine.
2052 */
2053
Eric Dumazet9092c652010-04-02 13:34:49 -07002054static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055{
Herbert Xu3d3a8532006-06-27 13:33:10 -07002056#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 int i;
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002058 if (!(dev->features & NETIF_F_HIGHDMA)) {
Ian Campbellea2ab692011-08-22 23:44:58 +00002059 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2060 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2061 if (PageHighMem(skb_frag_page(frag)))
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002062 return 1;
Ian Campbellea2ab692011-08-22 23:44:58 +00002063 }
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002064 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002066 if (PCI_DMA_BUS_IS_PHYS) {
2067 struct device *pdev = dev->dev.parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068
Eric Dumazet9092c652010-04-02 13:34:49 -07002069 if (!pdev)
2070 return 0;
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002071 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
Ian Campbellea2ab692011-08-22 23:44:58 +00002072 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2073 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
FUJITA Tomonori5acbbd42010-03-30 22:35:50 +00002074 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2075 return 1;
2076 }
2077 }
Herbert Xu3d3a8532006-06-27 13:33:10 -07002078#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 return 0;
2080}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002082struct dev_gso_cb {
2083 void (*destructor)(struct sk_buff *skb);
2084};
2085
2086#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2087
2088static void dev_gso_skb_destructor(struct sk_buff *skb)
2089{
2090 struct dev_gso_cb *cb;
2091
2092 do {
2093 struct sk_buff *nskb = skb->next;
2094
2095 skb->next = nskb->next;
2096 nskb->next = NULL;
2097 kfree_skb(nskb);
2098 } while (skb->next);
2099
2100 cb = DEV_GSO_CB(skb);
2101 if (cb->destructor)
2102 cb->destructor(skb);
2103}
2104
2105/**
2106 * dev_gso_segment - Perform emulated hardware segmentation on skb.
2107 * @skb: buffer to segment
Jesse Gross91ecb632011-01-09 06:23:33 +00002108 * @features: device features as applicable to this skb
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002109 *
2110 * This function segments the given skb and stores the list of segments
2111 * in skb->next.
2112 */
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002113static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002114{
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002115 struct sk_buff *segs;
2116
Herbert Xu576a30e2006-06-27 13:22:38 -07002117 segs = skb_gso_segment(skb, features);
2118
2119 /* Verifying header integrity only. */
2120 if (!segs)
2121 return 0;
2122
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07002123 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002124 return PTR_ERR(segs);
2125
2126 skb->next = segs;
2127 DEV_GSO_CB(skb)->destructor = skb->destructor;
2128 skb->destructor = dev_gso_skb_destructor;
2129
2130 return 0;
2131}
2132
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002133static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
Jesse Gross03634662011-01-09 06:23:35 +00002134{
2135 return ((features & NETIF_F_GEN_CSUM) ||
2136 ((features & NETIF_F_V4_CSUM) &&
2137 protocol == htons(ETH_P_IP)) ||
2138 ((features & NETIF_F_V6_CSUM) &&
2139 protocol == htons(ETH_P_IPV6)) ||
2140 ((features & NETIF_F_FCOE_CRC) &&
2141 protocol == htons(ETH_P_FCOE)));
2142}
2143
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002144static netdev_features_t harmonize_features(struct sk_buff *skb,
2145 __be16 protocol, netdev_features_t features)
Jesse Grossf01a5232011-01-09 06:23:31 +00002146{
Eric Dumazetd4027862011-01-19 00:51:36 +00002147 if (!can_checksum_protocol(features, protocol)) {
Jesse Grossf01a5232011-01-09 06:23:31 +00002148 features &= ~NETIF_F_ALL_CSUM;
2149 features &= ~NETIF_F_SG;
2150 } else if (illegal_highdma(skb->dev, skb)) {
2151 features &= ~NETIF_F_SG;
2152 }
2153
2154 return features;
2155}
2156
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002157netdev_features_t netif_skb_features(struct sk_buff *skb)
Jesse Gross58e998c2010-10-29 12:14:55 +00002158{
2159 __be16 protocol = skb->protocol;
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002160 netdev_features_t features = skb->dev->features;
Jesse Gross58e998c2010-10-29 12:14:55 +00002161
Ben Hutchings30b678d2012-07-30 15:57:00 +00002162 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2163 features &= ~NETIF_F_GSO_MASK;
2164
Jesse Gross58e998c2010-10-29 12:14:55 +00002165 if (protocol == htons(ETH_P_8021Q)) {
2166 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2167 protocol = veh->h_vlan_encapsulated_proto;
Jesse Grossf01a5232011-01-09 06:23:31 +00002168 } else if (!vlan_tx_tag_present(skb)) {
2169 return harmonize_features(skb, protocol, features);
2170 }
Jesse Gross58e998c2010-10-29 12:14:55 +00002171
Jesse Gross6ee400a2011-01-17 20:46:00 +00002172 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
Jesse Grossf01a5232011-01-09 06:23:31 +00002173
2174 if (protocol != htons(ETH_P_8021Q)) {
2175 return harmonize_features(skb, protocol, features);
2176 } else {
2177 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
Jesse Gross6ee400a2011-01-17 20:46:00 +00002178 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
Jesse Grossf01a5232011-01-09 06:23:31 +00002179 return harmonize_features(skb, protocol, features);
2180 }
Jesse Gross58e998c2010-10-29 12:14:55 +00002181}
Jesse Grossf01a5232011-01-09 06:23:31 +00002182EXPORT_SYMBOL(netif_skb_features);
Jesse Gross58e998c2010-10-29 12:14:55 +00002183
John Fastabend6afff0c2010-06-16 14:18:12 +00002184/*
2185 * Returns true if either:
2186 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
Rami Rosend1a53df2012-08-27 23:39:24 +00002187 * 2. skb is fragmented and the device does not support SG.
John Fastabend6afff0c2010-06-16 14:18:12 +00002188 */
2189static inline int skb_needs_linearize(struct sk_buff *skb,
Jesse Gross02932ce2011-01-09 06:23:34 +00002190 int features)
John Fastabend6afff0c2010-06-16 14:18:12 +00002191{
Jesse Gross02932ce2011-01-09 06:23:34 +00002192 return skb_is_nonlinear(skb) &&
2193 ((skb_has_frag_list(skb) &&
2194 !(features & NETIF_F_FRAGLIST)) ||
Jesse Grosse1e78db2010-10-29 12:14:53 +00002195 (skb_shinfo(skb)->nr_frags &&
Jesse Gross02932ce2011-01-09 06:23:34 +00002196 !(features & NETIF_F_SG)));
John Fastabend6afff0c2010-06-16 14:18:12 +00002197}
2198
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002199int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2200 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002201{
Stephen Hemminger00829822008-11-20 20:14:53 -08002202 const struct net_device_ops *ops = dev->netdev_ops;
Patrick McHardy572a9d72009-11-10 06:14:14 +00002203 int rc = NETDEV_TX_OK;
Koki Sanagiec764bf2011-05-30 21:48:34 +00002204 unsigned int skb_len;
Stephen Hemminger00829822008-11-20 20:14:53 -08002205
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002206 if (likely(!skb->next)) {
Michał Mirosławc8f44af2011-11-15 15:29:55 +00002207 netdev_features_t features;
Jesse Grossfc741212011-01-09 06:23:32 +00002208
Eric Dumazet93f154b2009-05-18 22:19:19 -07002209 /*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002210 * If device doesn't need skb->dst, release it right now while
Eric Dumazet93f154b2009-05-18 22:19:19 -07002211 * its hot in this cpu cache
2212 */
Eric Dumazetadf30902009-06-02 05:19:30 +00002213 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2214 skb_dst_drop(skb);
2215
Jesse Grossfc741212011-01-09 06:23:32 +00002216 features = netif_skb_features(skb);
2217
Jesse Gross7b9c6092010-10-20 13:56:04 +00002218 if (vlan_tx_tag_present(skb) &&
Jesse Grossfc741212011-01-09 06:23:32 +00002219 !(features & NETIF_F_HW_VLAN_TX)) {
Jesse Gross7b9c6092010-10-20 13:56:04 +00002220 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2221 if (unlikely(!skb))
2222 goto out;
2223
2224 skb->vlan_tci = 0;
2225 }
2226
Jesse Grossfc741212011-01-09 06:23:32 +00002227 if (netif_needs_gso(skb, features)) {
Jesse Gross91ecb632011-01-09 06:23:33 +00002228 if (unlikely(dev_gso_segment(skb, features)))
David S. Miller9ccb8972010-04-22 01:02:07 -07002229 goto out_kfree_skb;
2230 if (skb->next)
2231 goto gso;
John Fastabend6afff0c2010-06-16 14:18:12 +00002232 } else {
Jesse Gross02932ce2011-01-09 06:23:34 +00002233 if (skb_needs_linearize(skb, features) &&
John Fastabend6afff0c2010-06-16 14:18:12 +00002234 __skb_linearize(skb))
2235 goto out_kfree_skb;
2236
2237 /* If packet is not checksummed and device does not
2238 * support checksumming for this protocol, complete
2239 * checksumming here.
2240 */
2241 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Michał Mirosław55508d62010-12-14 15:24:08 +00002242 skb_set_transport_header(skb,
2243 skb_checksum_start_offset(skb));
Jesse Gross03634662011-01-09 06:23:35 +00002244 if (!(features & NETIF_F_ALL_CSUM) &&
John Fastabend6afff0c2010-06-16 14:18:12 +00002245 skb_checksum_help(skb))
2246 goto out_kfree_skb;
2247 }
David S. Miller9ccb8972010-04-22 01:02:07 -07002248 }
2249
Eric Dumazetb40863c2012-09-18 20:44:49 +00002250 if (!list_empty(&ptype_all))
2251 dev_queue_xmit_nit(skb, dev);
2252
Koki Sanagiec764bf2011-05-30 21:48:34 +00002253 skb_len = skb->len;
Patrick Ohlyac45f602009-02-12 05:03:37 +00002254 rc = ops->ndo_start_xmit(skb, dev);
Koki Sanagiec764bf2011-05-30 21:48:34 +00002255 trace_net_dev_xmit(skb, rc, dev, skb_len);
Patrick McHardyec634fe2009-07-05 19:23:38 -07002256 if (rc == NETDEV_TX_OK)
Eric Dumazet08baf562009-05-25 22:58:01 -07002257 txq_trans_update(txq);
Patrick Ohlyac45f602009-02-12 05:03:37 +00002258 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002259 }
2260
Herbert Xu576a30e2006-06-27 13:22:38 -07002261gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002262 do {
2263 struct sk_buff *nskb = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002264
2265 skb->next = nskb->next;
2266 nskb->next = NULL;
Krishna Kumar068a2de2009-12-09 20:59:58 +00002267
2268 /*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002269 * If device doesn't need nskb->dst, release it right now while
Krishna Kumar068a2de2009-12-09 20:59:58 +00002270 * its hot in this cpu cache
2271 */
2272 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2273 skb_dst_drop(nskb);
2274
Eric Dumazetb40863c2012-09-18 20:44:49 +00002275 if (!list_empty(&ptype_all))
2276 dev_queue_xmit_nit(nskb, dev);
2277
Koki Sanagiec764bf2011-05-30 21:48:34 +00002278 skb_len = nskb->len;
Stephen Hemminger00829822008-11-20 20:14:53 -08002279 rc = ops->ndo_start_xmit(nskb, dev);
Koki Sanagiec764bf2011-05-30 21:48:34 +00002280 trace_net_dev_xmit(nskb, rc, dev, skb_len);
Patrick McHardyec634fe2009-07-05 19:23:38 -07002281 if (unlikely(rc != NETDEV_TX_OK)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00002282 if (rc & ~NETDEV_TX_MASK)
2283 goto out_kfree_gso_skb;
Michael Chanf54d9e82006-06-25 23:57:04 -07002284 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002285 skb->next = nskb;
2286 return rc;
2287 }
Eric Dumazet08baf562009-05-25 22:58:01 -07002288 txq_trans_update(txq);
Tom Herbert734664982011-11-28 16:32:44 +00002289 if (unlikely(netif_xmit_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07002290 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002291 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002292
Patrick McHardy572a9d72009-11-10 06:14:14 +00002293out_kfree_gso_skb:
2294 if (likely(skb->next == NULL))
2295 skb->destructor = DEV_GSO_CB(skb)->destructor;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002296out_kfree_skb:
2297 kfree_skb(skb);
Jesse Gross7b9c6092010-10-20 13:56:04 +00002298out:
Patrick McHardy572a9d72009-11-10 06:14:14 +00002299 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002300}
2301
Tom Herbert0a9627f2010-03-16 08:03:29 +00002302static u32 hashrnd __read_mostly;
David S. Millerb6b2fed2008-07-21 09:48:06 -07002303
Vladislav Zolotarova3d22a62010-12-13 06:27:10 +00002304/*
2305 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2306 * to be used as a distribution range.
2307 */
2308u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2309 unsigned int num_tx_queues)
David S. Miller8f0f2222008-07-15 03:47:03 -07002310{
David S. Miller70192982009-01-27 16:34:47 -08002311 u32 hash;
John Fastabend4f57c082011-01-17 08:06:04 +00002312 u16 qoffset = 0;
2313 u16 qcount = num_tx_queues;
David S. Millerb6b2fed2008-07-21 09:48:06 -07002314
David S. Miller513de112009-05-03 14:43:10 -07002315 if (skb_rx_queue_recorded(skb)) {
2316 hash = skb_get_rx_queue(skb);
Vladislav Zolotarova3d22a62010-12-13 06:27:10 +00002317 while (unlikely(hash >= num_tx_queues))
2318 hash -= num_tx_queues;
David S. Miller513de112009-05-03 14:43:10 -07002319 return hash;
2320 }
Eric Dumazetec581f62009-05-01 09:05:06 -07002321
John Fastabend4f57c082011-01-17 08:06:04 +00002322 if (dev->num_tc) {
2323 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2324 qoffset = dev->tc_to_txq[tc].offset;
2325 qcount = dev->tc_to_txq[tc].count;
2326 }
2327
Eric Dumazetec581f62009-05-01 09:05:06 -07002328 if (skb->sk && skb->sk->sk_hash)
David S. Miller70192982009-01-27 16:34:47 -08002329 hash = skb->sk->sk_hash;
Eric Dumazetec581f62009-05-01 09:05:06 -07002330 else
Eric Dumazet62b1a8a2012-06-14 06:42:44 +00002331 hash = (__force u16) skb->protocol;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002332 hash = jhash_1word(hash, hashrnd);
David S. Millerd5a9e242009-01-27 16:22:11 -08002333
John Fastabend4f57c082011-01-17 08:06:04 +00002334 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
David S. Miller8f0f2222008-07-15 03:47:03 -07002335}
Vladislav Zolotarova3d22a62010-12-13 06:27:10 +00002336EXPORT_SYMBOL(__skb_tx_hash);
David S. Miller8f0f2222008-07-15 03:47:03 -07002337
Eric Dumazeted046422009-11-13 21:54:04 +00002338static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2339{
2340 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
Joe Perchese87cc472012-05-13 21:56:26 +00002341 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2342 dev->name, queue_index,
2343 dev->real_num_tx_queues);
Eric Dumazeted046422009-11-13 21:54:04 +00002344 return 0;
2345 }
2346 return queue_index;
2347}
2348
Tom Herbert1d24eb42010-11-21 13:17:27 +00002349static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2350{
Tom Herbertbf264142010-11-26 08:36:09 +00002351#ifdef CONFIG_XPS
Tom Herbert1d24eb42010-11-21 13:17:27 +00002352 struct xps_dev_maps *dev_maps;
2353 struct xps_map *map;
2354 int queue_index = -1;
2355
2356 rcu_read_lock();
2357 dev_maps = rcu_dereference(dev->xps_maps);
2358 if (dev_maps) {
2359 map = rcu_dereference(
2360 dev_maps->cpu_map[raw_smp_processor_id()]);
2361 if (map) {
2362 if (map->len == 1)
2363 queue_index = map->queues[0];
2364 else {
2365 u32 hash;
2366 if (skb->sk && skb->sk->sk_hash)
2367 hash = skb->sk->sk_hash;
2368 else
2369 hash = (__force u16) skb->protocol ^
2370 skb->rxhash;
2371 hash = jhash_1word(hash, hashrnd);
2372 queue_index = map->queues[
2373 ((u64)hash * map->len) >> 32];
2374 }
2375 if (unlikely(queue_index >= dev->real_num_tx_queues))
2376 queue_index = -1;
2377 }
2378 }
2379 rcu_read_unlock();
2380
2381 return queue_index;
2382#else
2383 return -1;
2384#endif
2385}
2386
David S. Millere8a04642008-07-17 00:34:19 -07002387static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2388 struct sk_buff *skb)
2389{
Tom Herbertb0f77d02010-07-14 20:50:29 -07002390 int queue_index;
Helmut Schaadeabc772010-09-03 02:39:56 +00002391 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002392
Tom Herbert3853b582010-11-21 13:17:29 +00002393 if (dev->real_num_tx_queues == 1)
2394 queue_index = 0;
2395 else if (ops->ndo_select_queue) {
Helmut Schaadeabc772010-09-03 02:39:56 +00002396 queue_index = ops->ndo_select_queue(dev, skb);
2397 queue_index = dev_cap_txqueue(dev, queue_index);
2398 } else {
2399 struct sock *sk = skb->sk;
2400 queue_index = sk_tx_queue_get(sk);
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00002401
Tom Herbert3853b582010-11-21 13:17:29 +00002402 if (queue_index < 0 || skb->ooo_okay ||
2403 queue_index >= dev->real_num_tx_queues) {
2404 int old_index = queue_index;
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00002405
Tom Herbert1d24eb42010-11-21 13:17:27 +00002406 queue_index = get_xps_queue(dev, skb);
2407 if (queue_index < 0)
2408 queue_index = skb_tx_hash(dev, skb);
Tom Herbert3853b582010-11-21 13:17:29 +00002409
2410 if (queue_index != old_index && sk) {
2411 struct dst_entry *dst =
2412 rcu_dereference_check(sk->sk_dst_cache, 1);
Eric Dumazet8728c542010-04-11 21:18:17 +00002413
2414 if (dst && skb_dst(skb) == dst)
2415 sk_tx_queue_set(sk, queue_index);
2416 }
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00002417 }
2418 }
David S. Millereae792b2008-07-15 03:03:33 -07002419
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002420 skb_set_queue_mapping(skb, queue_index);
2421 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07002422}
2423
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002424static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2425 struct net_device *dev,
2426 struct netdev_queue *txq)
2427{
2428 spinlock_t *root_lock = qdisc_lock(q);
Eric Dumazeta2da5702011-01-20 03:48:19 +00002429 bool contended;
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002430 int rc;
2431
Eric Dumazeta2da5702011-01-20 03:48:19 +00002432 qdisc_skb_cb(skb)->pkt_len = skb->len;
2433 qdisc_calculate_pkt_len(skb, q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002434 /*
2435 * Heuristic to force contended enqueues to serialize on a
2436 * separate lock before trying to get qdisc main lock.
2437 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2438 * and dequeue packets faster.
2439 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00002440 contended = qdisc_is_running(q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002441 if (unlikely(contended))
2442 spin_lock(&q->busylock);
2443
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002444 spin_lock(root_lock);
2445 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2446 kfree_skb(skb);
2447 rc = NET_XMIT_DROP;
2448 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
Eric Dumazetbc135b22010-06-02 03:23:51 -07002449 qdisc_run_begin(q)) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002450 /*
2451 * This is a work-conserving queue; there are no old skbs
2452 * waiting to be sent out; and the qdisc is not running -
2453 * xmit the skb directly.
2454 */
Eric Dumazet7fee2262010-05-11 23:19:48 +00002455 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2456 skb_dst_force(skb);
Eric Dumazetbfe0d022011-01-09 08:30:54 +00002457
Eric Dumazetbfe0d022011-01-09 08:30:54 +00002458 qdisc_bstats_update(q, skb);
2459
Eric Dumazet79640a42010-06-02 05:09:29 -07002460 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2461 if (unlikely(contended)) {
2462 spin_unlock(&q->busylock);
2463 contended = false;
2464 }
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002465 __qdisc_run(q);
Eric Dumazet79640a42010-06-02 05:09:29 -07002466 } else
Eric Dumazetbc135b22010-06-02 03:23:51 -07002467 qdisc_run_end(q);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002468
2469 rc = NET_XMIT_SUCCESS;
2470 } else {
Eric Dumazet7fee2262010-05-11 23:19:48 +00002471 skb_dst_force(skb);
Eric Dumazeta2da5702011-01-20 03:48:19 +00002472 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
Eric Dumazet79640a42010-06-02 05:09:29 -07002473 if (qdisc_run_begin(q)) {
2474 if (unlikely(contended)) {
2475 spin_unlock(&q->busylock);
2476 contended = false;
2477 }
2478 __qdisc_run(q);
2479 }
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002480 }
2481 spin_unlock(root_lock);
Eric Dumazet79640a42010-06-02 05:09:29 -07002482 if (unlikely(contended))
2483 spin_unlock(&q->busylock);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002484 return rc;
2485}
2486
Neil Horman5bc14212011-11-22 05:10:51 +00002487#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2488static void skb_update_prio(struct sk_buff *skb)
2489{
Igor Maravic6977a792011-11-25 07:44:54 +00002490 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
Neil Horman5bc14212011-11-22 05:10:51 +00002491
Eric Dumazet91c68ce2012-07-08 21:45:10 +00002492 if (!skb->priority && skb->sk && map) {
2493 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2494
2495 if (prioidx < map->priomap_len)
2496 skb->priority = map->priomap[prioidx];
2497 }
Neil Horman5bc14212011-11-22 05:10:51 +00002498}
2499#else
2500#define skb_update_prio(skb)
2501#endif
2502
Eric Dumazet745e20f2010-09-29 13:23:09 -07002503static DEFINE_PER_CPU(int, xmit_recursion);
David S. Miller11a766c2010-10-25 12:51:55 -07002504#define RECURSION_LIMIT 10
Eric Dumazet745e20f2010-09-29 13:23:09 -07002505
Dave Jonesd29f7492008-07-22 14:09:06 -07002506/**
Michel Machado95603e22012-06-12 10:16:35 +00002507 * dev_loopback_xmit - loop back @skb
2508 * @skb: buffer to transmit
2509 */
2510int dev_loopback_xmit(struct sk_buff *skb)
2511{
2512 skb_reset_mac_header(skb);
2513 __skb_pull(skb, skb_network_offset(skb));
2514 skb->pkt_type = PACKET_LOOPBACK;
2515 skb->ip_summed = CHECKSUM_UNNECESSARY;
2516 WARN_ON(!skb_dst(skb));
2517 skb_dst_force(skb);
2518 netif_rx_ni(skb);
2519 return 0;
2520}
2521EXPORT_SYMBOL(dev_loopback_xmit);
2522
2523/**
Dave Jonesd29f7492008-07-22 14:09:06 -07002524 * dev_queue_xmit - transmit a buffer
2525 * @skb: buffer to transmit
2526 *
2527 * Queue a buffer for transmission to a network device. The caller must
2528 * have set the device and priority and built the buffer before calling
2529 * this function. The function can be called from an interrupt.
2530 *
2531 * A negative errno code is returned on a failure. A success does not
2532 * guarantee the frame will be transmitted as it may be dropped due
2533 * to congestion or traffic shaping.
2534 *
2535 * -----------------------------------------------------------------------------------
2536 * I notice this method can also return errors from the queue disciplines,
2537 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2538 * be positive.
2539 *
2540 * Regardless of the return value, the skb is consumed, so it is currently
2541 * difficult to retry a send to this method. (You can bump the ref count
2542 * before sending to hold a reference for retry if you are careful.)
2543 *
2544 * When calling this method, interrupts MUST be enabled. This is because
2545 * the BH enable code must have IRQs enabled so that it will not deadlock.
2546 * --BLG
2547 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548int dev_queue_xmit(struct sk_buff *skb)
2549{
2550 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07002551 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 struct Qdisc *q;
2553 int rc = -ENOMEM;
2554
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002555 /* Disable soft irqs for various locks below. Also
2556 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002558 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559
Neil Horman5bc14212011-11-22 05:10:51 +00002560 skb_update_prio(skb);
2561
David S. Millereae792b2008-07-15 03:03:33 -07002562 txq = dev_pick_tx(dev, skb);
Paul E. McKenneya898def2010-02-22 17:04:49 -08002563 q = rcu_dereference_bh(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07002564
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002566 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567#endif
Koki Sanagicf66ba52010-08-23 18:45:02 +09002568 trace_net_dev_queue(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 if (q->enqueue) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002570 rc = __dev_xmit_skb(skb, q, dev, txq);
David S. Miller37437bb2008-07-16 02:15:04 -07002571 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 }
2573
2574 /* The device has no queue. Common case for software devices:
2575 loopback, all the sorts of tunnels...
2576
Herbert Xu932ff272006-06-09 12:20:56 -07002577 Really, it is unlikely that netif_tx_lock protection is necessary
2578 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 counters.)
2580 However, it is possible, that they rely on protection
2581 made by us here.
2582
2583 Check this and shot the lock. It is not prone from deadlocks.
2584 Either shot noqueue qdisc, it is even simpler 8)
2585 */
2586 if (dev->flags & IFF_UP) {
2587 int cpu = smp_processor_id(); /* ok because BHs are off */
2588
David S. Millerc773e842008-07-08 23:13:53 -07002589 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590
Eric Dumazet745e20f2010-09-29 13:23:09 -07002591 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2592 goto recursion_alert;
2593
David S. Millerc773e842008-07-08 23:13:53 -07002594 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595
Tom Herbert734664982011-11-28 16:32:44 +00002596 if (!netif_xmit_stopped(txq)) {
Eric Dumazet745e20f2010-09-29 13:23:09 -07002597 __this_cpu_inc(xmit_recursion);
Patrick McHardy572a9d72009-11-10 06:14:14 +00002598 rc = dev_hard_start_xmit(skb, dev, txq);
Eric Dumazet745e20f2010-09-29 13:23:09 -07002599 __this_cpu_dec(xmit_recursion);
Patrick McHardy572a9d72009-11-10 06:14:14 +00002600 if (dev_xmit_complete(rc)) {
David S. Millerc773e842008-07-08 23:13:53 -07002601 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 goto out;
2603 }
2604 }
David S. Millerc773e842008-07-08 23:13:53 -07002605 HARD_TX_UNLOCK(dev, txq);
Joe Perchese87cc472012-05-13 21:56:26 +00002606 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2607 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 } else {
2609 /* Recursion is detected! It is possible,
Eric Dumazet745e20f2010-09-29 13:23:09 -07002610 * unfortunately
2611 */
2612recursion_alert:
Joe Perchese87cc472012-05-13 21:56:26 +00002613 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2614 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615 }
2616 }
2617
2618 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07002619 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 kfree_skb(skb);
2622 return rc;
2623out:
Herbert Xud4828d82006-06-22 02:28:18 -07002624 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 return rc;
2626}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002627EXPORT_SYMBOL(dev_queue_xmit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628
2629
2630/*=======================================================================
2631 Receiver routines
2632 =======================================================================*/
2633
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07002634int netdev_max_backlog __read_mostly = 1000;
Eric Dumazet3b098e22010-05-15 23:57:10 -07002635int netdev_tstamp_prequeue __read_mostly = 1;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07002636int netdev_budget __read_mostly = 300;
2637int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07002639/* Called with irq disabled */
2640static inline void ____napi_schedule(struct softnet_data *sd,
2641 struct napi_struct *napi)
2642{
2643 list_add_tail(&napi->poll_list, &sd->poll_list);
2644 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2645}
2646
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002647/*
2648 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
Tom Herbertbdeab992011-08-14 19:45:55 +00002649 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
2650 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
2651 * if hash is a canonical 4-tuple hash over transport ports.
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002652 */
Tom Herbertbdeab992011-08-14 19:45:55 +00002653void __skb_get_rxhash(struct sk_buff *skb)
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002654{
Eric Dumazet4504b862011-11-28 05:23:23 +00002655 struct flow_keys keys;
2656 u32 hash;
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002657
Eric Dumazet4504b862011-11-28 05:23:23 +00002658 if (!skb_flow_dissect(skb, &keys))
2659 return;
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002660
Chema Gonzalez68622342012-09-07 13:40:50 +00002661 if (keys.ports)
Eric Dumazet4504b862011-11-28 05:23:23 +00002662 skb->l4_rxhash = 1;
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002663
2664 /* get a consistent hash (same value on both flow directions) */
Chema Gonzalez68622342012-09-07 13:40:50 +00002665 if (((__force u32)keys.dst < (__force u32)keys.src) ||
2666 (((__force u32)keys.dst == (__force u32)keys.src) &&
2667 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
Eric Dumazet4504b862011-11-28 05:23:23 +00002668 swap(keys.dst, keys.src);
Chema Gonzalez68622342012-09-07 13:40:50 +00002669 swap(keys.port16[0], keys.port16[1]);
2670 }
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002671
Eric Dumazet4504b862011-11-28 05:23:23 +00002672 hash = jhash_3words((__force u32)keys.dst,
2673 (__force u32)keys.src,
2674 (__force u32)keys.ports, hashrnd);
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002675 if (!hash)
2676 hash = 1;
2677
Tom Herbertbdeab992011-08-14 19:45:55 +00002678 skb->rxhash = hash;
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002679}
2680EXPORT_SYMBOL(__skb_get_rxhash);
2681
Eric Dumazetdf334542010-03-24 19:13:54 +00002682#ifdef CONFIG_RPS
Tom Herbertfec5e652010-04-16 16:01:27 -07002683
2684/* One global table that all flow-based protocols share. */
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00002685struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
Tom Herbertfec5e652010-04-16 16:01:27 -07002686EXPORT_SYMBOL(rps_sock_flow_table);
2687
Ingo Molnarc5905af2012-02-24 08:31:31 +01002688struct static_key rps_needed __read_mostly;
Eric Dumazetadc93002011-11-17 03:13:26 +00002689
Ben Hutchingsc4454772011-01-19 11:03:53 +00002690static struct rps_dev_flow *
2691set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2692 struct rps_dev_flow *rflow, u16 next_cpu)
2693{
Ben Hutchings09994d12011-10-03 04:42:46 +00002694 if (next_cpu != RPS_NO_CPU) {
Ben Hutchingsc4454772011-01-19 11:03:53 +00002695#ifdef CONFIG_RFS_ACCEL
2696 struct netdev_rx_queue *rxqueue;
2697 struct rps_dev_flow_table *flow_table;
2698 struct rps_dev_flow *old_rflow;
2699 u32 flow_id;
2700 u16 rxq_index;
2701 int rc;
2702
2703 /* Should we steer this flow to a different hardware queue? */
Ben Hutchings69a19ee2011-02-15 20:32:04 +00002704 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2705 !(dev->features & NETIF_F_NTUPLE))
Ben Hutchingsc4454772011-01-19 11:03:53 +00002706 goto out;
2707 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2708 if (rxq_index == skb_get_rx_queue(skb))
2709 goto out;
2710
2711 rxqueue = dev->_rx + rxq_index;
2712 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2713 if (!flow_table)
2714 goto out;
2715 flow_id = skb->rxhash & flow_table->mask;
2716 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2717 rxq_index, flow_id);
2718 if (rc < 0)
2719 goto out;
2720 old_rflow = rflow;
2721 rflow = &flow_table->flows[flow_id];
Ben Hutchingsc4454772011-01-19 11:03:53 +00002722 rflow->filter = rc;
2723 if (old_rflow->filter == rflow->filter)
2724 old_rflow->filter = RPS_NO_FILTER;
2725 out:
2726#endif
2727 rflow->last_qtail =
Ben Hutchings09994d12011-10-03 04:42:46 +00002728 per_cpu(softnet_data, next_cpu).input_queue_head;
Ben Hutchingsc4454772011-01-19 11:03:53 +00002729 }
2730
Ben Hutchings09994d12011-10-03 04:42:46 +00002731 rflow->cpu = next_cpu;
Ben Hutchingsc4454772011-01-19 11:03:53 +00002732 return rflow;
2733}
2734
Tom Herbert0a9627f2010-03-16 08:03:29 +00002735/*
2736 * get_rps_cpu is called from netif_receive_skb and returns the target
2737 * CPU from the RPS map of the receiving queue for a given skb.
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002738 * rcu_read_lock must be held on entry.
Tom Herbert0a9627f2010-03-16 08:03:29 +00002739 */
Tom Herbertfec5e652010-04-16 16:01:27 -07002740static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2741 struct rps_dev_flow **rflowp)
Tom Herbert0a9627f2010-03-16 08:03:29 +00002742{
Tom Herbert0a9627f2010-03-16 08:03:29 +00002743 struct netdev_rx_queue *rxqueue;
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00002744 struct rps_map *map;
Tom Herbertfec5e652010-04-16 16:01:27 -07002745 struct rps_dev_flow_table *flow_table;
2746 struct rps_sock_flow_table *sock_flow_table;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002747 int cpu = -1;
Tom Herbertfec5e652010-04-16 16:01:27 -07002748 u16 tcpu;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002749
Tom Herbert0a9627f2010-03-16 08:03:29 +00002750 if (skb_rx_queue_recorded(skb)) {
2751 u16 index = skb_get_rx_queue(skb);
Ben Hutchings62fe0b42010-09-27 08:24:33 +00002752 if (unlikely(index >= dev->real_num_rx_queues)) {
2753 WARN_ONCE(dev->real_num_rx_queues > 1,
2754 "%s received packet on queue %u, but number "
2755 "of RX queues is %u\n",
2756 dev->name, index, dev->real_num_rx_queues);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002757 goto done;
2758 }
2759 rxqueue = dev->_rx + index;
2760 } else
2761 rxqueue = dev->_rx;
2762
Eric Dumazet6e3f7fa2010-10-25 03:02:02 +00002763 map = rcu_dereference(rxqueue->rps_map);
2764 if (map) {
Tom Herbert85875232011-01-31 16:23:42 -08002765 if (map->len == 1 &&
Eric Dumazet33d480c2011-08-11 19:30:52 +00002766 !rcu_access_pointer(rxqueue->rps_flow_table)) {
Changli Gao6febfca2010-09-03 23:12:37 +00002767 tcpu = map->cpus[0];
2768 if (cpu_online(tcpu))
2769 cpu = tcpu;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002770 goto done;
Eric Dumazetb249dcb2010-04-19 21:56:38 +00002771 }
Eric Dumazet33d480c2011-08-11 19:30:52 +00002772 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
Tom Herbert0a9627f2010-03-16 08:03:29 +00002773 goto done;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002774 }
2775
Changli Gao2d47b452010-08-17 19:00:56 +00002776 skb_reset_network_header(skb);
Krishna Kumarbfb564e2010-08-04 06:15:52 +00002777 if (!skb_get_rxhash(skb))
Tom Herbert0a9627f2010-03-16 08:03:29 +00002778 goto done;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002779
Tom Herbertfec5e652010-04-16 16:01:27 -07002780 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2781 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2782 if (flow_table && sock_flow_table) {
2783 u16 next_cpu;
2784 struct rps_dev_flow *rflow;
2785
2786 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2787 tcpu = rflow->cpu;
2788
2789 next_cpu = sock_flow_table->ents[skb->rxhash &
2790 sock_flow_table->mask];
2791
2792 /*
2793 * If the desired CPU (where last recvmsg was done) is
2794 * different from current CPU (one in the rx-queue flow
2795 * table entry), switch if one of the following holds:
2796 * - Current CPU is unset (equal to RPS_NO_CPU).
2797 * - Current CPU is offline.
2798 * - The current CPU's queue tail has advanced beyond the
2799 * last packet that was enqueued using this table entry.
2800 * This guarantees that all previous packets for the flow
2801 * have been dequeued, thus preserving in order delivery.
2802 */
2803 if (unlikely(tcpu != next_cpu) &&
2804 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2805 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
Ben Hutchingsc4454772011-01-19 11:03:53 +00002806 rflow->last_qtail)) >= 0))
2807 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2808
Tom Herbertfec5e652010-04-16 16:01:27 -07002809 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2810 *rflowp = rflow;
2811 cpu = tcpu;
2812 goto done;
2813 }
2814 }
2815
Tom Herbert0a9627f2010-03-16 08:03:29 +00002816 if (map) {
Tom Herbertfec5e652010-04-16 16:01:27 -07002817 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
Tom Herbert0a9627f2010-03-16 08:03:29 +00002818
2819 if (cpu_online(tcpu)) {
2820 cpu = tcpu;
2821 goto done;
2822 }
2823 }
2824
2825done:
Tom Herbert0a9627f2010-03-16 08:03:29 +00002826 return cpu;
2827}
2828
Ben Hutchingsc4454772011-01-19 11:03:53 +00002829#ifdef CONFIG_RFS_ACCEL
2830
2831/**
2832 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2833 * @dev: Device on which the filter was set
2834 * @rxq_index: RX queue index
2835 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2836 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2837 *
2838 * Drivers that implement ndo_rx_flow_steer() should periodically call
2839 * this function for each installed filter and remove the filters for
2840 * which it returns %true.
2841 */
2842bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2843 u32 flow_id, u16 filter_id)
2844{
2845 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2846 struct rps_dev_flow_table *flow_table;
2847 struct rps_dev_flow *rflow;
2848 bool expire = true;
2849 int cpu;
2850
2851 rcu_read_lock();
2852 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2853 if (flow_table && flow_id <= flow_table->mask) {
2854 rflow = &flow_table->flows[flow_id];
2855 cpu = ACCESS_ONCE(rflow->cpu);
2856 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2857 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2858 rflow->last_qtail) <
2859 (int)(10 * flow_table->mask)))
2860 expire = false;
2861 }
2862 rcu_read_unlock();
2863 return expire;
2864}
2865EXPORT_SYMBOL(rps_may_expire_flow);
2866
2867#endif /* CONFIG_RFS_ACCEL */
2868
Tom Herbert0a9627f2010-03-16 08:03:29 +00002869/* Called from hardirq (IPI) context */
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002870static void rps_trigger_softirq(void *data)
Tom Herbert0a9627f2010-03-16 08:03:29 +00002871{
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002872 struct softnet_data *sd = data;
2873
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07002874 ____napi_schedule(sd, &sd->backlog);
Changli Gaodee42872010-05-02 05:42:16 +00002875 sd->received_rps++;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002876}
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002877
Tom Herbertfec5e652010-04-16 16:01:27 -07002878#endif /* CONFIG_RPS */
Tom Herbert0a9627f2010-03-16 08:03:29 +00002879
2880/*
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002881 * Check if this softnet_data structure is another cpu one
2882 * If yes, queue it to our IPI list and return 1
2883 * If no, return 0
2884 */
2885static int rps_ipi_queued(struct softnet_data *sd)
2886{
2887#ifdef CONFIG_RPS
2888 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2889
2890 if (sd != mysd) {
2891 sd->rps_ipi_next = mysd->rps_ipi_list;
2892 mysd->rps_ipi_list = sd;
2893
2894 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2895 return 1;
2896 }
2897#endif /* CONFIG_RPS */
2898 return 0;
2899}
2900
2901/*
Tom Herbert0a9627f2010-03-16 08:03:29 +00002902 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2903 * queue (may be a remote CPU queue).
2904 */
Tom Herbertfec5e652010-04-16 16:01:27 -07002905static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2906 unsigned int *qtail)
Tom Herbert0a9627f2010-03-16 08:03:29 +00002907{
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002908 struct softnet_data *sd;
Tom Herbert0a9627f2010-03-16 08:03:29 +00002909 unsigned long flags;
2910
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002911 sd = &per_cpu(softnet_data, cpu);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002912
2913 local_irq_save(flags);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002914
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002915 rps_lock(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07002916 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2917 if (skb_queue_len(&sd->input_pkt_queue)) {
Tom Herbert0a9627f2010-03-16 08:03:29 +00002918enqueue:
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002919 __skb_queue_tail(&sd->input_pkt_queue, skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00002920 input_queue_tail_incr_save(sd, qtail);
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002921 rps_unlock(sd);
Changli Gao152102c2010-03-30 20:16:22 +00002922 local_irq_restore(flags);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002923 return NET_RX_SUCCESS;
2924 }
2925
Eric Dumazetebda37c22010-05-06 23:51:21 +00002926 /* Schedule NAPI for backlog device
2927 * We can use non atomic operation since we own the queue lock
2928 */
2929 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002930 if (!rps_ipi_queued(sd))
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07002931 ____napi_schedule(sd, &sd->backlog);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002932 }
2933 goto enqueue;
2934 }
2935
Changli Gaodee42872010-05-02 05:42:16 +00002936 sd->dropped++;
Eric Dumazete36fa2f2010-04-19 21:17:14 +00002937 rps_unlock(sd);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002938
Tom Herbert0a9627f2010-03-16 08:03:29 +00002939 local_irq_restore(flags);
2940
Eric Dumazetcaf586e2010-09-30 21:06:55 +00002941 atomic_long_inc(&skb->dev->rx_dropped);
Tom Herbert0a9627f2010-03-16 08:03:29 +00002942 kfree_skb(skb);
2943 return NET_RX_DROP;
2944}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002945
Linus Torvalds1da177e2005-04-16 15:20:36 -07002946/**
2947 * netif_rx - post buffer to the network code
2948 * @skb: buffer to post
2949 *
2950 * This function receives a packet from a device driver and queues it for
2951 * the upper (protocol) levels to process. It always succeeds. The buffer
2952 * may be dropped during processing for congestion control or by the
2953 * protocol layers.
2954 *
2955 * return values:
2956 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002957 * NET_RX_DROP (packet was dropped)
2958 *
2959 */
2960
2961int netif_rx(struct sk_buff *skb)
2962{
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002963 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002964
2965 /* if netpoll wants it, pretend we never saw it */
2966 if (netpoll_rx(skb))
2967 return NET_RX_DROP;
2968
Eric Dumazet588f0332011-11-15 04:12:55 +00002969 net_timestamp_check(netdev_tstamp_prequeue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002970
Koki Sanagicf66ba52010-08-23 18:45:02 +09002971 trace_netif_rx(skb);
Eric Dumazetdf334542010-03-24 19:13:54 +00002972#ifdef CONFIG_RPS
Ingo Molnarc5905af2012-02-24 08:31:31 +01002973 if (static_key_false(&rps_needed)) {
Tom Herbertfec5e652010-04-16 16:01:27 -07002974 struct rps_dev_flow voidflow, *rflow = &voidflow;
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002975 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002976
Changli Gaocece1942010-08-07 20:35:43 -07002977 preempt_disable();
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002978 rcu_read_lock();
Tom Herbertfec5e652010-04-16 16:01:27 -07002979
2980 cpu = get_rps_cpu(skb->dev, skb, &rflow);
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002981 if (cpu < 0)
2982 cpu = smp_processor_id();
Tom Herbertfec5e652010-04-16 16:01:27 -07002983
2984 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2985
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002986 rcu_read_unlock();
Changli Gaocece1942010-08-07 20:35:43 -07002987 preempt_enable();
Eric Dumazetadc93002011-11-17 03:13:26 +00002988 } else
2989#endif
Tom Herbertfec5e652010-04-16 16:01:27 -07002990 {
2991 unsigned int qtail;
2992 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2993 put_cpu();
2994 }
Eric Dumazetb0e28f12010-04-15 00:14:07 -07002995 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002997EXPORT_SYMBOL(netif_rx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998
2999int netif_rx_ni(struct sk_buff *skb)
3000{
3001 int err;
3002
3003 preempt_disable();
3004 err = netif_rx(skb);
3005 if (local_softirq_pending())
3006 do_softirq();
3007 preempt_enable();
3008
3009 return err;
3010}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003011EXPORT_SYMBOL(netif_rx_ni);
3012
Linus Torvalds1da177e2005-04-16 15:20:36 -07003013static void net_tx_action(struct softirq_action *h)
3014{
3015 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3016
3017 if (sd->completion_queue) {
3018 struct sk_buff *clist;
3019
3020 local_irq_disable();
3021 clist = sd->completion_queue;
3022 sd->completion_queue = NULL;
3023 local_irq_enable();
3024
3025 while (clist) {
3026 struct sk_buff *skb = clist;
3027 clist = clist->next;
3028
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003029 WARN_ON(atomic_read(&skb->users));
Koki Sanagi07dc22e2010-08-23 18:46:12 +09003030 trace_kfree_skb(skb, net_tx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003031 __kfree_skb(skb);
3032 }
3033 }
3034
3035 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07003036 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003037
3038 local_irq_disable();
3039 head = sd->output_queue;
3040 sd->output_queue = NULL;
Changli Gaoa9cbd582010-04-26 23:06:24 +00003041 sd->output_queue_tailp = &sd->output_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003042 local_irq_enable();
3043
3044 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07003045 struct Qdisc *q = head;
3046 spinlock_t *root_lock;
3047
Linus Torvalds1da177e2005-04-16 15:20:36 -07003048 head = head->next_sched;
3049
David S. Miller5fb66222008-08-02 20:02:43 -07003050 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07003051 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07003052 smp_mb__before_clear_bit();
3053 clear_bit(__QDISC_STATE_SCHED,
3054 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07003055 qdisc_run(q);
3056 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003057 } else {
David S. Miller195648b2008-08-19 04:00:36 -07003058 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07003059 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07003060 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07003061 } else {
3062 smp_mb__before_clear_bit();
3063 clear_bit(__QDISC_STATE_SCHED,
3064 &q->state);
3065 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066 }
3067 }
3068 }
3069}
3070
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003071#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3072 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
Michał Mirosławda678292009-06-05 05:35:28 +00003073/* This hook is defined here for ATM LANE */
3074int (*br_fdb_test_addr_hook)(struct net_device *dev,
3075 unsigned char *addr) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07003076EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00003077#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07003078
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079#ifdef CONFIG_NET_CLS_ACT
3080/* TODO: Maybe we should just force sch_ingress to be compiled in
3081 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3082 * a compare and 2 stores extra right now if we dont have it on
3083 * but have CONFIG_NET_CLS_ACT
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003084 * NOTE: This doesn't stop any functionality; if you dont have
3085 * the ingress scheduler, you just can't add policies on ingress.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003086 *
3087 */
Eric Dumazet24824a02010-10-02 06:11:55 +00003088static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003089{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003090 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07003091 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07003092 int result = TC_ACT_OK;
3093 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003094
Stephen Hemmingerde384832010-08-01 00:33:23 -07003095 if (unlikely(MAX_RED_LOOP < ttl++)) {
Joe Perchese87cc472012-05-13 21:56:26 +00003096 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3097 skb->skb_iif, dev->ifindex);
Herbert Xuf697c3e2007-10-14 00:38:47 -07003098 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003099 }
3100
Herbert Xuf697c3e2007-10-14 00:38:47 -07003101 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3102 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3103
David S. Miller83874002008-07-17 00:53:03 -07003104 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07003105 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07003106 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07003107 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3108 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07003109 spin_unlock(qdisc_lock(q));
3110 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07003111
Linus Torvalds1da177e2005-04-16 15:20:36 -07003112 return result;
3113}
Herbert Xuf697c3e2007-10-14 00:38:47 -07003114
3115static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3116 struct packet_type **pt_prev,
3117 int *ret, struct net_device *orig_dev)
3118{
Eric Dumazet24824a02010-10-02 06:11:55 +00003119 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3120
3121 if (!rxq || rxq->qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07003122 goto out;
3123
3124 if (*pt_prev) {
3125 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3126 *pt_prev = NULL;
Herbert Xuf697c3e2007-10-14 00:38:47 -07003127 }
3128
Eric Dumazet24824a02010-10-02 06:11:55 +00003129 switch (ing_filter(skb, rxq)) {
Herbert Xuf697c3e2007-10-14 00:38:47 -07003130 case TC_ACT_SHOT:
3131 case TC_ACT_STOLEN:
3132 kfree_skb(skb);
3133 return NULL;
3134 }
3135
3136out:
3137 skb->tc_verd = 0;
3138 return skb;
3139}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003140#endif
3141
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003142/**
3143 * netdev_rx_handler_register - register receive handler
3144 * @dev: device to register a handler for
3145 * @rx_handler: receive handler to register
Jiri Pirko93e2c322010-06-10 03:34:59 +00003146 * @rx_handler_data: data pointer that is used by rx handler
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003147 *
3148 * Register a receive hander for a device. This handler will then be
3149 * called from __netif_receive_skb. A negative errno code is returned
3150 * on a failure.
3151 *
3152 * The caller must hold the rtnl_mutex.
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003153 *
3154 * For a general description of rx_handler, see enum rx_handler_result.
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003155 */
3156int netdev_rx_handler_register(struct net_device *dev,
Jiri Pirko93e2c322010-06-10 03:34:59 +00003157 rx_handler_func_t *rx_handler,
3158 void *rx_handler_data)
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003159{
3160 ASSERT_RTNL();
3161
3162 if (dev->rx_handler)
3163 return -EBUSY;
3164
Jiri Pirko93e2c322010-06-10 03:34:59 +00003165 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003166 rcu_assign_pointer(dev->rx_handler, rx_handler);
3167
3168 return 0;
3169}
3170EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3171
3172/**
3173 * netdev_rx_handler_unregister - unregister receive handler
3174 * @dev: device to unregister a handler from
3175 *
3176 * Unregister a receive hander from a device.
3177 *
3178 * The caller must hold the rtnl_mutex.
3179 */
3180void netdev_rx_handler_unregister(struct net_device *dev)
3181{
3182
3183 ASSERT_RTNL();
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00003184 RCU_INIT_POINTER(dev->rx_handler, NULL);
3185 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003186}
3187EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3188
Mel Gormanb4b9e352012-07-31 16:44:26 -07003189/*
3190 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3191 * the special handling of PFMEMALLOC skbs.
3192 */
3193static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3194{
3195 switch (skb->protocol) {
3196 case __constant_htons(ETH_P_ARP):
3197 case __constant_htons(ETH_P_IP):
3198 case __constant_htons(ETH_P_IPV6):
3199 case __constant_htons(ETH_P_8021Q):
3200 return true;
3201 default:
3202 return false;
3203 }
3204}
3205
Eric Dumazet10f744d2010-03-28 23:07:20 -07003206static int __netif_receive_skb(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003207{
3208 struct packet_type *ptype, *pt_prev;
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003209 rx_handler_func_t *rx_handler;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07003210 struct net_device *orig_dev;
David S. Miller63d8ea72011-02-28 10:48:59 -08003211 struct net_device *null_or_dev;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003212 bool deliver_exact = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003213 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08003214 __be16 type;
Mel Gormanb4b9e352012-07-31 16:44:26 -07003215 unsigned long pflags = current->flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003216
Eric Dumazet588f0332011-11-15 04:12:55 +00003217 net_timestamp_check(!netdev_tstamp_prequeue, skb);
Eric Dumazet81bbb3d2009-09-30 16:42:42 -07003218
Koki Sanagicf66ba52010-08-23 18:45:02 +09003219 trace_netif_receive_skb(skb);
Patrick McHardy9b22ea52008-11-04 14:49:57 -08003220
Mel Gormanb4b9e352012-07-31 16:44:26 -07003221 /*
3222 * PFMEMALLOC skbs are special, they should
3223 * - be delivered to SOCK_MEMALLOC sockets only
3224 * - stay away from userspace
3225 * - have bounded memory usage
3226 *
3227 * Use PF_MEMALLOC as this saves us from propagating the allocation
3228 * context down to all allocation sites.
3229 */
3230 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3231 current->flags |= PF_MEMALLOC;
3232
Linus Torvalds1da177e2005-04-16 15:20:36 -07003233 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003234 if (netpoll_receive_skb(skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003235 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003236
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07003237 orig_dev = skb->dev;
Jiri Pirko1765a572011-02-12 06:48:36 +00003238
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07003239 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03003240 skb_reset_transport_header(skb);
Jiri Pirko0b5c9db2011-06-10 06:56:58 +00003241 skb_reset_mac_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003242
3243 pt_prev = NULL;
3244
3245 rcu_read_lock();
3246
David S. Miller63d8ea72011-02-28 10:48:59 -08003247another_round:
David S. Millerb6858172012-07-23 16:27:54 -07003248 skb->skb_iif = skb->dev->ifindex;
David S. Miller63d8ea72011-02-28 10:48:59 -08003249
3250 __this_cpu_inc(softnet_data.processed);
3251
Jiri Pirkobcc6d472011-04-07 19:48:33 +00003252 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3253 skb = vlan_untag(skb);
3254 if (unlikely(!skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003255 goto unlock;
Jiri Pirkobcc6d472011-04-07 19:48:33 +00003256 }
3257
Linus Torvalds1da177e2005-04-16 15:20:36 -07003258#ifdef CONFIG_NET_CLS_ACT
3259 if (skb->tc_verd & TC_NCLS) {
3260 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3261 goto ncls;
3262 }
3263#endif
3264
Mel Gormanb4b9e352012-07-31 16:44:26 -07003265 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3266 goto skip_taps;
3267
Linus Torvalds1da177e2005-04-16 15:20:36 -07003268 list_for_each_entry_rcu(ptype, &ptype_all, list) {
David S. Miller63d8ea72011-02-28 10:48:59 -08003269 if (!ptype->dev || ptype->dev == skb->dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003270 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07003271 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003272 pt_prev = ptype;
3273 }
3274 }
3275
Mel Gormanb4b9e352012-07-31 16:44:26 -07003276skip_taps:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003277#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07003278 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3279 if (!skb)
Mel Gormanb4b9e352012-07-31 16:44:26 -07003280 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003281ncls:
3282#endif
3283
Mel Gormanb4b9e352012-07-31 16:44:26 -07003284 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3285 && !skb_pfmemalloc_protocol(skb))
3286 goto drop;
3287
Eric Dumazet6a32e4f2011-10-29 06:13:39 +00003288 rx_handler = rcu_dereference(skb->dev->rx_handler);
John Fastabend24257172011-10-10 09:16:41 +00003289 if (vlan_tx_tag_present(skb)) {
3290 if (pt_prev) {
3291 ret = deliver_skb(skb, pt_prev, orig_dev);
3292 pt_prev = NULL;
3293 }
Eric Dumazet6a32e4f2011-10-29 06:13:39 +00003294 if (vlan_do_receive(&skb, !rx_handler))
John Fastabend24257172011-10-10 09:16:41 +00003295 goto another_round;
3296 else if (unlikely(!skb))
Mel Gormanb4b9e352012-07-31 16:44:26 -07003297 goto unlock;
John Fastabend24257172011-10-10 09:16:41 +00003298 }
3299
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003300 if (rx_handler) {
3301 if (pt_prev) {
3302 ret = deliver_skb(skb, pt_prev, orig_dev);
3303 pt_prev = NULL;
3304 }
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003305 switch (rx_handler(&skb)) {
3306 case RX_HANDLER_CONSUMED:
Mel Gormanb4b9e352012-07-31 16:44:26 -07003307 goto unlock;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003308 case RX_HANDLER_ANOTHER:
David S. Miller63d8ea72011-02-28 10:48:59 -08003309 goto another_round;
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003310 case RX_HANDLER_EXACT:
3311 deliver_exact = true;
3312 case RX_HANDLER_PASS:
3313 break;
3314 default:
3315 BUG();
3316 }
Jiri Pirkoab95bfe2010-06-01 21:52:08 +00003317 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003318
David S. Miller63d8ea72011-02-28 10:48:59 -08003319 /* deliver only exact match when indicated */
Jiri Pirko8a4eb572011-03-12 03:14:39 +00003320 null_or_dev = deliver_exact ? skb->dev : NULL;
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00003321
Linus Torvalds1da177e2005-04-16 15:20:36 -07003322 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003323 list_for_each_entry_rcu(ptype,
3324 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
David S. Miller63d8ea72011-02-28 10:48:59 -08003325 if (ptype->type == type &&
Jiri Pirkoe3f48d32011-02-28 20:26:31 +00003326 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3327 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003328 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07003329 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003330 pt_prev = ptype;
3331 }
3332 }
3333
3334 if (pt_prev) {
Michael S. Tsirkin1080e512012-07-20 09:23:17 +00003335 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3336 ret = -ENOMEM;
3337 else
3338 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003339 } else {
Mel Gormanb4b9e352012-07-31 16:44:26 -07003340drop:
Eric Dumazetcaf586e2010-09-30 21:06:55 +00003341 atomic_long_inc(&skb->dev->rx_dropped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342 kfree_skb(skb);
3343 /* Jamal, now you will not able to escape explaining
3344 * me how you were going to use this. :-)
3345 */
3346 ret = NET_RX_DROP;
3347 }
3348
Mel Gormanb4b9e352012-07-31 16:44:26 -07003349unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350 rcu_read_unlock();
Mel Gormanb4b9e352012-07-31 16:44:26 -07003351out:
3352 tsk_restore_flags(current, pflags, PF_MEMALLOC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003353 return ret;
3354}
Tom Herbert0a9627f2010-03-16 08:03:29 +00003355
3356/**
3357 * netif_receive_skb - process receive buffer from network
3358 * @skb: buffer to process
3359 *
3360 * netif_receive_skb() is the main receive data processing function.
3361 * It always succeeds. The buffer may be dropped during processing
3362 * for congestion control or by the protocol layers.
3363 *
3364 * This function may only be called from softirq context and interrupts
3365 * should be enabled.
3366 *
3367 * Return values (usually ignored):
3368 * NET_RX_SUCCESS: no congestion
3369 * NET_RX_DROP: packet was dropped
3370 */
3371int netif_receive_skb(struct sk_buff *skb)
3372{
Eric Dumazet588f0332011-11-15 04:12:55 +00003373 net_timestamp_check(netdev_tstamp_prequeue, skb);
Eric Dumazet3b098e22010-05-15 23:57:10 -07003374
Richard Cochranc1f19b52010-07-17 08:49:36 +00003375 if (skb_defer_rx_timestamp(skb))
3376 return NET_RX_SUCCESS;
3377
Eric Dumazetdf334542010-03-24 19:13:54 +00003378#ifdef CONFIG_RPS
Ingo Molnarc5905af2012-02-24 08:31:31 +01003379 if (static_key_false(&rps_needed)) {
Eric Dumazet3b098e22010-05-15 23:57:10 -07003380 struct rps_dev_flow voidflow, *rflow = &voidflow;
3381 int cpu, ret;
Tom Herbert0a9627f2010-03-16 08:03:29 +00003382
Eric Dumazet3b098e22010-05-15 23:57:10 -07003383 rcu_read_lock();
Tom Herbert0a9627f2010-03-16 08:03:29 +00003384
Eric Dumazet3b098e22010-05-15 23:57:10 -07003385 cpu = get_rps_cpu(skb->dev, skb, &rflow);
Tom Herbertfec5e652010-04-16 16:01:27 -07003386
Eric Dumazet3b098e22010-05-15 23:57:10 -07003387 if (cpu >= 0) {
3388 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3389 rcu_read_unlock();
Eric Dumazetadc93002011-11-17 03:13:26 +00003390 return ret;
Eric Dumazet3b098e22010-05-15 23:57:10 -07003391 }
Eric Dumazetadc93002011-11-17 03:13:26 +00003392 rcu_read_unlock();
Tom Herbertfec5e652010-04-16 16:01:27 -07003393 }
Tom Herbert1e94d722010-03-18 17:45:44 -07003394#endif
Eric Dumazetadc93002011-11-17 03:13:26 +00003395 return __netif_receive_skb(skb);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003396}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003397EXPORT_SYMBOL(netif_receive_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003398
Eric Dumazet88751272010-04-19 05:07:33 +00003399/* Network device is going away, flush any packets still pending
3400 * Called with irqs disabled.
3401 */
Changli Gao152102c2010-03-30 20:16:22 +00003402static void flush_backlog(void *arg)
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003403{
Changli Gao152102c2010-03-30 20:16:22 +00003404 struct net_device *dev = arg;
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003405 struct softnet_data *sd = &__get_cpu_var(softnet_data);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003406 struct sk_buff *skb, *tmp;
3407
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003408 rps_lock(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003409 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003410 if (skb->dev == dev) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003411 __skb_unlink(skb, &sd->input_pkt_queue);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003412 kfree_skb(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003413 input_queue_head_incr(sd);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003414 }
Changli Gao6e7676c2010-04-27 15:07:33 -07003415 }
Eric Dumazete36fa2f2010-04-19 21:17:14 +00003416 rps_unlock(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003417
3418 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3419 if (skb->dev == dev) {
3420 __skb_unlink(skb, &sd->process_queue);
3421 kfree_skb(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003422 input_queue_head_incr(sd);
Changli Gao6e7676c2010-04-27 15:07:33 -07003423 }
3424 }
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07003425}
3426
Herbert Xud565b0a2008-12-15 23:38:52 -08003427static int napi_gro_complete(struct sk_buff *skb)
3428{
3429 struct packet_type *ptype;
3430 __be16 type = skb->protocol;
3431 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3432 int err = -ENOENT;
3433
Herbert Xufc59f9a2009-04-14 15:11:06 -07003434 if (NAPI_GRO_CB(skb)->count == 1) {
3435 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003436 goto out;
Herbert Xufc59f9a2009-04-14 15:11:06 -07003437 }
Herbert Xud565b0a2008-12-15 23:38:52 -08003438
3439 rcu_read_lock();
3440 list_for_each_entry_rcu(ptype, head, list) {
3441 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3442 continue;
3443
3444 err = ptype->gro_complete(skb);
3445 break;
3446 }
3447 rcu_read_unlock();
3448
3449 if (err) {
3450 WARN_ON(&ptype->list == head);
3451 kfree_skb(skb);
3452 return NET_RX_SUCCESS;
3453 }
3454
3455out:
Herbert Xud565b0a2008-12-15 23:38:52 -08003456 return netif_receive_skb(skb);
3457}
3458
Eric Dumazet86cac582010-08-31 18:25:32 +00003459inline void napi_gro_flush(struct napi_struct *napi)
Herbert Xud565b0a2008-12-15 23:38:52 -08003460{
3461 struct sk_buff *skb, *next;
3462
3463 for (skb = napi->gro_list; skb; skb = next) {
3464 next = skb->next;
3465 skb->next = NULL;
3466 napi_gro_complete(skb);
3467 }
3468
Herbert Xu4ae55442009-02-08 18:00:36 +00003469 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003470 napi->gro_list = NULL;
3471}
Eric Dumazet86cac582010-08-31 18:25:32 +00003472EXPORT_SYMBOL(napi_gro_flush);
Herbert Xud565b0a2008-12-15 23:38:52 -08003473
Ben Hutchings5b252f02009-10-29 07:17:09 +00003474enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08003475{
3476 struct sk_buff **pp = NULL;
3477 struct packet_type *ptype;
3478 __be16 type = skb->protocol;
3479 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
Herbert Xu0da2afd52008-12-26 14:57:42 -08003480 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08003481 int mac_len;
Ben Hutchings5b252f02009-10-29 07:17:09 +00003482 enum gro_result ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08003483
Jarek Poplawskice9e76c2010-08-05 01:19:11 +00003484 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
Herbert Xud565b0a2008-12-15 23:38:52 -08003485 goto normal;
3486
David S. Miller21dc3302010-08-23 00:13:46 -07003487 if (skb_is_gso(skb) || skb_has_frag_list(skb))
Herbert Xuf17f5c92009-01-14 14:36:12 -08003488 goto normal;
3489
Herbert Xud565b0a2008-12-15 23:38:52 -08003490 rcu_read_lock();
3491 list_for_each_entry_rcu(ptype, head, list) {
Herbert Xud565b0a2008-12-15 23:38:52 -08003492 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3493 continue;
3494
Herbert Xu86911732009-01-29 14:19:50 +00003495 skb_set_network_header(skb, skb_gro_offset(skb));
Herbert Xud565b0a2008-12-15 23:38:52 -08003496 mac_len = skb->network_header - skb->mac_header;
3497 skb->mac_len = mac_len;
3498 NAPI_GRO_CB(skb)->same_flow = 0;
3499 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08003500 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003501
Herbert Xud565b0a2008-12-15 23:38:52 -08003502 pp = ptype->gro_receive(&napi->gro_list, skb);
3503 break;
3504 }
3505 rcu_read_unlock();
3506
3507 if (&ptype->list == head)
3508 goto normal;
3509
Herbert Xu0da2afd52008-12-26 14:57:42 -08003510 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003511 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08003512
Herbert Xud565b0a2008-12-15 23:38:52 -08003513 if (pp) {
3514 struct sk_buff *nskb = *pp;
3515
3516 *pp = nskb->next;
3517 nskb->next = NULL;
3518 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00003519 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08003520 }
3521
Herbert Xu0da2afd52008-12-26 14:57:42 -08003522 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08003523 goto ok;
3524
Herbert Xu4ae55442009-02-08 18:00:36 +00003525 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
Herbert Xud565b0a2008-12-15 23:38:52 -08003526 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08003527
Herbert Xu4ae55442009-02-08 18:00:36 +00003528 napi->gro_count++;
Herbert Xud565b0a2008-12-15 23:38:52 -08003529 NAPI_GRO_CB(skb)->count = 1;
Herbert Xu86911732009-01-29 14:19:50 +00003530 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08003531 skb->next = napi->gro_list;
3532 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003533 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08003534
Herbert Xuad0f9902009-02-01 01:24:55 -08003535pull:
Herbert Xucb189782009-05-26 18:50:31 +00003536 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3537 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3538
3539 BUG_ON(skb->end - skb->tail < grow);
3540
3541 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3542
3543 skb->tail += grow;
3544 skb->data_len -= grow;
3545
3546 skb_shinfo(skb)->frags[0].page_offset += grow;
Eric Dumazet9e903e02011-10-18 21:00:24 +00003547 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
Herbert Xucb189782009-05-26 18:50:31 +00003548
Eric Dumazet9e903e02011-10-18 21:00:24 +00003549 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
Ian Campbellea2ab692011-08-22 23:44:58 +00003550 skb_frag_unref(skb, 0);
Herbert Xucb189782009-05-26 18:50:31 +00003551 memmove(skb_shinfo(skb)->frags,
3552 skb_shinfo(skb)->frags + 1,
Jarek Poplawskie5093ae2010-08-11 02:02:10 +00003553 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
Herbert Xucb189782009-05-26 18:50:31 +00003554 }
Herbert Xuad0f9902009-02-01 01:24:55 -08003555 }
3556
Herbert Xud565b0a2008-12-15 23:38:52 -08003557ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003558 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08003559
3560normal:
Herbert Xuad0f9902009-02-01 01:24:55 -08003561 ret = GRO_NORMAL;
3562 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08003563}
Herbert Xu96e93ea2009-01-06 10:49:34 -08003564EXPORT_SYMBOL(dev_gro_receive);
3565
Eric Dumazet40d08022010-08-26 22:03:08 -07003566static inline gro_result_t
Ben Hutchings5b252f02009-10-29 07:17:09 +00003567__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08003568{
3569 struct sk_buff *p;
Eric Dumazet5ca3b72c2012-02-08 08:51:50 +00003570 unsigned int maclen = skb->dev->hard_header_len;
Herbert Xu96e93ea2009-01-06 10:49:34 -08003571
3572 for (p = napi->gro_list; p; p = p->next) {
Eric Dumazet40d08022010-08-26 22:03:08 -07003573 unsigned long diffs;
3574
3575 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
Jesse Gross3701e512010-10-20 13:56:06 +00003576 diffs |= p->vlan_tci ^ skb->vlan_tci;
Eric Dumazet5ca3b72c2012-02-08 08:51:50 +00003577 if (maclen == ETH_HLEN)
3578 diffs |= compare_ether_header(skb_mac_header(p),
3579 skb_gro_mac_header(skb));
3580 else if (!diffs)
3581 diffs = memcmp(skb_mac_header(p),
3582 skb_gro_mac_header(skb),
3583 maclen);
Eric Dumazet40d08022010-08-26 22:03:08 -07003584 NAPI_GRO_CB(p)->same_flow = !diffs;
Herbert Xu96e93ea2009-01-06 10:49:34 -08003585 NAPI_GRO_CB(p)->flush = 0;
3586 }
3587
3588 return dev_gro_receive(napi, skb);
3589}
Herbert Xu5d38a072009-01-04 16:13:40 -08003590
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003591gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08003592{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003593 switch (ret) {
3594 case GRO_NORMAL:
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003595 if (netif_receive_skb(skb))
3596 ret = GRO_DROP;
3597 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08003598
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003599 case GRO_DROP:
Herbert Xu5d38a072009-01-04 16:13:40 -08003600 kfree_skb(skb);
3601 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00003602
Eric Dumazetdaa86542012-04-19 07:07:40 +00003603 case GRO_MERGED_FREE:
Eric Dumazetd7e88832012-04-30 08:10:34 +00003604 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3605 kmem_cache_free(skbuff_head_cache, skb);
3606 else
3607 __kfree_skb(skb);
Eric Dumazetdaa86542012-04-19 07:07:40 +00003608 break;
3609
Ben Hutchings5b252f02009-10-29 07:17:09 +00003610 case GRO_HELD:
3611 case GRO_MERGED:
3612 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08003613 }
3614
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003615 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003616}
3617EXPORT_SYMBOL(napi_skb_finish);
3618
Herbert Xu78a478d2009-05-26 18:50:21 +00003619void skb_gro_reset_offset(struct sk_buff *skb)
3620{
3621 NAPI_GRO_CB(skb)->data_offset = 0;
3622 NAPI_GRO_CB(skb)->frag0 = NULL;
Herbert Xu74895942009-05-26 18:50:27 +00003623 NAPI_GRO_CB(skb)->frag0_len = 0;
Herbert Xu78a478d2009-05-26 18:50:21 +00003624
Herbert Xu78d3fd02009-05-26 18:50:23 +00003625 if (skb->mac_header == skb->tail &&
Ian Campbellea2ab692011-08-22 23:44:58 +00003626 !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
Herbert Xu78a478d2009-05-26 18:50:21 +00003627 NAPI_GRO_CB(skb)->frag0 =
Ian Campbellea2ab692011-08-22 23:44:58 +00003628 skb_frag_address(&skb_shinfo(skb)->frags[0]);
Eric Dumazet9e903e02011-10-18 21:00:24 +00003629 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
Herbert Xu74895942009-05-26 18:50:27 +00003630 }
Herbert Xu78a478d2009-05-26 18:50:21 +00003631}
3632EXPORT_SYMBOL(skb_gro_reset_offset);
3633
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003634gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003635{
Herbert Xu86911732009-01-29 14:19:50 +00003636 skb_gro_reset_offset(skb);
3637
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003638 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08003639}
3640EXPORT_SYMBOL(napi_gro_receive);
3641
stephen hemmingerd0c2b0d2010-10-19 07:12:10 +00003642static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08003643{
Herbert Xu96e93ea2009-01-06 10:49:34 -08003644 __skb_pull(skb, skb_headlen(skb));
Eric Dumazet2a2a4592012-03-21 06:58:03 +00003645 /* restore the reserve we had after netdev_alloc_skb_ip_align() */
3646 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
Jesse Gross3701e512010-10-20 13:56:06 +00003647 skb->vlan_tci = 0;
Herbert Xu66c46d72011-01-29 20:44:54 -08003648 skb->dev = napi->dev;
Andy Gospodarek6d152e22011-02-02 14:53:25 -08003649 skb->skb_iif = 0;
Herbert Xu96e93ea2009-01-06 10:49:34 -08003650
3651 napi->skb = skb;
3652}
Herbert Xu96e93ea2009-01-06 10:49:34 -08003653
Herbert Xu76620aa2009-04-16 02:02:07 -07003654struct sk_buff *napi_get_frags(struct napi_struct *napi)
Herbert Xu5d38a072009-01-04 16:13:40 -08003655{
Herbert Xu5d38a072009-01-04 16:13:40 -08003656 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08003657
3658 if (!skb) {
Eric Dumazet89d71a62009-10-13 05:34:20 +00003659 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3660 if (skb)
3661 napi->skb = skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08003662 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08003663 return skb;
3664}
Herbert Xu76620aa2009-04-16 02:02:07 -07003665EXPORT_SYMBOL(napi_get_frags);
Herbert Xu96e93ea2009-01-06 10:49:34 -08003666
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003667gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3668 gro_result_t ret)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003669{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003670 switch (ret) {
3671 case GRO_NORMAL:
Herbert Xu86911732009-01-29 14:19:50 +00003672 case GRO_HELD:
Ajit Khapardee76b69c2010-02-16 20:25:43 +00003673 skb->protocol = eth_type_trans(skb, skb->dev);
Herbert Xu86911732009-01-29 14:19:50 +00003674
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003675 if (ret == GRO_HELD)
3676 skb_gro_pull(skb, -ETH_HLEN);
3677 else if (netif_receive_skb(skb))
3678 ret = GRO_DROP;
Herbert Xu86911732009-01-29 14:19:50 +00003679 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003680
3681 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003682 case GRO_MERGED_FREE:
3683 napi_reuse_skb(napi, skb);
3684 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00003685
3686 case GRO_MERGED:
3687 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003688 }
3689
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003690 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003691}
3692EXPORT_SYMBOL(napi_frags_finish);
3693
Eric Dumazet4adb9c42012-05-18 20:49:06 +00003694static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
Herbert Xu96e93ea2009-01-06 10:49:34 -08003695{
Herbert Xu76620aa2009-04-16 02:02:07 -07003696 struct sk_buff *skb = napi->skb;
3697 struct ethhdr *eth;
Herbert Xua5b1cf22009-05-26 18:50:28 +00003698 unsigned int hlen;
3699 unsigned int off;
Herbert Xu76620aa2009-04-16 02:02:07 -07003700
3701 napi->skb = NULL;
3702
3703 skb_reset_mac_header(skb);
3704 skb_gro_reset_offset(skb);
3705
Herbert Xua5b1cf22009-05-26 18:50:28 +00003706 off = skb_gro_offset(skb);
3707 hlen = off + sizeof(*eth);
3708 eth = skb_gro_header_fast(skb, off);
3709 if (skb_gro_header_hard(skb, hlen)) {
3710 eth = skb_gro_header_slow(skb, hlen, off);
3711 if (unlikely(!eth)) {
3712 napi_reuse_skb(napi, skb);
3713 skb = NULL;
3714 goto out;
3715 }
Herbert Xu76620aa2009-04-16 02:02:07 -07003716 }
3717
3718 skb_gro_pull(skb, sizeof(*eth));
3719
3720 /*
3721 * This works because the only protocols we care about don't require
3722 * special handling. We'll fix it up properly at the end.
3723 */
3724 skb->protocol = eth->h_proto;
3725
3726out:
3727 return skb;
3728}
Herbert Xu76620aa2009-04-16 02:02:07 -07003729
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003730gro_result_t napi_gro_frags(struct napi_struct *napi)
Herbert Xu76620aa2009-04-16 02:02:07 -07003731{
3732 struct sk_buff *skb = napi_frags_skb(napi);
Herbert Xu96e93ea2009-01-06 10:49:34 -08003733
3734 if (!skb)
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07003735 return GRO_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08003736
Herbert Xu5d0d9be2009-01-29 14:19:48 +00003737 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08003738}
3739EXPORT_SYMBOL(napi_gro_frags);
3740
Eric Dumazete326bed2010-04-22 00:22:45 -07003741/*
3742 * net_rps_action sends any pending IPI's for rps.
3743 * Note: called with local irq disabled, but exits with local irq enabled.
3744 */
3745static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3746{
3747#ifdef CONFIG_RPS
3748 struct softnet_data *remsd = sd->rps_ipi_list;
3749
3750 if (remsd) {
3751 sd->rps_ipi_list = NULL;
3752
3753 local_irq_enable();
3754
3755 /* Send pending IPI's to kick RPS processing on remote cpus. */
3756 while (remsd) {
3757 struct softnet_data *next = remsd->rps_ipi_next;
3758
3759 if (cpu_online(remsd->cpu))
3760 __smp_call_function_single(remsd->cpu,
3761 &remsd->csd, 0);
3762 remsd = next;
3763 }
3764 } else
3765#endif
3766 local_irq_enable();
3767}
3768
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003769static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003770{
3771 int work = 0;
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003772 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003773
Eric Dumazete326bed2010-04-22 00:22:45 -07003774#ifdef CONFIG_RPS
3775 /* Check if we have pending ipi, its better to send them now,
3776 * not waiting net_rx_action() end.
3777 */
3778 if (sd->rps_ipi_list) {
3779 local_irq_disable();
3780 net_rps_action_and_irq_enable(sd);
3781 }
3782#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003783 napi->weight = weight_p;
Changli Gao6e7676c2010-04-27 15:07:33 -07003784 local_irq_disable();
3785 while (work < quota) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003786 struct sk_buff *skb;
Changli Gao6e7676c2010-04-27 15:07:33 -07003787 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003788
Changli Gao6e7676c2010-04-27 15:07:33 -07003789 while ((skb = __skb_dequeue(&sd->process_queue))) {
Eric Dumazete4008272010-04-05 15:42:39 -07003790 local_irq_enable();
Changli Gao6e7676c2010-04-27 15:07:33 -07003791 __netif_receive_skb(skb);
Changli Gao6e7676c2010-04-27 15:07:33 -07003792 local_irq_disable();
Tom Herbert76cc8b12010-05-20 18:37:59 +00003793 input_queue_head_incr(sd);
3794 if (++work >= quota) {
3795 local_irq_enable();
3796 return work;
3797 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003798 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003799
Changli Gao6e7676c2010-04-27 15:07:33 -07003800 rps_lock(sd);
3801 qlen = skb_queue_len(&sd->input_pkt_queue);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003802 if (qlen)
Changli Gao6e7676c2010-04-27 15:07:33 -07003803 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3804 &sd->process_queue);
Tom Herbert76cc8b12010-05-20 18:37:59 +00003805
Changli Gao6e7676c2010-04-27 15:07:33 -07003806 if (qlen < quota - work) {
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003807 /*
3808 * Inline a custom version of __napi_complete().
3809 * only current cpu owns and manipulates this napi,
3810 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3811 * we can use a plain write instead of clear_bit(),
3812 * and we dont need an smp_mb() memory barrier.
3813 */
3814 list_del(&napi->poll_list);
3815 napi->state = 0;
3816
Changli Gao6e7676c2010-04-27 15:07:33 -07003817 quota = work + qlen;
3818 }
3819 rps_unlock(sd);
3820 }
3821 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003822
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003823 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003824}
3825
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003826/**
3827 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003828 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003829 *
3830 * The entry's receive function will be scheduled to run
3831 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08003832void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003833{
3834 unsigned long flags;
3835
3836 local_irq_save(flags);
Eric Dumazeteecfd7c2010-05-06 22:07:48 -07003837 ____napi_schedule(&__get_cpu_var(softnet_data), n);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003838 local_irq_restore(flags);
3839}
3840EXPORT_SYMBOL(__napi_schedule);
3841
Herbert Xud565b0a2008-12-15 23:38:52 -08003842void __napi_complete(struct napi_struct *n)
3843{
3844 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3845 BUG_ON(n->gro_list);
3846
3847 list_del(&n->poll_list);
3848 smp_mb__before_clear_bit();
3849 clear_bit(NAPI_STATE_SCHED, &n->state);
3850}
3851EXPORT_SYMBOL(__napi_complete);
3852
3853void napi_complete(struct napi_struct *n)
3854{
3855 unsigned long flags;
3856
3857 /*
3858 * don't let napi dequeue from the cpu poll list
3859 * just in case its running on a different cpu
3860 */
3861 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3862 return;
3863
3864 napi_gro_flush(n);
3865 local_irq_save(flags);
3866 __napi_complete(n);
3867 local_irq_restore(flags);
3868}
3869EXPORT_SYMBOL(napi_complete);
3870
3871void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3872 int (*poll)(struct napi_struct *, int), int weight)
3873{
3874 INIT_LIST_HEAD(&napi->poll_list);
Herbert Xu4ae55442009-02-08 18:00:36 +00003875 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003876 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08003877 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08003878 napi->poll = poll;
3879 napi->weight = weight;
3880 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08003881 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08003882#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08003883 spin_lock_init(&napi->poll_lock);
3884 napi->poll_owner = -1;
3885#endif
3886 set_bit(NAPI_STATE_SCHED, &napi->state);
3887}
3888EXPORT_SYMBOL(netif_napi_add);
3889
3890void netif_napi_del(struct napi_struct *napi)
3891{
3892 struct sk_buff *skb, *next;
3893
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08003894 list_del_init(&napi->dev_list);
Herbert Xu76620aa2009-04-16 02:02:07 -07003895 napi_free_frags(napi);
Herbert Xud565b0a2008-12-15 23:38:52 -08003896
3897 for (skb = napi->gro_list; skb; skb = next) {
3898 next = skb->next;
3899 skb->next = NULL;
3900 kfree_skb(skb);
3901 }
3902
3903 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00003904 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003905}
3906EXPORT_SYMBOL(netif_napi_del);
3907
Linus Torvalds1da177e2005-04-16 15:20:36 -07003908static void net_rx_action(struct softirq_action *h)
3909{
Eric Dumazete326bed2010-04-22 00:22:45 -07003910 struct softnet_data *sd = &__get_cpu_var(softnet_data);
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003911 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07003912 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07003913 void *have;
3914
Linus Torvalds1da177e2005-04-16 15:20:36 -07003915 local_irq_disable();
3916
Eric Dumazete326bed2010-04-22 00:22:45 -07003917 while (!list_empty(&sd->poll_list)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003918 struct napi_struct *n;
3919 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003920
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003921 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003922 * Allow this to run for 2 jiffies since which will allow
3923 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003924 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003925 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003926 goto softnet_break;
3927
3928 local_irq_enable();
3929
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003930 /* Even though interrupts have been re-enabled, this
3931 * access is safe because interrupts can only add new
3932 * entries to the tail of this list, and only ->poll()
3933 * calls can remove this head entry from the list.
3934 */
Eric Dumazete326bed2010-04-22 00:22:45 -07003935 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003936
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003937 have = netpoll_poll_lock(n);
3938
3939 weight = n->weight;
3940
David S. Miller0a7606c2007-10-29 21:28:47 -07003941 /* This NAPI_STATE_SCHED test is for avoiding a race
3942 * with netpoll's poll_napi(). Only the entity which
3943 * obtains the lock and sees NAPI_STATE_SCHED set will
3944 * actually make the ->poll() call. Therefore we avoid
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003945 * accidentally calling ->poll() when NAPI is not scheduled.
David S. Miller0a7606c2007-10-29 21:28:47 -07003946 */
3947 work = 0;
Neil Horman4ea7e382009-05-21 07:36:08 +00003948 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
David S. Miller0a7606c2007-10-29 21:28:47 -07003949 work = n->poll(n, weight);
Neil Horman4ea7e382009-05-21 07:36:08 +00003950 trace_napi_poll(n);
3951 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003952
3953 WARN_ON_ONCE(work > weight);
3954
3955 budget -= work;
3956
3957 local_irq_disable();
3958
3959 /* Drivers must not modify the NAPI state if they
3960 * consume the entire weight. In such cases this code
3961 * still "owns" the NAPI instance and therefore can
3962 * move the instance around on the list at-will.
3963 */
David S. Millerfed17f32008-01-07 21:00:40 -08003964 if (unlikely(work == weight)) {
Herbert Xuff780cd2009-06-26 19:27:04 -07003965 if (unlikely(napi_disable_pending(n))) {
3966 local_irq_enable();
3967 napi_complete(n);
3968 local_irq_disable();
3969 } else
Eric Dumazete326bed2010-04-22 00:22:45 -07003970 list_move_tail(&n->poll_list, &sd->poll_list);
David S. Millerfed17f32008-01-07 21:00:40 -08003971 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003972
3973 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003974 }
3975out:
Eric Dumazete326bed2010-04-22 00:22:45 -07003976 net_rps_action_and_irq_enable(sd);
Tom Herbert0a9627f2010-03-16 08:03:29 +00003977
Chris Leechdb217332006-06-17 21:24:58 -07003978#ifdef CONFIG_NET_DMA
3979 /*
3980 * There may not be any more sk_buffs coming right now, so push
3981 * any pending DMA copies to hardware
3982 */
Dan Williams2ba05622009-01-06 11:38:14 -07003983 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07003984#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003985
Linus Torvalds1da177e2005-04-16 15:20:36 -07003986 return;
3987
3988softnet_break:
Changli Gaodee42872010-05-02 05:42:16 +00003989 sd->time_squeeze++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003990 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3991 goto out;
3992}
3993
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003994static gifconf_func_t *gifconf_list[NPROTO];
Linus Torvalds1da177e2005-04-16 15:20:36 -07003995
3996/**
3997 * register_gifconf - register a SIOCGIF handler
3998 * @family: Address family
3999 * @gifconf: Function handler
4000 *
4001 * Register protocol dependent address dumping routines. The handler
4002 * that is passed must not be freed or reused until it has been replaced
4003 * by another handler.
4004 */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004005int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004006{
4007 if (family >= NPROTO)
4008 return -EINVAL;
4009 gifconf_list[family] = gifconf;
4010 return 0;
4011}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004012EXPORT_SYMBOL(register_gifconf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004013
4014
4015/*
4016 * Map an interface index to its name (SIOCGIFNAME)
4017 */
4018
4019/*
4020 * We need this ioctl for efficient implementation of the
4021 * if_indextoname() function required by the IPv6 API. Without
4022 * it, we would have to search all the interfaces to find a
4023 * match. --pb
4024 */
4025
Eric W. Biederman881d9662007-09-17 11:56:21 -07004026static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004027{
4028 struct net_device *dev;
4029 struct ifreq ifr;
4030
4031 /*
4032 * Fetch the caller's info block.
4033 */
4034
4035 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4036 return -EFAULT;
4037
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00004038 rcu_read_lock();
4039 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004040 if (!dev) {
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00004041 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004042 return -ENODEV;
4043 }
4044
4045 strcpy(ifr.ifr_name, dev->name);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00004046 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004047
4048 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4049 return -EFAULT;
4050 return 0;
4051}
4052
4053/*
4054 * Perform a SIOCGIFCONF call. This structure will change
4055 * size eventually, and there is nothing I can do about it.
4056 * Thus we will need a 'compatibility mode'.
4057 */
4058
Eric W. Biederman881d9662007-09-17 11:56:21 -07004059static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004060{
4061 struct ifconf ifc;
4062 struct net_device *dev;
4063 char __user *pos;
4064 int len;
4065 int total;
4066 int i;
4067
4068 /*
4069 * Fetch the caller's info block.
4070 */
4071
4072 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4073 return -EFAULT;
4074
4075 pos = ifc.ifc_buf;
4076 len = ifc.ifc_len;
4077
4078 /*
4079 * Loop over the interfaces, and write an info block for each.
4080 */
4081
4082 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004083 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004084 for (i = 0; i < NPROTO; i++) {
4085 if (gifconf_list[i]) {
4086 int done;
4087 if (!pos)
4088 done = gifconf_list[i](dev, NULL, 0);
4089 else
4090 done = gifconf_list[i](dev, pos + total,
4091 len - total);
4092 if (done < 0)
4093 return -EFAULT;
4094 total += done;
4095 }
4096 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004097 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004098
4099 /*
4100 * All done. Write the updated control block back to the caller.
4101 */
4102 ifc.ifc_len = total;
4103
4104 /*
4105 * Both BSD and Solaris return 0 here, so we do too.
4106 */
4107 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4108}
4109
4110#ifdef CONFIG_PROC_FS
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004111
Eric Dumazet2def16a2012-04-02 22:33:02 +00004112#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004113
4114#define get_bucket(x) ((x) >> BUCKET_SPACE)
4115#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4116#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4117
Eric Dumazet2def16a2012-04-02 22:33:02 +00004118static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004119{
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004120 struct net *net = seq_file_net(seq);
4121 struct net_device *dev;
4122 struct hlist_node *p;
4123 struct hlist_head *h;
Eric Dumazet2def16a2012-04-02 22:33:02 +00004124 unsigned int count = 0, offset = get_offset(*pos);
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004125
Eric Dumazet2def16a2012-04-02 22:33:02 +00004126 h = &net->dev_name_head[get_bucket(*pos)];
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004127 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
Eric Dumazet2def16a2012-04-02 22:33:02 +00004128 if (++count == offset)
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004129 return dev;
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004130 }
4131
4132 return NULL;
4133}
4134
Eric Dumazet2def16a2012-04-02 22:33:02 +00004135static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004136{
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004137 struct net_device *dev;
4138 unsigned int bucket;
4139
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004140 do {
Eric Dumazet2def16a2012-04-02 22:33:02 +00004141 dev = dev_from_same_bucket(seq, pos);
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004142 if (dev)
4143 return dev;
4144
Eric Dumazet2def16a2012-04-02 22:33:02 +00004145 bucket = get_bucket(*pos) + 1;
4146 *pos = set_bucket_offset(bucket, 1);
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004147 } while (bucket < NETDEV_HASHENTRIES);
4148
4149 return NULL;
4150}
4151
Linus Torvalds1da177e2005-04-16 15:20:36 -07004152/*
4153 * This is invoked by the /proc filesystem handler to display a device
4154 * in detail.
4155 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004156void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08004157 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004158{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08004159 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07004160 if (!*pos)
4161 return SEQ_START_TOKEN;
4162
Eric Dumazet2def16a2012-04-02 22:33:02 +00004163 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
Mihai Maruseacf04565d2011-10-20 20:45:10 +00004164 return NULL;
Pavel Emelianov7562f872007-05-03 15:13:45 -07004165
Eric Dumazet2def16a2012-04-02 22:33:02 +00004166 return dev_from_bucket(seq, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004167}
4168
4169void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4170{
4171 ++*pos;
Eric Dumazet2def16a2012-04-02 22:33:02 +00004172 return dev_from_bucket(seq, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004173}
4174
4175void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08004176 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004177{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08004178 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004179}
4180
4181static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4182{
Eric Dumazet28172732010-07-07 14:58:56 -07004183 struct rtnl_link_stats64 temp;
4184 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004185
Ben Hutchingsbe1f3c22010-06-08 07:19:54 +00004186 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4187 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
Rusty Russell5a1b5892007-04-28 21:04:03 -07004188 dev->name, stats->rx_bytes, stats->rx_packets,
4189 stats->rx_errors,
4190 stats->rx_dropped + stats->rx_missed_errors,
4191 stats->rx_fifo_errors,
4192 stats->rx_length_errors + stats->rx_over_errors +
4193 stats->rx_crc_errors + stats->rx_frame_errors,
4194 stats->rx_compressed, stats->multicast,
4195 stats->tx_bytes, stats->tx_packets,
4196 stats->tx_errors, stats->tx_dropped,
4197 stats->tx_fifo_errors, stats->collisions,
4198 stats->tx_carrier_errors +
4199 stats->tx_aborted_errors +
4200 stats->tx_window_errors +
4201 stats->tx_heartbeat_errors,
4202 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004203}
4204
4205/*
4206 * Called from the PROCfs module. This now uses the new arbitrary sized
4207 * /proc/net interface to create /proc/net/dev
4208 */
4209static int dev_seq_show(struct seq_file *seq, void *v)
4210{
4211 if (v == SEQ_START_TOKEN)
4212 seq_puts(seq, "Inter-| Receive "
4213 " | Transmit\n"
4214 " face |bytes packets errs drop fifo frame "
4215 "compressed multicast|bytes packets errs "
4216 "drop fifo colls carrier compressed\n");
4217 else
4218 dev_seq_printf_stats(seq, v);
4219 return 0;
4220}
4221
Changli Gaodee42872010-05-02 05:42:16 +00004222static struct softnet_data *softnet_get_online(loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004223{
Changli Gaodee42872010-05-02 05:42:16 +00004224 struct softnet_data *sd = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004225
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004226 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004227 if (cpu_online(*pos)) {
Changli Gaodee42872010-05-02 05:42:16 +00004228 sd = &per_cpu(softnet_data, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004229 break;
4230 } else
4231 ++*pos;
Changli Gaodee42872010-05-02 05:42:16 +00004232 return sd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004233}
4234
4235static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4236{
4237 return softnet_get_online(pos);
4238}
4239
4240static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4241{
4242 ++*pos;
4243 return softnet_get_online(pos);
4244}
4245
4246static void softnet_seq_stop(struct seq_file *seq, void *v)
4247{
4248}
4249
4250static int softnet_seq_show(struct seq_file *seq, void *v)
4251{
Changli Gaodee42872010-05-02 05:42:16 +00004252 struct softnet_data *sd = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004253
Tom Herbert0a9627f2010-03-16 08:03:29 +00004254 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Changli Gaodee42872010-05-02 05:42:16 +00004255 sd->processed, sd->dropped, sd->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07004256 0, 0, 0, 0, /* was fastroute */
Changli Gaodee42872010-05-02 05:42:16 +00004257 sd->cpu_collision, sd->received_rps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004258 return 0;
4259}
4260
Stephen Hemmingerf6908082007-03-12 14:34:29 -07004261static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004262 .start = dev_seq_start,
4263 .next = dev_seq_next,
4264 .stop = dev_seq_stop,
4265 .show = dev_seq_show,
4266};
4267
4268static int dev_seq_open(struct inode *inode, struct file *file)
4269{
Denis V. Luneve372c412007-11-19 22:31:54 -08004270 return seq_open_net(inode, file, &dev_seq_ops,
Eric Dumazet2def16a2012-04-02 22:33:02 +00004271 sizeof(struct seq_net_private));
Anton Blanchard5cac98d2011-11-27 21:14:46 +00004272}
4273
Arjan van de Ven9a321442007-02-12 00:55:35 -08004274static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004275 .owner = THIS_MODULE,
4276 .open = dev_seq_open,
4277 .read = seq_read,
4278 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08004279 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004280};
4281
Stephen Hemmingerf6908082007-03-12 14:34:29 -07004282static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004283 .start = softnet_seq_start,
4284 .next = softnet_seq_next,
4285 .stop = softnet_seq_stop,
4286 .show = softnet_seq_show,
4287};
4288
4289static int softnet_seq_open(struct inode *inode, struct file *file)
4290{
4291 return seq_open(file, &softnet_seq_ops);
4292}
4293
Arjan van de Ven9a321442007-02-12 00:55:35 -08004294static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004295 .owner = THIS_MODULE,
4296 .open = softnet_seq_open,
4297 .read = seq_read,
4298 .llseek = seq_lseek,
4299 .release = seq_release,
4300};
4301
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004302static void *ptype_get_idx(loff_t pos)
4303{
4304 struct packet_type *pt = NULL;
4305 loff_t i = 0;
4306 int t;
4307
4308 list_for_each_entry_rcu(pt, &ptype_all, list) {
4309 if (i == pos)
4310 return pt;
4311 ++i;
4312 }
4313
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004314 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004315 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4316 if (i == pos)
4317 return pt;
4318 ++i;
4319 }
4320 }
4321 return NULL;
4322}
4323
4324static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08004325 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004326{
4327 rcu_read_lock();
4328 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4329}
4330
4331static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4332{
4333 struct packet_type *pt;
4334 struct list_head *nxt;
4335 int hash;
4336
4337 ++*pos;
4338 if (v == SEQ_START_TOKEN)
4339 return ptype_get_idx(0);
4340
4341 pt = v;
4342 nxt = pt->list.next;
4343 if (pt->type == htons(ETH_P_ALL)) {
4344 if (nxt != &ptype_all)
4345 goto found;
4346 hash = 0;
4347 nxt = ptype_base[0].next;
4348 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004349 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004350
4351 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004352 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004353 return NULL;
4354 nxt = ptype_base[hash].next;
4355 }
4356found:
4357 return list_entry(nxt, struct packet_type, list);
4358}
4359
4360static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08004361 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004362{
4363 rcu_read_unlock();
4364}
4365
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004366static int ptype_seq_show(struct seq_file *seq, void *v)
4367{
4368 struct packet_type *pt = v;
4369
4370 if (v == SEQ_START_TOKEN)
4371 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004372 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004373 if (pt->type == htons(ETH_P_ALL))
4374 seq_puts(seq, "ALL ");
4375 else
4376 seq_printf(seq, "%04x", ntohs(pt->type));
4377
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08004378 seq_printf(seq, " %-8s %pF\n",
4379 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004380 }
4381
4382 return 0;
4383}
4384
4385static const struct seq_operations ptype_seq_ops = {
4386 .start = ptype_seq_start,
4387 .next = ptype_seq_next,
4388 .stop = ptype_seq_stop,
4389 .show = ptype_seq_show,
4390};
4391
4392static int ptype_seq_open(struct inode *inode, struct file *file)
4393{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07004394 return seq_open_net(inode, file, &ptype_seq_ops,
4395 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004396}
4397
4398static const struct file_operations ptype_seq_fops = {
4399 .owner = THIS_MODULE,
4400 .open = ptype_seq_open,
4401 .read = seq_read,
4402 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07004403 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004404};
4405
4406
Pavel Emelyanov46650792007-10-08 20:38:39 -07004407static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004408{
4409 int rc = -ENOMEM;
4410
Eric W. Biederman881d9662007-09-17 11:56:21 -07004411 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004412 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004413 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004414 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004415 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02004416 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07004417
Eric W. Biederman881d9662007-09-17 11:56:21 -07004418 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02004419 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004420 rc = 0;
4421out:
4422 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02004423out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004424 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004425out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004426 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004427out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004428 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004429 goto out;
4430}
Eric W. Biederman881d9662007-09-17 11:56:21 -07004431
Pavel Emelyanov46650792007-10-08 20:38:39 -07004432static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004433{
4434 wext_proc_exit(net);
4435
4436 proc_net_remove(net, "ptype");
4437 proc_net_remove(net, "softnet_stat");
4438 proc_net_remove(net, "dev");
4439}
4440
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004441static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004442 .init = dev_proc_net_init,
4443 .exit = dev_proc_net_exit,
4444};
4445
4446static int __init dev_proc_init(void)
4447{
4448 return register_pernet_subsys(&dev_proc_ops);
4449}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004450#else
4451#define dev_proc_init() 0
4452#endif /* CONFIG_PROC_FS */
4453
4454
4455/**
Jiri Pirko1765a572011-02-12 06:48:36 +00004456 * netdev_set_master - set up master pointer
Linus Torvalds1da177e2005-04-16 15:20:36 -07004457 * @slave: slave device
4458 * @master: new master device
4459 *
4460 * Changes the master device of the slave. Pass %NULL to break the
4461 * bonding. The caller must hold the RTNL semaphore. On a failure
4462 * a negative errno code is returned. On success the reference counts
Jiri Pirko1765a572011-02-12 06:48:36 +00004463 * are adjusted and the function returns zero.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004464 */
4465int netdev_set_master(struct net_device *slave, struct net_device *master)
4466{
4467 struct net_device *old = slave->master;
4468
4469 ASSERT_RTNL();
4470
4471 if (master) {
4472 if (old)
4473 return -EBUSY;
4474 dev_hold(master);
4475 }
4476
4477 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004478
Eric Dumazet6df427f2011-05-19 19:37:40 +00004479 if (old)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004480 dev_put(old);
Jiri Pirko1765a572011-02-12 06:48:36 +00004481 return 0;
4482}
4483EXPORT_SYMBOL(netdev_set_master);
4484
4485/**
4486 * netdev_set_bond_master - set up bonding master/slave pair
4487 * @slave: slave device
4488 * @master: new master device
4489 *
4490 * Changes the master device of the slave. Pass %NULL to break the
4491 * bonding. The caller must hold the RTNL semaphore. On a failure
4492 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4493 * to the routing socket and the function returns zero.
4494 */
4495int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4496{
4497 int err;
4498
4499 ASSERT_RTNL();
4500
4501 err = netdev_set_master(slave, master);
4502 if (err)
4503 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004504 if (master)
4505 slave->flags |= IFF_SLAVE;
4506 else
4507 slave->flags &= ~IFF_SLAVE;
4508
4509 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4510 return 0;
4511}
Jiri Pirko1765a572011-02-12 06:48:36 +00004512EXPORT_SYMBOL(netdev_set_bond_master);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004513
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004514static void dev_change_rx_flags(struct net_device *dev, int flags)
4515{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004516 const struct net_device_ops *ops = dev->netdev_ops;
4517
4518 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4519 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004520}
4521
Wang Chendad9b332008-06-18 01:48:28 -07004522static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07004523{
Eric Dumazetb536db92011-11-30 21:42:26 +00004524 unsigned int old_flags = dev->flags;
Eric W. Biedermand04a48b2012-05-23 17:01:57 -06004525 kuid_t uid;
4526 kgid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07004527
Patrick McHardy24023452007-07-14 18:51:31 -07004528 ASSERT_RTNL();
4529
Wang Chendad9b332008-06-18 01:48:28 -07004530 dev->flags |= IFF_PROMISC;
4531 dev->promiscuity += inc;
4532 if (dev->promiscuity == 0) {
4533 /*
4534 * Avoid overflow.
4535 * If inc causes overflow, untouch promisc and return error.
4536 */
4537 if (inc < 0)
4538 dev->flags &= ~IFF_PROMISC;
4539 else {
4540 dev->promiscuity -= inc;
Joe Perches7b6cd1c2012-02-01 10:54:43 +00004541 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4542 dev->name);
Wang Chendad9b332008-06-18 01:48:28 -07004543 return -EOVERFLOW;
4544 }
4545 }
Patrick McHardy4417da62007-06-27 01:28:10 -07004546 if (dev->flags != old_flags) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00004547 pr_info("device %s %s promiscuous mode\n",
4548 dev->name,
4549 dev->flags & IFF_PROMISC ? "entered" : "left");
David Howells8192b0c2008-11-14 10:39:10 +11004550 if (audit_enabled) {
4551 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05004552 audit_log(current->audit_context, GFP_ATOMIC,
4553 AUDIT_ANOM_PROMISCUOUS,
4554 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4555 dev->name, (dev->flags & IFF_PROMISC),
4556 (old_flags & IFF_PROMISC),
4557 audit_get_loginuid(current),
Eric W. Biedermand04a48b2012-05-23 17:01:57 -06004558 from_kuid(&init_user_ns, uid),
4559 from_kgid(&init_user_ns, gid),
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05004560 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11004561 }
Patrick McHardy24023452007-07-14 18:51:31 -07004562
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004563 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07004564 }
Wang Chendad9b332008-06-18 01:48:28 -07004565 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07004566}
4567
Linus Torvalds1da177e2005-04-16 15:20:36 -07004568/**
4569 * dev_set_promiscuity - update promiscuity count on a device
4570 * @dev: device
4571 * @inc: modifier
4572 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07004573 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004574 * remains above zero the interface remains promiscuous. Once it hits zero
4575 * the device reverts back to normal filtering operation. A negative inc
4576 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07004577 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004578 */
Wang Chendad9b332008-06-18 01:48:28 -07004579int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004580{
Eric Dumazetb536db92011-11-30 21:42:26 +00004581 unsigned int old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07004582 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004583
Wang Chendad9b332008-06-18 01:48:28 -07004584 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07004585 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07004586 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07004587 if (dev->flags != old_flags)
4588 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07004589 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004590}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004591EXPORT_SYMBOL(dev_set_promiscuity);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004592
4593/**
4594 * dev_set_allmulti - update allmulti count on a device
4595 * @dev: device
4596 * @inc: modifier
4597 *
4598 * Add or remove reception of all multicast frames to a device. While the
4599 * count in the device remains above zero the interface remains listening
4600 * to all interfaces. Once it hits zero the device reverts back to normal
4601 * filtering operation. A negative @inc value is used to drop the counter
4602 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07004603 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004604 */
4605
Wang Chendad9b332008-06-18 01:48:28 -07004606int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004607{
Eric Dumazetb536db92011-11-30 21:42:26 +00004608 unsigned int old_flags = dev->flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004609
Patrick McHardy24023452007-07-14 18:51:31 -07004610 ASSERT_RTNL();
4611
Linus Torvalds1da177e2005-04-16 15:20:36 -07004612 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07004613 dev->allmulti += inc;
4614 if (dev->allmulti == 0) {
4615 /*
4616 * Avoid overflow.
4617 * If inc causes overflow, untouch allmulti and return error.
4618 */
4619 if (inc < 0)
4620 dev->flags &= ~IFF_ALLMULTI;
4621 else {
4622 dev->allmulti -= inc;
Joe Perches7b6cd1c2012-02-01 10:54:43 +00004623 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4624 dev->name);
Wang Chendad9b332008-06-18 01:48:28 -07004625 return -EOVERFLOW;
4626 }
4627 }
Patrick McHardy24023452007-07-14 18:51:31 -07004628 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004629 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07004630 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07004631 }
Wang Chendad9b332008-06-18 01:48:28 -07004632 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07004633}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004634EXPORT_SYMBOL(dev_set_allmulti);
Patrick McHardy4417da62007-06-27 01:28:10 -07004635
4636/*
4637 * Upload unicast and multicast address lists to device and
4638 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08004639 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07004640 * are present.
4641 */
4642void __dev_set_rx_mode(struct net_device *dev)
4643{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004644 const struct net_device_ops *ops = dev->netdev_ops;
4645
Patrick McHardy4417da62007-06-27 01:28:10 -07004646 /* dev_open will call this function so the list will stay sane. */
4647 if (!(dev->flags&IFF_UP))
4648 return;
4649
4650 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09004651 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07004652
Jiri Pirko01789342011-08-16 06:29:00 +00004653 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
Patrick McHardy4417da62007-06-27 01:28:10 -07004654 /* Unicast addresses changes may only happen under the rtnl,
4655 * therefore calling __dev_set_promiscuity here is safe.
4656 */
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08004657 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07004658 __dev_set_promiscuity(dev, 1);
Joe Perches2d348d12011-07-25 16:17:35 -07004659 dev->uc_promisc = true;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08004660 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07004661 __dev_set_promiscuity(dev, -1);
Joe Perches2d348d12011-07-25 16:17:35 -07004662 dev->uc_promisc = false;
Patrick McHardy4417da62007-06-27 01:28:10 -07004663 }
Patrick McHardy4417da62007-06-27 01:28:10 -07004664 }
Jiri Pirko01789342011-08-16 06:29:00 +00004665
4666 if (ops->ndo_set_rx_mode)
4667 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004668}
4669
4670void dev_set_rx_mode(struct net_device *dev)
4671{
David S. Millerb9e40852008-07-15 00:15:08 -07004672 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004673 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07004674 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004675}
4676
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004677/**
4678 * dev_get_flags - get flags reported to userspace
4679 * @dev: device
4680 *
4681 * Get the combination of flag bits exported through APIs to userspace.
4682 */
Eric Dumazet95c96172012-04-15 05:58:06 +00004683unsigned int dev_get_flags(const struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004684{
Eric Dumazet95c96172012-04-15 05:58:06 +00004685 unsigned int flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004686
4687 flags = (dev->flags & ~(IFF_PROMISC |
4688 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08004689 IFF_RUNNING |
4690 IFF_LOWER_UP |
4691 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004692 (dev->gflags & (IFF_PROMISC |
4693 IFF_ALLMULTI));
4694
Stefan Rompfb00055a2006-03-20 17:09:11 -08004695 if (netif_running(dev)) {
4696 if (netif_oper_up(dev))
4697 flags |= IFF_RUNNING;
4698 if (netif_carrier_ok(dev))
4699 flags |= IFF_LOWER_UP;
4700 if (netif_dormant(dev))
4701 flags |= IFF_DORMANT;
4702 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004703
4704 return flags;
4705}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004706EXPORT_SYMBOL(dev_get_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004707
Patrick McHardybd380812010-02-26 06:34:53 +00004708int __dev_change_flags(struct net_device *dev, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004709{
Eric Dumazetb536db92011-11-30 21:42:26 +00004710 unsigned int old_flags = dev->flags;
Patrick McHardybd380812010-02-26 06:34:53 +00004711 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004712
Patrick McHardy24023452007-07-14 18:51:31 -07004713 ASSERT_RTNL();
4714
Linus Torvalds1da177e2005-04-16 15:20:36 -07004715 /*
4716 * Set the flags on our device.
4717 */
4718
4719 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4720 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4721 IFF_AUTOMEDIA)) |
4722 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4723 IFF_ALLMULTI));
4724
4725 /*
4726 * Load in the correct multicast list now the flags have changed.
4727 */
4728
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004729 if ((old_flags ^ flags) & IFF_MULTICAST)
4730 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07004731
Patrick McHardy4417da62007-06-27 01:28:10 -07004732 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004733
4734 /*
4735 * Have we downed the interface. We handle IFF_UP ourselves
4736 * according to user attempts to set it, rather than blindly
4737 * setting it.
4738 */
4739
4740 ret = 0;
4741 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
Patrick McHardybd380812010-02-26 06:34:53 +00004742 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004743
4744 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07004745 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004746 }
4747
Linus Torvalds1da177e2005-04-16 15:20:36 -07004748 if ((flags ^ dev->gflags) & IFF_PROMISC) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004749 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4750
Linus Torvalds1da177e2005-04-16 15:20:36 -07004751 dev->gflags ^= IFF_PROMISC;
4752 dev_set_promiscuity(dev, inc);
4753 }
4754
4755 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4756 is important. Some (broken) drivers set IFF_PROMISC, when
4757 IFF_ALLMULTI is requested not asking us and not reporting.
4758 */
4759 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004760 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4761
Linus Torvalds1da177e2005-04-16 15:20:36 -07004762 dev->gflags ^= IFF_ALLMULTI;
4763 dev_set_allmulti(dev, inc);
4764 }
4765
Patrick McHardybd380812010-02-26 06:34:53 +00004766 return ret;
4767}
4768
4769void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4770{
4771 unsigned int changes = dev->flags ^ old_flags;
4772
4773 if (changes & IFF_UP) {
4774 if (dev->flags & IFF_UP)
4775 call_netdevice_notifiers(NETDEV_UP, dev);
4776 else
4777 call_netdevice_notifiers(NETDEV_DOWN, dev);
4778 }
4779
4780 if (dev->flags & IFF_UP &&
4781 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4782 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4783}
4784
4785/**
4786 * dev_change_flags - change device settings
4787 * @dev: device
4788 * @flags: device state flags
4789 *
4790 * Change settings on device based state flags. The flags are
4791 * in the userspace exported format.
4792 */
Eric Dumazetb536db92011-11-30 21:42:26 +00004793int dev_change_flags(struct net_device *dev, unsigned int flags)
Patrick McHardybd380812010-02-26 06:34:53 +00004794{
Eric Dumazetb536db92011-11-30 21:42:26 +00004795 int ret;
4796 unsigned int changes, old_flags = dev->flags;
Patrick McHardybd380812010-02-26 06:34:53 +00004797
4798 ret = __dev_change_flags(dev, flags);
4799 if (ret < 0)
4800 return ret;
4801
4802 changes = old_flags ^ dev->flags;
Thomas Graf7c355f52007-06-05 16:03:03 -07004803 if (changes)
4804 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004805
Patrick McHardybd380812010-02-26 06:34:53 +00004806 __dev_notify_flags(dev, old_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004807 return ret;
4808}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004809EXPORT_SYMBOL(dev_change_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004810
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004811/**
4812 * dev_set_mtu - Change maximum transfer unit
4813 * @dev: device
4814 * @new_mtu: new transfer unit
4815 *
4816 * Change the maximum transfer size of the network device.
4817 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004818int dev_set_mtu(struct net_device *dev, int new_mtu)
4819{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004820 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004821 int err;
4822
4823 if (new_mtu == dev->mtu)
4824 return 0;
4825
4826 /* MTU must be positive. */
4827 if (new_mtu < 0)
4828 return -EINVAL;
4829
4830 if (!netif_device_present(dev))
4831 return -ENODEV;
4832
4833 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004834 if (ops->ndo_change_mtu)
4835 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004836 else
4837 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004838
Linus Torvalds1da177e2005-04-16 15:20:36 -07004839 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004840 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004841 return err;
4842}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004843EXPORT_SYMBOL(dev_set_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004844
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004845/**
Vlad Dogarucbda10f2011-01-13 23:38:30 +00004846 * dev_set_group - Change group this device belongs to
4847 * @dev: device
4848 * @new_group: group this device should belong to
4849 */
4850void dev_set_group(struct net_device *dev, int new_group)
4851{
4852 dev->group = new_group;
4853}
4854EXPORT_SYMBOL(dev_set_group);
4855
4856/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004857 * dev_set_mac_address - Change Media Access Control Address
4858 * @dev: device
4859 * @sa: new address
4860 *
4861 * Change the hardware (MAC) address of the device
4862 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004863int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4864{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004865 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004866 int err;
4867
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004868 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004869 return -EOPNOTSUPP;
4870 if (sa->sa_family != dev->type)
4871 return -EINVAL;
4872 if (!netif_device_present(dev))
4873 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004874 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004875 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004876 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04004877 add_device_randomness(dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004878 return err;
4879}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004880EXPORT_SYMBOL(dev_set_mac_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004881
4882/*
Eric Dumazet3710bec2009-11-01 19:42:09 +00004883 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07004884 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07004885static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004886{
4887 int err;
Eric Dumazet3710bec2009-11-01 19:42:09 +00004888 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004889
4890 if (!dev)
4891 return -ENODEV;
4892
4893 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004894 case SIOCGIFFLAGS: /* Get interface flags */
4895 ifr->ifr_flags = (short) dev_get_flags(dev);
4896 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004897
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004898 case SIOCGIFMETRIC: /* Get the metric on the interface
4899 (currently unused) */
4900 ifr->ifr_metric = 0;
4901 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004902
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004903 case SIOCGIFMTU: /* Get the MTU of a device */
4904 ifr->ifr_mtu = dev->mtu;
4905 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004906
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004907 case SIOCGIFHWADDR:
4908 if (!dev->addr_len)
4909 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4910 else
4911 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4912 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4913 ifr->ifr_hwaddr.sa_family = dev->type;
4914 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004915
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004916 case SIOCGIFSLAVE:
4917 err = -EINVAL;
4918 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004919
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004920 case SIOCGIFMAP:
4921 ifr->ifr_map.mem_start = dev->mem_start;
4922 ifr->ifr_map.mem_end = dev->mem_end;
4923 ifr->ifr_map.base_addr = dev->base_addr;
4924 ifr->ifr_map.irq = dev->irq;
4925 ifr->ifr_map.dma = dev->dma;
4926 ifr->ifr_map.port = dev->if_port;
4927 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004928
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004929 case SIOCGIFINDEX:
4930 ifr->ifr_ifindex = dev->ifindex;
4931 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004932
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004933 case SIOCGIFTXQLEN:
4934 ifr->ifr_qlen = dev->tx_queue_len;
4935 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004936
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004937 default:
4938 /* dev_ioctl() should ensure this case
4939 * is never reached
4940 */
4941 WARN_ON(1);
Lifeng Sun41c31f32011-04-27 22:04:51 +00004942 err = -ENOTTY;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004943 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004944
4945 }
4946 return err;
4947}
4948
4949/*
4950 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
4951 */
4952static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4953{
4954 int err;
4955 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004956 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004957
4958 if (!dev)
4959 return -ENODEV;
4960
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004961 ops = dev->netdev_ops;
4962
Jeff Garzik14e3e072007-10-08 00:06:32 -07004963 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004964 case SIOCSIFFLAGS: /* Set interface flags */
4965 return dev_change_flags(dev, ifr->ifr_flags);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004966
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004967 case SIOCSIFMETRIC: /* Set the metric on the interface
4968 (currently unused) */
4969 return -EOPNOTSUPP;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004970
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004971 case SIOCSIFMTU: /* Set the MTU of a device */
4972 return dev_set_mtu(dev, ifr->ifr_mtu);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004973
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004974 case SIOCSIFHWADDR:
4975 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004976
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004977 case SIOCSIFHWBROADCAST:
4978 if (ifr->ifr_hwaddr.sa_family != dev->type)
4979 return -EINVAL;
4980 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4981 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4982 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4983 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004984
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004985 case SIOCSIFMAP:
4986 if (ops->ndo_set_config) {
4987 if (!netif_device_present(dev))
4988 return -ENODEV;
4989 return ops->ndo_set_config(dev, &ifr->ifr_map);
4990 }
4991 return -EOPNOTSUPP;
4992
4993 case SIOCADDMULTI:
Jiri Pirkob81693d2011-08-16 06:29:02 +00004994 if (!ops->ndo_set_rx_mode ||
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004995 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4996 return -EINVAL;
4997 if (!netif_device_present(dev))
4998 return -ENODEV;
Jiri Pirko22bedad32010-04-01 21:22:57 +00004999 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005000
5001 case SIOCDELMULTI:
Jiri Pirkob81693d2011-08-16 06:29:02 +00005002 if (!ops->ndo_set_rx_mode ||
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005003 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5004 return -EINVAL;
5005 if (!netif_device_present(dev))
5006 return -ENODEV;
Jiri Pirko22bedad32010-04-01 21:22:57 +00005007 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005008
5009 case SIOCSIFTXQLEN:
5010 if (ifr->ifr_qlen < 0)
5011 return -EINVAL;
5012 dev->tx_queue_len = ifr->ifr_qlen;
5013 return 0;
5014
5015 case SIOCSIFNAME:
5016 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5017 return dev_change_name(dev, ifr->ifr_newname);
5018
Richard Cochran4dc360c2011-10-19 17:00:35 -04005019 case SIOCSHWTSTAMP:
5020 err = net_hwtstamp_validate(ifr);
5021 if (err)
5022 return err;
5023 /* fall through */
5024
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005025 /*
5026 * Unknown or private ioctl
5027 */
5028 default:
5029 if ((cmd >= SIOCDEVPRIVATE &&
5030 cmd <= SIOCDEVPRIVATE + 15) ||
5031 cmd == SIOCBONDENSLAVE ||
5032 cmd == SIOCBONDRELEASE ||
5033 cmd == SIOCBONDSETHWADDR ||
5034 cmd == SIOCBONDSLAVEINFOQUERY ||
5035 cmd == SIOCBONDINFOQUERY ||
5036 cmd == SIOCBONDCHANGEACTIVE ||
5037 cmd == SIOCGMIIPHY ||
5038 cmd == SIOCGMIIREG ||
5039 cmd == SIOCSMIIREG ||
5040 cmd == SIOCBRADDIF ||
5041 cmd == SIOCBRDELIF ||
5042 cmd == SIOCSHWTSTAMP ||
5043 cmd == SIOCWANDEV) {
5044 err = -EOPNOTSUPP;
5045 if (ops->ndo_do_ioctl) {
5046 if (netif_device_present(dev))
5047 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5048 else
5049 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005050 }
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005051 } else
5052 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005053
5054 }
5055 return err;
5056}
5057
5058/*
5059 * This function handles all "interface"-type I/O control requests. The actual
5060 * 'doing' part of this is dev_ifsioc above.
5061 */
5062
5063/**
5064 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07005065 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07005066 * @cmd: command to issue
5067 * @arg: pointer to a struct ifreq in user space
5068 *
5069 * Issue ioctl functions to devices. This is normally called by the
5070 * user space syscall interfaces but can sometimes be useful for
5071 * other purposes. The return value is the return from the syscall if
5072 * positive or a negative errno code on error.
5073 */
5074
Eric W. Biederman881d9662007-09-17 11:56:21 -07005075int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005076{
5077 struct ifreq ifr;
5078 int ret;
5079 char *colon;
5080
5081 /* One special case: SIOCGIFCONF takes ifconf argument
5082 and requires shared lock, because it sleeps writing
5083 to user space.
5084 */
5085
5086 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005087 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07005088 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005089 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005090 return ret;
5091 }
5092 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005093 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005094
5095 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5096 return -EFAULT;
5097
5098 ifr.ifr_name[IFNAMSIZ-1] = 0;
5099
5100 colon = strchr(ifr.ifr_name, ':');
5101 if (colon)
5102 *colon = 0;
5103
5104 /*
5105 * See which interface the caller is talking about.
5106 */
5107
5108 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005109 /*
5110 * These ioctl calls:
5111 * - can be done by all.
5112 * - atomic and do not require locking.
5113 * - return a value
5114 */
5115 case SIOCGIFFLAGS:
5116 case SIOCGIFMETRIC:
5117 case SIOCGIFMTU:
5118 case SIOCGIFHWADDR:
5119 case SIOCGIFSLAVE:
5120 case SIOCGIFMAP:
5121 case SIOCGIFINDEX:
5122 case SIOCGIFTXQLEN:
5123 dev_load(net, ifr.ifr_name);
Eric Dumazet3710bec2009-11-01 19:42:09 +00005124 rcu_read_lock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005125 ret = dev_ifsioc_locked(net, &ifr, cmd);
Eric Dumazet3710bec2009-11-01 19:42:09 +00005126 rcu_read_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005127 if (!ret) {
5128 if (colon)
5129 *colon = ':';
5130 if (copy_to_user(arg, &ifr,
5131 sizeof(struct ifreq)))
5132 ret = -EFAULT;
5133 }
5134 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005135
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005136 case SIOCETHTOOL:
5137 dev_load(net, ifr.ifr_name);
5138 rtnl_lock();
5139 ret = dev_ethtool(net, &ifr);
5140 rtnl_unlock();
5141 if (!ret) {
5142 if (colon)
5143 *colon = ':';
5144 if (copy_to_user(arg, &ifr,
5145 sizeof(struct ifreq)))
5146 ret = -EFAULT;
5147 }
5148 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005149
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005150 /*
5151 * These ioctl calls:
5152 * - require superuser power.
5153 * - require strict serialization.
5154 * - return a value
5155 */
5156 case SIOCGMIIPHY:
5157 case SIOCGMIIREG:
5158 case SIOCSIFNAME:
5159 if (!capable(CAP_NET_ADMIN))
5160 return -EPERM;
5161 dev_load(net, ifr.ifr_name);
5162 rtnl_lock();
5163 ret = dev_ifsioc(net, &ifr, cmd);
5164 rtnl_unlock();
5165 if (!ret) {
5166 if (colon)
5167 *colon = ':';
5168 if (copy_to_user(arg, &ifr,
5169 sizeof(struct ifreq)))
5170 ret = -EFAULT;
5171 }
5172 return ret;
5173
5174 /*
5175 * These ioctl calls:
5176 * - require superuser power.
5177 * - require strict serialization.
5178 * - do not return a value
5179 */
5180 case SIOCSIFFLAGS:
5181 case SIOCSIFMETRIC:
5182 case SIOCSIFMTU:
5183 case SIOCSIFMAP:
5184 case SIOCSIFHWADDR:
5185 case SIOCSIFSLAVE:
5186 case SIOCADDMULTI:
5187 case SIOCDELMULTI:
5188 case SIOCSIFHWBROADCAST:
5189 case SIOCSIFTXQLEN:
5190 case SIOCSMIIREG:
5191 case SIOCBONDENSLAVE:
5192 case SIOCBONDRELEASE:
5193 case SIOCBONDSETHWADDR:
5194 case SIOCBONDCHANGEACTIVE:
5195 case SIOCBRADDIF:
5196 case SIOCBRDELIF:
5197 case SIOCSHWTSTAMP:
5198 if (!capable(CAP_NET_ADMIN))
5199 return -EPERM;
5200 /* fall through */
5201 case SIOCBONDSLAVEINFOQUERY:
5202 case SIOCBONDINFOQUERY:
5203 dev_load(net, ifr.ifr_name);
5204 rtnl_lock();
5205 ret = dev_ifsioc(net, &ifr, cmd);
5206 rtnl_unlock();
5207 return ret;
5208
5209 case SIOCGIFMEM:
5210 /* Get the per device memory space. We can add this but
5211 * currently do not support it */
5212 case SIOCSIFMEM:
5213 /* Set the per device memory buffer space.
5214 * Not applicable in our case */
5215 case SIOCSIFLINK:
Lifeng Sun41c31f32011-04-27 22:04:51 +00005216 return -ENOTTY;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005217
5218 /*
5219 * Unknown or private ioctl.
5220 */
5221 default:
5222 if (cmd == SIOCWANDEV ||
5223 (cmd >= SIOCDEVPRIVATE &&
5224 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005225 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005226 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07005227 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005228 rtnl_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005229 if (!ret && copy_to_user(arg, &ifr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005230 sizeof(struct ifreq)))
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005231 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005232 return ret;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005233 }
5234 /* Take care of Wireless Extensions */
5235 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5236 return wext_handle_ioctl(net, &ifr, cmd, arg);
Lifeng Sun41c31f32011-04-27 22:04:51 +00005237 return -ENOTTY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005238 }
5239}
5240
5241
5242/**
5243 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07005244 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07005245 *
5246 * Returns a suitable unique value for a new device interface
5247 * number. The caller must hold the rtnl semaphore or the
5248 * dev_base_lock to be sure it remains unique.
5249 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07005250static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005251{
Pavel Emelyanovaa79e662012-08-08 21:53:19 +00005252 int ifindex = net->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005253 for (;;) {
5254 if (++ifindex <= 0)
5255 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005256 if (!__dev_get_by_index(net, ifindex))
Pavel Emelyanovaa79e662012-08-08 21:53:19 +00005257 return net->ifindex = ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005258 }
5259}
5260
Linus Torvalds1da177e2005-04-16 15:20:36 -07005261/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08005262static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005263
Stephen Hemminger6f05f622007-03-08 20:46:03 -08005264static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005265{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005266 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005267}
5268
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005269static void rollback_registered_many(struct list_head *head)
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005270{
Krishna Kumare93737b2009-12-08 22:26:02 +00005271 struct net_device *dev, *tmp;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005272
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005273 BUG_ON(dev_boot_phase);
5274 ASSERT_RTNL();
5275
Krishna Kumare93737b2009-12-08 22:26:02 +00005276 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005277 /* Some devices call without registering
Krishna Kumare93737b2009-12-08 22:26:02 +00005278 * for initialization unwind. Remove those
5279 * devices and proceed with the remaining.
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005280 */
5281 if (dev->reg_state == NETREG_UNINITIALIZED) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005282 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5283 dev->name, dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005284
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005285 WARN_ON(1);
Krishna Kumare93737b2009-12-08 22:26:02 +00005286 list_del(&dev->unreg_list);
5287 continue;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005288 }
Eric Dumazet449f4542011-05-19 12:24:16 +00005289 dev->dismantle = true;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005290 BUG_ON(dev->reg_state != NETREG_REGISTERED);
Octavian Purdila44345722010-12-13 12:44:07 +00005291 }
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005292
Octavian Purdila44345722010-12-13 12:44:07 +00005293 /* If device is running, close it first. */
5294 dev_close_many(head);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005295
Octavian Purdila44345722010-12-13 12:44:07 +00005296 list_for_each_entry(dev, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005297 /* And unlink it from device chain. */
5298 unlist_netdevice(dev);
5299
5300 dev->reg_state = NETREG_UNREGISTERING;
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005301 }
5302
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005303 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005304
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005305 list_for_each_entry(dev, head, unreg_list) {
5306 /* Shutdown queueing discipline. */
5307 dev_shutdown(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005308
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005309
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005310 /* Notify protocols, that we are about to destroy
5311 this device. They should clean all the things.
5312 */
5313 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5314
Patrick McHardya2835762010-02-26 06:34:51 +00005315 if (!dev->rtnl_link_ops ||
5316 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5317 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5318
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005319 /*
5320 * Flush the unicast and multicast chains
5321 */
Jiri Pirkoa748ee22010-04-01 21:22:09 +00005322 dev_uc_flush(dev);
Jiri Pirko22bedad32010-04-01 21:22:57 +00005323 dev_mc_flush(dev);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005324
5325 if (dev->netdev_ops->ndo_uninit)
5326 dev->netdev_ops->ndo_uninit(dev);
5327
5328 /* Notifier chain MUST detach us from master device. */
5329 WARN_ON(dev->master);
5330
5331 /* Remove entries from kobject tree */
5332 netdev_unregister_kobject(dev);
5333 }
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005334
Eric W. Biederman850a5452011-10-13 22:25:23 +00005335 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005336
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00005337 list_for_each_entry(dev, head, unreg_list)
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005338 dev_put(dev);
5339}
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005340
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005341static void rollback_registered(struct net_device *dev)
5342{
5343 LIST_HEAD(single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005344
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005345 list_add(&dev->unreg_list, &single);
5346 rollback_registered_many(&single);
Eric Dumazetceaaec92011-02-17 22:59:19 +00005347 list_del(&single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005348}
5349
Michał Mirosławc8f44af2011-11-15 15:29:55 +00005350static netdev_features_t netdev_fix_features(struct net_device *dev,
5351 netdev_features_t features)
Herbert Xub63365a2008-10-23 01:11:29 -07005352{
Michał Mirosław57422dc2011-01-22 12:14:12 +00005353 /* Fix illegal checksum combinations */
5354 if ((features & NETIF_F_HW_CSUM) &&
5355 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005356 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
Michał Mirosław57422dc2011-01-22 12:14:12 +00005357 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5358 }
5359
Herbert Xub63365a2008-10-23 01:11:29 -07005360 /* Fix illegal SG+CSUM combinations. */
5361 if ((features & NETIF_F_SG) &&
5362 !(features & NETIF_F_ALL_CSUM)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005363 netdev_dbg(dev,
5364 "Dropping NETIF_F_SG since no checksum feature.\n");
Herbert Xub63365a2008-10-23 01:11:29 -07005365 features &= ~NETIF_F_SG;
5366 }
5367
5368 /* TSO requires that SG is present as well. */
Ben Hutchingsea2d3682011-04-12 14:38:37 +00005369 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005370 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
Ben Hutchingsea2d3682011-04-12 14:38:37 +00005371 features &= ~NETIF_F_ALL_TSO;
Herbert Xub63365a2008-10-23 01:11:29 -07005372 }
5373
Ben Hutchings31d8b9e2011-04-12 14:47:15 +00005374 /* TSO ECN requires that TSO is present as well. */
5375 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5376 features &= ~NETIF_F_TSO_ECN;
5377
Michał Mirosław212b5732011-02-15 16:59:16 +00005378 /* Software GSO depends on SG. */
5379 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005380 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
Michał Mirosław212b5732011-02-15 16:59:16 +00005381 features &= ~NETIF_F_GSO;
5382 }
5383
Michał Mirosławacd11302011-01-24 15:45:15 -08005384 /* UFO needs SG and checksumming */
Herbert Xub63365a2008-10-23 01:11:29 -07005385 if (features & NETIF_F_UFO) {
Michał Mirosław79032642010-11-30 06:38:00 +00005386 /* maybe split UFO into V4 and V6? */
5387 if (!((features & NETIF_F_GEN_CSUM) ||
5388 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5389 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005390 netdev_dbg(dev,
Michał Mirosławacd11302011-01-24 15:45:15 -08005391 "Dropping NETIF_F_UFO since no checksum offload features.\n");
Herbert Xub63365a2008-10-23 01:11:29 -07005392 features &= ~NETIF_F_UFO;
5393 }
5394
5395 if (!(features & NETIF_F_SG)) {
Michał Mirosław6f404e42011-05-16 15:14:21 -04005396 netdev_dbg(dev,
Michał Mirosławacd11302011-01-24 15:45:15 -08005397 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
Herbert Xub63365a2008-10-23 01:11:29 -07005398 features &= ~NETIF_F_UFO;
5399 }
5400 }
5401
5402 return features;
5403}
Herbert Xub63365a2008-10-23 01:11:29 -07005404
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005405int __netdev_update_features(struct net_device *dev)
Michał Mirosław5455c692011-02-15 16:59:17 +00005406{
Michał Mirosławc8f44af2011-11-15 15:29:55 +00005407 netdev_features_t features;
Michał Mirosław5455c692011-02-15 16:59:17 +00005408 int err = 0;
5409
Michał Mirosław87267482011-04-12 09:56:38 +00005410 ASSERT_RTNL();
5411
Michał Mirosław5455c692011-02-15 16:59:17 +00005412 features = netdev_get_wanted_features(dev);
5413
5414 if (dev->netdev_ops->ndo_fix_features)
5415 features = dev->netdev_ops->ndo_fix_features(dev, features);
5416
5417 /* driver might be less strict about feature dependencies */
5418 features = netdev_fix_features(dev, features);
5419
5420 if (dev->features == features)
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005421 return 0;
Michał Mirosław5455c692011-02-15 16:59:17 +00005422
Michał Mirosławc8f44af2011-11-15 15:29:55 +00005423 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5424 &dev->features, &features);
Michał Mirosław5455c692011-02-15 16:59:17 +00005425
5426 if (dev->netdev_ops->ndo_set_features)
5427 err = dev->netdev_ops->ndo_set_features(dev, features);
5428
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005429 if (unlikely(err < 0)) {
Michał Mirosław5455c692011-02-15 16:59:17 +00005430 netdev_err(dev,
Michał Mirosławc8f44af2011-11-15 15:29:55 +00005431 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5432 err, &features, &dev->features);
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005433 return -1;
5434 }
5435
5436 if (!err)
5437 dev->features = features;
5438
5439 return 1;
5440}
5441
Michał Mirosławafe12cc2011-05-07 03:22:17 +00005442/**
5443 * netdev_update_features - recalculate device features
5444 * @dev: the device to check
5445 *
5446 * Recalculate dev->features set and send notifications if it
5447 * has changed. Should be called after driver or hardware dependent
5448 * conditions might have changed that influence the features.
5449 */
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005450void netdev_update_features(struct net_device *dev)
5451{
5452 if (__netdev_update_features(dev))
5453 netdev_features_change(dev);
Michał Mirosław5455c692011-02-15 16:59:17 +00005454}
5455EXPORT_SYMBOL(netdev_update_features);
5456
Linus Torvalds1da177e2005-04-16 15:20:36 -07005457/**
Michał Mirosławafe12cc2011-05-07 03:22:17 +00005458 * netdev_change_features - recalculate device features
5459 * @dev: the device to check
5460 *
5461 * Recalculate dev->features set and send notifications even
5462 * if they have not changed. Should be called instead of
5463 * netdev_update_features() if also dev->vlan_features might
5464 * have changed to allow the changes to be propagated to stacked
5465 * VLAN devices.
5466 */
5467void netdev_change_features(struct net_device *dev)
5468{
5469 __netdev_update_features(dev);
5470 netdev_features_change(dev);
5471}
5472EXPORT_SYMBOL(netdev_change_features);
5473
5474/**
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08005475 * netif_stacked_transfer_operstate - transfer operstate
5476 * @rootdev: the root or lower level device to transfer state from
5477 * @dev: the device to transfer operstate to
5478 *
5479 * Transfer operational state from root to device. This is normally
5480 * called when a stacking relationship exists between the root
5481 * device and the device(a leaf device).
5482 */
5483void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5484 struct net_device *dev)
5485{
5486 if (rootdev->operstate == IF_OPER_DORMANT)
5487 netif_dormant_on(dev);
5488 else
5489 netif_dormant_off(dev);
5490
5491 if (netif_carrier_ok(rootdev)) {
5492 if (!netif_carrier_ok(dev))
5493 netif_carrier_on(dev);
5494 } else {
5495 if (netif_carrier_ok(dev))
5496 netif_carrier_off(dev);
5497 }
5498}
5499EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5500
Tom Herbertbf264142010-11-26 08:36:09 +00005501#ifdef CONFIG_RPS
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005502static int netif_alloc_rx_queues(struct net_device *dev)
5503{
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005504 unsigned int i, count = dev->num_rx_queues;
Tom Herbertbd25fa72010-10-18 18:00:16 +00005505 struct netdev_rx_queue *rx;
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005506
Tom Herbertbd25fa72010-10-18 18:00:16 +00005507 BUG_ON(count < 1);
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005508
Tom Herbertbd25fa72010-10-18 18:00:16 +00005509 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5510 if (!rx) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005511 pr_err("netdev: Unable to allocate %u rx queues\n", count);
Tom Herbertbd25fa72010-10-18 18:00:16 +00005512 return -ENOMEM;
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005513 }
Tom Herbertbd25fa72010-10-18 18:00:16 +00005514 dev->_rx = rx;
5515
Tom Herbertbd25fa72010-10-18 18:00:16 +00005516 for (i = 0; i < count; i++)
Tom Herbertfe822242010-11-09 10:47:38 +00005517 rx[i].dev = dev;
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005518 return 0;
5519}
Tom Herbertbf264142010-11-26 08:36:09 +00005520#endif
Eric Dumazet1b4bf462010-09-23 17:26:35 +00005521
Changli Gaoaa942102010-12-04 02:31:41 +00005522static void netdev_init_one_queue(struct net_device *dev,
5523 struct netdev_queue *queue, void *_unused)
5524{
5525 /* Initialize queue lock */
5526 spin_lock_init(&queue->_xmit_lock);
5527 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5528 queue->xmit_lock_owner = -1;
Changli Gaob236da62010-12-14 03:09:15 +00005529 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
Changli Gaoaa942102010-12-04 02:31:41 +00005530 queue->dev = dev;
Tom Herbert114cf582011-11-28 16:33:09 +00005531#ifdef CONFIG_BQL
5532 dql_init(&queue->dql, HZ);
5533#endif
Changli Gaoaa942102010-12-04 02:31:41 +00005534}
5535
Tom Herberte6484932010-10-18 18:04:39 +00005536static int netif_alloc_netdev_queues(struct net_device *dev)
5537{
5538 unsigned int count = dev->num_tx_queues;
5539 struct netdev_queue *tx;
5540
5541 BUG_ON(count < 1);
5542
5543 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5544 if (!tx) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005545 pr_err("netdev: Unable to allocate %u tx queues\n", count);
Tom Herberte6484932010-10-18 18:04:39 +00005546 return -ENOMEM;
5547 }
5548 dev->_tx = tx;
Tom Herbert1d24eb42010-11-21 13:17:27 +00005549
Tom Herberte6484932010-10-18 18:04:39 +00005550 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5551 spin_lock_init(&dev->tx_global_lock);
Changli Gaoaa942102010-12-04 02:31:41 +00005552
5553 return 0;
Tom Herberte6484932010-10-18 18:04:39 +00005554}
5555
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08005556/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005557 * register_netdevice - register a network device
5558 * @dev: device to register
5559 *
5560 * Take a completed network device structure and add it to the kernel
5561 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5562 * chain. 0 is returned on success. A negative errno code is returned
5563 * on a failure to set up the device, or if the name is a duplicate.
5564 *
5565 * Callers must hold the rtnl semaphore. You may want
5566 * register_netdev() instead of this.
5567 *
5568 * BUGS:
5569 * The locking appears insufficient to guarantee two parallel registers
5570 * will not get the same name.
5571 */
5572
5573int register_netdevice(struct net_device *dev)
5574{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005575 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005576 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005577
5578 BUG_ON(dev_boot_phase);
5579 ASSERT_RTNL();
5580
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005581 might_sleep();
5582
Linus Torvalds1da177e2005-04-16 15:20:36 -07005583 /* When net_device's are persistent, this will be fatal. */
5584 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005585 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005586
David S. Millerf1f28aa2008-07-15 00:08:33 -07005587 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07005588 netdev_set_addr_lockdep_class(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005589
Linus Torvalds1da177e2005-04-16 15:20:36 -07005590 dev->iflink = -1;
5591
Peter Pan(潘卫平)0696c3a2011-05-12 15:46:56 +00005592 ret = dev_get_valid_name(dev, dev->name);
5593 if (ret < 0)
5594 goto out;
5595
Linus Torvalds1da177e2005-04-16 15:20:36 -07005596 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005597 if (dev->netdev_ops->ndo_init) {
5598 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005599 if (ret) {
5600 if (ret > 0)
5601 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08005602 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005603 }
5604 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005605
Pavel Emelyanov9c7dafb2012-08-08 21:52:46 +00005606 ret = -EBUSY;
5607 if (!dev->ifindex)
5608 dev->ifindex = dev_new_index(net);
5609 else if (__dev_get_by_index(net, dev->ifindex))
5610 goto err_uninit;
5611
Linus Torvalds1da177e2005-04-16 15:20:36 -07005612 if (dev->iflink == -1)
5613 dev->iflink = dev->ifindex;
5614
Michał Mirosław5455c692011-02-15 16:59:17 +00005615 /* Transfer changeable features to wanted_features and enable
5616 * software offloads (GSO and GRO).
5617 */
5618 dev->hw_features |= NETIF_F_SOFT_FEATURES;
Michał Mirosław14d12322011-02-22 16:52:28 +00005619 dev->features |= NETIF_F_SOFT_FEATURES;
5620 dev->wanted_features = dev->features & dev->hw_features;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005621
Tom Herbertc6e1a0d2011-04-04 22:30:30 -07005622 /* Turn on no cache copy if HW is doing checksum */
Michał Mirosław34324dc2011-11-15 15:29:55 +00005623 if (!(dev->flags & IFF_LOOPBACK)) {
5624 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5625 if (dev->features & NETIF_F_ALL_CSUM) {
5626 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5627 dev->features |= NETIF_F_NOCACHE_COPY;
5628 }
Tom Herbertc6e1a0d2011-04-04 22:30:30 -07005629 }
5630
Michał Mirosław1180e7d2011-07-14 14:41:11 -07005631 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
Brandon Philips16c3ea72010-09-15 09:24:24 +00005632 */
Michał Mirosław1180e7d2011-07-14 14:41:11 -07005633 dev->vlan_features |= NETIF_F_HIGHDMA;
Brandon Philips16c3ea72010-09-15 09:24:24 +00005634
Johannes Berg7ffbe3f2009-10-02 05:15:27 +00005635 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5636 ret = notifier_to_errno(ret);
5637 if (ret)
5638 goto err_uninit;
5639
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005640 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005641 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005642 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005643 dev->reg_state = NETREG_REGISTERED;
5644
Michał Mirosław6cb6a272011-04-02 22:48:47 -07005645 __netdev_update_features(dev);
Michał Mirosław8e9b59b2011-02-22 16:52:28 +00005646
Linus Torvalds1da177e2005-04-16 15:20:36 -07005647 /*
5648 * Default initial state at registry is that the
5649 * device is present.
5650 */
5651
5652 set_bit(__LINK_STATE_PRESENT, &dev->state);
5653
Ben Hutchings8f4cccb2012-08-20 22:16:51 +01005654 linkwatch_init_dev(dev);
5655
Linus Torvalds1da177e2005-04-16 15:20:36 -07005656 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005657 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005658 list_netdevice(dev);
Theodore Ts'o7bf23572012-07-04 21:23:25 -04005659 add_device_randomness(dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005660
5661 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005662 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07005663 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005664 if (ret) {
5665 rollback_registered(dev);
5666 dev->reg_state = NETREG_UNREGISTERED;
5667 }
Eric W. Biedermand90a9092009-12-12 22:11:15 +00005668 /*
5669 * Prevent userspace races by waiting until the network
5670 * device is fully setup before sending notifications.
5671 */
Patrick McHardya2835762010-02-26 06:34:51 +00005672 if (!dev->rtnl_link_ops ||
5673 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5674 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005675
5676out:
5677 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005678
5679err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005680 if (dev->netdev_ops->ndo_uninit)
5681 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005682 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005683}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005684EXPORT_SYMBOL(register_netdevice);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005685
5686/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08005687 * init_dummy_netdev - init a dummy network device for NAPI
5688 * @dev: device to init
5689 *
5690 * This takes a network device structure and initialize the minimum
5691 * amount of fields so it can be used to schedule NAPI polls without
5692 * registering a full blown interface. This is to be used by drivers
5693 * that need to tie several hardware interfaces to a single NAPI
5694 * poll scheduler due to HW limitations.
5695 */
5696int init_dummy_netdev(struct net_device *dev)
5697{
5698 /* Clear everything. Note we don't initialize spinlocks
5699 * are they aren't supposed to be taken by any of the
5700 * NAPI code and this dummy netdev is supposed to be
5701 * only ever used for NAPI polls
5702 */
5703 memset(dev, 0, sizeof(struct net_device));
5704
5705 /* make sure we BUG if trying to hit standard
5706 * register/unregister code path
5707 */
5708 dev->reg_state = NETREG_DUMMY;
5709
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08005710 /* NAPI wants this */
5711 INIT_LIST_HEAD(&dev->napi_list);
5712
5713 /* a dummy interface is started by default */
5714 set_bit(__LINK_STATE_PRESENT, &dev->state);
5715 set_bit(__LINK_STATE_START, &dev->state);
5716
Eric Dumazet29b44332010-10-11 10:22:12 +00005717 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5718 * because users of this 'device' dont need to change
5719 * its refcount.
5720 */
5721
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08005722 return 0;
5723}
5724EXPORT_SYMBOL_GPL(init_dummy_netdev);
5725
5726
5727/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005728 * register_netdev - register a network device
5729 * @dev: device to register
5730 *
5731 * Take a completed network device structure and add it to the kernel
5732 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5733 * chain. 0 is returned on success. A negative errno code is returned
5734 * on a failure to set up the device, or if the name is a duplicate.
5735 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07005736 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07005737 * and expands the device name if you passed a format string to
5738 * alloc_netdev.
5739 */
5740int register_netdev(struct net_device *dev)
5741{
5742 int err;
5743
5744 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005745 err = register_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005746 rtnl_unlock();
5747 return err;
5748}
5749EXPORT_SYMBOL(register_netdev);
5750
Eric Dumazet29b44332010-10-11 10:22:12 +00005751int netdev_refcnt_read(const struct net_device *dev)
5752{
5753 int i, refcnt = 0;
5754
5755 for_each_possible_cpu(i)
5756 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5757 return refcnt;
5758}
5759EXPORT_SYMBOL(netdev_refcnt_read);
5760
Ben Hutchings2c530402012-07-10 10:55:09 +00005761/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005762 * netdev_wait_allrefs - wait until all references are gone.
Randy Dunlap3de7a372012-08-18 14:36:44 +00005763 * @dev: target net_device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005764 *
5765 * This is called when unregistering network devices.
5766 *
5767 * Any protocol or device that holds a reference should register
5768 * for netdevice notification, and cleanup and put back the
5769 * reference if they receive an UNREGISTER event.
5770 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005771 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005772 */
5773static void netdev_wait_allrefs(struct net_device *dev)
5774{
5775 unsigned long rebroadcast_time, warning_time;
Eric Dumazet29b44332010-10-11 10:22:12 +00005776 int refcnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005777
Eric Dumazete014deb2009-11-17 05:59:21 +00005778 linkwatch_forget_dev(dev);
5779
Linus Torvalds1da177e2005-04-16 15:20:36 -07005780 rebroadcast_time = warning_time = jiffies;
Eric Dumazet29b44332010-10-11 10:22:12 +00005781 refcnt = netdev_refcnt_read(dev);
5782
5783 while (refcnt != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005784 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005785 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005786
5787 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005788 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Eric Dumazet748e2d92012-08-22 21:50:59 +00005789
5790 __rtnl_unlock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00005791 rcu_barrier();
Eric Dumazet748e2d92012-08-22 21:50:59 +00005792 rtnl_lock();
5793
Eric Dumazet0115e8e2012-08-22 17:19:46 +00005794 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005795 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5796 &dev->state)) {
5797 /* We must not have linkwatch events
5798 * pending on unregister. If this
5799 * happens, we simply run the queue
5800 * unscheduled, resulting in a noop
5801 * for this device.
5802 */
5803 linkwatch_run_queue();
5804 }
5805
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005806 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005807
5808 rebroadcast_time = jiffies;
5809 }
5810
5811 msleep(250);
5812
Eric Dumazet29b44332010-10-11 10:22:12 +00005813 refcnt = netdev_refcnt_read(dev);
5814
Linus Torvalds1da177e2005-04-16 15:20:36 -07005815 if (time_after(jiffies, warning_time + 10 * HZ)) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005816 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5817 dev->name, refcnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005818 warning_time = jiffies;
5819 }
5820 }
5821}
5822
5823/* The sequence is:
5824 *
5825 * rtnl_lock();
5826 * ...
5827 * register_netdevice(x1);
5828 * register_netdevice(x2);
5829 * ...
5830 * unregister_netdevice(y1);
5831 * unregister_netdevice(y2);
5832 * ...
5833 * rtnl_unlock();
5834 * free_netdev(y1);
5835 * free_netdev(y2);
5836 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07005837 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07005838 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005839 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07005840 * without deadlocking with linkwatch via keventd.
5841 * 2) Since we run with the RTNL semaphore not held, we can sleep
5842 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07005843 *
5844 * We must not return until all unregister events added during
5845 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005846 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005847void netdev_run_todo(void)
5848{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005849 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005850
Linus Torvalds1da177e2005-04-16 15:20:36 -07005851 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005852 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07005853
5854 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005855
Eric Dumazet0115e8e2012-08-22 17:19:46 +00005856
5857 /* Wait for rcu callbacks to finish before next phase */
Eric W. Biederman850a5452011-10-13 22:25:23 +00005858 if (!list_empty(&list))
5859 rcu_barrier();
5860
Linus Torvalds1da177e2005-04-16 15:20:36 -07005861 while (!list_empty(&list)) {
5862 struct net_device *dev
stephen hemmingere5e26d72010-02-24 14:01:38 +00005863 = list_first_entry(&list, struct net_device, todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005864 list_del(&dev->todo_list);
5865
Eric Dumazet748e2d92012-08-22 21:50:59 +00005866 rtnl_lock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00005867 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Eric Dumazet748e2d92012-08-22 21:50:59 +00005868 __rtnl_unlock();
Eric Dumazet0115e8e2012-08-22 17:19:46 +00005869
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005870 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005871 pr_err("network todo '%s' but state %d\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07005872 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005873 dump_stack();
5874 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005875 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005876
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005877 dev->reg_state = NETREG_UNREGISTERED;
5878
Changli Gao152102c2010-03-30 20:16:22 +00005879 on_each_cpu(flush_backlog, dev, 1);
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07005880
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005881 netdev_wait_allrefs(dev);
5882
5883 /* paranoia */
Eric Dumazet29b44332010-10-11 10:22:12 +00005884 BUG_ON(netdev_refcnt_read(dev));
Eric Dumazet33d480c2011-08-11 19:30:52 +00005885 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5886 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07005887 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005888
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005889 if (dev->destructor)
5890 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07005891
5892 /* Free network device */
5893 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005894 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005895}
5896
Ben Hutchings3cfde792010-07-09 09:11:52 +00005897/* Convert net_device_stats to rtnl_link_stats64. They have the same
5898 * fields in the same order, with only the type differing.
5899 */
Eric Dumazet77a1abf2012-03-05 04:50:09 +00005900void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5901 const struct net_device_stats *netdev_stats)
Ben Hutchings3cfde792010-07-09 09:11:52 +00005902{
5903#if BITS_PER_LONG == 64
Eric Dumazet77a1abf2012-03-05 04:50:09 +00005904 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5905 memcpy(stats64, netdev_stats, sizeof(*stats64));
Ben Hutchings3cfde792010-07-09 09:11:52 +00005906#else
5907 size_t i, n = sizeof(*stats64) / sizeof(u64);
5908 const unsigned long *src = (const unsigned long *)netdev_stats;
5909 u64 *dst = (u64 *)stats64;
5910
5911 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5912 sizeof(*stats64) / sizeof(u64));
5913 for (i = 0; i < n; i++)
5914 dst[i] = src[i];
5915#endif
5916}
Eric Dumazet77a1abf2012-03-05 04:50:09 +00005917EXPORT_SYMBOL(netdev_stats_to_stats64);
Ben Hutchings3cfde792010-07-09 09:11:52 +00005918
Eric Dumazetd83345a2009-11-16 03:36:51 +00005919/**
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005920 * dev_get_stats - get network device statistics
5921 * @dev: device to get statistics from
Eric Dumazet28172732010-07-07 14:58:56 -07005922 * @storage: place to store stats
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005923 *
Ben Hutchingsd7753512010-07-09 09:12:41 +00005924 * Get network statistics from device. Return @storage.
5925 * The device driver may provide its own method by setting
5926 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5927 * otherwise the internal statistics structure is used.
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005928 */
Ben Hutchingsd7753512010-07-09 09:12:41 +00005929struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5930 struct rtnl_link_stats64 *storage)
Eric Dumazet7004bf22009-05-18 00:34:33 +00005931{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005932 const struct net_device_ops *ops = dev->netdev_ops;
5933
Eric Dumazet28172732010-07-07 14:58:56 -07005934 if (ops->ndo_get_stats64) {
5935 memset(storage, 0, sizeof(*storage));
Eric Dumazetcaf586e2010-09-30 21:06:55 +00005936 ops->ndo_get_stats64(dev, storage);
5937 } else if (ops->ndo_get_stats) {
Ben Hutchings3cfde792010-07-09 09:11:52 +00005938 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
Eric Dumazetcaf586e2010-09-30 21:06:55 +00005939 } else {
5940 netdev_stats_to_stats64(storage, &dev->stats);
Eric Dumazet28172732010-07-07 14:58:56 -07005941 }
Eric Dumazetcaf586e2010-09-30 21:06:55 +00005942 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
Eric Dumazet28172732010-07-07 14:58:56 -07005943 return storage;
Rusty Russellc45d2862007-03-28 14:29:08 -07005944}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005945EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07005946
Eric Dumazet24824a02010-10-02 06:11:55 +00005947struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
David S. Millerdc2b4842008-07-08 17:18:23 -07005948{
Eric Dumazet24824a02010-10-02 06:11:55 +00005949 struct netdev_queue *queue = dev_ingress_queue(dev);
David S. Millerdc2b4842008-07-08 17:18:23 -07005950
Eric Dumazet24824a02010-10-02 06:11:55 +00005951#ifdef CONFIG_NET_CLS_ACT
5952 if (queue)
5953 return queue;
5954 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5955 if (!queue)
5956 return NULL;
5957 netdev_init_one_queue(dev, queue, NULL);
Eric Dumazet24824a02010-10-02 06:11:55 +00005958 queue->qdisc = &noop_qdisc;
5959 queue->qdisc_sleeping = &noop_qdisc;
5960 rcu_assign_pointer(dev->ingress_queue, queue);
5961#endif
5962 return queue;
David S. Millerbb949fb2008-07-08 16:55:56 -07005963}
5964
Linus Torvalds1da177e2005-04-16 15:20:36 -07005965/**
Tom Herbert36909ea2011-01-09 19:36:31 +00005966 * alloc_netdev_mqs - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005967 * @sizeof_priv: size of private data to allocate space for
5968 * @name: device name format string
5969 * @setup: callback to initialize device
Tom Herbert36909ea2011-01-09 19:36:31 +00005970 * @txqs: the number of TX subqueues to allocate
5971 * @rxqs: the number of RX subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07005972 *
5973 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005974 * and performs basic initialization. Also allocates subquue structs
Tom Herbert36909ea2011-01-09 19:36:31 +00005975 * for each queue on the device.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005976 */
Tom Herbert36909ea2011-01-09 19:36:31 +00005977struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5978 void (*setup)(struct net_device *),
5979 unsigned int txqs, unsigned int rxqs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005980{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005981 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07005982 size_t alloc_size;
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005983 struct net_device *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005984
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005985 BUG_ON(strlen(name) >= sizeof(dev->name));
5986
Tom Herbert36909ea2011-01-09 19:36:31 +00005987 if (txqs < 1) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005988 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
Tom Herbert55513fb2010-10-18 17:55:58 +00005989 return NULL;
5990 }
5991
Tom Herbert36909ea2011-01-09 19:36:31 +00005992#ifdef CONFIG_RPS
5993 if (rxqs < 1) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00005994 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
Tom Herbert36909ea2011-01-09 19:36:31 +00005995 return NULL;
5996 }
5997#endif
5998
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005999 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07006000 if (sizeof_priv) {
6001 /* ensure 32-byte alignment of private area */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006002 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07006003 alloc_size += sizeof_priv;
6004 }
6005 /* ensure 32-byte alignment of whole construct */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006006 alloc_size += NETDEV_ALIGN - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006007
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07006008 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006009 if (!p) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006010 pr_err("alloc_netdev: Unable to allocate device\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07006011 return NULL;
6012 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006013
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00006014 dev = PTR_ALIGN(p, NETDEV_ALIGN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006015 dev->padded = (char *)dev - (char *)p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006016
Eric Dumazet29b44332010-10-11 10:22:12 +00006017 dev->pcpu_refcnt = alloc_percpu(int);
6018 if (!dev->pcpu_refcnt)
Tom Herberte6484932010-10-18 18:04:39 +00006019 goto free_p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006020
Linus Torvalds1da177e2005-04-16 15:20:36 -07006021 if (dev_addr_init(dev))
Eric Dumazet29b44332010-10-11 10:22:12 +00006022 goto free_pcpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006023
Jiri Pirko22bedad32010-04-01 21:22:57 +00006024 dev_mc_init(dev);
Jiri Pirkoa748ee22010-04-01 21:22:09 +00006025 dev_uc_init(dev);
Jiri Pirkoccffad252009-05-22 23:22:17 +00006026
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09006027 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006028
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07006029 dev->gso_max_size = GSO_MAX_SIZE;
Ben Hutchings30b678d2012-07-30 15:57:00 +00006030 dev->gso_max_segs = GSO_MAX_SEGS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006031
Herbert Xud565b0a2008-12-15 23:38:52 -08006032 INIT_LIST_HEAD(&dev->napi_list);
Eric W. Biederman9fdce092009-10-30 14:51:13 +00006033 INIT_LIST_HEAD(&dev->unreg_list);
Eric Dumazete014deb2009-11-17 05:59:21 +00006034 INIT_LIST_HEAD(&dev->link_watch_list);
Eric Dumazet93f154b2009-05-18 22:19:19 -07006035 dev->priv_flags = IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006036 setup(dev);
David S. Miller8d3bdbd2011-02-08 15:02:50 -08006037
6038 dev->num_tx_queues = txqs;
6039 dev->real_num_tx_queues = txqs;
6040 if (netif_alloc_netdev_queues(dev))
6041 goto free_all;
6042
6043#ifdef CONFIG_RPS
6044 dev->num_rx_queues = rxqs;
6045 dev->real_num_rx_queues = rxqs;
6046 if (netif_alloc_rx_queues(dev))
6047 goto free_all;
6048#endif
6049
Linus Torvalds1da177e2005-04-16 15:20:36 -07006050 strcpy(dev->name, name);
Vlad Dogarucbda10f2011-01-13 23:38:30 +00006051 dev->group = INIT_NETDEV_GROUP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006052 return dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006053
David S. Miller8d3bdbd2011-02-08 15:02:50 -08006054free_all:
6055 free_netdev(dev);
6056 return NULL;
6057
Eric Dumazet29b44332010-10-11 10:22:12 +00006058free_pcpu:
6059 free_percpu(dev->pcpu_refcnt);
Tom Herberted9af2e2010-11-09 10:47:30 +00006060 kfree(dev->_tx);
Tom Herbertfe822242010-11-09 10:47:38 +00006061#ifdef CONFIG_RPS
6062 kfree(dev->_rx);
6063#endif
6064
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00006065free_p:
6066 kfree(p);
6067 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006068}
Tom Herbert36909ea2011-01-09 19:36:31 +00006069EXPORT_SYMBOL(alloc_netdev_mqs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006070
6071/**
6072 * free_netdev - free network device
6073 * @dev: device
6074 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006075 * This function does the last stage of destroying an allocated device
6076 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006077 * If this is the last reference then it will be freed.
6078 */
6079void free_netdev(struct net_device *dev)
6080{
Herbert Xud565b0a2008-12-15 23:38:52 -08006081 struct napi_struct *p, *n;
6082
Denis V. Lunevf3005d72008-04-16 02:02:18 -07006083 release_net(dev_net(dev));
6084
David S. Millere8a04642008-07-17 00:34:19 -07006085 kfree(dev->_tx);
Tom Herbertfe822242010-11-09 10:47:38 +00006086#ifdef CONFIG_RPS
6087 kfree(dev->_rx);
6088#endif
David S. Millere8a04642008-07-17 00:34:19 -07006089
Eric Dumazet33d480c2011-08-11 19:30:52 +00006090 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
Eric Dumazet24824a02010-10-02 06:11:55 +00006091
Jiri Pirkof001fde2009-05-05 02:48:28 +00006092 /* Flush device addresses */
6093 dev_addr_flush(dev);
6094
Herbert Xud565b0a2008-12-15 23:38:52 -08006095 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6096 netif_napi_del(p);
6097
Eric Dumazet29b44332010-10-11 10:22:12 +00006098 free_percpu(dev->pcpu_refcnt);
6099 dev->pcpu_refcnt = NULL;
6100
Stephen Hemminger3041a062006-05-26 13:25:24 -07006101 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006102 if (dev->reg_state == NETREG_UNINITIALIZED) {
6103 kfree((char *)dev - dev->padded);
6104 return;
6105 }
6106
6107 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6108 dev->reg_state = NETREG_RELEASED;
6109
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07006110 /* will free via device release */
6111 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006112}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07006113EXPORT_SYMBOL(free_netdev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006114
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07006115/**
6116 * synchronize_net - Synchronize with packet receive processing
6117 *
6118 * Wait for packets currently being received to be done.
6119 * Does not block later packets from starting.
6120 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09006121void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006122{
6123 might_sleep();
Eric Dumazetbe3fc412011-05-23 23:07:32 +00006124 if (rtnl_is_locked())
6125 synchronize_rcu_expedited();
6126 else
6127 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07006128}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07006129EXPORT_SYMBOL(synchronize_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006130
6131/**
Eric Dumazet44a08732009-10-27 07:03:04 +00006132 * unregister_netdevice_queue - remove device from the kernel
Linus Torvalds1da177e2005-04-16 15:20:36 -07006133 * @dev: device
Eric Dumazet44a08732009-10-27 07:03:04 +00006134 * @head: list
Jaswinder Singh Rajput6ebfbc02009-11-22 20:43:13 -08006135 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07006136 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08006137 * from the kernel tables.
Eric Dumazet44a08732009-10-27 07:03:04 +00006138 * If head not NULL, device is queued to be unregistered later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006139 *
6140 * Callers must hold the rtnl semaphore. You may want
6141 * unregister_netdev() instead of this.
6142 */
6143
Eric Dumazet44a08732009-10-27 07:03:04 +00006144void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006145{
Herbert Xua6620712007-12-12 19:21:56 -08006146 ASSERT_RTNL();
6147
Eric Dumazet44a08732009-10-27 07:03:04 +00006148 if (head) {
Eric W. Biederman9fdce092009-10-30 14:51:13 +00006149 list_move_tail(&dev->unreg_list, head);
Eric Dumazet44a08732009-10-27 07:03:04 +00006150 } else {
6151 rollback_registered(dev);
6152 /* Finish processing unregister after unlock */
6153 net_set_todo(dev);
6154 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006155}
Eric Dumazet44a08732009-10-27 07:03:04 +00006156EXPORT_SYMBOL(unregister_netdevice_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006157
6158/**
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006159 * unregister_netdevice_many - unregister many devices
6160 * @head: list of devices
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006161 */
6162void unregister_netdevice_many(struct list_head *head)
6163{
6164 struct net_device *dev;
6165
6166 if (!list_empty(head)) {
6167 rollback_registered_many(head);
6168 list_for_each_entry(dev, head, unreg_list)
6169 net_set_todo(dev);
6170 }
6171}
Eric Dumazet63c80992009-10-27 07:06:49 +00006172EXPORT_SYMBOL(unregister_netdevice_many);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00006173
6174/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07006175 * unregister_netdev - remove device from the kernel
6176 * @dev: device
6177 *
6178 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08006179 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006180 *
6181 * This is just a wrapper for unregister_netdevice that takes
6182 * the rtnl semaphore. In general you want to use this and not
6183 * unregister_netdevice.
6184 */
6185void unregister_netdev(struct net_device *dev)
6186{
6187 rtnl_lock();
6188 unregister_netdevice(dev);
6189 rtnl_unlock();
6190}
Linus Torvalds1da177e2005-04-16 15:20:36 -07006191EXPORT_SYMBOL(unregister_netdev);
6192
Eric W. Biedermance286d32007-09-12 13:53:49 +02006193/**
6194 * dev_change_net_namespace - move device to different nethost namespace
6195 * @dev: device
6196 * @net: network namespace
6197 * @pat: If not NULL name pattern to try if the current device name
6198 * is already taken in the destination network namespace.
6199 *
6200 * This function shuts down a device interface and moves it
6201 * to a new network namespace. On success 0 is returned, on
6202 * a failure a netagive errno code is returned.
6203 *
6204 * Callers must hold the rtnl semaphore.
6205 */
6206
6207int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6208{
Eric W. Biedermance286d32007-09-12 13:53:49 +02006209 int err;
6210
6211 ASSERT_RTNL();
6212
6213 /* Don't allow namespace local devices to be moved. */
6214 err = -EINVAL;
6215 if (dev->features & NETIF_F_NETNS_LOCAL)
6216 goto out;
6217
6218 /* Ensure the device has been registrered */
6219 err = -EINVAL;
6220 if (dev->reg_state != NETREG_REGISTERED)
6221 goto out;
6222
6223 /* Get out if there is nothing todo */
6224 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09006225 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02006226 goto out;
6227
6228 /* Pick the destination device name, and ensure
6229 * we can use it in the destination network namespace.
6230 */
6231 err = -EEXIST;
Octavian Purdilad9031022009-11-18 02:36:59 +00006232 if (__dev_get_by_name(net, dev->name)) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02006233 /* We get here if we can't use the current device name */
6234 if (!pat)
6235 goto out;
Jiri Pirko1c5cae82011-04-30 01:21:32 +00006236 if (dev_get_valid_name(dev, pat) < 0)
Eric W. Biedermance286d32007-09-12 13:53:49 +02006237 goto out;
6238 }
6239
6240 /*
6241 * And now a mini version of register_netdevice unregister_netdevice.
6242 */
6243
6244 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07006245 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006246
6247 /* And unlink it from device chain */
6248 err = -ENODEV;
6249 unlist_netdevice(dev);
6250
6251 synchronize_net();
6252
6253 /* Shutdown queueing discipline. */
6254 dev_shutdown(dev);
6255
6256 /* Notify protocols, that we are about to destroy
6257 this device. They should clean all the things.
David Lamparter3b27e102010-09-17 03:22:19 +00006258
6259 Note that dev->reg_state stays at NETREG_REGISTERED.
6260 This is wanted because this way 8021q and macvlan know
6261 the device is just moving and can keep their slaves up.
Eric W. Biedermance286d32007-09-12 13:53:49 +02006262 */
6263 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Gao feng6549dd42012-08-23 15:36:55 +00006264 rcu_barrier();
6265 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
Eric W. Biedermand2237d32011-10-21 06:24:20 +00006266 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006267
6268 /*
6269 * Flush the unicast and multicast chains
6270 */
Jiri Pirkoa748ee22010-04-01 21:22:09 +00006271 dev_uc_flush(dev);
Jiri Pirko22bedad32010-04-01 21:22:57 +00006272 dev_mc_flush(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006273
6274 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09006275 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006276
Eric W. Biedermance286d32007-09-12 13:53:49 +02006277 /* If there is an ifindex conflict assign a new one */
6278 if (__dev_get_by_index(net, dev->ifindex)) {
6279 int iflink = (dev->iflink == dev->ifindex);
6280 dev->ifindex = dev_new_index(net);
6281 if (iflink)
6282 dev->iflink = dev->ifindex;
6283 }
6284
Eric W. Biederman8b41d182007-09-26 22:02:53 -07006285 /* Fixup kobjects */
Eric W. Biedermana1b3f592010-05-04 17:36:49 -07006286 err = device_rename(&dev->dev, dev->name);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07006287 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006288
6289 /* Add the device back in the hashes */
6290 list_netdevice(dev);
6291
6292 /* Notify protocols, that a new device appeared. */
6293 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6294
Eric W. Biedermand90a9092009-12-12 22:11:15 +00006295 /*
6296 * Prevent userspace races by waiting until the network
6297 * device is fully setup before sending notifications.
6298 */
6299 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6300
Eric W. Biedermance286d32007-09-12 13:53:49 +02006301 synchronize_net();
6302 err = 0;
6303out:
6304 return err;
6305}
Johannes Berg463d0182009-07-14 00:33:35 +02006306EXPORT_SYMBOL_GPL(dev_change_net_namespace);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006307
Linus Torvalds1da177e2005-04-16 15:20:36 -07006308static int dev_cpu_callback(struct notifier_block *nfb,
6309 unsigned long action,
6310 void *ocpu)
6311{
6312 struct sk_buff **list_skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006313 struct sk_buff *skb;
6314 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6315 struct softnet_data *sd, *oldsd;
6316
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07006317 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006318 return NOTIFY_OK;
6319
6320 local_irq_disable();
6321 cpu = smp_processor_id();
6322 sd = &per_cpu(softnet_data, cpu);
6323 oldsd = &per_cpu(softnet_data, oldcpu);
6324
6325 /* Find end of our completion_queue. */
6326 list_skb = &sd->completion_queue;
6327 while (*list_skb)
6328 list_skb = &(*list_skb)->next;
6329 /* Append completion queue from offline CPU. */
6330 *list_skb = oldsd->completion_queue;
6331 oldsd->completion_queue = NULL;
6332
Linus Torvalds1da177e2005-04-16 15:20:36 -07006333 /* Append output queue from offline CPU. */
Changli Gaoa9cbd582010-04-26 23:06:24 +00006334 if (oldsd->output_queue) {
6335 *sd->output_queue_tailp = oldsd->output_queue;
6336 sd->output_queue_tailp = oldsd->output_queue_tailp;
6337 oldsd->output_queue = NULL;
6338 oldsd->output_queue_tailp = &oldsd->output_queue;
6339 }
Heiko Carstens264524d2011-06-06 20:50:03 +00006340 /* Append NAPI poll list from offline CPU. */
6341 if (!list_empty(&oldsd->poll_list)) {
6342 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6343 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6344 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006345
6346 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6347 local_irq_enable();
6348
6349 /* Process offline CPU's input_pkt_queue */
Tom Herbert76cc8b12010-05-20 18:37:59 +00006350 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6351 netif_rx(skb);
6352 input_queue_head_incr(oldsd);
6353 }
Tom Herbertfec5e652010-04-16 16:01:27 -07006354 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07006355 netif_rx(skb);
Tom Herbert76cc8b12010-05-20 18:37:59 +00006356 input_queue_head_incr(oldsd);
Tom Herbertfec5e652010-04-16 16:01:27 -07006357 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07006358
6359 return NOTIFY_OK;
6360}
Linus Torvalds1da177e2005-04-16 15:20:36 -07006361
6362
Herbert Xu7f353bf2007-08-10 15:47:58 -07006363/**
Herbert Xub63365a2008-10-23 01:11:29 -07006364 * netdev_increment_features - increment feature set by one
6365 * @all: current feature set
6366 * @one: new feature set
6367 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07006368 *
6369 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07006370 * @one to the master device with current feature set @all. Will not
6371 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07006372 */
Michał Mirosławc8f44af2011-11-15 15:29:55 +00006373netdev_features_t netdev_increment_features(netdev_features_t all,
6374 netdev_features_t one, netdev_features_t mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07006375{
Michał Mirosław1742f182011-04-22 06:31:16 +00006376 if (mask & NETIF_F_GEN_CSUM)
6377 mask |= NETIF_F_ALL_CSUM;
6378 mask |= NETIF_F_VLAN_CHALLENGED;
6379
6380 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6381 all &= one | ~NETIF_F_ALL_FOR_ALL;
6382
Michał Mirosław1742f182011-04-22 06:31:16 +00006383 /* If one device supports hw checksumming, set for all. */
6384 if (all & NETIF_F_GEN_CSUM)
6385 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
Herbert Xu7f353bf2007-08-10 15:47:58 -07006386
6387 return all;
6388}
Herbert Xub63365a2008-10-23 01:11:29 -07006389EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07006390
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07006391static struct hlist_head *netdev_create_hash(void)
6392{
6393 int i;
6394 struct hlist_head *hash;
6395
6396 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6397 if (hash != NULL)
6398 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6399 INIT_HLIST_HEAD(&hash[i]);
6400
6401 return hash;
6402}
6403
Eric W. Biederman881d9662007-09-17 11:56:21 -07006404/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07006405static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07006406{
Rustad, Mark D734b6542012-07-18 09:06:07 +00006407 if (net != &init_net)
6408 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07006409
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07006410 net->dev_name_head = netdev_create_hash();
6411 if (net->dev_name_head == NULL)
6412 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07006413
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07006414 net->dev_index_head = netdev_create_hash();
6415 if (net->dev_index_head == NULL)
6416 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07006417
6418 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07006419
6420err_idx:
6421 kfree(net->dev_name_head);
6422err_name:
6423 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07006424}
6425
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07006426/**
6427 * netdev_drivername - network driver for the device
6428 * @dev: network device
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07006429 *
6430 * Determine network driver for device.
6431 */
David S. Miller3019de12011-06-06 16:41:33 -07006432const char *netdev_drivername(const struct net_device *dev)
Arjan van de Ven6579e572008-07-21 13:31:48 -07006433{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07006434 const struct device_driver *driver;
6435 const struct device *parent;
David S. Miller3019de12011-06-06 16:41:33 -07006436 const char *empty = "";
Arjan van de Ven6579e572008-07-21 13:31:48 -07006437
6438 parent = dev->dev.parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07006439 if (!parent)
David S. Miller3019de12011-06-06 16:41:33 -07006440 return empty;
Arjan van de Ven6579e572008-07-21 13:31:48 -07006441
6442 driver = parent->driver;
6443 if (driver && driver->name)
David S. Miller3019de12011-06-06 16:41:33 -07006444 return driver->name;
6445 return empty;
Arjan van de Ven6579e572008-07-21 13:31:48 -07006446}
6447
Jason Baronffa10cb2011-08-11 14:36:48 -04006448int __netdev_printk(const char *level, const struct net_device *dev,
Joe Perches256df2f2010-06-27 01:02:35 +00006449 struct va_format *vaf)
6450{
6451 int r;
6452
6453 if (dev && dev->dev.parent)
6454 r = dev_printk(level, dev->dev.parent, "%s: %pV",
6455 netdev_name(dev), vaf);
6456 else if (dev)
6457 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6458 else
6459 r = printk("%s(NULL net_device): %pV", level, vaf);
6460
6461 return r;
6462}
Jason Baronffa10cb2011-08-11 14:36:48 -04006463EXPORT_SYMBOL(__netdev_printk);
Joe Perches256df2f2010-06-27 01:02:35 +00006464
6465int netdev_printk(const char *level, const struct net_device *dev,
6466 const char *format, ...)
6467{
6468 struct va_format vaf;
6469 va_list args;
6470 int r;
6471
6472 va_start(args, format);
6473
6474 vaf.fmt = format;
6475 vaf.va = &args;
6476
6477 r = __netdev_printk(level, dev, &vaf);
6478 va_end(args);
6479
6480 return r;
6481}
6482EXPORT_SYMBOL(netdev_printk);
6483
6484#define define_netdev_printk_level(func, level) \
6485int func(const struct net_device *dev, const char *fmt, ...) \
6486{ \
6487 int r; \
6488 struct va_format vaf; \
6489 va_list args; \
6490 \
6491 va_start(args, fmt); \
6492 \
6493 vaf.fmt = fmt; \
6494 vaf.va = &args; \
6495 \
6496 r = __netdev_printk(level, dev, &vaf); \
6497 va_end(args); \
6498 \
6499 return r; \
6500} \
6501EXPORT_SYMBOL(func);
6502
6503define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6504define_netdev_printk_level(netdev_alert, KERN_ALERT);
6505define_netdev_printk_level(netdev_crit, KERN_CRIT);
6506define_netdev_printk_level(netdev_err, KERN_ERR);
6507define_netdev_printk_level(netdev_warn, KERN_WARNING);
6508define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6509define_netdev_printk_level(netdev_info, KERN_INFO);
6510
Pavel Emelyanov46650792007-10-08 20:38:39 -07006511static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07006512{
6513 kfree(net->dev_name_head);
6514 kfree(net->dev_index_head);
6515}
6516
Denis V. Lunev022cbae2007-11-13 03:23:50 -08006517static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07006518 .init = netdev_init,
6519 .exit = netdev_exit,
6520};
6521
Pavel Emelyanov46650792007-10-08 20:38:39 -07006522static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02006523{
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00006524 struct net_device *dev, *aux;
Eric W. Biedermance286d32007-09-12 13:53:49 +02006525 /*
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00006526 * Push all migratable network devices back to the
Eric W. Biedermance286d32007-09-12 13:53:49 +02006527 * initial network namespace
6528 */
6529 rtnl_lock();
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00006530 for_each_netdev_safe(net, dev, aux) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02006531 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07006532 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02006533
6534 /* Ignore unmoveable devices (i.e. loopback) */
6535 if (dev->features & NETIF_F_NETNS_LOCAL)
6536 continue;
6537
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00006538 /* Leave virtual devices for the generic cleanup */
6539 if (dev->rtnl_link_ops)
6540 continue;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08006541
Lucas De Marchi25985ed2011-03-30 22:57:33 -03006542 /* Push remaining network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07006543 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6544 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02006545 if (err) {
Joe Perches7b6cd1c2012-02-01 10:54:43 +00006546 pr_emerg("%s: failed to move %s to init_net: %d\n",
6547 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07006548 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02006549 }
6550 }
6551 rtnl_unlock();
6552}
6553
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00006554static void __net_exit default_device_exit_batch(struct list_head *net_list)
6555{
6556 /* At exit all network devices most be removed from a network
Uwe Kleine-Königb5950762010-11-01 15:38:34 -04006557 * namespace. Do this in the reverse order of registration.
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00006558 * Do this across as many network namespaces as possible to
6559 * improve batching efficiency.
6560 */
6561 struct net_device *dev;
6562 struct net *net;
6563 LIST_HEAD(dev_kill_list);
6564
6565 rtnl_lock();
6566 list_for_each_entry(net, net_list, exit_list) {
6567 for_each_netdev_reverse(net, dev) {
6568 if (dev->rtnl_link_ops)
6569 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6570 else
6571 unregister_netdevice_queue(dev, &dev_kill_list);
6572 }
6573 }
6574 unregister_netdevice_many(&dev_kill_list);
Eric Dumazetceaaec92011-02-17 22:59:19 +00006575 list_del(&dev_kill_list);
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00006576 rtnl_unlock();
6577}
6578
Denis V. Lunev022cbae2007-11-13 03:23:50 -08006579static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02006580 .exit = default_device_exit,
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00006581 .exit_batch = default_device_exit_batch,
Eric W. Biedermance286d32007-09-12 13:53:49 +02006582};
6583
Linus Torvalds1da177e2005-04-16 15:20:36 -07006584/*
6585 * Initialize the DEV module. At boot time this walks the device list and
6586 * unhooks any devices that fail to initialise (normally hardware not
6587 * present) and leaves us with a valid list of present and active devices.
6588 *
6589 */
6590
6591/*
6592 * This is called single threaded during boot, so no need
6593 * to take the rtnl semaphore.
6594 */
6595static int __init net_dev_init(void)
6596{
6597 int i, rc = -ENOMEM;
6598
6599 BUG_ON(!dev_boot_phase);
6600
Linus Torvalds1da177e2005-04-16 15:20:36 -07006601 if (dev_proc_init())
6602 goto out;
6603
Eric W. Biederman8b41d182007-09-26 22:02:53 -07006604 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07006605 goto out;
6606
6607 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08006608 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07006609 INIT_LIST_HEAD(&ptype_base[i]);
6610
Eric W. Biederman881d9662007-09-17 11:56:21 -07006611 if (register_pernet_subsys(&netdev_net_ops))
6612 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006613
6614 /*
6615 * Initialise the packet receive queues.
6616 */
6617
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07006618 for_each_possible_cpu(i) {
Eric Dumazete36fa2f2010-04-19 21:17:14 +00006619 struct softnet_data *sd = &per_cpu(softnet_data, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006620
Changli Gaodee42872010-05-02 05:42:16 +00006621 memset(sd, 0, sizeof(*sd));
Eric Dumazete36fa2f2010-04-19 21:17:14 +00006622 skb_queue_head_init(&sd->input_pkt_queue);
Changli Gao6e7676c2010-04-27 15:07:33 -07006623 skb_queue_head_init(&sd->process_queue);
Eric Dumazete36fa2f2010-04-19 21:17:14 +00006624 sd->completion_queue = NULL;
6625 INIT_LIST_HEAD(&sd->poll_list);
Changli Gaoa9cbd582010-04-26 23:06:24 +00006626 sd->output_queue = NULL;
6627 sd->output_queue_tailp = &sd->output_queue;
Eric Dumazetdf334542010-03-24 19:13:54 +00006628#ifdef CONFIG_RPS
Eric Dumazete36fa2f2010-04-19 21:17:14 +00006629 sd->csd.func = rps_trigger_softirq;
6630 sd->csd.info = sd;
6631 sd->csd.flags = 0;
6632 sd->cpu = i;
Tom Herbert1e94d722010-03-18 17:45:44 -07006633#endif
Tom Herbert0a9627f2010-03-16 08:03:29 +00006634
Eric Dumazete36fa2f2010-04-19 21:17:14 +00006635 sd->backlog.poll = process_backlog;
6636 sd->backlog.weight = weight_p;
6637 sd->backlog.gro_list = NULL;
6638 sd->backlog.gro_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07006639 }
6640
Linus Torvalds1da177e2005-04-16 15:20:36 -07006641 dev_boot_phase = 0;
6642
Eric W. Biederman505d4f72008-11-07 22:54:20 -08006643 /* The loopback device is special if any other network devices
6644 * is present in a network namespace the loopback device must
6645 * be present. Since we now dynamically allocate and free the
6646 * loopback device ensure this invariant is maintained by
6647 * keeping the loopback device as the first device on the
6648 * list of network devices. Ensuring the loopback devices
6649 * is the first device that appears and the last network device
6650 * that disappears.
6651 */
6652 if (register_pernet_device(&loopback_net_ops))
6653 goto out;
6654
6655 if (register_pernet_device(&default_device_ops))
6656 goto out;
6657
Carlos R. Mafra962cf362008-05-15 11:15:37 -03006658 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6659 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006660
6661 hotcpu_notifier(dev_cpu_callback, 0);
6662 dst_init();
6663 dev_mcast_init();
6664 rc = 0;
6665out:
6666 return rc;
6667}
6668
6669subsys_initcall(net_dev_init);
6670
Krishna Kumare88721f2009-02-18 17:55:02 -08006671static int __init initialize_hashrnd(void)
6672{
Tom Herbert0a9627f2010-03-16 08:03:29 +00006673 get_random_bytes(&hashrnd, sizeof(hashrnd));
Krishna Kumare88721f2009-02-18 17:55:02 -08006674 return 0;
6675}
6676
6677late_initcall_sync(initialize_hashrnd);
6678