blob: ae75f25ac0a5d0af7b3f60a328a6f005d552b9f6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
stephen hemminger08e98972009-11-10 07:20:34 +000082#include <linux/hash.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080084#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070094#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/notifier.h>
96#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h>
103#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700104#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#include <net/dst.h>
106#include <net/pkt_sched.h>
107#include <net/checksum.h>
Arnd Bergmann44540962009-11-26 06:07:08 +0000108#include <net/xfrm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#include <linux/highmem.h>
110#include <linux/init.h>
111#include <linux/kmod.h>
112#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113#include <linux/netpoll.h>
114#include <linux/rcupdate.h>
115#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700116#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500119#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700120#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700121#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700122#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700123#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700124#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700126#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700127#include <linux/ipv6.h>
128#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700129#include <linux/jhash.h>
130#include <linux/random.h>
David S. Miller9cbc1cb2009-06-15 03:02:23 -0700131#include <trace/events/napi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700133#include "net-sysfs.h"
134
Herbert Xud565b0a2008-12-15 23:38:52 -0800135/* Instead of increasing this, you should create a hash table. */
136#define MAX_GRO_SKBS 8
137
Herbert Xu5d38a072009-01-04 16:13:40 -0800138/* This should be increased if a protocol with a bigger head is added. */
139#define GRO_MAX_HEAD (MAX_HEADER + 128)
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141/*
142 * The list of packet types we will receive (as opposed to discard)
143 * and the routines to invoke.
144 *
145 * Why 16. Because with 16 the only overlap we get on a hash of the
146 * low nibble of the protocol value is RARP/SNAP/X.25.
147 *
148 * NOTE: That is no longer true with the addition of VLAN tags. Not
149 * sure which should go first, but I bet it won't make much
150 * difference if we are running VLANs. The good news is that
151 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700152 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 * --BLG
154 *
155 * 0800 IP
156 * 8100 802.1Q VLAN
157 * 0001 802.3
158 * 0002 AX.25
159 * 0004 802.2
160 * 8035 RARP
161 * 0005 SNAP
162 * 0805 X.25
163 * 0806 ARP
164 * 8137 IPX
165 * 0009 Localtalk
166 * 86DD IPv6
167 */
168
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800169#define PTYPE_HASH_SIZE (16)
170#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
171
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800173static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700174static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 * semaphore.
179 *
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800180 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 *
182 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700183 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 * actual updates. This allows pure readers to access the list even
185 * while a writer is preparing to update it.
186 *
187 * To put it another way, dev_base_lock is held for writing only to
188 * protect against pure readers; the rtnl semaphore provides the
189 * protection against other writers.
190 *
191 * See, for example usages, register_netdevice() and
192 * unregister_netdevice(), which must be called with the rtnl
193 * semaphore held.
194 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195DEFINE_RWLOCK(dev_base_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196EXPORT_SYMBOL(dev_base_lock);
197
Eric W. Biederman881d9662007-09-17 11:56:21 -0700198static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199{
200 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
stephen hemminger08e98972009-11-10 07:20:34 +0000201 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202}
203
Eric W. Biederman881d9662007-09-17 11:56:21 -0700204static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205{
Eric Dumazet7c28bd02009-10-24 06:13:17 -0700206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
Eric W. Biedermance286d32007-09-12 13:53:49 +0200209/* Device list insertion */
210static int list_netdevice(struct net_device *dev)
211{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900212 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200213
214 ASSERT_RTNL();
215
216 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800217 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
Eric Dumazet72c95282009-10-30 07:11:27 +0000218 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000219 hlist_add_head_rcu(&dev->index_hlist,
220 dev_index_hash(net, dev->ifindex));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200221 write_unlock_bh(&dev_base_lock);
222 return 0;
223}
224
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000225/* Device list removal
226 * caller must respect a RCU grace period before freeing/reusing dev
227 */
Eric W. Biedermance286d32007-09-12 13:53:49 +0200228static void unlist_netdevice(struct net_device *dev)
229{
230 ASSERT_RTNL();
231
232 /* Unlink dev from the device chain */
233 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800234 list_del_rcu(&dev->dev_list);
Eric Dumazet72c95282009-10-30 07:11:27 +0000235 hlist_del_rcu(&dev->name_hlist);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000236 hlist_del_rcu(&dev->index_hlist);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200237 write_unlock_bh(&dev_base_lock);
238}
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/*
241 * Our notifier list
242 */
243
Alan Sternf07d5b92006-05-09 15:23:03 -0700244static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245
246/*
247 * Device drivers call our routines to queue packets here. We empty the
248 * queue in the local softnet handler.
249 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700250
251DEFINE_PER_CPU(struct softnet_data, softnet_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700252EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
David S. Millercf508b12008-07-22 14:16:42 -0700254#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700255/*
David S. Millerc773e842008-07-08 23:13:53 -0700256 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700257 * according to dev->type
258 */
259static const unsigned short netdev_lock_type[] =
260 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
261 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
262 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
263 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
264 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
265 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
266 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
267 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
268 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
269 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
270 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
271 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
272 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800273 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Dmitry Eremin-Solenikov929122cd2009-08-14 20:00:20 +0400274 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000275 ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700276
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700277static const char *const netdev_lock_name[] =
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700278 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
279 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
280 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
281 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
282 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
283 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
284 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
285 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
286 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
287 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
288 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
289 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
290 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800291 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Dmitry Eremin-Solenikov929122cd2009-08-14 20:00:20 +0400292 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000293 "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700294
295static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700296static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700297
298static inline unsigned short netdev_lock_pos(unsigned short dev_type)
299{
300 int i;
301
302 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
303 if (netdev_lock_type[i] == dev_type)
304 return i;
305 /* the last key is used by default */
306 return ARRAY_SIZE(netdev_lock_type) - 1;
307}
308
David S. Millercf508b12008-07-22 14:16:42 -0700309static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
310 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700311{
312 int i;
313
314 i = netdev_lock_pos(dev_type);
315 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
316 netdev_lock_name[i]);
317}
David S. Millercf508b12008-07-22 14:16:42 -0700318
319static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
320{
321 int i;
322
323 i = netdev_lock_pos(dev->type);
324 lockdep_set_class_and_name(&dev->addr_list_lock,
325 &netdev_addr_lock_key[i],
326 netdev_lock_name[i]);
327}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700328#else
David S. Millercf508b12008-07-22 14:16:42 -0700329static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
330 unsigned short dev_type)
331{
332}
333static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700334{
335}
336#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
338/*******************************************************************************
339
340 Protocol management and registration routines
341
342*******************************************************************************/
343
344/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 * Add a protocol ID to the list. Now that the input handler is
346 * smarter we can dispense with all the messy stuff that used to be
347 * here.
348 *
349 * BEWARE!!! Protocol handlers, mangling input packets,
350 * MUST BE last in hash buckets and checking protocol handlers
351 * MUST start from promiscuous ptype_all chain in net_bh.
352 * It is true now, do not change it.
353 * Explanation follows: if protocol handler, mangling packet, will
354 * be the first on list, it is not able to sense, that packet
355 * is cloned and should be copied-on-write, so that it will
356 * change it and subsequent readers will get broken packet.
357 * --ANK (980803)
358 */
359
360/**
361 * dev_add_pack - add packet handler
362 * @pt: packet type declaration
363 *
364 * Add a protocol handler to the networking stack. The passed &packet_type
365 * is linked into kernel lists and may not be freed until it has been
366 * removed from the kernel lists.
367 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900368 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 * guarantee all CPU's that are in middle of receiving packets
370 * will see the new packet type (until the next received packet).
371 */
372
373void dev_add_pack(struct packet_type *pt)
374{
375 int hash;
376
377 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700378 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700380 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800381 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 list_add_rcu(&pt->list, &ptype_base[hash]);
383 }
384 spin_unlock_bh(&ptype_lock);
385}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700386EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388/**
389 * __dev_remove_pack - remove packet handler
390 * @pt: packet type declaration
391 *
392 * Remove a protocol handler that was previously added to the kernel
393 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
394 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900395 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 *
397 * The packet type might still be in use by receivers
398 * and must not be freed until after all the CPU's have gone
399 * through a quiescent state.
400 */
401void __dev_remove_pack(struct packet_type *pt)
402{
403 struct list_head *head;
404 struct packet_type *pt1;
405
406 spin_lock_bh(&ptype_lock);
407
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700408 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700410 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800411 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412
413 list_for_each_entry(pt1, head, list) {
414 if (pt == pt1) {
415 list_del_rcu(&pt->list);
416 goto out;
417 }
418 }
419
420 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
421out:
422 spin_unlock_bh(&ptype_lock);
423}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700424EXPORT_SYMBOL(__dev_remove_pack);
425
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426/**
427 * dev_remove_pack - remove packet handler
428 * @pt: packet type declaration
429 *
430 * Remove a protocol handler that was previously added to the kernel
431 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
432 * from the kernel lists and can be freed or reused once this function
433 * returns.
434 *
435 * This call sleeps to guarantee that no CPU is looking at the packet
436 * type after return.
437 */
438void dev_remove_pack(struct packet_type *pt)
439{
440 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 synchronize_net();
443}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700444EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446/******************************************************************************
447
448 Device Boot-time Settings Routines
449
450*******************************************************************************/
451
452/* Boot time configuration table */
453static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
454
455/**
456 * netdev_boot_setup_add - add new setup entry
457 * @name: name of the device
458 * @map: configured settings for the device
459 *
460 * Adds new setup entry to the dev_boot_setup list. The function
461 * returns 0 on error and 1 on success. This is a generic routine to
462 * all netdevices.
463 */
464static int netdev_boot_setup_add(char *name, struct ifmap *map)
465{
466 struct netdev_boot_setup *s;
467 int i;
468
469 s = dev_boot_setup;
470 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
471 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
472 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700473 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 memcpy(&s[i].map, map, sizeof(s[i].map));
475 break;
476 }
477 }
478
479 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
480}
481
482/**
483 * netdev_boot_setup_check - check boot time settings
484 * @dev: the netdevice
485 *
486 * Check boot time settings for the device.
487 * The found settings are set for the device to be used
488 * later in the device probing.
489 * Returns 0 if no settings found, 1 if they are.
490 */
491int netdev_boot_setup_check(struct net_device *dev)
492{
493 struct netdev_boot_setup *s = dev_boot_setup;
494 int i;
495
496 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
497 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700498 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 dev->irq = s[i].map.irq;
500 dev->base_addr = s[i].map.base_addr;
501 dev->mem_start = s[i].map.mem_start;
502 dev->mem_end = s[i].map.mem_end;
503 return 1;
504 }
505 }
506 return 0;
507}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700508EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
510
511/**
512 * netdev_boot_base - get address from boot time settings
513 * @prefix: prefix for network device
514 * @unit: id for network device
515 *
516 * Check boot time settings for the base address of device.
517 * The found settings are set for the device to be used
518 * later in the device probing.
519 * Returns 0 if no settings found.
520 */
521unsigned long netdev_boot_base(const char *prefix, int unit)
522{
523 const struct netdev_boot_setup *s = dev_boot_setup;
524 char name[IFNAMSIZ];
525 int i;
526
527 sprintf(name, "%s%d", prefix, unit);
528
529 /*
530 * If device already registered then return base of 1
531 * to indicate not to probe for this interface
532 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700533 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 return 1;
535
536 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
537 if (!strcmp(name, s[i].name))
538 return s[i].map.base_addr;
539 return 0;
540}
541
542/*
543 * Saves at boot time configured settings for any netdevice.
544 */
545int __init netdev_boot_setup(char *str)
546{
547 int ints[5];
548 struct ifmap map;
549
550 str = get_options(str, ARRAY_SIZE(ints), ints);
551 if (!str || !*str)
552 return 0;
553
554 /* Save settings */
555 memset(&map, 0, sizeof(map));
556 if (ints[0] > 0)
557 map.irq = ints[1];
558 if (ints[0] > 1)
559 map.base_addr = ints[2];
560 if (ints[0] > 2)
561 map.mem_start = ints[3];
562 if (ints[0] > 3)
563 map.mem_end = ints[4];
564
565 /* Add new entry to the list */
566 return netdev_boot_setup_add(str, &map);
567}
568
569__setup("netdev=", netdev_boot_setup);
570
571/*******************************************************************************
572
573 Device Interface Subroutines
574
575*******************************************************************************/
576
577/**
578 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700579 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 * @name: name to find
581 *
582 * Find an interface by name. Must be called under RTNL semaphore
583 * or @dev_base_lock. If the name is found a pointer to the device
584 * is returned. If the name is not found then %NULL is returned. The
585 * reference counters are not incremented so the caller must be
586 * careful with locks.
587 */
588
Eric W. Biederman881d9662007-09-17 11:56:21 -0700589struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590{
591 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700592 struct net_device *dev;
593 struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700595 hlist_for_each_entry(dev, p, head, name_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 if (!strncmp(dev->name, name, IFNAMSIZ))
597 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700598
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 return NULL;
600}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700601EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
603/**
Eric Dumazet72c95282009-10-30 07:11:27 +0000604 * dev_get_by_name_rcu - find a device by its name
605 * @net: the applicable net namespace
606 * @name: name to find
607 *
608 * Find an interface by name.
609 * If the name is found a pointer to the device is returned.
610 * If the name is not found then %NULL is returned.
611 * The reference counters are not incremented so the caller must be
612 * careful with locks. The caller must hold RCU lock.
613 */
614
615struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
616{
617 struct hlist_node *p;
618 struct net_device *dev;
619 struct hlist_head *head = dev_name_hash(net, name);
620
621 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
622 if (!strncmp(dev->name, name, IFNAMSIZ))
623 return dev;
624
625 return NULL;
626}
627EXPORT_SYMBOL(dev_get_by_name_rcu);
628
629/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700631 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 * @name: name to find
633 *
634 * Find an interface by name. This can be called from any
635 * context and does its own locking. The returned handle has
636 * the usage count incremented and the caller must use dev_put() to
637 * release it when it is no longer needed. %NULL is returned if no
638 * matching device is found.
639 */
640
Eric W. Biederman881d9662007-09-17 11:56:21 -0700641struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct net_device *dev;
644
Eric Dumazet72c95282009-10-30 07:11:27 +0000645 rcu_read_lock();
646 dev = dev_get_by_name_rcu(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 if (dev)
648 dev_hold(dev);
Eric Dumazet72c95282009-10-30 07:11:27 +0000649 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 return dev;
651}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700652EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654/**
655 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700656 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 * @ifindex: index of device
658 *
659 * Search for an interface by index. Returns %NULL if the device
660 * is not found or a pointer to the device. The device has not
661 * had its reference counter increased so the caller must be careful
662 * about locking. The caller must hold either the RTNL semaphore
663 * or @dev_base_lock.
664 */
665
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
668 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700669 struct net_device *dev;
670 struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700672 hlist_for_each_entry(dev, p, head, index_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 if (dev->ifindex == ifindex)
674 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700675
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 return NULL;
677}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700678EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000680/**
681 * dev_get_by_index_rcu - find a device by its ifindex
682 * @net: the applicable net namespace
683 * @ifindex: index of device
684 *
685 * Search for an interface by index. Returns %NULL if the device
686 * is not found or a pointer to the device. The device has not
687 * had its reference counter increased so the caller must be careful
688 * about locking. The caller must hold RCU lock.
689 */
690
691struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
692{
693 struct hlist_node *p;
694 struct net_device *dev;
695 struct hlist_head *head = dev_index_hash(net, ifindex);
696
697 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
698 if (dev->ifindex == ifindex)
699 return dev;
700
701 return NULL;
702}
703EXPORT_SYMBOL(dev_get_by_index_rcu);
704
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
706/**
707 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700708 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 * @ifindex: index of device
710 *
711 * Search for an interface by index. Returns NULL if the device
712 * is not found or a pointer to the device. The device returned has
713 * had a reference added and the pointer is safe until the user calls
714 * dev_put to indicate they have finished with it.
715 */
716
Eric W. Biederman881d9662007-09-17 11:56:21 -0700717struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718{
719 struct net_device *dev;
720
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000721 rcu_read_lock();
722 dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 if (dev)
724 dev_hold(dev);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000725 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 return dev;
727}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700728EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
730/**
731 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700732 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 * @type: media type of device
734 * @ha: hardware address
735 *
736 * Search for an interface by MAC address. Returns NULL if the device
737 * is not found or a pointer to the device. The caller must hold the
738 * rtnl semaphore. The returned device has not had its ref count increased
739 * and the caller must therefore be careful about locking
740 *
741 * BUGS:
742 * If the API was consistent this would be __dev_get_by_hwaddr
743 */
744
Eric W. Biederman881d9662007-09-17 11:56:21 -0700745struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746{
747 struct net_device *dev;
748
749 ASSERT_RTNL();
750
Denis V. Lunev81103a52007-12-12 10:47:38 -0800751 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 if (dev->type == type &&
753 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700754 return dev;
755
756 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757}
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300758EXPORT_SYMBOL(dev_getbyhwaddr);
759
Eric W. Biederman881d9662007-09-17 11:56:21 -0700760struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700761{
762 struct net_device *dev;
763
764 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700765 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700766 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700767 return dev;
768
769 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700770}
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700771EXPORT_SYMBOL(__dev_getfirstbyhwtype);
772
Eric W. Biederman881d9662007-09-17 11:56:21 -0700773struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774{
775 struct net_device *dev;
776
777 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700778 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700779 if (dev)
780 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 rtnl_unlock();
782 return dev;
783}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784EXPORT_SYMBOL(dev_getfirstbyhwtype);
785
786/**
787 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700788 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 * @if_flags: IFF_* values
790 * @mask: bitmask of bits in if_flags to check
791 *
792 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900793 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 * had a reference added and the pointer is safe until the user calls
795 * dev_put to indicate they have finished with it.
796 */
797
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700798struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
799 unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700801 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
Pavel Emelianov7562f872007-05-03 15:13:45 -0700803 ret = NULL;
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800804 rcu_read_lock();
805 for_each_netdev_rcu(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 if (((dev->flags ^ if_flags) & mask) == 0) {
807 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700808 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 break;
810 }
811 }
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800812 rcu_read_unlock();
Pavel Emelianov7562f872007-05-03 15:13:45 -0700813 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700815EXPORT_SYMBOL(dev_get_by_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817/**
818 * dev_valid_name - check if name is okay for network device
819 * @name: name string
820 *
821 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700822 * to allow sysfs to work. We also disallow any kind of
823 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800825int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700827 if (*name == '\0')
828 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700829 if (strlen(name) >= IFNAMSIZ)
830 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700831 if (!strcmp(name, ".") || !strcmp(name, ".."))
832 return 0;
833
834 while (*name) {
835 if (*name == '/' || isspace(*name))
836 return 0;
837 name++;
838 }
839 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700841EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842
843/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200844 * __dev_alloc_name - allocate a name for a device
845 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200847 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 *
849 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700850 * id. It scans list of devices to build up a free map, then chooses
851 * the first empty slot. The caller must hold the dev_base or rtnl lock
852 * while allocating the name and adding the device in order to avoid
853 * duplicates.
854 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
855 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 */
857
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200858static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859{
860 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 const char *p;
862 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700863 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 struct net_device *d;
865
866 p = strnchr(name, IFNAMSIZ-1, '%');
867 if (p) {
868 /*
869 * Verify the string as this thing may have come from
870 * the user. There must be either one "%d" and no other "%"
871 * characters.
872 */
873 if (p[1] != 'd' || strchr(p + 2, '%'))
874 return -EINVAL;
875
876 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700877 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 if (!inuse)
879 return -ENOMEM;
880
Eric W. Biederman881d9662007-09-17 11:56:21 -0700881 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 if (!sscanf(d->name, name, &i))
883 continue;
884 if (i < 0 || i >= max_netdevices)
885 continue;
886
887 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200888 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 if (!strncmp(buf, d->name, IFNAMSIZ))
890 set_bit(i, inuse);
891 }
892
893 i = find_first_zero_bit(inuse, max_netdevices);
894 free_page((unsigned long) inuse);
895 }
896
Octavian Purdilad9031022009-11-18 02:36:59 +0000897 if (buf != name)
898 snprintf(buf, IFNAMSIZ, name, i);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200899 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
902 /* It is possible to run out of possible slots
903 * when the name is long and there isn't enough space left
904 * for the digits, or if all bits are used.
905 */
906 return -ENFILE;
907}
908
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200909/**
910 * dev_alloc_name - allocate a name for a device
911 * @dev: device
912 * @name: name format string
913 *
914 * Passed a format string - eg "lt%d" it will try and find a suitable
915 * id. It scans list of devices to build up a free map, then chooses
916 * the first empty slot. The caller must hold the dev_base or rtnl lock
917 * while allocating the name and adding the device in order to avoid
918 * duplicates.
919 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
920 * Returns the number of the unit assigned or a negative errno code.
921 */
922
923int dev_alloc_name(struct net_device *dev, const char *name)
924{
925 char buf[IFNAMSIZ];
926 struct net *net;
927 int ret;
928
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900929 BUG_ON(!dev_net(dev));
930 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200931 ret = __dev_alloc_name(net, name, buf);
932 if (ret >= 0)
933 strlcpy(dev->name, buf, IFNAMSIZ);
934 return ret;
935}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700936EXPORT_SYMBOL(dev_alloc_name);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200937
Octavian Purdilad9031022009-11-18 02:36:59 +0000938static int dev_get_valid_name(struct net *net, const char *name, char *buf,
939 bool fmt)
940{
941 if (!dev_valid_name(name))
942 return -EINVAL;
943
944 if (fmt && strchr(name, '%'))
945 return __dev_alloc_name(net, name, buf);
946 else if (__dev_get_by_name(net, name))
947 return -EEXIST;
948 else if (buf != name)
949 strlcpy(buf, name, IFNAMSIZ);
950
951 return 0;
952}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
954/**
955 * dev_change_name - change name of a device
956 * @dev: device
957 * @newname: name (or format string) must be at least IFNAMSIZ
958 *
959 * Change name of a device, can pass format strings "eth%d".
960 * for wildcarding.
961 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700962int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963{
Herbert Xufcc5a032007-07-30 17:03:38 -0700964 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700966 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700967 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
969 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900970 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900972 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (dev->flags & IFF_UP)
974 return -EBUSY;
975
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700976 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
977 return 0;
978
Herbert Xufcc5a032007-07-30 17:03:38 -0700979 memcpy(oldname, dev->name, IFNAMSIZ);
980
Octavian Purdilad9031022009-11-18 02:36:59 +0000981 err = dev_get_valid_name(net, newname, dev->name, 1);
982 if (err < 0)
983 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
Herbert Xufcc5a032007-07-30 17:03:38 -0700985rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700986 /* For now only devices in the initial network namespace
987 * are in sysfs.
988 */
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800989 if (net_eq(net, &init_net)) {
Eric W. Biederman38918452008-10-27 17:51:47 -0700990 ret = device_rename(&dev->dev, dev->name);
991 if (ret) {
992 memcpy(dev->name, oldname, IFNAMSIZ);
993 return ret;
994 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700995 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700996
997 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600998 hlist_del(&dev->name_hlist);
Eric Dumazet72c95282009-10-30 07:11:27 +0000999 write_unlock_bh(&dev_base_lock);
1000
1001 synchronize_rcu();
1002
1003 write_lock_bh(&dev_base_lock);
1004 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -07001005 write_unlock_bh(&dev_base_lock);
1006
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001007 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001008 ret = notifier_to_errno(ret);
1009
1010 if (ret) {
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001011 /* err >= 0 after dev_alloc_name() or stores the first errno */
1012 if (err >= 0) {
Herbert Xufcc5a032007-07-30 17:03:38 -07001013 err = ret;
1014 memcpy(dev->name, oldname, IFNAMSIZ);
1015 goto rollback;
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001016 } else {
1017 printk(KERN_ERR
1018 "%s: name change rollback failed: %d.\n",
1019 dev->name, ret);
Herbert Xufcc5a032007-07-30 17:03:38 -07001020 }
1021 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
1023 return err;
1024}
1025
1026/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001027 * dev_set_alias - change ifalias of a device
1028 * @dev: device
1029 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07001030 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001031 *
1032 * Set ifalias for a device,
1033 */
1034int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1035{
1036 ASSERT_RTNL();
1037
1038 if (len >= IFALIASZ)
1039 return -EINVAL;
1040
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -07001041 if (!len) {
1042 if (dev->ifalias) {
1043 kfree(dev->ifalias);
1044 dev->ifalias = NULL;
1045 }
1046 return 0;
1047 }
1048
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001049 dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001050 if (!dev->ifalias)
1051 return -ENOMEM;
1052
1053 strlcpy(dev->ifalias, alias, len+1);
1054 return len;
1055}
1056
1057
1058/**
Stephen Hemminger3041a062006-05-26 13:25:24 -07001059 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001060 * @dev: device to cause notification
1061 *
1062 * Called to indicate a device has changed features.
1063 */
1064void netdev_features_change(struct net_device *dev)
1065{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001066 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001067}
1068EXPORT_SYMBOL(netdev_features_change);
1069
1070/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 * netdev_state_change - device changes state
1072 * @dev: device to cause notification
1073 *
1074 * Called to indicate a device has changed state. This function calls
1075 * the notifier chains for netdev_chain and sends a NEWLINK message
1076 * to the routing socket.
1077 */
1078void netdev_state_change(struct net_device *dev)
1079{
1080 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001081 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1083 }
1084}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001085EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086
Moni Shoua75c78502009-09-15 02:37:40 -07001087void netdev_bonding_change(struct net_device *dev, unsigned long event)
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001088{
Moni Shoua75c78502009-09-15 02:37:40 -07001089 call_netdevice_notifiers(event, dev);
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001090}
1091EXPORT_SYMBOL(netdev_bonding_change);
1092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093/**
1094 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001095 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 * @name: name of interface
1097 *
1098 * If a network interface is not present and the process has suitable
1099 * privileges this function loads the module. If module loading is not
1100 * available in this kernel then it becomes a nop.
1101 */
1102
Eric W. Biederman881d9662007-09-17 11:56:21 -07001103void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001105 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
Eric Dumazet72c95282009-10-30 07:11:27 +00001107 rcu_read_lock();
1108 dev = dev_get_by_name_rcu(net, name);
1109 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
Eric Parisa8f80e82009-08-13 09:44:51 -04001111 if (!dev && capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 request_module("%s", name);
1113}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001114EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116/**
1117 * dev_open - prepare an interface for use.
1118 * @dev: device to open
1119 *
1120 * Takes a device from down to up state. The device's private open
1121 * function is invoked and then the multicast lists are loaded. Finally
1122 * the device is moved into the up state and a %NETDEV_UP message is
1123 * sent to the netdev notifier chain.
1124 *
1125 * Calling this function on an active interface is a nop. On a failure
1126 * a negative errno code is returned.
1127 */
1128int dev_open(struct net_device *dev)
1129{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001130 const struct net_device_ops *ops = dev->netdev_ops;
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001131 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001133 ASSERT_RTNL();
1134
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 /*
1136 * Is it already up?
1137 */
1138
1139 if (dev->flags & IFF_UP)
1140 return 0;
1141
1142 /*
1143 * Is it even present?
1144 */
1145 if (!netif_device_present(dev))
1146 return -ENODEV;
1147
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001148 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1149 ret = notifier_to_errno(ret);
1150 if (ret)
1151 return ret;
1152
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 /*
1154 * Call device private open method
1155 */
1156 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001157
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001158 if (ops->ndo_validate_addr)
1159 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001160
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001161 if (!ret && ops->ndo_open)
1162 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001164 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 * If it went open OK then:
1166 */
1167
Jeff Garzikbada3392007-10-23 20:19:37 -07001168 if (ret)
1169 clear_bit(__LINK_STATE_START, &dev->state);
1170 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 /*
1172 * Set the flags.
1173 */
1174 dev->flags |= IFF_UP;
1175
1176 /*
Dan Williams649274d2009-01-11 00:20:39 -08001177 * Enable NET_DMA
1178 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001179 net_dmaengine_get();
Dan Williams649274d2009-01-11 00:20:39 -08001180
1181 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 * Initialize multicasting status
1183 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001184 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 /*
1187 * Wakeup transmit queue engine
1188 */
1189 dev_activate(dev);
1190
1191 /*
1192 * ... and announce new interface.
1193 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001194 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001196
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 return ret;
1198}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001199EXPORT_SYMBOL(dev_open);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201/**
1202 * dev_close - shutdown an interface.
1203 * @dev: device to shutdown
1204 *
1205 * This function moves an active device into down state. A
1206 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1207 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1208 * chain.
1209 */
1210int dev_close(struct net_device *dev)
1211{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001212 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001213 ASSERT_RTNL();
1214
David S. Miller9d5010d2007-09-12 14:33:25 +02001215 might_sleep();
1216
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 if (!(dev->flags & IFF_UP))
1218 return 0;
1219
1220 /*
1221 * Tell people we are going down, so that they can
1222 * prepare to death, when device is still operating.
1223 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001224 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 clear_bit(__LINK_STATE_START, &dev->state);
1227
1228 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001229 * it can be even on different cpu. So just clear netif_running().
1230 *
1231 * dev->stop() will invoke napi_disable() on all of it's
1232 * napi_struct instances on this device.
1233 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001236 dev_deactivate(dev);
1237
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 /*
1239 * Call the device specific close. This cannot fail.
1240 * Only if device is UP
1241 *
1242 * We allow it to be called even after a DETACH hot-plug
1243 * event.
1244 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001245 if (ops->ndo_stop)
1246 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
1248 /*
1249 * Device is now down.
1250 */
1251
1252 dev->flags &= ~IFF_UP;
1253
1254 /*
1255 * Tell people we are down
1256 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001257 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
Dan Williams649274d2009-01-11 00:20:39 -08001259 /*
1260 * Shutdown NET_DMA
1261 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001262 net_dmaengine_put();
Dan Williams649274d2009-01-11 00:20:39 -08001263
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 return 0;
1265}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001266EXPORT_SYMBOL(dev_close);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
1268
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001269/**
1270 * dev_disable_lro - disable Large Receive Offload on a device
1271 * @dev: device
1272 *
1273 * Disable Large Receive Offload (LRO) on a net device. Must be
1274 * called under RTNL. This is needed if received packets may be
1275 * forwarded to another interface.
1276 */
1277void dev_disable_lro(struct net_device *dev)
1278{
1279 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1280 dev->ethtool_ops->set_flags) {
1281 u32 flags = dev->ethtool_ops->get_flags(dev);
1282 if (flags & ETH_FLAG_LRO) {
1283 flags &= ~ETH_FLAG_LRO;
1284 dev->ethtool_ops->set_flags(dev, flags);
1285 }
1286 }
1287 WARN_ON(dev->features & NETIF_F_LRO);
1288}
1289EXPORT_SYMBOL(dev_disable_lro);
1290
1291
Eric W. Biederman881d9662007-09-17 11:56:21 -07001292static int dev_boot_phase = 1;
1293
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294/*
1295 * Device change register/unregister. These are not inline or static
1296 * as we export them to the world.
1297 */
1298
1299/**
1300 * register_netdevice_notifier - register a network notifier block
1301 * @nb: notifier
1302 *
1303 * Register a notifier to be called when network device events occur.
1304 * The notifier passed is linked into the kernel structures and must
1305 * not be reused until it has been unregistered. A negative errno code
1306 * is returned on a failure.
1307 *
1308 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001309 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 * view of the network device list.
1311 */
1312
1313int register_netdevice_notifier(struct notifier_block *nb)
1314{
1315 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001316 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001317 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 int err;
1319
1320 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001321 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001322 if (err)
1323 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001324 if (dev_boot_phase)
1325 goto unlock;
1326 for_each_net(net) {
1327 for_each_netdev(net, dev) {
1328 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1329 err = notifier_to_errno(err);
1330 if (err)
1331 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Eric W. Biederman881d9662007-09-17 11:56:21 -07001333 if (!(dev->flags & IFF_UP))
1334 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001335
Eric W. Biederman881d9662007-09-17 11:56:21 -07001336 nb->notifier_call(nb, NETDEV_UP, dev);
1337 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001339
1340unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 rtnl_unlock();
1342 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001343
1344rollback:
1345 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001346 for_each_net(net) {
1347 for_each_netdev(net, dev) {
1348 if (dev == last)
1349 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001350
Eric W. Biederman881d9662007-09-17 11:56:21 -07001351 if (dev->flags & IFF_UP) {
1352 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1353 nb->notifier_call(nb, NETDEV_DOWN, dev);
1354 }
1355 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00001356 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001357 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001358 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001359
1360 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001361 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001363EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364
1365/**
1366 * unregister_netdevice_notifier - unregister a network notifier block
1367 * @nb: notifier
1368 *
1369 * Unregister a notifier previously registered by
1370 * register_netdevice_notifier(). The notifier is unlinked into the
1371 * kernel structures and may then be reused. A negative errno code
1372 * is returned on a failure.
1373 */
1374
1375int unregister_netdevice_notifier(struct notifier_block *nb)
1376{
Herbert Xu9f514952006-03-25 01:24:25 -08001377 int err;
1378
1379 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001380 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001381 rtnl_unlock();
1382 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001384EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
1386/**
1387 * call_netdevice_notifiers - call all network notifier blocks
1388 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001389 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 *
1391 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001392 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 */
1394
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001395int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001397 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398}
1399
1400/* When > 0 there are consumers of rx skb time stamps */
1401static atomic_t netstamp_needed = ATOMIC_INIT(0);
1402
1403void net_enable_timestamp(void)
1404{
1405 atomic_inc(&netstamp_needed);
1406}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001407EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408
1409void net_disable_timestamp(void)
1410{
1411 atomic_dec(&netstamp_needed);
1412}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001413EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001415static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416{
1417 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001418 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001419 else
1420 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421}
1422
Arnd Bergmann44540962009-11-26 06:07:08 +00001423/**
1424 * dev_forward_skb - loopback an skb to another netif
1425 *
1426 * @dev: destination network device
1427 * @skb: buffer to forward
1428 *
1429 * return values:
1430 * NET_RX_SUCCESS (no congestion)
1431 * NET_RX_DROP (packet was dropped)
1432 *
1433 * dev_forward_skb can be used for injecting an skb from the
1434 * start_xmit function of one device into the receive queue
1435 * of another device.
1436 *
1437 * The receiving device may be in another namespace, so
1438 * we have to clear all information in the skb that could
1439 * impact namespace isolation.
1440 */
1441int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1442{
1443 skb_orphan(skb);
1444
1445 if (!(dev->flags & IFF_UP))
1446 return NET_RX_DROP;
1447
1448 if (skb->len > (dev->mtu + dev->hard_header_len))
1449 return NET_RX_DROP;
1450
Arnd Bergmann8a83a002010-01-30 12:23:03 +00001451 skb_set_dev(skb, dev);
Arnd Bergmann44540962009-11-26 06:07:08 +00001452 skb->tstamp.tv64 = 0;
1453 skb->pkt_type = PACKET_HOST;
1454 skb->protocol = eth_type_trans(skb, dev);
Arnd Bergmann44540962009-11-26 06:07:08 +00001455 return netif_rx(skb);
1456}
1457EXPORT_SYMBOL_GPL(dev_forward_skb);
1458
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459/*
1460 * Support routine. Sends outgoing frames to any network
1461 * taps currently in use.
1462 */
1463
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001464static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465{
1466 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001467
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001468#ifdef CONFIG_NET_CLS_ACT
1469 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1470 net_timestamp(skb);
1471#else
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001472 net_timestamp(skb);
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001473#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
1475 rcu_read_lock();
1476 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1477 /* Never send packets back to the socket
1478 * they originated from - MvS (miquels@drinkel.ow.org)
1479 */
1480 if ((ptype->dev == dev || !ptype->dev) &&
1481 (ptype->af_packet_priv == NULL ||
1482 (struct sock *)ptype->af_packet_priv != skb->sk)) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001483 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 if (!skb2)
1485 break;
1486
1487 /* skb->nh should be correctly
1488 set by sender, so that the second statement is
1489 just protection against buggy protocols.
1490 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001491 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001493 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001494 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (net_ratelimit())
1496 printk(KERN_CRIT "protocol %04x is "
1497 "buggy, dev %s\n",
1498 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001499 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 }
1501
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001502 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001504 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 }
1506 }
1507 rcu_read_unlock();
1508}
1509
Denis Vlasenko56079432006-03-29 15:57:29 -08001510
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001511static inline void __netif_reschedule(struct Qdisc *q)
1512{
1513 struct softnet_data *sd;
1514 unsigned long flags;
1515
1516 local_irq_save(flags);
1517 sd = &__get_cpu_var(softnet_data);
1518 q->next_sched = sd->output_queue;
1519 sd->output_queue = q;
1520 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1521 local_irq_restore(flags);
1522}
1523
David S. Miller37437bb2008-07-16 02:15:04 -07001524void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001525{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001526 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1527 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001528}
1529EXPORT_SYMBOL(__netif_schedule);
1530
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001531void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001532{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001533 if (atomic_dec_and_test(&skb->users)) {
1534 struct softnet_data *sd;
1535 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001536
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001537 local_irq_save(flags);
1538 sd = &__get_cpu_var(softnet_data);
1539 skb->next = sd->completion_queue;
1540 sd->completion_queue = skb;
1541 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1542 local_irq_restore(flags);
1543 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001544}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001545EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001546
1547void dev_kfree_skb_any(struct sk_buff *skb)
1548{
1549 if (in_irq() || irqs_disabled())
1550 dev_kfree_skb_irq(skb);
1551 else
1552 dev_kfree_skb(skb);
1553}
1554EXPORT_SYMBOL(dev_kfree_skb_any);
1555
1556
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001557/**
1558 * netif_device_detach - mark device as removed
1559 * @dev: network device
1560 *
1561 * Mark device as removed from system and therefore no longer available.
1562 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001563void netif_device_detach(struct net_device *dev)
1564{
1565 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1566 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001567 netif_tx_stop_all_queues(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001568 }
1569}
1570EXPORT_SYMBOL(netif_device_detach);
1571
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001572/**
1573 * netif_device_attach - mark device as attached
1574 * @dev: network device
1575 *
1576 * Mark device as attached from system and restart if needed.
1577 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001578void netif_device_attach(struct net_device *dev)
1579{
1580 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1581 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001582 netif_tx_wake_all_queues(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001583 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001584 }
1585}
1586EXPORT_SYMBOL(netif_device_attach);
1587
Ben Hutchings6de329e2008-06-16 17:02:28 -07001588static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1589{
1590 return ((features & NETIF_F_GEN_CSUM) ||
1591 ((features & NETIF_F_IP_CSUM) &&
1592 protocol == htons(ETH_P_IP)) ||
1593 ((features & NETIF_F_IPV6_CSUM) &&
Yi Zou1c8dbcf2009-02-27 14:06:54 -08001594 protocol == htons(ETH_P_IPV6)) ||
1595 ((features & NETIF_F_FCOE_CRC) &&
1596 protocol == htons(ETH_P_FCOE)));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001597}
1598
1599static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1600{
1601 if (can_checksum_protocol(dev->features, skb->protocol))
1602 return true;
1603
1604 if (skb->protocol == htons(ETH_P_8021Q)) {
1605 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1606 if (can_checksum_protocol(dev->features & dev->vlan_features,
1607 veh->h_vlan_encapsulated_proto))
1608 return true;
1609 }
1610
1611 return false;
1612}
Denis Vlasenko56079432006-03-29 15:57:29 -08001613
Arnd Bergmann8a83a002010-01-30 12:23:03 +00001614/**
1615 * skb_dev_set -- assign a new device to a buffer
1616 * @skb: buffer for the new device
1617 * @dev: network device
1618 *
1619 * If an skb is owned by a device already, we have to reset
1620 * all data private to the namespace a device belongs to
1621 * before assigning it a new device.
1622 */
1623#ifdef CONFIG_NET_NS
1624void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1625{
1626 skb_dst_drop(skb);
1627 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1628 secpath_reset(skb);
1629 nf_reset(skb);
1630 skb_init_secmark(skb);
1631 skb->mark = 0;
1632 skb->priority = 0;
1633 skb->nf_trace = 0;
1634 skb->ipvs_property = 0;
1635#ifdef CONFIG_NET_SCHED
1636 skb->tc_index = 0;
1637#endif
1638 }
1639 skb->dev = dev;
1640}
1641EXPORT_SYMBOL(skb_set_dev);
1642#endif /* CONFIG_NET_NS */
1643
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644/*
1645 * Invalidate hardware checksum when packet is to be mangled, and
1646 * complete checksum manually on outgoing path.
1647 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001648int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649{
Al Virod3bc23e2006-11-14 21:24:49 -08001650 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001651 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652
Patrick McHardy84fa7932006-08-29 16:44:56 -07001653 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001654 goto out_set_summed;
1655
1656 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001657 /* Let GSO fix up the checksum. */
1658 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 }
1660
Herbert Xua0308472007-10-15 01:47:15 -07001661 offset = skb->csum_start - skb_headroom(skb);
1662 BUG_ON(offset >= skb_headlen(skb));
1663 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1664
1665 offset += skb->csum_offset;
1666 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1667
1668 if (skb_cloned(skb) &&
1669 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1671 if (ret)
1672 goto out;
1673 }
1674
Herbert Xua0308472007-10-15 01:47:15 -07001675 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001676out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001678out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 return ret;
1680}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001681EXPORT_SYMBOL(skb_checksum_help);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001683/**
1684 * skb_gso_segment - Perform segmentation on skb.
1685 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001686 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001687 *
1688 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001689 *
1690 * It may return NULL if the skb requires no segmentation. This is
1691 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001692 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001693struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001694{
1695 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1696 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001697 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001698 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001699
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001700 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001701 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001702 __skb_pull(skb, skb->mac_len);
1703
Herbert Xu67fd1a72009-01-19 16:26:44 -08001704 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1705 struct net_device *dev = skb->dev;
1706 struct ethtool_drvinfo info = {};
1707
1708 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1709 dev->ethtool_ops->get_drvinfo(dev, &info);
1710
1711 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1712 "ip_summed=%d",
1713 info.driver, dev ? dev->features : 0L,
1714 skb->sk ? skb->sk->sk_route_caps : 0L,
1715 skb->len, skb->data_len, skb->ip_summed);
1716
Herbert Xua430a432006-07-08 13:34:56 -07001717 if (skb_header_cloned(skb) &&
1718 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1719 return ERR_PTR(err);
1720 }
1721
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001722 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001723 list_for_each_entry_rcu(ptype,
1724 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001725 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001726 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001727 err = ptype->gso_send_check(skb);
1728 segs = ERR_PTR(err);
1729 if (err || skb_gso_ok(skb, features))
1730 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001731 __skb_push(skb, (skb->data -
1732 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001733 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001734 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001735 break;
1736 }
1737 }
1738 rcu_read_unlock();
1739
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001740 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001741
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001742 return segs;
1743}
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001744EXPORT_SYMBOL(skb_gso_segment);
1745
Herbert Xufb286bb2005-11-10 13:01:24 -08001746/* Take action when hardware reception checksum errors are detected. */
1747#ifdef CONFIG_BUG
1748void netdev_rx_csum_fault(struct net_device *dev)
1749{
1750 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001751 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001752 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001753 dump_stack();
1754 }
1755}
1756EXPORT_SYMBOL(netdev_rx_csum_fault);
1757#endif
1758
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759/* Actually, we should eliminate this check as soon as we know, that:
1760 * 1. IOMMU is present and allows to map all the memory.
1761 * 2. No high memory really exists on this machine.
1762 */
1763
1764static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1765{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001766#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767 int i;
1768
1769 if (dev->features & NETIF_F_HIGHDMA)
1770 return 0;
1771
1772 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1773 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1774 return 1;
1775
Herbert Xu3d3a8532006-06-27 13:33:10 -07001776#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 return 0;
1778}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001780struct dev_gso_cb {
1781 void (*destructor)(struct sk_buff *skb);
1782};
1783
1784#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1785
1786static void dev_gso_skb_destructor(struct sk_buff *skb)
1787{
1788 struct dev_gso_cb *cb;
1789
1790 do {
1791 struct sk_buff *nskb = skb->next;
1792
1793 skb->next = nskb->next;
1794 nskb->next = NULL;
1795 kfree_skb(nskb);
1796 } while (skb->next);
1797
1798 cb = DEV_GSO_CB(skb);
1799 if (cb->destructor)
1800 cb->destructor(skb);
1801}
1802
1803/**
1804 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1805 * @skb: buffer to segment
1806 *
1807 * This function segments the given skb and stores the list of segments
1808 * in skb->next.
1809 */
1810static int dev_gso_segment(struct sk_buff *skb)
1811{
1812 struct net_device *dev = skb->dev;
1813 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001814 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1815 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001816
Herbert Xu576a30e2006-06-27 13:22:38 -07001817 segs = skb_gso_segment(skb, features);
1818
1819 /* Verifying header integrity only. */
1820 if (!segs)
1821 return 0;
1822
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001823 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001824 return PTR_ERR(segs);
1825
1826 skb->next = segs;
1827 DEV_GSO_CB(skb)->destructor = skb->destructor;
1828 skb->destructor = dev_gso_skb_destructor;
1829
1830 return 0;
1831}
1832
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001833int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1834 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001835{
Stephen Hemminger00829822008-11-20 20:14:53 -08001836 const struct net_device_ops *ops = dev->netdev_ops;
Patrick McHardy572a9d72009-11-10 06:14:14 +00001837 int rc = NETDEV_TX_OK;
Stephen Hemminger00829822008-11-20 20:14:53 -08001838
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001839 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001840 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001841 dev_queue_xmit_nit(skb, dev);
1842
Herbert Xu576a30e2006-06-27 13:22:38 -07001843 if (netif_needs_gso(dev, skb)) {
1844 if (unlikely(dev_gso_segment(skb)))
1845 goto out_kfree_skb;
1846 if (skb->next)
1847 goto gso;
1848 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001849
Eric Dumazet93f154b2009-05-18 22:19:19 -07001850 /*
1851 * If device doesnt need skb->dst, release it right now while
1852 * its hot in this cpu cache
1853 */
Eric Dumazetadf30902009-06-02 05:19:30 +00001854 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1855 skb_dst_drop(skb);
1856
Patrick Ohlyac45f602009-02-12 05:03:37 +00001857 rc = ops->ndo_start_xmit(skb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001858 if (rc == NETDEV_TX_OK)
Eric Dumazet08baf562009-05-25 22:58:01 -07001859 txq_trans_update(txq);
Patrick Ohlyac45f602009-02-12 05:03:37 +00001860 /*
1861 * TODO: if skb_orphan() was called by
1862 * dev->hard_start_xmit() (for example, the unmodified
1863 * igb driver does that; bnx2 doesn't), then
1864 * skb_tx_software_timestamp() will be unable to send
1865 * back the time stamp.
1866 *
1867 * How can this be prevented? Always create another
1868 * reference to the socket before calling
1869 * dev->hard_start_xmit()? Prevent that skb_orphan()
1870 * does anything in dev->hard_start_xmit() by clearing
1871 * the skb destructor before the call and restoring it
1872 * afterwards, then doing the skb_orphan() ourselves?
1873 */
Patrick Ohlyac45f602009-02-12 05:03:37 +00001874 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001875 }
1876
Herbert Xu576a30e2006-06-27 13:22:38 -07001877gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001878 do {
1879 struct sk_buff *nskb = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001880
1881 skb->next = nskb->next;
1882 nskb->next = NULL;
Krishna Kumar068a2de2009-12-09 20:59:58 +00001883
1884 /*
1885 * If device doesnt need nskb->dst, release it right now while
1886 * its hot in this cpu cache
1887 */
1888 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1889 skb_dst_drop(nskb);
1890
Stephen Hemminger00829822008-11-20 20:14:53 -08001891 rc = ops->ndo_start_xmit(nskb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001892 if (unlikely(rc != NETDEV_TX_OK)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00001893 if (rc & ~NETDEV_TX_MASK)
1894 goto out_kfree_gso_skb;
Michael Chanf54d9e82006-06-25 23:57:04 -07001895 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001896 skb->next = nskb;
1897 return rc;
1898 }
Eric Dumazet08baf562009-05-25 22:58:01 -07001899 txq_trans_update(txq);
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001900 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001901 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001902 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001903
Patrick McHardy572a9d72009-11-10 06:14:14 +00001904out_kfree_gso_skb:
1905 if (likely(skb->next == NULL))
1906 skb->destructor = DEV_GSO_CB(skb)->destructor;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001907out_kfree_skb:
1908 kfree_skb(skb);
Patrick McHardy572a9d72009-11-10 06:14:14 +00001909 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001910}
1911
David S. Miller70192982009-01-27 16:34:47 -08001912static u32 skb_tx_hashrnd;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001913
Stephen Hemminger92477442009-03-21 13:39:26 -07001914u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
David S. Miller8f0f2222008-07-15 03:47:03 -07001915{
David S. Miller70192982009-01-27 16:34:47 -08001916 u32 hash;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001917
David S. Miller513de112009-05-03 14:43:10 -07001918 if (skb_rx_queue_recorded(skb)) {
1919 hash = skb_get_rx_queue(skb);
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001920 while (unlikely(hash >= dev->real_num_tx_queues))
David S. Miller513de112009-05-03 14:43:10 -07001921 hash -= dev->real_num_tx_queues;
1922 return hash;
1923 }
Eric Dumazetec581f62009-05-01 09:05:06 -07001924
1925 if (skb->sk && skb->sk->sk_hash)
David S. Miller70192982009-01-27 16:34:47 -08001926 hash = skb->sk->sk_hash;
Eric Dumazetec581f62009-05-01 09:05:06 -07001927 else
David S. Miller70192982009-01-27 16:34:47 -08001928 hash = skb->protocol;
David S. Millerd5a9e242009-01-27 16:22:11 -08001929
David S. Miller70192982009-01-27 16:34:47 -08001930 hash = jhash_1word(hash, skb_tx_hashrnd);
David S. Millerd5a9e242009-01-27 16:22:11 -08001931
David S. Millerb6b2fed2008-07-21 09:48:06 -07001932 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001933}
Stephen Hemminger92477442009-03-21 13:39:26 -07001934EXPORT_SYMBOL(skb_tx_hash);
David S. Miller8f0f2222008-07-15 03:47:03 -07001935
Eric Dumazeted046422009-11-13 21:54:04 +00001936static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1937{
1938 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1939 if (net_ratelimit()) {
1940 WARN(1, "%s selects TX queue %d, but "
1941 "real number of TX queues is %d\n",
1942 dev->name, queue_index,
1943 dev->real_num_tx_queues);
1944 }
1945 return 0;
1946 }
1947 return queue_index;
1948}
1949
David S. Millere8a04642008-07-17 00:34:19 -07001950static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1951 struct sk_buff *skb)
1952{
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001953 u16 queue_index;
1954 struct sock *sk = skb->sk;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001955
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001956 if (sk_tx_queue_recorded(sk)) {
1957 queue_index = sk_tx_queue_get(sk);
1958 } else {
1959 const struct net_device_ops *ops = dev->netdev_ops;
1960
1961 if (ops->ndo_select_queue) {
1962 queue_index = ops->ndo_select_queue(dev, skb);
Eric Dumazeted046422009-11-13 21:54:04 +00001963 queue_index = dev_cap_txqueue(dev, queue_index);
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001964 } else {
1965 queue_index = 0;
1966 if (dev->real_num_tx_queues > 1)
1967 queue_index = skb_tx_hash(dev, skb);
1968
1969 if (sk && sk->sk_dst_cache)
1970 sk_tx_queue_set(sk, queue_index);
1971 }
1972 }
David S. Millereae792b2008-07-15 03:03:33 -07001973
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001974 skb_set_queue_mapping(skb, queue_index);
1975 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001976}
1977
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00001978static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1979 struct net_device *dev,
1980 struct netdev_queue *txq)
1981{
1982 spinlock_t *root_lock = qdisc_lock(q);
1983 int rc;
1984
1985 spin_lock(root_lock);
1986 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1987 kfree_skb(skb);
1988 rc = NET_XMIT_DROP;
1989 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1990 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
1991 /*
1992 * This is a work-conserving queue; there are no old skbs
1993 * waiting to be sent out; and the qdisc is not running -
1994 * xmit the skb directly.
1995 */
1996 __qdisc_update_bstats(q, skb->len);
1997 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
1998 __qdisc_run(q);
1999 else
2000 clear_bit(__QDISC_STATE_RUNNING, &q->state);
2001
2002 rc = NET_XMIT_SUCCESS;
2003 } else {
2004 rc = qdisc_enqueue_root(skb, q);
2005 qdisc_run(q);
2006 }
2007 spin_unlock(root_lock);
2008
2009 return rc;
2010}
2011
Krishna Kumar4b258462010-01-21 01:26:29 -08002012/*
2013 * Returns true if either:
2014 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2015 * 2. skb is fragmented and the device does not support SG, or if
2016 * at least one of fragments is in highmem and device does not
2017 * support DMA from it.
2018 */
2019static inline int skb_needs_linearize(struct sk_buff *skb,
2020 struct net_device *dev)
2021{
2022 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2023 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2024 illegal_highdma(dev, skb)));
2025}
2026
Dave Jonesd29f7492008-07-22 14:09:06 -07002027/**
2028 * dev_queue_xmit - transmit a buffer
2029 * @skb: buffer to transmit
2030 *
2031 * Queue a buffer for transmission to a network device. The caller must
2032 * have set the device and priority and built the buffer before calling
2033 * this function. The function can be called from an interrupt.
2034 *
2035 * A negative errno code is returned on a failure. A success does not
2036 * guarantee the frame will be transmitted as it may be dropped due
2037 * to congestion or traffic shaping.
2038 *
2039 * -----------------------------------------------------------------------------------
2040 * I notice this method can also return errors from the queue disciplines,
2041 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2042 * be positive.
2043 *
2044 * Regardless of the return value, the skb is consumed, so it is currently
2045 * difficult to retry a send to this method. (You can bump the ref count
2046 * before sending to hold a reference for retry if you are careful.)
2047 *
2048 * When calling this method, interrupts MUST be enabled. This is because
2049 * the BH enable code must have IRQs enabled so that it will not deadlock.
2050 * --BLG
2051 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052int dev_queue_xmit(struct sk_buff *skb)
2053{
2054 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07002055 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 struct Qdisc *q;
2057 int rc = -ENOMEM;
2058
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002059 /* GSO will handle the following emulations directly. */
2060 if (netif_needs_gso(dev, skb))
2061 goto gso;
2062
Krishna Kumar4b258462010-01-21 01:26:29 -08002063 /* Convert a paged skb to linear, if required */
2064 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 goto out_kfree_skb;
2066
2067 /* If packet is not checksummed and device does not support
2068 * checksumming for this protocol, complete checksumming here.
2069 */
Herbert Xu663ead32007-04-09 11:59:07 -07002070 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2071 skb_set_transport_header(skb, skb->csum_start -
2072 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07002073 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2074 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07002075 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002077gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002078 /* Disable soft irqs for various locks below. Also
2079 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002081 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082
David S. Millereae792b2008-07-15 03:03:33 -07002083 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07002084 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07002085
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002087 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088#endif
2089 if (q->enqueue) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002090 rc = __dev_xmit_skb(skb, q, dev, txq);
David S. Miller37437bb2008-07-16 02:15:04 -07002091 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 }
2093
2094 /* The device has no queue. Common case for software devices:
2095 loopback, all the sorts of tunnels...
2096
Herbert Xu932ff272006-06-09 12:20:56 -07002097 Really, it is unlikely that netif_tx_lock protection is necessary
2098 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 counters.)
2100 However, it is possible, that they rely on protection
2101 made by us here.
2102
2103 Check this and shot the lock. It is not prone from deadlocks.
2104 Either shot noqueue qdisc, it is even simpler 8)
2105 */
2106 if (dev->flags & IFF_UP) {
2107 int cpu = smp_processor_id(); /* ok because BHs are off */
2108
David S. Millerc773e842008-07-08 23:13:53 -07002109 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110
David S. Millerc773e842008-07-08 23:13:53 -07002111 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002113 if (!netif_tx_queue_stopped(txq)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00002114 rc = dev_hard_start_xmit(skb, dev, txq);
2115 if (dev_xmit_complete(rc)) {
David S. Millerc773e842008-07-08 23:13:53 -07002116 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 goto out;
2118 }
2119 }
David S. Millerc773e842008-07-08 23:13:53 -07002120 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 if (net_ratelimit())
2122 printk(KERN_CRIT "Virtual device %s asks to "
2123 "queue packet!\n", dev->name);
2124 } else {
2125 /* Recursion is detected! It is possible,
2126 * unfortunately */
2127 if (net_ratelimit())
2128 printk(KERN_CRIT "Dead loop on virtual device "
2129 "%s, fix it urgently!\n", dev->name);
2130 }
2131 }
2132
2133 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07002134 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135
2136out_kfree_skb:
2137 kfree_skb(skb);
2138 return rc;
2139out:
Herbert Xud4828d82006-06-22 02:28:18 -07002140 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 return rc;
2142}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002143EXPORT_SYMBOL(dev_queue_xmit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144
2145
2146/*=======================================================================
2147 Receiver routines
2148 =======================================================================*/
2149
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07002150int netdev_max_backlog __read_mostly = 1000;
2151int netdev_budget __read_mostly = 300;
2152int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153
2154DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2155
2156
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157/**
2158 * netif_rx - post buffer to the network code
2159 * @skb: buffer to post
2160 *
2161 * This function receives a packet from a device driver and queues it for
2162 * the upper (protocol) levels to process. It always succeeds. The buffer
2163 * may be dropped during processing for congestion control or by the
2164 * protocol layers.
2165 *
2166 * return values:
2167 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 * NET_RX_DROP (packet was dropped)
2169 *
2170 */
2171
2172int netif_rx(struct sk_buff *skb)
2173{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 struct softnet_data *queue;
2175 unsigned long flags;
2176
2177 /* if netpoll wants it, pretend we never saw it */
2178 if (netpoll_rx(skb))
2179 return NET_RX_DROP;
2180
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002181 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002182 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183
2184 /*
2185 * The code is rearranged so that the path is the most
2186 * short when CPU is congested, but is still operating.
2187 */
2188 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 queue = &__get_cpu_var(softnet_data);
2190
2191 __get_cpu_var(netdev_rx_stat).total++;
2192 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2193 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07002197 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198 }
2199
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002200 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 goto enqueue;
2202 }
2203
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 __get_cpu_var(netdev_rx_stat).dropped++;
2205 local_irq_restore(flags);
2206
2207 kfree_skb(skb);
2208 return NET_RX_DROP;
2209}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002210EXPORT_SYMBOL(netif_rx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211
2212int netif_rx_ni(struct sk_buff *skb)
2213{
2214 int err;
2215
2216 preempt_disable();
2217 err = netif_rx(skb);
2218 if (local_softirq_pending())
2219 do_softirq();
2220 preempt_enable();
2221
2222 return err;
2223}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224EXPORT_SYMBOL(netif_rx_ni);
2225
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226static void net_tx_action(struct softirq_action *h)
2227{
2228 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2229
2230 if (sd->completion_queue) {
2231 struct sk_buff *clist;
2232
2233 local_irq_disable();
2234 clist = sd->completion_queue;
2235 sd->completion_queue = NULL;
2236 local_irq_enable();
2237
2238 while (clist) {
2239 struct sk_buff *skb = clist;
2240 clist = clist->next;
2241
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002242 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 __kfree_skb(skb);
2244 }
2245 }
2246
2247 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002248 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
2250 local_irq_disable();
2251 head = sd->output_queue;
2252 sd->output_queue = NULL;
2253 local_irq_enable();
2254
2255 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002256 struct Qdisc *q = head;
2257 spinlock_t *root_lock;
2258
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 head = head->next_sched;
2260
David S. Miller5fb66222008-08-02 20:02:43 -07002261 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002262 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002263 smp_mb__before_clear_bit();
2264 clear_bit(__QDISC_STATE_SCHED,
2265 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002266 qdisc_run(q);
2267 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002269 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002270 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002271 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002272 } else {
2273 smp_mb__before_clear_bit();
2274 clear_bit(__QDISC_STATE_SCHED,
2275 &q->state);
2276 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 }
2278 }
2279 }
2280}
2281
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002282static inline int deliver_skb(struct sk_buff *skb,
2283 struct packet_type *pt_prev,
2284 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285{
2286 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002287 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288}
2289
2290#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Michał Mirosławda678292009-06-05 05:35:28 +00002291
2292#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2293/* This hook is defined here for ATM LANE */
2294int (*br_fdb_test_addr_hook)(struct net_device *dev,
2295 unsigned char *addr) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002296EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002297#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298
Stephen Hemminger6229e362007-03-21 13:38:47 -07002299/*
2300 * If bridge module is loaded call bridging hook.
2301 * returns NULL if packet was consumed.
2302 */
2303struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2304 struct sk_buff *skb) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002305EXPORT_SYMBOL_GPL(br_handle_frame_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002306
Stephen Hemminger6229e362007-03-21 13:38:47 -07002307static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2308 struct packet_type **pt_prev, int *ret,
2309 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310{
2311 struct net_bridge_port *port;
2312
Stephen Hemminger6229e362007-03-21 13:38:47 -07002313 if (skb->pkt_type == PACKET_LOOPBACK ||
2314 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2315 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
2317 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002318 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002320 }
2321
Stephen Hemminger6229e362007-03-21 13:38:47 -07002322 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323}
2324#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002325#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326#endif
2327
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002328#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2329struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2330EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2331
2332static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2333 struct packet_type **pt_prev,
2334 int *ret,
2335 struct net_device *orig_dev)
2336{
2337 if (skb->dev->macvlan_port == NULL)
2338 return skb;
2339
2340 if (*pt_prev) {
2341 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2342 *pt_prev = NULL;
2343 }
2344 return macvlan_handle_frame_hook(skb);
2345}
2346#else
2347#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2348#endif
2349
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350#ifdef CONFIG_NET_CLS_ACT
2351/* TODO: Maybe we should just force sch_ingress to be compiled in
2352 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2353 * a compare and 2 stores extra right now if we dont have it on
2354 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002355 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 * the ingress scheduler, you just cant add policies on ingress.
2357 *
2358 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002359static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002362 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002363 struct netdev_queue *rxq;
2364 int result = TC_ACT_OK;
2365 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002366
Herbert Xuf697c3e2007-10-14 00:38:47 -07002367 if (MAX_RED_LOOP < ttl++) {
2368 printk(KERN_WARNING
2369 "Redir loop detected Dropping packet (%d->%d)\n",
Eric Dumazet8964be42009-11-20 15:35:04 -08002370 skb->skb_iif, dev->ifindex);
Herbert Xuf697c3e2007-10-14 00:38:47 -07002371 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 }
2373
Herbert Xuf697c3e2007-10-14 00:38:47 -07002374 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2375 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2376
David S. Miller555353c2008-07-08 17:33:13 -07002377 rxq = &dev->rx_queue;
2378
David S. Miller83874002008-07-17 00:53:03 -07002379 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002380 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002381 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002382 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2383 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002384 spin_unlock(qdisc_lock(q));
2385 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002386
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 return result;
2388}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002389
2390static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2391 struct packet_type **pt_prev,
2392 int *ret, struct net_device *orig_dev)
2393{
David S. Miller8d50b532008-07-30 02:37:46 -07002394 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002395 goto out;
2396
2397 if (*pt_prev) {
2398 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2399 *pt_prev = NULL;
2400 } else {
2401 /* Huh? Why does turning on AF_PACKET affect this? */
2402 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2403 }
2404
2405 switch (ing_filter(skb)) {
2406 case TC_ACT_SHOT:
2407 case TC_ACT_STOLEN:
2408 kfree_skb(skb);
2409 return NULL;
2410 }
2411
2412out:
2413 skb->tc_verd = 0;
2414 return skb;
2415}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416#endif
2417
Patrick McHardybc1d0412008-07-14 22:49:30 -07002418/*
2419 * netif_nit_deliver - deliver received packets to network taps
2420 * @skb: buffer
2421 *
2422 * This function is used to deliver incoming packets to network
2423 * taps. It should be used when the normal netif_receive_skb path
2424 * is bypassed, for example because of VLAN acceleration.
2425 */
2426void netif_nit_deliver(struct sk_buff *skb)
2427{
2428 struct packet_type *ptype;
2429
2430 if (list_empty(&ptype_all))
2431 return;
2432
2433 skb_reset_network_header(skb);
2434 skb_reset_transport_header(skb);
2435 skb->mac_len = skb->network_header - skb->mac_header;
2436
2437 rcu_read_lock();
2438 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2439 if (!ptype->dev || ptype->dev == skb->dev)
2440 deliver_skb(skb, ptype, skb->dev);
2441 }
2442 rcu_read_unlock();
2443}
2444
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002445/**
2446 * netif_receive_skb - process receive buffer from network
2447 * @skb: buffer to process
2448 *
2449 * netif_receive_skb() is the main receive data processing function.
2450 * It always succeeds. The buffer may be dropped during processing
2451 * for congestion control or by the protocol layers.
2452 *
2453 * This function may only be called from softirq context and interrupts
2454 * should be enabled.
2455 *
2456 * Return values (usually ignored):
2457 * NET_RX_SUCCESS: no congestion
2458 * NET_RX_DROP: packet was dropped
2459 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460int netif_receive_skb(struct sk_buff *skb)
2461{
2462 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002463 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002464 struct net_device *null_or_orig;
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002465 struct net_device *null_or_bond;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002467 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468
Eric Dumazet81bbb3d2009-09-30 16:42:42 -07002469 if (!skb->tstamp.tv64)
2470 net_timestamp(skb);
2471
Eric Dumazet05423b22009-10-26 18:40:35 -07002472 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002473 return NET_RX_SUCCESS;
2474
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002476 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 return NET_RX_DROP;
2478
Eric Dumazet8964be42009-11-20 15:35:04 -08002479 if (!skb->skb_iif)
2480 skb->skb_iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002481
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002482 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002483 orig_dev = skb->dev;
2484 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002485 if (skb_bond_should_drop(skb))
2486 null_or_orig = orig_dev; /* deliver only exact match */
2487 else
2488 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002489 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002490
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 __get_cpu_var(netdev_rx_stat).total++;
2492
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002493 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002494 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002495 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496
2497 pt_prev = NULL;
2498
2499 rcu_read_lock();
2500
2501#ifdef CONFIG_NET_CLS_ACT
2502 if (skb->tc_verd & TC_NCLS) {
2503 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2504 goto ncls;
2505 }
2506#endif
2507
2508 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002509 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2510 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002511 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002512 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 pt_prev = ptype;
2514 }
2515 }
2516
2517#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002518 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2519 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521ncls:
2522#endif
2523
Stephen Hemminger6229e362007-03-21 13:38:47 -07002524 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2525 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002527 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2528 if (!skb)
2529 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002531 /*
2532 * Make sure frames received on VLAN interfaces stacked on
2533 * bonding interfaces still make their way to any base bonding
2534 * device that may have registered for a specific ptype. The
2535 * handler may have to adjust skb->dev and orig_dev.
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002536 */
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002537 null_or_bond = NULL;
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002538 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2539 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002540 null_or_bond = vlan_dev_real_dev(skb->dev);
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002541 }
2542
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002544 list_for_each_entry_rcu(ptype,
2545 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002546 if (ptype->type == type && (ptype->dev == null_or_orig ||
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002547 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2548 ptype->dev == null_or_bond)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002549 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002550 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 pt_prev = ptype;
2552 }
2553 }
2554
2555 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002556 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 } else {
2558 kfree_skb(skb);
2559 /* Jamal, now you will not able to escape explaining
2560 * me how you were going to use this. :-)
2561 */
2562 ret = NET_RX_DROP;
2563 }
2564
2565out:
2566 rcu_read_unlock();
2567 return ret;
2568}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002569EXPORT_SYMBOL(netif_receive_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002571/* Network device is going away, flush any packets still pending */
2572static void flush_backlog(void *arg)
2573{
2574 struct net_device *dev = arg;
2575 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2576 struct sk_buff *skb, *tmp;
2577
2578 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2579 if (skb->dev == dev) {
2580 __skb_unlink(skb, &queue->input_pkt_queue);
2581 kfree_skb(skb);
2582 }
2583}
2584
Herbert Xud565b0a2008-12-15 23:38:52 -08002585static int napi_gro_complete(struct sk_buff *skb)
2586{
2587 struct packet_type *ptype;
2588 __be16 type = skb->protocol;
2589 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2590 int err = -ENOENT;
2591
Herbert Xufc59f9a2009-04-14 15:11:06 -07002592 if (NAPI_GRO_CB(skb)->count == 1) {
2593 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002594 goto out;
Herbert Xufc59f9a2009-04-14 15:11:06 -07002595 }
Herbert Xud565b0a2008-12-15 23:38:52 -08002596
2597 rcu_read_lock();
2598 list_for_each_entry_rcu(ptype, head, list) {
2599 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2600 continue;
2601
2602 err = ptype->gro_complete(skb);
2603 break;
2604 }
2605 rcu_read_unlock();
2606
2607 if (err) {
2608 WARN_ON(&ptype->list == head);
2609 kfree_skb(skb);
2610 return NET_RX_SUCCESS;
2611 }
2612
2613out:
Herbert Xud565b0a2008-12-15 23:38:52 -08002614 return netif_receive_skb(skb);
2615}
2616
David S. Miller11380a42010-01-19 13:46:10 -08002617static void napi_gro_flush(struct napi_struct *napi)
Herbert Xud565b0a2008-12-15 23:38:52 -08002618{
2619 struct sk_buff *skb, *next;
2620
2621 for (skb = napi->gro_list; skb; skb = next) {
2622 next = skb->next;
2623 skb->next = NULL;
2624 napi_gro_complete(skb);
2625 }
2626
Herbert Xu4ae55442009-02-08 18:00:36 +00002627 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002628 napi->gro_list = NULL;
2629}
Herbert Xud565b0a2008-12-15 23:38:52 -08002630
Ben Hutchings5b252f02009-10-29 07:17:09 +00002631enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002632{
2633 struct sk_buff **pp = NULL;
2634 struct packet_type *ptype;
2635 __be16 type = skb->protocol;
2636 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
Herbert Xu0da2afd52008-12-26 14:57:42 -08002637 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002638 int mac_len;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002639 enum gro_result ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002640
2641 if (!(skb->dev->features & NETIF_F_GRO))
2642 goto normal;
2643
David S. Miller4cf704f2009-06-09 00:18:51 -07002644 if (skb_is_gso(skb) || skb_has_frags(skb))
Herbert Xuf17f5c92009-01-14 14:36:12 -08002645 goto normal;
2646
Herbert Xud565b0a2008-12-15 23:38:52 -08002647 rcu_read_lock();
2648 list_for_each_entry_rcu(ptype, head, list) {
Herbert Xud565b0a2008-12-15 23:38:52 -08002649 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2650 continue;
2651
Herbert Xu86911732009-01-29 14:19:50 +00002652 skb_set_network_header(skb, skb_gro_offset(skb));
Herbert Xud565b0a2008-12-15 23:38:52 -08002653 mac_len = skb->network_header - skb->mac_header;
2654 skb->mac_len = mac_len;
2655 NAPI_GRO_CB(skb)->same_flow = 0;
2656 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002657 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002658
Herbert Xud565b0a2008-12-15 23:38:52 -08002659 pp = ptype->gro_receive(&napi->gro_list, skb);
2660 break;
2661 }
2662 rcu_read_unlock();
2663
2664 if (&ptype->list == head)
2665 goto normal;
2666
Herbert Xu0da2afd52008-12-26 14:57:42 -08002667 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002668 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002669
Herbert Xud565b0a2008-12-15 23:38:52 -08002670 if (pp) {
2671 struct sk_buff *nskb = *pp;
2672
2673 *pp = nskb->next;
2674 nskb->next = NULL;
2675 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00002676 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08002677 }
2678
Herbert Xu0da2afd52008-12-26 14:57:42 -08002679 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002680 goto ok;
2681
Herbert Xu4ae55442009-02-08 18:00:36 +00002682 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
Herbert Xud565b0a2008-12-15 23:38:52 -08002683 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08002684
Herbert Xu4ae55442009-02-08 18:00:36 +00002685 napi->gro_count++;
Herbert Xud565b0a2008-12-15 23:38:52 -08002686 NAPI_GRO_CB(skb)->count = 1;
Herbert Xu86911732009-01-29 14:19:50 +00002687 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002688 skb->next = napi->gro_list;
2689 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002690 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08002691
Herbert Xuad0f9902009-02-01 01:24:55 -08002692pull:
Herbert Xucb189782009-05-26 18:50:31 +00002693 if (skb_headlen(skb) < skb_gro_offset(skb)) {
2694 int grow = skb_gro_offset(skb) - skb_headlen(skb);
2695
2696 BUG_ON(skb->end - skb->tail < grow);
2697
2698 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2699
2700 skb->tail += grow;
2701 skb->data_len -= grow;
2702
2703 skb_shinfo(skb)->frags[0].page_offset += grow;
2704 skb_shinfo(skb)->frags[0].size -= grow;
2705
2706 if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2707 put_page(skb_shinfo(skb)->frags[0].page);
2708 memmove(skb_shinfo(skb)->frags,
2709 skb_shinfo(skb)->frags + 1,
2710 --skb_shinfo(skb)->nr_frags);
2711 }
Herbert Xuad0f9902009-02-01 01:24:55 -08002712 }
2713
Herbert Xud565b0a2008-12-15 23:38:52 -08002714ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002715 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002716
2717normal:
Herbert Xuad0f9902009-02-01 01:24:55 -08002718 ret = GRO_NORMAL;
2719 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08002720}
Herbert Xu96e93ea2009-01-06 10:49:34 -08002721EXPORT_SYMBOL(dev_gro_receive);
2722
Ben Hutchings5b252f02009-10-29 07:17:09 +00002723static gro_result_t
2724__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002725{
2726 struct sk_buff *p;
2727
Herbert Xud1c76af2009-03-16 10:50:02 -07002728 if (netpoll_rx_on(skb))
2729 return GRO_NORMAL;
2730
Herbert Xu96e93ea2009-01-06 10:49:34 -08002731 for (p = napi->gro_list; p; p = p->next) {
Joe Perchesf64f9e72009-11-29 16:55:45 -08002732 NAPI_GRO_CB(p)->same_flow =
2733 (p->dev == skb->dev) &&
2734 !compare_ether_header(skb_mac_header(p),
2735 skb_gro_mac_header(skb));
Herbert Xu96e93ea2009-01-06 10:49:34 -08002736 NAPI_GRO_CB(p)->flush = 0;
2737 }
2738
2739 return dev_gro_receive(napi, skb);
2740}
Herbert Xu5d38a072009-01-04 16:13:40 -08002741
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002742gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08002743{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002744 switch (ret) {
2745 case GRO_NORMAL:
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002746 if (netif_receive_skb(skb))
2747 ret = GRO_DROP;
2748 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002749
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002750 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002751 case GRO_MERGED_FREE:
Herbert Xu5d38a072009-01-04 16:13:40 -08002752 kfree_skb(skb);
2753 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002754
2755 case GRO_HELD:
2756 case GRO_MERGED:
2757 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002758 }
2759
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002760 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002761}
2762EXPORT_SYMBOL(napi_skb_finish);
2763
Herbert Xu78a478d2009-05-26 18:50:21 +00002764void skb_gro_reset_offset(struct sk_buff *skb)
2765{
2766 NAPI_GRO_CB(skb)->data_offset = 0;
2767 NAPI_GRO_CB(skb)->frag0 = NULL;
Herbert Xu74895942009-05-26 18:50:27 +00002768 NAPI_GRO_CB(skb)->frag0_len = 0;
Herbert Xu78a478d2009-05-26 18:50:21 +00002769
Herbert Xu78d3fd02009-05-26 18:50:23 +00002770 if (skb->mac_header == skb->tail &&
Herbert Xu74895942009-05-26 18:50:27 +00002771 !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
Herbert Xu78a478d2009-05-26 18:50:21 +00002772 NAPI_GRO_CB(skb)->frag0 =
2773 page_address(skb_shinfo(skb)->frags[0].page) +
2774 skb_shinfo(skb)->frags[0].page_offset;
Herbert Xu74895942009-05-26 18:50:27 +00002775 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2776 }
Herbert Xu78a478d2009-05-26 18:50:21 +00002777}
2778EXPORT_SYMBOL(skb_gro_reset_offset);
2779
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002780gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002781{
Herbert Xu86911732009-01-29 14:19:50 +00002782 skb_gro_reset_offset(skb);
2783
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002784 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002785}
2786EXPORT_SYMBOL(napi_gro_receive);
2787
Herbert Xu96e93ea2009-01-06 10:49:34 -08002788void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2789{
Herbert Xu96e93ea2009-01-06 10:49:34 -08002790 __skb_pull(skb, skb_headlen(skb));
2791 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2792
2793 napi->skb = skb;
2794}
2795EXPORT_SYMBOL(napi_reuse_skb);
2796
Herbert Xu76620aa2009-04-16 02:02:07 -07002797struct sk_buff *napi_get_frags(struct napi_struct *napi)
Herbert Xu5d38a072009-01-04 16:13:40 -08002798{
Herbert Xu5d38a072009-01-04 16:13:40 -08002799 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002800
2801 if (!skb) {
Eric Dumazet89d71a62009-10-13 05:34:20 +00002802 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2803 if (skb)
2804 napi->skb = skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002805 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08002806 return skb;
2807}
Herbert Xu76620aa2009-04-16 02:02:07 -07002808EXPORT_SYMBOL(napi_get_frags);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002809
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002810gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2811 gro_result_t ret)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002812{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002813 switch (ret) {
2814 case GRO_NORMAL:
Herbert Xu86911732009-01-29 14:19:50 +00002815 case GRO_HELD:
Herbert Xu86911732009-01-29 14:19:50 +00002816 skb->protocol = eth_type_trans(skb, napi->dev);
2817
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002818 if (ret == GRO_HELD)
2819 skb_gro_pull(skb, -ETH_HLEN);
2820 else if (netif_receive_skb(skb))
2821 ret = GRO_DROP;
Herbert Xu86911732009-01-29 14:19:50 +00002822 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002823
2824 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002825 case GRO_MERGED_FREE:
2826 napi_reuse_skb(napi, skb);
2827 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002828
2829 case GRO_MERGED:
2830 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002831 }
2832
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002833 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002834}
2835EXPORT_SYMBOL(napi_frags_finish);
2836
Herbert Xu76620aa2009-04-16 02:02:07 -07002837struct sk_buff *napi_frags_skb(struct napi_struct *napi)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002838{
Herbert Xu76620aa2009-04-16 02:02:07 -07002839 struct sk_buff *skb = napi->skb;
2840 struct ethhdr *eth;
Herbert Xua5b1cf22009-05-26 18:50:28 +00002841 unsigned int hlen;
2842 unsigned int off;
Herbert Xu76620aa2009-04-16 02:02:07 -07002843
2844 napi->skb = NULL;
2845
2846 skb_reset_mac_header(skb);
2847 skb_gro_reset_offset(skb);
2848
Herbert Xua5b1cf22009-05-26 18:50:28 +00002849 off = skb_gro_offset(skb);
2850 hlen = off + sizeof(*eth);
2851 eth = skb_gro_header_fast(skb, off);
2852 if (skb_gro_header_hard(skb, hlen)) {
2853 eth = skb_gro_header_slow(skb, hlen, off);
2854 if (unlikely(!eth)) {
2855 napi_reuse_skb(napi, skb);
2856 skb = NULL;
2857 goto out;
2858 }
Herbert Xu76620aa2009-04-16 02:02:07 -07002859 }
2860
2861 skb_gro_pull(skb, sizeof(*eth));
2862
2863 /*
2864 * This works because the only protocols we care about don't require
2865 * special handling. We'll fix it up properly at the end.
2866 */
2867 skb->protocol = eth->h_proto;
2868
2869out:
2870 return skb;
2871}
2872EXPORT_SYMBOL(napi_frags_skb);
2873
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002874gro_result_t napi_gro_frags(struct napi_struct *napi)
Herbert Xu76620aa2009-04-16 02:02:07 -07002875{
2876 struct sk_buff *skb = napi_frags_skb(napi);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002877
2878 if (!skb)
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002879 return GRO_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002880
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002881 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08002882}
2883EXPORT_SYMBOL(napi_gro_frags);
2884
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002885static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002886{
2887 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2889 unsigned long start_time = jiffies;
2890
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002891 napi->weight = weight_p;
2892 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002894
2895 local_irq_disable();
2896 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002897 if (!skb) {
Herbert Xu8f1ead22009-03-26 00:59:10 -07002898 __napi_complete(napi);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002899 local_irq_enable();
Herbert Xu8f1ead22009-03-26 00:59:10 -07002900 break;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 local_irq_enable();
2903
Herbert Xu8f1ead22009-03-26 00:59:10 -07002904 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002905 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002906
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002907 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908}
2909
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002910/**
2911 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002912 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002913 *
2914 * The entry's receive function will be scheduled to run
2915 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002916void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002917{
2918 unsigned long flags;
2919
2920 local_irq_save(flags);
2921 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2922 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2923 local_irq_restore(flags);
2924}
2925EXPORT_SYMBOL(__napi_schedule);
2926
Herbert Xud565b0a2008-12-15 23:38:52 -08002927void __napi_complete(struct napi_struct *n)
2928{
2929 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2930 BUG_ON(n->gro_list);
2931
2932 list_del(&n->poll_list);
2933 smp_mb__before_clear_bit();
2934 clear_bit(NAPI_STATE_SCHED, &n->state);
2935}
2936EXPORT_SYMBOL(__napi_complete);
2937
2938void napi_complete(struct napi_struct *n)
2939{
2940 unsigned long flags;
2941
2942 /*
2943 * don't let napi dequeue from the cpu poll list
2944 * just in case its running on a different cpu
2945 */
2946 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2947 return;
2948
2949 napi_gro_flush(n);
2950 local_irq_save(flags);
2951 __napi_complete(n);
2952 local_irq_restore(flags);
2953}
2954EXPORT_SYMBOL(napi_complete);
2955
2956void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2957 int (*poll)(struct napi_struct *, int), int weight)
2958{
2959 INIT_LIST_HEAD(&napi->poll_list);
Herbert Xu4ae55442009-02-08 18:00:36 +00002960 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002961 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002962 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002963 napi->poll = poll;
2964 napi->weight = weight;
2965 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002966 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002967#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002968 spin_lock_init(&napi->poll_lock);
2969 napi->poll_owner = -1;
2970#endif
2971 set_bit(NAPI_STATE_SCHED, &napi->state);
2972}
2973EXPORT_SYMBOL(netif_napi_add);
2974
2975void netif_napi_del(struct napi_struct *napi)
2976{
2977 struct sk_buff *skb, *next;
2978
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002979 list_del_init(&napi->dev_list);
Herbert Xu76620aa2009-04-16 02:02:07 -07002980 napi_free_frags(napi);
Herbert Xud565b0a2008-12-15 23:38:52 -08002981
2982 for (skb = napi->gro_list; skb; skb = next) {
2983 next = skb->next;
2984 skb->next = NULL;
2985 kfree_skb(skb);
2986 }
2987
2988 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00002989 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002990}
2991EXPORT_SYMBOL(netif_napi_del);
2992
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002993
Linus Torvalds1da177e2005-04-16 15:20:36 -07002994static void net_rx_action(struct softirq_action *h)
2995{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002996 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002997 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002998 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002999 void *have;
3000
Linus Torvalds1da177e2005-04-16 15:20:36 -07003001 local_irq_disable();
3002
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003003 while (!list_empty(list)) {
3004 struct napi_struct *n;
3005 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003006
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003007 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003008 * Allow this to run for 2 jiffies since which will allow
3009 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003010 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003011 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003012 goto softnet_break;
3013
3014 local_irq_enable();
3015
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003016 /* Even though interrupts have been re-enabled, this
3017 * access is safe because interrupts can only add new
3018 * entries to the tail of this list, and only ->poll()
3019 * calls can remove this head entry from the list.
3020 */
3021 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003023 have = netpoll_poll_lock(n);
3024
3025 weight = n->weight;
3026
David S. Miller0a7606c2007-10-29 21:28:47 -07003027 /* This NAPI_STATE_SCHED test is for avoiding a race
3028 * with netpoll's poll_napi(). Only the entity which
3029 * obtains the lock and sees NAPI_STATE_SCHED set will
3030 * actually make the ->poll() call. Therefore we avoid
3031 * accidently calling ->poll() when NAPI is not scheduled.
3032 */
3033 work = 0;
Neil Horman4ea7e382009-05-21 07:36:08 +00003034 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
David S. Miller0a7606c2007-10-29 21:28:47 -07003035 work = n->poll(n, weight);
Neil Horman4ea7e382009-05-21 07:36:08 +00003036 trace_napi_poll(n);
3037 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003038
3039 WARN_ON_ONCE(work > weight);
3040
3041 budget -= work;
3042
3043 local_irq_disable();
3044
3045 /* Drivers must not modify the NAPI state if they
3046 * consume the entire weight. In such cases this code
3047 * still "owns" the NAPI instance and therefore can
3048 * move the instance around on the list at-will.
3049 */
David S. Millerfed17f32008-01-07 21:00:40 -08003050 if (unlikely(work == weight)) {
Herbert Xuff780cd2009-06-26 19:27:04 -07003051 if (unlikely(napi_disable_pending(n))) {
3052 local_irq_enable();
3053 napi_complete(n);
3054 local_irq_disable();
3055 } else
David S. Millerfed17f32008-01-07 21:00:40 -08003056 list_move_tail(&n->poll_list, list);
3057 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003058
3059 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003060 }
3061out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07003062 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003063
Chris Leechdb217332006-06-17 21:24:58 -07003064#ifdef CONFIG_NET_DMA
3065 /*
3066 * There may not be any more sk_buffs coming right now, so push
3067 * any pending DMA copies to hardware
3068 */
Dan Williams2ba05622009-01-06 11:38:14 -07003069 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07003070#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003071
Linus Torvalds1da177e2005-04-16 15:20:36 -07003072 return;
3073
3074softnet_break:
3075 __get_cpu_var(netdev_rx_stat).time_squeeze++;
3076 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3077 goto out;
3078}
3079
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003080static gifconf_func_t *gifconf_list[NPROTO];
Linus Torvalds1da177e2005-04-16 15:20:36 -07003081
3082/**
3083 * register_gifconf - register a SIOCGIF handler
3084 * @family: Address family
3085 * @gifconf: Function handler
3086 *
3087 * Register protocol dependent address dumping routines. The handler
3088 * that is passed must not be freed or reused until it has been replaced
3089 * by another handler.
3090 */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003091int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003092{
3093 if (family >= NPROTO)
3094 return -EINVAL;
3095 gifconf_list[family] = gifconf;
3096 return 0;
3097}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003098EXPORT_SYMBOL(register_gifconf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003099
3100
3101/*
3102 * Map an interface index to its name (SIOCGIFNAME)
3103 */
3104
3105/*
3106 * We need this ioctl for efficient implementation of the
3107 * if_indextoname() function required by the IPv6 API. Without
3108 * it, we would have to search all the interfaces to find a
3109 * match. --pb
3110 */
3111
Eric W. Biederman881d9662007-09-17 11:56:21 -07003112static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003113{
3114 struct net_device *dev;
3115 struct ifreq ifr;
3116
3117 /*
3118 * Fetch the caller's info block.
3119 */
3120
3121 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3122 return -EFAULT;
3123
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003124 rcu_read_lock();
3125 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003126 if (!dev) {
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003127 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003128 return -ENODEV;
3129 }
3130
3131 strcpy(ifr.ifr_name, dev->name);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003132 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003133
3134 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3135 return -EFAULT;
3136 return 0;
3137}
3138
3139/*
3140 * Perform a SIOCGIFCONF call. This structure will change
3141 * size eventually, and there is nothing I can do about it.
3142 * Thus we will need a 'compatibility mode'.
3143 */
3144
Eric W. Biederman881d9662007-09-17 11:56:21 -07003145static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003146{
3147 struct ifconf ifc;
3148 struct net_device *dev;
3149 char __user *pos;
3150 int len;
3151 int total;
3152 int i;
3153
3154 /*
3155 * Fetch the caller's info block.
3156 */
3157
3158 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3159 return -EFAULT;
3160
3161 pos = ifc.ifc_buf;
3162 len = ifc.ifc_len;
3163
3164 /*
3165 * Loop over the interfaces, and write an info block for each.
3166 */
3167
3168 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003169 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003170 for (i = 0; i < NPROTO; i++) {
3171 if (gifconf_list[i]) {
3172 int done;
3173 if (!pos)
3174 done = gifconf_list[i](dev, NULL, 0);
3175 else
3176 done = gifconf_list[i](dev, pos + total,
3177 len - total);
3178 if (done < 0)
3179 return -EFAULT;
3180 total += done;
3181 }
3182 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003183 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003184
3185 /*
3186 * All done. Write the updated control block back to the caller.
3187 */
3188 ifc.ifc_len = total;
3189
3190 /*
3191 * Both BSD and Solaris return 0 here, so we do too.
3192 */
3193 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3194}
3195
3196#ifdef CONFIG_PROC_FS
3197/*
3198 * This is invoked by the /proc filesystem handler to display a device
3199 * in detail.
3200 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003201void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003202 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003203{
Denis V. Luneve372c412007-11-19 22:31:54 -08003204 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07003205 loff_t off;
3206 struct net_device *dev;
3207
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003208 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07003209 if (!*pos)
3210 return SEQ_START_TOKEN;
3211
3212 off = 1;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003213 for_each_netdev_rcu(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07003214 if (off++ == *pos)
3215 return dev;
3216
3217 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003218}
3219
3220void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3221{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003222 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3223 first_net_device(seq_file_net(seq)) :
3224 next_net_device((struct net_device *)v);
3225
Linus Torvalds1da177e2005-04-16 15:20:36 -07003226 ++*pos;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003227 return rcu_dereference(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003228}
3229
3230void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003231 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003232{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003233 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003234}
3235
3236static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3237{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08003238 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003239
Jesper Dangaard Brouer2d13baf2010-01-05 05:50:52 +00003240 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
Rusty Russell5a1b5892007-04-28 21:04:03 -07003241 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3242 dev->name, stats->rx_bytes, stats->rx_packets,
3243 stats->rx_errors,
3244 stats->rx_dropped + stats->rx_missed_errors,
3245 stats->rx_fifo_errors,
3246 stats->rx_length_errors + stats->rx_over_errors +
3247 stats->rx_crc_errors + stats->rx_frame_errors,
3248 stats->rx_compressed, stats->multicast,
3249 stats->tx_bytes, stats->tx_packets,
3250 stats->tx_errors, stats->tx_dropped,
3251 stats->tx_fifo_errors, stats->collisions,
3252 stats->tx_carrier_errors +
3253 stats->tx_aborted_errors +
3254 stats->tx_window_errors +
3255 stats->tx_heartbeat_errors,
3256 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003257}
3258
3259/*
3260 * Called from the PROCfs module. This now uses the new arbitrary sized
3261 * /proc/net interface to create /proc/net/dev
3262 */
3263static int dev_seq_show(struct seq_file *seq, void *v)
3264{
3265 if (v == SEQ_START_TOKEN)
3266 seq_puts(seq, "Inter-| Receive "
3267 " | Transmit\n"
3268 " face |bytes packets errs drop fifo frame "
3269 "compressed multicast|bytes packets errs "
3270 "drop fifo colls carrier compressed\n");
3271 else
3272 dev_seq_printf_stats(seq, v);
3273 return 0;
3274}
3275
3276static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3277{
3278 struct netif_rx_stats *rc = NULL;
3279
Mike Travis0c0b0ac2008-05-02 16:43:08 -07003280 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003281 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003282 rc = &per_cpu(netdev_rx_stat, *pos);
3283 break;
3284 } else
3285 ++*pos;
3286 return rc;
3287}
3288
3289static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3290{
3291 return softnet_get_online(pos);
3292}
3293
3294static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3295{
3296 ++*pos;
3297 return softnet_get_online(pos);
3298}
3299
3300static void softnet_seq_stop(struct seq_file *seq, void *v)
3301{
3302}
3303
3304static int softnet_seq_show(struct seq_file *seq, void *v)
3305{
3306 struct netif_rx_stats *s = v;
3307
3308 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07003309 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07003310 0, 0, 0, 0, /* was fastroute */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003311 s->cpu_collision);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003312 return 0;
3313}
3314
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003315static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003316 .start = dev_seq_start,
3317 .next = dev_seq_next,
3318 .stop = dev_seq_stop,
3319 .show = dev_seq_show,
3320};
3321
3322static int dev_seq_open(struct inode *inode, struct file *file)
3323{
Denis V. Luneve372c412007-11-19 22:31:54 -08003324 return seq_open_net(inode, file, &dev_seq_ops,
3325 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003326}
3327
Arjan van de Ven9a321442007-02-12 00:55:35 -08003328static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003329 .owner = THIS_MODULE,
3330 .open = dev_seq_open,
3331 .read = seq_read,
3332 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08003333 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003334};
3335
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003336static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003337 .start = softnet_seq_start,
3338 .next = softnet_seq_next,
3339 .stop = softnet_seq_stop,
3340 .show = softnet_seq_show,
3341};
3342
3343static int softnet_seq_open(struct inode *inode, struct file *file)
3344{
3345 return seq_open(file, &softnet_seq_ops);
3346}
3347
Arjan van de Ven9a321442007-02-12 00:55:35 -08003348static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003349 .owner = THIS_MODULE,
3350 .open = softnet_seq_open,
3351 .read = seq_read,
3352 .llseek = seq_lseek,
3353 .release = seq_release,
3354};
3355
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003356static void *ptype_get_idx(loff_t pos)
3357{
3358 struct packet_type *pt = NULL;
3359 loff_t i = 0;
3360 int t;
3361
3362 list_for_each_entry_rcu(pt, &ptype_all, list) {
3363 if (i == pos)
3364 return pt;
3365 ++i;
3366 }
3367
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003368 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003369 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3370 if (i == pos)
3371 return pt;
3372 ++i;
3373 }
3374 }
3375 return NULL;
3376}
3377
3378static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003379 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003380{
3381 rcu_read_lock();
3382 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3383}
3384
3385static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3386{
3387 struct packet_type *pt;
3388 struct list_head *nxt;
3389 int hash;
3390
3391 ++*pos;
3392 if (v == SEQ_START_TOKEN)
3393 return ptype_get_idx(0);
3394
3395 pt = v;
3396 nxt = pt->list.next;
3397 if (pt->type == htons(ETH_P_ALL)) {
3398 if (nxt != &ptype_all)
3399 goto found;
3400 hash = 0;
3401 nxt = ptype_base[0].next;
3402 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003403 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003404
3405 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003406 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003407 return NULL;
3408 nxt = ptype_base[hash].next;
3409 }
3410found:
3411 return list_entry(nxt, struct packet_type, list);
3412}
3413
3414static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003415 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003416{
3417 rcu_read_unlock();
3418}
3419
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003420static int ptype_seq_show(struct seq_file *seq, void *v)
3421{
3422 struct packet_type *pt = v;
3423
3424 if (v == SEQ_START_TOKEN)
3425 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003426 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003427 if (pt->type == htons(ETH_P_ALL))
3428 seq_puts(seq, "ALL ");
3429 else
3430 seq_printf(seq, "%04x", ntohs(pt->type));
3431
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003432 seq_printf(seq, " %-8s %pF\n",
3433 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003434 }
3435
3436 return 0;
3437}
3438
3439static const struct seq_operations ptype_seq_ops = {
3440 .start = ptype_seq_start,
3441 .next = ptype_seq_next,
3442 .stop = ptype_seq_stop,
3443 .show = ptype_seq_show,
3444};
3445
3446static int ptype_seq_open(struct inode *inode, struct file *file)
3447{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003448 return seq_open_net(inode, file, &ptype_seq_ops,
3449 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003450}
3451
3452static const struct file_operations ptype_seq_fops = {
3453 .owner = THIS_MODULE,
3454 .open = ptype_seq_open,
3455 .read = seq_read,
3456 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003457 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003458};
3459
3460
Pavel Emelyanov46650792007-10-08 20:38:39 -07003461static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003462{
3463 int rc = -ENOMEM;
3464
Eric W. Biederman881d9662007-09-17 11:56:21 -07003465 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003466 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003467 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003468 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003469 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003470 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003471
Eric W. Biederman881d9662007-09-17 11:56:21 -07003472 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003473 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003474 rc = 0;
3475out:
3476 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003477out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003478 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003479out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003480 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003481out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003482 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003483 goto out;
3484}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003485
Pavel Emelyanov46650792007-10-08 20:38:39 -07003486static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003487{
3488 wext_proc_exit(net);
3489
3490 proc_net_remove(net, "ptype");
3491 proc_net_remove(net, "softnet_stat");
3492 proc_net_remove(net, "dev");
3493}
3494
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003495static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003496 .init = dev_proc_net_init,
3497 .exit = dev_proc_net_exit,
3498};
3499
3500static int __init dev_proc_init(void)
3501{
3502 return register_pernet_subsys(&dev_proc_ops);
3503}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003504#else
3505#define dev_proc_init() 0
3506#endif /* CONFIG_PROC_FS */
3507
3508
3509/**
3510 * netdev_set_master - set up master/slave pair
3511 * @slave: slave device
3512 * @master: new master device
3513 *
3514 * Changes the master device of the slave. Pass %NULL to break the
3515 * bonding. The caller must hold the RTNL semaphore. On a failure
3516 * a negative errno code is returned. On success the reference counts
3517 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3518 * function returns zero.
3519 */
3520int netdev_set_master(struct net_device *slave, struct net_device *master)
3521{
3522 struct net_device *old = slave->master;
3523
3524 ASSERT_RTNL();
3525
3526 if (master) {
3527 if (old)
3528 return -EBUSY;
3529 dev_hold(master);
3530 }
3531
3532 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003533
Linus Torvalds1da177e2005-04-16 15:20:36 -07003534 synchronize_net();
3535
3536 if (old)
3537 dev_put(old);
3538
3539 if (master)
3540 slave->flags |= IFF_SLAVE;
3541 else
3542 slave->flags &= ~IFF_SLAVE;
3543
3544 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3545 return 0;
3546}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003547EXPORT_SYMBOL(netdev_set_master);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003548
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003549static void dev_change_rx_flags(struct net_device *dev, int flags)
3550{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003551 const struct net_device_ops *ops = dev->netdev_ops;
3552
3553 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3554 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003555}
3556
Wang Chendad9b332008-06-18 01:48:28 -07003557static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003558{
3559 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003560 uid_t uid;
3561 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003562
Patrick McHardy24023452007-07-14 18:51:31 -07003563 ASSERT_RTNL();
3564
Wang Chendad9b332008-06-18 01:48:28 -07003565 dev->flags |= IFF_PROMISC;
3566 dev->promiscuity += inc;
3567 if (dev->promiscuity == 0) {
3568 /*
3569 * Avoid overflow.
3570 * If inc causes overflow, untouch promisc and return error.
3571 */
3572 if (inc < 0)
3573 dev->flags &= ~IFF_PROMISC;
3574 else {
3575 dev->promiscuity -= inc;
3576 printk(KERN_WARNING "%s: promiscuity touches roof, "
3577 "set promiscuity failed, promiscuity feature "
3578 "of device might be broken.\n", dev->name);
3579 return -EOVERFLOW;
3580 }
3581 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003582 if (dev->flags != old_flags) {
3583 printk(KERN_INFO "device %s %s promiscuous mode\n",
3584 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3585 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003586 if (audit_enabled) {
3587 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003588 audit_log(current->audit_context, GFP_ATOMIC,
3589 AUDIT_ANOM_PROMISCUOUS,
3590 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3591 dev->name, (dev->flags & IFF_PROMISC),
3592 (old_flags & IFF_PROMISC),
3593 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003594 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003595 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003596 }
Patrick McHardy24023452007-07-14 18:51:31 -07003597
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003598 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003599 }
Wang Chendad9b332008-06-18 01:48:28 -07003600 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003601}
3602
Linus Torvalds1da177e2005-04-16 15:20:36 -07003603/**
3604 * dev_set_promiscuity - update promiscuity count on a device
3605 * @dev: device
3606 * @inc: modifier
3607 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003608 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003609 * remains above zero the interface remains promiscuous. Once it hits zero
3610 * the device reverts back to normal filtering operation. A negative inc
3611 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003612 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003613 */
Wang Chendad9b332008-06-18 01:48:28 -07003614int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003615{
3616 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003617 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003618
Wang Chendad9b332008-06-18 01:48:28 -07003619 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003620 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003621 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003622 if (dev->flags != old_flags)
3623 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003624 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003625}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003626EXPORT_SYMBOL(dev_set_promiscuity);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003627
3628/**
3629 * dev_set_allmulti - update allmulti count on a device
3630 * @dev: device
3631 * @inc: modifier
3632 *
3633 * Add or remove reception of all multicast frames to a device. While the
3634 * count in the device remains above zero the interface remains listening
3635 * to all interfaces. Once it hits zero the device reverts back to normal
3636 * filtering operation. A negative @inc value is used to drop the counter
3637 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003638 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003639 */
3640
Wang Chendad9b332008-06-18 01:48:28 -07003641int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003642{
3643 unsigned short old_flags = dev->flags;
3644
Patrick McHardy24023452007-07-14 18:51:31 -07003645 ASSERT_RTNL();
3646
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003648 dev->allmulti += inc;
3649 if (dev->allmulti == 0) {
3650 /*
3651 * Avoid overflow.
3652 * If inc causes overflow, untouch allmulti and return error.
3653 */
3654 if (inc < 0)
3655 dev->flags &= ~IFF_ALLMULTI;
3656 else {
3657 dev->allmulti -= inc;
3658 printk(KERN_WARNING "%s: allmulti touches roof, "
3659 "set allmulti failed, allmulti feature of "
3660 "device might be broken.\n", dev->name);
3661 return -EOVERFLOW;
3662 }
3663 }
Patrick McHardy24023452007-07-14 18:51:31 -07003664 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003665 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003666 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003667 }
Wang Chendad9b332008-06-18 01:48:28 -07003668 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003669}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003670EXPORT_SYMBOL(dev_set_allmulti);
Patrick McHardy4417da62007-06-27 01:28:10 -07003671
3672/*
3673 * Upload unicast and multicast address lists to device and
3674 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003675 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003676 * are present.
3677 */
3678void __dev_set_rx_mode(struct net_device *dev)
3679{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003680 const struct net_device_ops *ops = dev->netdev_ops;
3681
Patrick McHardy4417da62007-06-27 01:28:10 -07003682 /* dev_open will call this function so the list will stay sane. */
3683 if (!(dev->flags&IFF_UP))
3684 return;
3685
3686 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003687 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003688
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003689 if (ops->ndo_set_rx_mode)
3690 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003691 else {
3692 /* Unicast addresses changes may only happen under the rtnl,
3693 * therefore calling __dev_set_promiscuity here is safe.
3694 */
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08003695 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003696 __dev_set_promiscuity(dev, 1);
3697 dev->uc_promisc = 1;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08003698 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003699 __dev_set_promiscuity(dev, -1);
3700 dev->uc_promisc = 0;
3701 }
3702
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003703 if (ops->ndo_set_multicast_list)
3704 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003705 }
3706}
3707
3708void dev_set_rx_mode(struct net_device *dev)
3709{
David S. Millerb9e40852008-07-15 00:15:08 -07003710 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003711 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003712 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003713}
3714
Jiri Pirkof001fde2009-05-05 02:48:28 +00003715/* hw addresses list handling functions */
3716
Jiri Pirko31278e72009-06-17 01:12:19 +00003717static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3718 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003719{
3720 struct netdev_hw_addr *ha;
3721 int alloc_size;
3722
3723 if (addr_len > MAX_ADDR_LEN)
3724 return -EINVAL;
3725
Jiri Pirko31278e72009-06-17 01:12:19 +00003726 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003727 if (!memcmp(ha->addr, addr, addr_len) &&
3728 ha->type == addr_type) {
3729 ha->refcount++;
3730 return 0;
3731 }
3732 }
3733
3734
Jiri Pirkof001fde2009-05-05 02:48:28 +00003735 alloc_size = sizeof(*ha);
3736 if (alloc_size < L1_CACHE_BYTES)
3737 alloc_size = L1_CACHE_BYTES;
3738 ha = kmalloc(alloc_size, GFP_ATOMIC);
3739 if (!ha)
3740 return -ENOMEM;
3741 memcpy(ha->addr, addr, addr_len);
3742 ha->type = addr_type;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003743 ha->refcount = 1;
3744 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003745 list_add_tail_rcu(&ha->list, &list->list);
3746 list->count++;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003747 return 0;
3748}
3749
3750static void ha_rcu_free(struct rcu_head *head)
3751{
3752 struct netdev_hw_addr *ha;
3753
3754 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3755 kfree(ha);
3756}
3757
Jiri Pirko31278e72009-06-17 01:12:19 +00003758static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3759 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003760{
3761 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003762
Jiri Pirko31278e72009-06-17 01:12:19 +00003763 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003764 if (!memcmp(ha->addr, addr, addr_len) &&
Jiri Pirkof001fde2009-05-05 02:48:28 +00003765 (ha->type == addr_type || !addr_type)) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003766 if (--ha->refcount)
3767 return 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003768 list_del_rcu(&ha->list);
3769 call_rcu(&ha->rcu_head, ha_rcu_free);
Jiri Pirko31278e72009-06-17 01:12:19 +00003770 list->count--;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003771 return 0;
3772 }
3773 }
3774 return -ENOENT;
3775}
3776
Jiri Pirko31278e72009-06-17 01:12:19 +00003777static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3778 struct netdev_hw_addr_list *from_list,
3779 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003780 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003781{
3782 int err;
3783 struct netdev_hw_addr *ha, *ha2;
3784 unsigned char type;
3785
Jiri Pirko31278e72009-06-17 01:12:19 +00003786 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003787 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003788 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003789 if (err)
3790 goto unroll;
3791 }
3792 return 0;
3793
3794unroll:
Jiri Pirko31278e72009-06-17 01:12:19 +00003795 list_for_each_entry(ha2, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003796 if (ha2 == ha)
3797 break;
3798 type = addr_type ? addr_type : ha2->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003799 __hw_addr_del(to_list, ha2->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003800 }
3801 return err;
3802}
3803
Jiri Pirko31278e72009-06-17 01:12:19 +00003804static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3805 struct netdev_hw_addr_list *from_list,
3806 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003807 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003808{
3809 struct netdev_hw_addr *ha;
3810 unsigned char type;
3811
Jiri Pirko31278e72009-06-17 01:12:19 +00003812 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003813 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003814 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003815 }
3816}
3817
Jiri Pirko31278e72009-06-17 01:12:19 +00003818static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3819 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003820 int addr_len)
3821{
3822 int err = 0;
3823 struct netdev_hw_addr *ha, *tmp;
3824
Jiri Pirko31278e72009-06-17 01:12:19 +00003825 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003826 if (!ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003827 err = __hw_addr_add(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003828 addr_len, ha->type);
3829 if (err)
3830 break;
3831 ha->synced = true;
3832 ha->refcount++;
3833 } else if (ha->refcount == 1) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003834 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3835 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003836 }
3837 }
3838 return err;
3839}
3840
Jiri Pirko31278e72009-06-17 01:12:19 +00003841static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3842 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003843 int addr_len)
3844{
3845 struct netdev_hw_addr *ha, *tmp;
3846
Jiri Pirko31278e72009-06-17 01:12:19 +00003847 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003848 if (ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003849 __hw_addr_del(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003850 addr_len, ha->type);
3851 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003852 __hw_addr_del(from_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003853 addr_len, ha->type);
3854 }
3855 }
3856}
3857
Jiri Pirko31278e72009-06-17 01:12:19 +00003858static void __hw_addr_flush(struct netdev_hw_addr_list *list)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003859{
3860 struct netdev_hw_addr *ha, *tmp;
3861
Jiri Pirko31278e72009-06-17 01:12:19 +00003862 list_for_each_entry_safe(ha, tmp, &list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003863 list_del_rcu(&ha->list);
3864 call_rcu(&ha->rcu_head, ha_rcu_free);
3865 }
Jiri Pirko31278e72009-06-17 01:12:19 +00003866 list->count = 0;
3867}
3868
3869static void __hw_addr_init(struct netdev_hw_addr_list *list)
3870{
3871 INIT_LIST_HEAD(&list->list);
3872 list->count = 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003873}
3874
3875/* Device addresses handling functions */
3876
3877static void dev_addr_flush(struct net_device *dev)
3878{
3879 /* rtnl_mutex must be held here */
3880
Jiri Pirko31278e72009-06-17 01:12:19 +00003881 __hw_addr_flush(&dev->dev_addrs);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003882 dev->dev_addr = NULL;
3883}
3884
3885static int dev_addr_init(struct net_device *dev)
3886{
3887 unsigned char addr[MAX_ADDR_LEN];
3888 struct netdev_hw_addr *ha;
3889 int err;
3890
3891 /* rtnl_mutex must be held here */
3892
Jiri Pirko31278e72009-06-17 01:12:19 +00003893 __hw_addr_init(&dev->dev_addrs);
Eric Dumazet0c279222009-06-08 03:49:24 +00003894 memset(addr, 0, sizeof(addr));
Jiri Pirko31278e72009-06-17 01:12:19 +00003895 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
Jiri Pirkof001fde2009-05-05 02:48:28 +00003896 NETDEV_HW_ADDR_T_LAN);
3897 if (!err) {
3898 /*
3899 * Get the first (previously created) address from the list
3900 * and set dev_addr pointer to this location.
3901 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003902 ha = list_first_entry(&dev->dev_addrs.list,
Jiri Pirkof001fde2009-05-05 02:48:28 +00003903 struct netdev_hw_addr, list);
3904 dev->dev_addr = ha->addr;
3905 }
3906 return err;
3907}
3908
3909/**
3910 * dev_addr_add - Add a device address
3911 * @dev: device
3912 * @addr: address to add
3913 * @addr_type: address type
3914 *
3915 * Add a device address to the device or increase the reference count if
3916 * it already exists.
3917 *
3918 * The caller must hold the rtnl_mutex.
3919 */
3920int dev_addr_add(struct net_device *dev, unsigned char *addr,
3921 unsigned char addr_type)
3922{
3923 int err;
3924
3925 ASSERT_RTNL();
3926
Jiri Pirko31278e72009-06-17 01:12:19 +00003927 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003928 if (!err)
3929 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3930 return err;
3931}
3932EXPORT_SYMBOL(dev_addr_add);
3933
3934/**
3935 * dev_addr_del - Release a device address.
3936 * @dev: device
3937 * @addr: address to delete
3938 * @addr_type: address type
3939 *
3940 * Release reference to a device address and remove it from the device
3941 * if the reference count drops to zero.
3942 *
3943 * The caller must hold the rtnl_mutex.
3944 */
3945int dev_addr_del(struct net_device *dev, unsigned char *addr,
3946 unsigned char addr_type)
3947{
3948 int err;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003949 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003950
3951 ASSERT_RTNL();
3952
Jiri Pirkoccffad252009-05-22 23:22:17 +00003953 /*
3954 * We can not remove the first address from the list because
3955 * dev->dev_addr points to that.
3956 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003957 ha = list_first_entry(&dev->dev_addrs.list,
3958 struct netdev_hw_addr, list);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003959 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3960 return -ENOENT;
3961
Jiri Pirko31278e72009-06-17 01:12:19 +00003962 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003963 addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003964 if (!err)
3965 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3966 return err;
3967}
3968EXPORT_SYMBOL(dev_addr_del);
3969
3970/**
3971 * dev_addr_add_multiple - Add device addresses from another device
3972 * @to_dev: device to which addresses will be added
3973 * @from_dev: device from which addresses will be added
3974 * @addr_type: address type - 0 means type will be used from from_dev
3975 *
3976 * Add device addresses of the one device to another.
3977 **
3978 * The caller must hold the rtnl_mutex.
3979 */
3980int dev_addr_add_multiple(struct net_device *to_dev,
3981 struct net_device *from_dev,
3982 unsigned char addr_type)
3983{
3984 int err;
3985
3986 ASSERT_RTNL();
3987
3988 if (from_dev->addr_len != to_dev->addr_len)
3989 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00003990 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003991 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003992 if (!err)
3993 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3994 return err;
3995}
3996EXPORT_SYMBOL(dev_addr_add_multiple);
3997
3998/**
3999 * dev_addr_del_multiple - Delete device addresses by another device
4000 * @to_dev: device where the addresses will be deleted
4001 * @from_dev: device by which addresses the addresses will be deleted
4002 * @addr_type: address type - 0 means type will used from from_dev
4003 *
4004 * Deletes addresses in to device by the list of addresses in from device.
4005 *
4006 * The caller must hold the rtnl_mutex.
4007 */
4008int dev_addr_del_multiple(struct net_device *to_dev,
4009 struct net_device *from_dev,
4010 unsigned char addr_type)
4011{
4012 ASSERT_RTNL();
4013
4014 if (from_dev->addr_len != to_dev->addr_len)
4015 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00004016 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00004017 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00004018 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4019 return 0;
4020}
4021EXPORT_SYMBOL(dev_addr_del_multiple);
4022
Jiri Pirko31278e72009-06-17 01:12:19 +00004023/* multicast addresses handling functions */
Jiri Pirkof001fde2009-05-05 02:48:28 +00004024
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004025int __dev_addr_delete(struct dev_addr_list **list, int *count,
4026 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07004027{
4028 struct dev_addr_list *da;
4029
4030 for (; (da = *list) != NULL; list = &da->next) {
4031 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4032 alen == da->da_addrlen) {
4033 if (glbl) {
4034 int old_glbl = da->da_gusers;
4035 da->da_gusers = 0;
4036 if (old_glbl == 0)
4037 break;
4038 }
4039 if (--da->da_users)
4040 return 0;
4041
4042 *list = da->next;
4043 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004044 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07004045 return 0;
4046 }
4047 }
4048 return -ENOENT;
4049}
4050
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004051int __dev_addr_add(struct dev_addr_list **list, int *count,
4052 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07004053{
4054 struct dev_addr_list *da;
4055
4056 for (da = *list; da != NULL; da = da->next) {
4057 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4058 da->da_addrlen == alen) {
4059 if (glbl) {
4060 int old_glbl = da->da_gusers;
4061 da->da_gusers = 1;
4062 if (old_glbl)
4063 return 0;
4064 }
4065 da->da_users++;
4066 return 0;
4067 }
4068 }
4069
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08004070 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07004071 if (da == NULL)
4072 return -ENOMEM;
4073 memcpy(da->da_addr, addr, alen);
4074 da->da_addrlen = alen;
4075 da->da_users = 1;
4076 da->da_gusers = glbl ? 1 : 0;
4077 da->next = *list;
4078 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004079 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07004080 return 0;
4081}
4082
Patrick McHardy4417da62007-06-27 01:28:10 -07004083/**
4084 * dev_unicast_delete - Release secondary unicast address.
4085 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004086 * @addr: address to delete
Patrick McHardy4417da62007-06-27 01:28:10 -07004087 *
4088 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004089 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07004090 *
4091 * The caller must hold the rtnl_mutex.
4092 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00004093int dev_unicast_delete(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07004094{
4095 int err;
4096
4097 ASSERT_RTNL();
4098
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004099 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004100 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4101 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004102 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004103 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004104 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004105 return err;
4106}
4107EXPORT_SYMBOL(dev_unicast_delete);
4108
4109/**
4110 * dev_unicast_add - add a secondary unicast address
4111 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07004112 * @addr: address to add
Patrick McHardy4417da62007-06-27 01:28:10 -07004113 *
4114 * Add a secondary unicast address to the device or increase
4115 * the reference count if it already exists.
4116 *
4117 * The caller must hold the rtnl_mutex.
4118 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00004119int dev_unicast_add(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07004120{
4121 int err;
4122
4123 ASSERT_RTNL();
4124
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004125 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004126 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4127 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004128 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004129 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004130 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004131 return err;
4132}
4133EXPORT_SYMBOL(dev_unicast_add);
4134
Chris Leeche83a2ea2008-01-31 16:53:23 -08004135int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4136 struct dev_addr_list **from, int *from_count)
4137{
4138 struct dev_addr_list *da, *next;
4139 int err = 0;
4140
4141 da = *from;
4142 while (da != NULL) {
4143 next = da->next;
4144 if (!da->da_synced) {
4145 err = __dev_addr_add(to, to_count,
4146 da->da_addr, da->da_addrlen, 0);
4147 if (err < 0)
4148 break;
4149 da->da_synced = 1;
4150 da->da_users++;
4151 } else if (da->da_users == 1) {
4152 __dev_addr_delete(to, to_count,
4153 da->da_addr, da->da_addrlen, 0);
4154 __dev_addr_delete(from, from_count,
4155 da->da_addr, da->da_addrlen, 0);
4156 }
4157 da = next;
4158 }
4159 return err;
4160}
Johannes Bergc4029082009-06-17 17:43:30 +02004161EXPORT_SYMBOL_GPL(__dev_addr_sync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004162
4163void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4164 struct dev_addr_list **from, int *from_count)
4165{
4166 struct dev_addr_list *da, *next;
4167
4168 da = *from;
4169 while (da != NULL) {
4170 next = da->next;
4171 if (da->da_synced) {
4172 __dev_addr_delete(to, to_count,
4173 da->da_addr, da->da_addrlen, 0);
4174 da->da_synced = 0;
4175 __dev_addr_delete(from, from_count,
4176 da->da_addr, da->da_addrlen, 0);
4177 }
4178 da = next;
4179 }
4180}
Johannes Bergc4029082009-06-17 17:43:30 +02004181EXPORT_SYMBOL_GPL(__dev_addr_unsync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004182
4183/**
4184 * dev_unicast_sync - Synchronize device's unicast list to another device
4185 * @to: destination device
4186 * @from: source device
4187 *
4188 * Add newly added addresses to the destination device and release
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004189 * addresses that have no users left. The source device must be
4190 * locked by netif_tx_lock_bh.
Chris Leeche83a2ea2008-01-31 16:53:23 -08004191 *
4192 * This function is intended to be called from the dev->set_rx_mode
4193 * function of layered software devices.
4194 */
4195int dev_unicast_sync(struct net_device *to, struct net_device *from)
4196{
4197 int err = 0;
4198
Jiri Pirkoccffad252009-05-22 23:22:17 +00004199 if (to->addr_len != from->addr_len)
4200 return -EINVAL;
4201
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004202 netif_addr_lock_bh(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004203 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004204 if (!err)
4205 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004206 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004207 return err;
4208}
4209EXPORT_SYMBOL(dev_unicast_sync);
4210
4211/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08004212 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08004213 * @to: destination device
4214 * @from: source device
4215 *
4216 * Remove all addresses that were added to the destination device by
4217 * dev_unicast_sync(). This function is intended to be called from the
4218 * dev->stop function of layered software devices.
4219 */
4220void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4221{
Jiri Pirkoccffad252009-05-22 23:22:17 +00004222 if (to->addr_len != from->addr_len)
4223 return;
4224
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004225 netif_addr_lock_bh(from);
4226 netif_addr_lock(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004227 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004228 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004229 netif_addr_unlock(to);
4230 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004231}
4232EXPORT_SYMBOL(dev_unicast_unsync);
4233
Jiri Pirkoccffad252009-05-22 23:22:17 +00004234static void dev_unicast_flush(struct net_device *dev)
4235{
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004236 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004237 __hw_addr_flush(&dev->uc);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004238 netif_addr_unlock_bh(dev);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004239}
4240
4241static void dev_unicast_init(struct net_device *dev)
4242{
Jiri Pirko31278e72009-06-17 01:12:19 +00004243 __hw_addr_init(&dev->uc);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004244}
4245
4246
Denis Cheng12972622007-07-18 02:12:56 -07004247static void __dev_addr_discard(struct dev_addr_list **list)
4248{
4249 struct dev_addr_list *tmp;
4250
4251 while (*list != NULL) {
4252 tmp = *list;
4253 *list = tmp->next;
4254 if (tmp->da_users > tmp->da_gusers)
4255 printk("__dev_addr_discard: address leakage! "
4256 "da_users=%d\n", tmp->da_users);
4257 kfree(tmp);
4258 }
4259}
4260
Denis Cheng26cc2522007-07-18 02:12:03 -07004261static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07004262{
David S. Millerb9e40852008-07-15 00:15:08 -07004263 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07004264
Denis Cheng456ad752007-07-18 02:10:54 -07004265 __dev_addr_discard(&dev->mc_list);
4266 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07004267
David S. Millerb9e40852008-07-15 00:15:08 -07004268 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07004269}
4270
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004271/**
4272 * dev_get_flags - get flags reported to userspace
4273 * @dev: device
4274 *
4275 * Get the combination of flag bits exported through APIs to userspace.
4276 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004277unsigned dev_get_flags(const struct net_device *dev)
4278{
4279 unsigned flags;
4280
4281 flags = (dev->flags & ~(IFF_PROMISC |
4282 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08004283 IFF_RUNNING |
4284 IFF_LOWER_UP |
4285 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004286 (dev->gflags & (IFF_PROMISC |
4287 IFF_ALLMULTI));
4288
Stefan Rompfb00055a2006-03-20 17:09:11 -08004289 if (netif_running(dev)) {
4290 if (netif_oper_up(dev))
4291 flags |= IFF_RUNNING;
4292 if (netif_carrier_ok(dev))
4293 flags |= IFF_LOWER_UP;
4294 if (netif_dormant(dev))
4295 flags |= IFF_DORMANT;
4296 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004297
4298 return flags;
4299}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004300EXPORT_SYMBOL(dev_get_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004301
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004302/**
4303 * dev_change_flags - change device settings
4304 * @dev: device
4305 * @flags: device state flags
4306 *
4307 * Change settings on device based state flags. The flags are
4308 * in the userspace exported format.
4309 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004310int dev_change_flags(struct net_device *dev, unsigned flags)
4311{
Thomas Graf7c355f52007-06-05 16:03:03 -07004312 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004313 int old_flags = dev->flags;
4314
Patrick McHardy24023452007-07-14 18:51:31 -07004315 ASSERT_RTNL();
4316
Linus Torvalds1da177e2005-04-16 15:20:36 -07004317 /*
4318 * Set the flags on our device.
4319 */
4320
4321 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4322 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4323 IFF_AUTOMEDIA)) |
4324 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4325 IFF_ALLMULTI));
4326
4327 /*
4328 * Load in the correct multicast list now the flags have changed.
4329 */
4330
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004331 if ((old_flags ^ flags) & IFF_MULTICAST)
4332 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07004333
Patrick McHardy4417da62007-06-27 01:28:10 -07004334 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004335
4336 /*
4337 * Have we downed the interface. We handle IFF_UP ourselves
4338 * according to user attempts to set it, rather than blindly
4339 * setting it.
4340 */
4341
4342 ret = 0;
4343 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
4344 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4345
4346 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07004347 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004348 }
4349
4350 if (dev->flags & IFF_UP &&
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004351 ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004352 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004353 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004354
4355 if ((flags ^ dev->gflags) & IFF_PROMISC) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004356 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4357
Linus Torvalds1da177e2005-04-16 15:20:36 -07004358 dev->gflags ^= IFF_PROMISC;
4359 dev_set_promiscuity(dev, inc);
4360 }
4361
4362 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4363 is important. Some (broken) drivers set IFF_PROMISC, when
4364 IFF_ALLMULTI is requested not asking us and not reporting.
4365 */
4366 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004367 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4368
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369 dev->gflags ^= IFF_ALLMULTI;
4370 dev_set_allmulti(dev, inc);
4371 }
4372
Thomas Graf7c355f52007-06-05 16:03:03 -07004373 /* Exclude state transition flags, already notified */
4374 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4375 if (changes)
4376 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004377
4378 return ret;
4379}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004380EXPORT_SYMBOL(dev_change_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004381
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004382/**
4383 * dev_set_mtu - Change maximum transfer unit
4384 * @dev: device
4385 * @new_mtu: new transfer unit
4386 *
4387 * Change the maximum transfer size of the network device.
4388 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004389int dev_set_mtu(struct net_device *dev, int new_mtu)
4390{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004391 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004392 int err;
4393
4394 if (new_mtu == dev->mtu)
4395 return 0;
4396
4397 /* MTU must be positive. */
4398 if (new_mtu < 0)
4399 return -EINVAL;
4400
4401 if (!netif_device_present(dev))
4402 return -ENODEV;
4403
4404 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004405 if (ops->ndo_change_mtu)
4406 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004407 else
4408 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004409
Linus Torvalds1da177e2005-04-16 15:20:36 -07004410 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004411 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004412 return err;
4413}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004414EXPORT_SYMBOL(dev_set_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004415
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004416/**
4417 * dev_set_mac_address - Change Media Access Control Address
4418 * @dev: device
4419 * @sa: new address
4420 *
4421 * Change the hardware (MAC) address of the device
4422 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004423int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4424{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004425 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004426 int err;
4427
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004428 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004429 return -EOPNOTSUPP;
4430 if (sa->sa_family != dev->type)
4431 return -EINVAL;
4432 if (!netif_device_present(dev))
4433 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004434 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004435 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004436 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004437 return err;
4438}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004439EXPORT_SYMBOL(dev_set_mac_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004440
4441/*
Eric Dumazet3710bec2009-11-01 19:42:09 +00004442 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07004444static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004445{
4446 int err;
Eric Dumazet3710bec2009-11-01 19:42:09 +00004447 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004448
4449 if (!dev)
4450 return -ENODEV;
4451
4452 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004453 case SIOCGIFFLAGS: /* Get interface flags */
4454 ifr->ifr_flags = (short) dev_get_flags(dev);
4455 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004456
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004457 case SIOCGIFMETRIC: /* Get the metric on the interface
4458 (currently unused) */
4459 ifr->ifr_metric = 0;
4460 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004461
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004462 case SIOCGIFMTU: /* Get the MTU of a device */
4463 ifr->ifr_mtu = dev->mtu;
4464 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004465
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004466 case SIOCGIFHWADDR:
4467 if (!dev->addr_len)
4468 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4469 else
4470 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4471 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4472 ifr->ifr_hwaddr.sa_family = dev->type;
4473 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004474
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004475 case SIOCGIFSLAVE:
4476 err = -EINVAL;
4477 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004478
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004479 case SIOCGIFMAP:
4480 ifr->ifr_map.mem_start = dev->mem_start;
4481 ifr->ifr_map.mem_end = dev->mem_end;
4482 ifr->ifr_map.base_addr = dev->base_addr;
4483 ifr->ifr_map.irq = dev->irq;
4484 ifr->ifr_map.dma = dev->dma;
4485 ifr->ifr_map.port = dev->if_port;
4486 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004487
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004488 case SIOCGIFINDEX:
4489 ifr->ifr_ifindex = dev->ifindex;
4490 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004491
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004492 case SIOCGIFTXQLEN:
4493 ifr->ifr_qlen = dev->tx_queue_len;
4494 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004495
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004496 default:
4497 /* dev_ioctl() should ensure this case
4498 * is never reached
4499 */
4500 WARN_ON(1);
4501 err = -EINVAL;
4502 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004503
4504 }
4505 return err;
4506}
4507
4508/*
4509 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
4510 */
4511static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4512{
4513 int err;
4514 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004515 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004516
4517 if (!dev)
4518 return -ENODEV;
4519
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004520 ops = dev->netdev_ops;
4521
Jeff Garzik14e3e072007-10-08 00:06:32 -07004522 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004523 case SIOCSIFFLAGS: /* Set interface flags */
4524 return dev_change_flags(dev, ifr->ifr_flags);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004525
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004526 case SIOCSIFMETRIC: /* Set the metric on the interface
4527 (currently unused) */
4528 return -EOPNOTSUPP;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004529
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004530 case SIOCSIFMTU: /* Set the MTU of a device */
4531 return dev_set_mtu(dev, ifr->ifr_mtu);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004532
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004533 case SIOCSIFHWADDR:
4534 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004535
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004536 case SIOCSIFHWBROADCAST:
4537 if (ifr->ifr_hwaddr.sa_family != dev->type)
4538 return -EINVAL;
4539 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4540 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4541 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4542 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004543
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004544 case SIOCSIFMAP:
4545 if (ops->ndo_set_config) {
4546 if (!netif_device_present(dev))
4547 return -ENODEV;
4548 return ops->ndo_set_config(dev, &ifr->ifr_map);
4549 }
4550 return -EOPNOTSUPP;
4551
4552 case SIOCADDMULTI:
4553 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4554 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4555 return -EINVAL;
4556 if (!netif_device_present(dev))
4557 return -ENODEV;
4558 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4559 dev->addr_len, 1);
4560
4561 case SIOCDELMULTI:
4562 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4563 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4564 return -EINVAL;
4565 if (!netif_device_present(dev))
4566 return -ENODEV;
4567 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4568 dev->addr_len, 1);
4569
4570 case SIOCSIFTXQLEN:
4571 if (ifr->ifr_qlen < 0)
4572 return -EINVAL;
4573 dev->tx_queue_len = ifr->ifr_qlen;
4574 return 0;
4575
4576 case SIOCSIFNAME:
4577 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4578 return dev_change_name(dev, ifr->ifr_newname);
4579
4580 /*
4581 * Unknown or private ioctl
4582 */
4583 default:
4584 if ((cmd >= SIOCDEVPRIVATE &&
4585 cmd <= SIOCDEVPRIVATE + 15) ||
4586 cmd == SIOCBONDENSLAVE ||
4587 cmd == SIOCBONDRELEASE ||
4588 cmd == SIOCBONDSETHWADDR ||
4589 cmd == SIOCBONDSLAVEINFOQUERY ||
4590 cmd == SIOCBONDINFOQUERY ||
4591 cmd == SIOCBONDCHANGEACTIVE ||
4592 cmd == SIOCGMIIPHY ||
4593 cmd == SIOCGMIIREG ||
4594 cmd == SIOCSMIIREG ||
4595 cmd == SIOCBRADDIF ||
4596 cmd == SIOCBRDELIF ||
4597 cmd == SIOCSHWTSTAMP ||
4598 cmd == SIOCWANDEV) {
4599 err = -EOPNOTSUPP;
4600 if (ops->ndo_do_ioctl) {
4601 if (netif_device_present(dev))
4602 err = ops->ndo_do_ioctl(dev, ifr, cmd);
4603 else
4604 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004605 }
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004606 } else
4607 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004608
4609 }
4610 return err;
4611}
4612
4613/*
4614 * This function handles all "interface"-type I/O control requests. The actual
4615 * 'doing' part of this is dev_ifsioc above.
4616 */
4617
4618/**
4619 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004620 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004621 * @cmd: command to issue
4622 * @arg: pointer to a struct ifreq in user space
4623 *
4624 * Issue ioctl functions to devices. This is normally called by the
4625 * user space syscall interfaces but can sometimes be useful for
4626 * other purposes. The return value is the return from the syscall if
4627 * positive or a negative errno code on error.
4628 */
4629
Eric W. Biederman881d9662007-09-17 11:56:21 -07004630int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004631{
4632 struct ifreq ifr;
4633 int ret;
4634 char *colon;
4635
4636 /* One special case: SIOCGIFCONF takes ifconf argument
4637 and requires shared lock, because it sleeps writing
4638 to user space.
4639 */
4640
4641 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004642 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004643 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004644 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004645 return ret;
4646 }
4647 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004648 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004649
4650 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4651 return -EFAULT;
4652
4653 ifr.ifr_name[IFNAMSIZ-1] = 0;
4654
4655 colon = strchr(ifr.ifr_name, ':');
4656 if (colon)
4657 *colon = 0;
4658
4659 /*
4660 * See which interface the caller is talking about.
4661 */
4662
4663 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004664 /*
4665 * These ioctl calls:
4666 * - can be done by all.
4667 * - atomic and do not require locking.
4668 * - return a value
4669 */
4670 case SIOCGIFFLAGS:
4671 case SIOCGIFMETRIC:
4672 case SIOCGIFMTU:
4673 case SIOCGIFHWADDR:
4674 case SIOCGIFSLAVE:
4675 case SIOCGIFMAP:
4676 case SIOCGIFINDEX:
4677 case SIOCGIFTXQLEN:
4678 dev_load(net, ifr.ifr_name);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004679 rcu_read_lock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004680 ret = dev_ifsioc_locked(net, &ifr, cmd);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004681 rcu_read_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004682 if (!ret) {
4683 if (colon)
4684 *colon = ':';
4685 if (copy_to_user(arg, &ifr,
4686 sizeof(struct ifreq)))
4687 ret = -EFAULT;
4688 }
4689 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004690
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004691 case SIOCETHTOOL:
4692 dev_load(net, ifr.ifr_name);
4693 rtnl_lock();
4694 ret = dev_ethtool(net, &ifr);
4695 rtnl_unlock();
4696 if (!ret) {
4697 if (colon)
4698 *colon = ':';
4699 if (copy_to_user(arg, &ifr,
4700 sizeof(struct ifreq)))
4701 ret = -EFAULT;
4702 }
4703 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004704
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004705 /*
4706 * These ioctl calls:
4707 * - require superuser power.
4708 * - require strict serialization.
4709 * - return a value
4710 */
4711 case SIOCGMIIPHY:
4712 case SIOCGMIIREG:
4713 case SIOCSIFNAME:
4714 if (!capable(CAP_NET_ADMIN))
4715 return -EPERM;
4716 dev_load(net, ifr.ifr_name);
4717 rtnl_lock();
4718 ret = dev_ifsioc(net, &ifr, cmd);
4719 rtnl_unlock();
4720 if (!ret) {
4721 if (colon)
4722 *colon = ':';
4723 if (copy_to_user(arg, &ifr,
4724 sizeof(struct ifreq)))
4725 ret = -EFAULT;
4726 }
4727 return ret;
4728
4729 /*
4730 * These ioctl calls:
4731 * - require superuser power.
4732 * - require strict serialization.
4733 * - do not return a value
4734 */
4735 case SIOCSIFFLAGS:
4736 case SIOCSIFMETRIC:
4737 case SIOCSIFMTU:
4738 case SIOCSIFMAP:
4739 case SIOCSIFHWADDR:
4740 case SIOCSIFSLAVE:
4741 case SIOCADDMULTI:
4742 case SIOCDELMULTI:
4743 case SIOCSIFHWBROADCAST:
4744 case SIOCSIFTXQLEN:
4745 case SIOCSMIIREG:
4746 case SIOCBONDENSLAVE:
4747 case SIOCBONDRELEASE:
4748 case SIOCBONDSETHWADDR:
4749 case SIOCBONDCHANGEACTIVE:
4750 case SIOCBRADDIF:
4751 case SIOCBRDELIF:
4752 case SIOCSHWTSTAMP:
4753 if (!capable(CAP_NET_ADMIN))
4754 return -EPERM;
4755 /* fall through */
4756 case SIOCBONDSLAVEINFOQUERY:
4757 case SIOCBONDINFOQUERY:
4758 dev_load(net, ifr.ifr_name);
4759 rtnl_lock();
4760 ret = dev_ifsioc(net, &ifr, cmd);
4761 rtnl_unlock();
4762 return ret;
4763
4764 case SIOCGIFMEM:
4765 /* Get the per device memory space. We can add this but
4766 * currently do not support it */
4767 case SIOCSIFMEM:
4768 /* Set the per device memory buffer space.
4769 * Not applicable in our case */
4770 case SIOCSIFLINK:
4771 return -EINVAL;
4772
4773 /*
4774 * Unknown or private ioctl.
4775 */
4776 default:
4777 if (cmd == SIOCWANDEV ||
4778 (cmd >= SIOCDEVPRIVATE &&
4779 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004780 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004781 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004782 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004783 rtnl_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004784 if (!ret && copy_to_user(arg, &ifr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004785 sizeof(struct ifreq)))
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004786 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004787 return ret;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004788 }
4789 /* Take care of Wireless Extensions */
4790 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4791 return wext_handle_ioctl(net, &ifr, cmd, arg);
4792 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004793 }
4794}
4795
4796
4797/**
4798 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004799 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004800 *
4801 * Returns a suitable unique value for a new device interface
4802 * number. The caller must hold the rtnl semaphore or the
4803 * dev_base_lock to be sure it remains unique.
4804 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004805static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004806{
4807 static int ifindex;
4808 for (;;) {
4809 if (++ifindex <= 0)
4810 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004811 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004812 return ifindex;
4813 }
4814}
4815
Linus Torvalds1da177e2005-04-16 15:20:36 -07004816/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004817static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004818
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004819static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004820{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004821 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004822}
4823
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004824static void rollback_registered_many(struct list_head *head)
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004825{
Krishna Kumare93737b2009-12-08 22:26:02 +00004826 struct net_device *dev, *tmp;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004827
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004828 BUG_ON(dev_boot_phase);
4829 ASSERT_RTNL();
4830
Krishna Kumare93737b2009-12-08 22:26:02 +00004831 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004832 /* Some devices call without registering
Krishna Kumare93737b2009-12-08 22:26:02 +00004833 * for initialization unwind. Remove those
4834 * devices and proceed with the remaining.
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004835 */
4836 if (dev->reg_state == NETREG_UNINITIALIZED) {
4837 pr_debug("unregister_netdevice: device %s/%p never "
4838 "was registered\n", dev->name, dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004839
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004840 WARN_ON(1);
Krishna Kumare93737b2009-12-08 22:26:02 +00004841 list_del(&dev->unreg_list);
4842 continue;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004843 }
4844
4845 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4846
4847 /* If device is running, close it first. */
4848 dev_close(dev);
4849
4850 /* And unlink it from device chain. */
4851 unlist_netdevice(dev);
4852
4853 dev->reg_state = NETREG_UNREGISTERING;
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004854 }
4855
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004856 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004857
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004858 list_for_each_entry(dev, head, unreg_list) {
4859 /* Shutdown queueing discipline. */
4860 dev_shutdown(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004861
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004862
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004863 /* Notify protocols, that we are about to destroy
4864 this device. They should clean all the things.
4865 */
4866 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4867
4868 /*
4869 * Flush the unicast and multicast chains
4870 */
4871 dev_unicast_flush(dev);
4872 dev_addr_discard(dev);
4873
4874 if (dev->netdev_ops->ndo_uninit)
4875 dev->netdev_ops->ndo_uninit(dev);
4876
4877 /* Notifier chain MUST detach us from master device. */
4878 WARN_ON(dev->master);
4879
4880 /* Remove entries from kobject tree */
4881 netdev_unregister_kobject(dev);
4882 }
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004883
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00004884 /* Process any work delayed until the end of the batch */
4885 dev = list_entry(head->next, struct net_device, unreg_list);
4886 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4887
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004888 synchronize_net();
4889
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00004890 list_for_each_entry(dev, head, unreg_list)
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004891 dev_put(dev);
4892}
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004893
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004894static void rollback_registered(struct net_device *dev)
4895{
4896 LIST_HEAD(single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004897
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004898 list_add(&dev->unreg_list, &single);
4899 rollback_registered_many(&single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004900}
4901
David S. Millere8a04642008-07-17 00:34:19 -07004902static void __netdev_init_queue_locks_one(struct net_device *dev,
4903 struct netdev_queue *dev_queue,
4904 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004905{
4906 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004907 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004908 dev_queue->xmit_lock_owner = -1;
4909}
4910
4911static void netdev_init_queue_locks(struct net_device *dev)
4912{
David S. Millere8a04642008-07-17 00:34:19 -07004913 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4914 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004915}
4916
Herbert Xub63365a2008-10-23 01:11:29 -07004917unsigned long netdev_fix_features(unsigned long features, const char *name)
4918{
4919 /* Fix illegal SG+CSUM combinations. */
4920 if ((features & NETIF_F_SG) &&
4921 !(features & NETIF_F_ALL_CSUM)) {
4922 if (name)
4923 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4924 "checksum feature.\n", name);
4925 features &= ~NETIF_F_SG;
4926 }
4927
4928 /* TSO requires that SG is present as well. */
4929 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4930 if (name)
4931 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4932 "SG feature.\n", name);
4933 features &= ~NETIF_F_TSO;
4934 }
4935
4936 if (features & NETIF_F_UFO) {
4937 if (!(features & NETIF_F_GEN_CSUM)) {
4938 if (name)
4939 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4940 "since no NETIF_F_HW_CSUM feature.\n",
4941 name);
4942 features &= ~NETIF_F_UFO;
4943 }
4944
4945 if (!(features & NETIF_F_SG)) {
4946 if (name)
4947 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4948 "since no NETIF_F_SG feature.\n", name);
4949 features &= ~NETIF_F_UFO;
4950 }
4951 }
4952
4953 return features;
4954}
4955EXPORT_SYMBOL(netdev_fix_features);
4956
Linus Torvalds1da177e2005-04-16 15:20:36 -07004957/**
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08004958 * netif_stacked_transfer_operstate - transfer operstate
4959 * @rootdev: the root or lower level device to transfer state from
4960 * @dev: the device to transfer operstate to
4961 *
4962 * Transfer operational state from root to device. This is normally
4963 * called when a stacking relationship exists between the root
4964 * device and the device(a leaf device).
4965 */
4966void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4967 struct net_device *dev)
4968{
4969 if (rootdev->operstate == IF_OPER_DORMANT)
4970 netif_dormant_on(dev);
4971 else
4972 netif_dormant_off(dev);
4973
4974 if (netif_carrier_ok(rootdev)) {
4975 if (!netif_carrier_ok(dev))
4976 netif_carrier_on(dev);
4977 } else {
4978 if (netif_carrier_ok(dev))
4979 netif_carrier_off(dev);
4980 }
4981}
4982EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4983
4984/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07004985 * register_netdevice - register a network device
4986 * @dev: device to register
4987 *
4988 * Take a completed network device structure and add it to the kernel
4989 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4990 * chain. 0 is returned on success. A negative errno code is returned
4991 * on a failure to set up the device, or if the name is a duplicate.
4992 *
4993 * Callers must hold the rtnl semaphore. You may want
4994 * register_netdev() instead of this.
4995 *
4996 * BUGS:
4997 * The locking appears insufficient to guarantee two parallel registers
4998 * will not get the same name.
4999 */
5000
5001int register_netdevice(struct net_device *dev)
5002{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005003 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005004 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005005
5006 BUG_ON(dev_boot_phase);
5007 ASSERT_RTNL();
5008
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005009 might_sleep();
5010
Linus Torvalds1da177e2005-04-16 15:20:36 -07005011 /* When net_device's are persistent, this will be fatal. */
5012 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005013 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005014
David S. Millerf1f28aa2008-07-15 00:08:33 -07005015 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07005016 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07005017 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005018
Linus Torvalds1da177e2005-04-16 15:20:36 -07005019 dev->iflink = -1;
5020
5021 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005022 if (dev->netdev_ops->ndo_init) {
5023 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005024 if (ret) {
5025 if (ret > 0)
5026 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08005027 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005028 }
5029 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005030
Octavian Purdilad9031022009-11-18 02:36:59 +00005031 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
5032 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005033 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005034
Eric W. Biederman881d9662007-09-17 11:56:21 -07005035 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005036 if (dev->iflink == -1)
5037 dev->iflink = dev->ifindex;
5038
Stephen Hemmingerd212f872007-06-27 00:47:37 -07005039 /* Fix illegal checksum combinations */
5040 if ((dev->features & NETIF_F_HW_CSUM) &&
5041 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5042 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5043 dev->name);
5044 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5045 }
5046
5047 if ((dev->features & NETIF_F_NO_CSUM) &&
5048 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5049 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5050 dev->name);
5051 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5052 }
5053
Herbert Xub63365a2008-10-23 01:11:29 -07005054 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005055
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07005056 /* Enable software GSO if SG is supported. */
5057 if (dev->features & NETIF_F_SG)
5058 dev->features |= NETIF_F_GSO;
5059
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07005060 netdev_initialize_kobject(dev);
Johannes Berg7ffbe3f2009-10-02 05:15:27 +00005061
5062 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5063 ret = notifier_to_errno(ret);
5064 if (ret)
5065 goto err_uninit;
5066
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005067 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005068 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005069 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005070 dev->reg_state = NETREG_REGISTERED;
5071
Linus Torvalds1da177e2005-04-16 15:20:36 -07005072 /*
5073 * Default initial state at registry is that the
5074 * device is present.
5075 */
5076
5077 set_bit(__LINK_STATE_PRESENT, &dev->state);
5078
Linus Torvalds1da177e2005-04-16 15:20:36 -07005079 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005080 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005081 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005082
5083 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005084 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07005085 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005086 if (ret) {
5087 rollback_registered(dev);
5088 dev->reg_state = NETREG_UNREGISTERED;
5089 }
Eric W. Biedermand90a9092009-12-12 22:11:15 +00005090 /*
5091 * Prevent userspace races by waiting until the network
5092 * device is fully setup before sending notifications.
5093 */
5094 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005095
5096out:
5097 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005098
5099err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005100 if (dev->netdev_ops->ndo_uninit)
5101 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005102 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005103}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005104EXPORT_SYMBOL(register_netdevice);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005105
5106/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08005107 * init_dummy_netdev - init a dummy network device for NAPI
5108 * @dev: device to init
5109 *
5110 * This takes a network device structure and initialize the minimum
5111 * amount of fields so it can be used to schedule NAPI polls without
5112 * registering a full blown interface. This is to be used by drivers
5113 * that need to tie several hardware interfaces to a single NAPI
5114 * poll scheduler due to HW limitations.
5115 */
5116int init_dummy_netdev(struct net_device *dev)
5117{
5118 /* Clear everything. Note we don't initialize spinlocks
5119 * are they aren't supposed to be taken by any of the
5120 * NAPI code and this dummy netdev is supposed to be
5121 * only ever used for NAPI polls
5122 */
5123 memset(dev, 0, sizeof(struct net_device));
5124
5125 /* make sure we BUG if trying to hit standard
5126 * register/unregister code path
5127 */
5128 dev->reg_state = NETREG_DUMMY;
5129
5130 /* initialize the ref count */
5131 atomic_set(&dev->refcnt, 1);
5132
5133 /* NAPI wants this */
5134 INIT_LIST_HEAD(&dev->napi_list);
5135
5136 /* a dummy interface is started by default */
5137 set_bit(__LINK_STATE_PRESENT, &dev->state);
5138 set_bit(__LINK_STATE_START, &dev->state);
5139
5140 return 0;
5141}
5142EXPORT_SYMBOL_GPL(init_dummy_netdev);
5143
5144
5145/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005146 * register_netdev - register a network device
5147 * @dev: device to register
5148 *
5149 * Take a completed network device structure and add it to the kernel
5150 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5151 * chain. 0 is returned on success. A negative errno code is returned
5152 * on a failure to set up the device, or if the name is a duplicate.
5153 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07005154 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07005155 * and expands the device name if you passed a format string to
5156 * alloc_netdev.
5157 */
5158int register_netdev(struct net_device *dev)
5159{
5160 int err;
5161
5162 rtnl_lock();
5163
5164 /*
5165 * If the name is a format string the caller wants us to do a
5166 * name allocation.
5167 */
5168 if (strchr(dev->name, '%')) {
5169 err = dev_alloc_name(dev, dev->name);
5170 if (err < 0)
5171 goto out;
5172 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005173
Linus Torvalds1da177e2005-04-16 15:20:36 -07005174 err = register_netdevice(dev);
5175out:
5176 rtnl_unlock();
5177 return err;
5178}
5179EXPORT_SYMBOL(register_netdev);
5180
5181/*
5182 * netdev_wait_allrefs - wait until all references are gone.
5183 *
5184 * This is called when unregistering network devices.
5185 *
5186 * Any protocol or device that holds a reference should register
5187 * for netdevice notification, and cleanup and put back the
5188 * reference if they receive an UNREGISTER event.
5189 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005190 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005191 */
5192static void netdev_wait_allrefs(struct net_device *dev)
5193{
5194 unsigned long rebroadcast_time, warning_time;
5195
Eric Dumazete014deb2009-11-17 05:59:21 +00005196 linkwatch_forget_dev(dev);
5197
Linus Torvalds1da177e2005-04-16 15:20:36 -07005198 rebroadcast_time = warning_time = jiffies;
5199 while (atomic_read(&dev->refcnt) != 0) {
5200 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005201 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005202
5203 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005204 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00005205 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
Octavian Purdila395264d2009-11-16 13:49:35 +00005206 * should have already handle it the first time */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005207
5208 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5209 &dev->state)) {
5210 /* We must not have linkwatch events
5211 * pending on unregister. If this
5212 * happens, we simply run the queue
5213 * unscheduled, resulting in a noop
5214 * for this device.
5215 */
5216 linkwatch_run_queue();
5217 }
5218
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005219 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005220
5221 rebroadcast_time = jiffies;
5222 }
5223
5224 msleep(250);
5225
5226 if (time_after(jiffies, warning_time + 10 * HZ)) {
5227 printk(KERN_EMERG "unregister_netdevice: "
5228 "waiting for %s to become free. Usage "
5229 "count = %d\n",
5230 dev->name, atomic_read(&dev->refcnt));
5231 warning_time = jiffies;
5232 }
5233 }
5234}
5235
5236/* The sequence is:
5237 *
5238 * rtnl_lock();
5239 * ...
5240 * register_netdevice(x1);
5241 * register_netdevice(x2);
5242 * ...
5243 * unregister_netdevice(y1);
5244 * unregister_netdevice(y2);
5245 * ...
5246 * rtnl_unlock();
5247 * free_netdev(y1);
5248 * free_netdev(y2);
5249 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07005250 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07005251 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005252 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07005253 * without deadlocking with linkwatch via keventd.
5254 * 2) Since we run with the RTNL semaphore not held, we can sleep
5255 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07005256 *
5257 * We must not return until all unregister events added during
5258 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005259 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005260void netdev_run_todo(void)
5261{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005262 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005263
Linus Torvalds1da177e2005-04-16 15:20:36 -07005264 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005265 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07005266
5267 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005268
Linus Torvalds1da177e2005-04-16 15:20:36 -07005269 while (!list_empty(&list)) {
5270 struct net_device *dev
5271 = list_entry(list.next, struct net_device, todo_list);
5272 list_del(&dev->todo_list);
5273
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005274 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005275 printk(KERN_ERR "network todo '%s' but state %d\n",
5276 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005277 dump_stack();
5278 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005279 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005280
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005281 dev->reg_state = NETREG_UNREGISTERED;
5282
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07005283 on_each_cpu(flush_backlog, dev, 1);
5284
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005285 netdev_wait_allrefs(dev);
5286
5287 /* paranoia */
5288 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07005289 WARN_ON(dev->ip_ptr);
5290 WARN_ON(dev->ip6_ptr);
5291 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005292
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005293 if (dev->destructor)
5294 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07005295
5296 /* Free network device */
5297 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005298 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005299}
5300
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005301/**
Eric Dumazetd83345a2009-11-16 03:36:51 +00005302 * dev_txq_stats_fold - fold tx_queues stats
5303 * @dev: device to get statistics from
5304 * @stats: struct net_device_stats to hold results
5305 */
5306void dev_txq_stats_fold(const struct net_device *dev,
5307 struct net_device_stats *stats)
5308{
5309 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5310 unsigned int i;
5311 struct netdev_queue *txq;
5312
5313 for (i = 0; i < dev->num_tx_queues; i++) {
5314 txq = netdev_get_tx_queue(dev, i);
5315 tx_bytes += txq->tx_bytes;
5316 tx_packets += txq->tx_packets;
5317 tx_dropped += txq->tx_dropped;
5318 }
5319 if (tx_bytes || tx_packets || tx_dropped) {
5320 stats->tx_bytes = tx_bytes;
5321 stats->tx_packets = tx_packets;
5322 stats->tx_dropped = tx_dropped;
5323 }
5324}
5325EXPORT_SYMBOL(dev_txq_stats_fold);
5326
5327/**
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005328 * dev_get_stats - get network device statistics
5329 * @dev: device to get statistics from
5330 *
5331 * Get network statistics from device. The device driver may provide
5332 * its own method by setting dev->netdev_ops->get_stats; otherwise
5333 * the internal statistics structure is used.
5334 */
5335const struct net_device_stats *dev_get_stats(struct net_device *dev)
Eric Dumazet7004bf22009-05-18 00:34:33 +00005336{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005337 const struct net_device_ops *ops = dev->netdev_ops;
5338
5339 if (ops->ndo_get_stats)
5340 return ops->ndo_get_stats(dev);
Eric Dumazet7004bf22009-05-18 00:34:33 +00005341
Eric Dumazetd83345a2009-11-16 03:36:51 +00005342 dev_txq_stats_fold(dev, &dev->stats);
5343 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07005344}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005345EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07005346
David S. Millerdc2b4842008-07-08 17:18:23 -07005347static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07005348 struct netdev_queue *queue,
5349 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07005350{
David S. Millerdc2b4842008-07-08 17:18:23 -07005351 queue->dev = dev;
5352}
5353
David S. Millerbb949fb2008-07-08 16:55:56 -07005354static void netdev_init_queues(struct net_device *dev)
5355{
David S. Millere8a04642008-07-17 00:34:19 -07005356 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5357 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07005358 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07005359}
5360
Linus Torvalds1da177e2005-04-16 15:20:36 -07005361/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005362 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005363 * @sizeof_priv: size of private data to allocate space for
5364 * @name: device name format string
5365 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005366 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07005367 *
5368 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005369 * and performs basic initialization. Also allocates subquue structs
5370 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005371 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005372struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5373 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005374{
David S. Millere8a04642008-07-17 00:34:19 -07005375 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005376 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07005377 size_t alloc_size;
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005378 struct net_device *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005379
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005380 BUG_ON(strlen(name) >= sizeof(dev->name));
5381
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005382 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005383 if (sizeof_priv) {
5384 /* ensure 32-byte alignment of private area */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005385 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005386 alloc_size += sizeof_priv;
5387 }
5388 /* ensure 32-byte alignment of whole construct */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005389 alloc_size += NETDEV_ALIGN - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005390
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07005391 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005392 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005393 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005394 return NULL;
5395 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005396
Stephen Hemminger79439862008-07-21 13:28:44 -07005397 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07005398 if (!tx) {
5399 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5400 "tx qdiscs.\n");
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005401 goto free_p;
David S. Millere8a04642008-07-17 00:34:19 -07005402 }
5403
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005404 dev = PTR_ALIGN(p, NETDEV_ALIGN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005405 dev->padded = (char *)dev - (char *)p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005406
5407 if (dev_addr_init(dev))
5408 goto free_tx;
5409
Jiri Pirkoccffad252009-05-22 23:22:17 +00005410 dev_unicast_init(dev);
5411
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005412 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005413
David S. Millere8a04642008-07-17 00:34:19 -07005414 dev->_tx = tx;
5415 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005416 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07005417
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07005418 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005419
David S. Millerbb949fb2008-07-08 16:55:56 -07005420 netdev_init_queues(dev);
5421
Peter P Waskiewicz Jr15682bc2010-02-10 20:03:05 -08005422 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5423 dev->ethtool_ntuple_list.count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08005424 INIT_LIST_HEAD(&dev->napi_list);
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005425 INIT_LIST_HEAD(&dev->unreg_list);
Eric Dumazete014deb2009-11-17 05:59:21 +00005426 INIT_LIST_HEAD(&dev->link_watch_list);
Eric Dumazet93f154b2009-05-18 22:19:19 -07005427 dev->priv_flags = IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005428 setup(dev);
5429 strcpy(dev->name, name);
5430 return dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005431
5432free_tx:
5433 kfree(tx);
5434
5435free_p:
5436 kfree(p);
5437 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005438}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005439EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005440
5441/**
5442 * free_netdev - free network device
5443 * @dev: device
5444 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005445 * This function does the last stage of destroying an allocated device
5446 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005447 * If this is the last reference then it will be freed.
5448 */
5449void free_netdev(struct net_device *dev)
5450{
Herbert Xud565b0a2008-12-15 23:38:52 -08005451 struct napi_struct *p, *n;
5452
Denis V. Lunevf3005d72008-04-16 02:02:18 -07005453 release_net(dev_net(dev));
5454
David S. Millere8a04642008-07-17 00:34:19 -07005455 kfree(dev->_tx);
5456
Jiri Pirkof001fde2009-05-05 02:48:28 +00005457 /* Flush device addresses */
5458 dev_addr_flush(dev);
5459
Peter P Waskiewicz Jr15682bc2010-02-10 20:03:05 -08005460 /* Clear ethtool n-tuple list */
5461 ethtool_ntuple_flush(dev);
5462
Herbert Xud565b0a2008-12-15 23:38:52 -08005463 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5464 netif_napi_del(p);
5465
Stephen Hemminger3041a062006-05-26 13:25:24 -07005466 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005467 if (dev->reg_state == NETREG_UNINITIALIZED) {
5468 kfree((char *)dev - dev->padded);
5469 return;
5470 }
5471
5472 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5473 dev->reg_state = NETREG_RELEASED;
5474
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07005475 /* will free via device release */
5476 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005477}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005478EXPORT_SYMBOL(free_netdev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005479
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005480/**
5481 * synchronize_net - Synchronize with packet receive processing
5482 *
5483 * Wait for packets currently being received to be done.
5484 * Does not block later packets from starting.
5485 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005486void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005487{
5488 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07005489 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005490}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005491EXPORT_SYMBOL(synchronize_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005492
5493/**
Eric Dumazet44a08732009-10-27 07:03:04 +00005494 * unregister_netdevice_queue - remove device from the kernel
Linus Torvalds1da177e2005-04-16 15:20:36 -07005495 * @dev: device
Eric Dumazet44a08732009-10-27 07:03:04 +00005496 * @head: list
Jaswinder Singh Rajput6ebfbc02009-11-22 20:43:13 -08005497 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07005498 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005499 * from the kernel tables.
Eric Dumazet44a08732009-10-27 07:03:04 +00005500 * If head not NULL, device is queued to be unregistered later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005501 *
5502 * Callers must hold the rtnl semaphore. You may want
5503 * unregister_netdev() instead of this.
5504 */
5505
Eric Dumazet44a08732009-10-27 07:03:04 +00005506void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005507{
Herbert Xua6620712007-12-12 19:21:56 -08005508 ASSERT_RTNL();
5509
Eric Dumazet44a08732009-10-27 07:03:04 +00005510 if (head) {
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005511 list_move_tail(&dev->unreg_list, head);
Eric Dumazet44a08732009-10-27 07:03:04 +00005512 } else {
5513 rollback_registered(dev);
5514 /* Finish processing unregister after unlock */
5515 net_set_todo(dev);
5516 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005517}
Eric Dumazet44a08732009-10-27 07:03:04 +00005518EXPORT_SYMBOL(unregister_netdevice_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005519
5520/**
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005521 * unregister_netdevice_many - unregister many devices
5522 * @head: list of devices
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005523 */
5524void unregister_netdevice_many(struct list_head *head)
5525{
5526 struct net_device *dev;
5527
5528 if (!list_empty(head)) {
5529 rollback_registered_many(head);
5530 list_for_each_entry(dev, head, unreg_list)
5531 net_set_todo(dev);
5532 }
5533}
Eric Dumazet63c80992009-10-27 07:06:49 +00005534EXPORT_SYMBOL(unregister_netdevice_many);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005535
5536/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005537 * unregister_netdev - remove device from the kernel
5538 * @dev: device
5539 *
5540 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005541 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005542 *
5543 * This is just a wrapper for unregister_netdevice that takes
5544 * the rtnl semaphore. In general you want to use this and not
5545 * unregister_netdevice.
5546 */
5547void unregister_netdev(struct net_device *dev)
5548{
5549 rtnl_lock();
5550 unregister_netdevice(dev);
5551 rtnl_unlock();
5552}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005553EXPORT_SYMBOL(unregister_netdev);
5554
Eric W. Biedermance286d32007-09-12 13:53:49 +02005555/**
5556 * dev_change_net_namespace - move device to different nethost namespace
5557 * @dev: device
5558 * @net: network namespace
5559 * @pat: If not NULL name pattern to try if the current device name
5560 * is already taken in the destination network namespace.
5561 *
5562 * This function shuts down a device interface and moves it
5563 * to a new network namespace. On success 0 is returned, on
5564 * a failure a netagive errno code is returned.
5565 *
5566 * Callers must hold the rtnl semaphore.
5567 */
5568
5569int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5570{
Eric W. Biedermance286d32007-09-12 13:53:49 +02005571 int err;
5572
5573 ASSERT_RTNL();
5574
5575 /* Don't allow namespace local devices to be moved. */
5576 err = -EINVAL;
5577 if (dev->features & NETIF_F_NETNS_LOCAL)
5578 goto out;
5579
Eric W. Biederman38918452008-10-27 17:51:47 -07005580#ifdef CONFIG_SYSFS
5581 /* Don't allow real devices to be moved when sysfs
5582 * is enabled.
5583 */
5584 err = -EINVAL;
5585 if (dev->dev.parent)
5586 goto out;
5587#endif
5588
Eric W. Biedermance286d32007-09-12 13:53:49 +02005589 /* Ensure the device has been registrered */
5590 err = -EINVAL;
5591 if (dev->reg_state != NETREG_REGISTERED)
5592 goto out;
5593
5594 /* Get out if there is nothing todo */
5595 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09005596 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02005597 goto out;
5598
5599 /* Pick the destination device name, and ensure
5600 * we can use it in the destination network namespace.
5601 */
5602 err = -EEXIST;
Octavian Purdilad9031022009-11-18 02:36:59 +00005603 if (__dev_get_by_name(net, dev->name)) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005604 /* We get here if we can't use the current device name */
5605 if (!pat)
5606 goto out;
Octavian Purdilad9031022009-11-18 02:36:59 +00005607 if (dev_get_valid_name(net, pat, dev->name, 1))
Eric W. Biedermance286d32007-09-12 13:53:49 +02005608 goto out;
5609 }
5610
5611 /*
5612 * And now a mini version of register_netdevice unregister_netdevice.
5613 */
5614
5615 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07005616 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005617
5618 /* And unlink it from device chain */
5619 err = -ENODEV;
5620 unlist_netdevice(dev);
5621
5622 synchronize_net();
5623
5624 /* Shutdown queueing discipline. */
5625 dev_shutdown(dev);
5626
5627 /* Notify protocols, that we are about to destroy
5628 this device. They should clean all the things.
5629 */
5630 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00005631 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005632
5633 /*
5634 * Flush the unicast and multicast chains
5635 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00005636 dev_unicast_flush(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005637 dev_addr_discard(dev);
5638
Eric W. Biederman38918452008-10-27 17:51:47 -07005639 netdev_unregister_kobject(dev);
5640
Eric W. Biedermance286d32007-09-12 13:53:49 +02005641 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005642 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005643
Eric W. Biedermance286d32007-09-12 13:53:49 +02005644 /* If there is an ifindex conflict assign a new one */
5645 if (__dev_get_by_index(net, dev->ifindex)) {
5646 int iflink = (dev->iflink == dev->ifindex);
5647 dev->ifindex = dev_new_index(net);
5648 if (iflink)
5649 dev->iflink = dev->ifindex;
5650 }
5651
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005652 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07005653 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005654 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005655
5656 /* Add the device back in the hashes */
5657 list_netdevice(dev);
5658
5659 /* Notify protocols, that a new device appeared. */
5660 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5661
Eric W. Biedermand90a9092009-12-12 22:11:15 +00005662 /*
5663 * Prevent userspace races by waiting until the network
5664 * device is fully setup before sending notifications.
5665 */
5666 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5667
Eric W. Biedermance286d32007-09-12 13:53:49 +02005668 synchronize_net();
5669 err = 0;
5670out:
5671 return err;
5672}
Johannes Berg463d0182009-07-14 00:33:35 +02005673EXPORT_SYMBOL_GPL(dev_change_net_namespace);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005674
Linus Torvalds1da177e2005-04-16 15:20:36 -07005675static int dev_cpu_callback(struct notifier_block *nfb,
5676 unsigned long action,
5677 void *ocpu)
5678{
5679 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07005680 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005681 struct sk_buff *skb;
5682 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5683 struct softnet_data *sd, *oldsd;
5684
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07005685 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005686 return NOTIFY_OK;
5687
5688 local_irq_disable();
5689 cpu = smp_processor_id();
5690 sd = &per_cpu(softnet_data, cpu);
5691 oldsd = &per_cpu(softnet_data, oldcpu);
5692
5693 /* Find end of our completion_queue. */
5694 list_skb = &sd->completion_queue;
5695 while (*list_skb)
5696 list_skb = &(*list_skb)->next;
5697 /* Append completion queue from offline CPU. */
5698 *list_skb = oldsd->completion_queue;
5699 oldsd->completion_queue = NULL;
5700
5701 /* Find end of our output_queue. */
5702 list_net = &sd->output_queue;
5703 while (*list_net)
5704 list_net = &(*list_net)->next_sched;
5705 /* Append output queue from offline CPU. */
5706 *list_net = oldsd->output_queue;
5707 oldsd->output_queue = NULL;
5708
5709 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5710 local_irq_enable();
5711
5712 /* Process offline CPU's input_pkt_queue */
5713 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5714 netif_rx(skb);
5715
5716 return NOTIFY_OK;
5717}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005718
5719
Herbert Xu7f353bf2007-08-10 15:47:58 -07005720/**
Herbert Xub63365a2008-10-23 01:11:29 -07005721 * netdev_increment_features - increment feature set by one
5722 * @all: current feature set
5723 * @one: new feature set
5724 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005725 *
5726 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005727 * @one to the master device with current feature set @all. Will not
5728 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005729 */
Herbert Xub63365a2008-10-23 01:11:29 -07005730unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5731 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005732{
Herbert Xub63365a2008-10-23 01:11:29 -07005733 /* If device needs checksumming, downgrade to it. */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005734 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
Herbert Xub63365a2008-10-23 01:11:29 -07005735 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5736 else if (mask & NETIF_F_ALL_CSUM) {
5737 /* If one device supports v4/v6 checksumming, set for all. */
5738 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5739 !(all & NETIF_F_GEN_CSUM)) {
5740 all &= ~NETIF_F_ALL_CSUM;
5741 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5742 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005743
Herbert Xub63365a2008-10-23 01:11:29 -07005744 /* If one device supports hw checksumming, set for all. */
5745 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5746 all &= ~NETIF_F_ALL_CSUM;
5747 all |= NETIF_F_HW_CSUM;
5748 }
5749 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005750
Herbert Xub63365a2008-10-23 01:11:29 -07005751 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005752
Herbert Xub63365a2008-10-23 01:11:29 -07005753 one |= all & NETIF_F_ONE_FOR_ALL;
Sridhar Samudralad9f59502009-10-07 12:24:25 +00005754 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
Herbert Xub63365a2008-10-23 01:11:29 -07005755 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005756
5757 return all;
5758}
Herbert Xub63365a2008-10-23 01:11:29 -07005759EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005760
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005761static struct hlist_head *netdev_create_hash(void)
5762{
5763 int i;
5764 struct hlist_head *hash;
5765
5766 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5767 if (hash != NULL)
5768 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5769 INIT_HLIST_HEAD(&hash[i]);
5770
5771 return hash;
5772}
5773
Eric W. Biederman881d9662007-09-17 11:56:21 -07005774/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005775static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005776{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005777 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005778
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005779 net->dev_name_head = netdev_create_hash();
5780 if (net->dev_name_head == NULL)
5781 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005782
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005783 net->dev_index_head = netdev_create_hash();
5784 if (net->dev_index_head == NULL)
5785 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005786
5787 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005788
5789err_idx:
5790 kfree(net->dev_name_head);
5791err_name:
5792 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005793}
5794
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005795/**
5796 * netdev_drivername - network driver for the device
5797 * @dev: network device
5798 * @buffer: buffer for resulting name
5799 * @len: size of buffer
5800 *
5801 * Determine network driver for device.
5802 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005803char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005804{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005805 const struct device_driver *driver;
5806 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005807
5808 if (len <= 0 || !buffer)
5809 return buffer;
5810 buffer[0] = 0;
5811
5812 parent = dev->dev.parent;
5813
5814 if (!parent)
5815 return buffer;
5816
5817 driver = parent->driver;
5818 if (driver && driver->name)
5819 strlcpy(buffer, driver->name, len);
5820 return buffer;
5821}
5822
Pavel Emelyanov46650792007-10-08 20:38:39 -07005823static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005824{
5825 kfree(net->dev_name_head);
5826 kfree(net->dev_index_head);
5827}
5828
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005829static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005830 .init = netdev_init,
5831 .exit = netdev_exit,
5832};
5833
Pavel Emelyanov46650792007-10-08 20:38:39 -07005834static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005835{
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005836 struct net_device *dev, *aux;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005837 /*
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005838 * Push all migratable network devices back to the
Eric W. Biedermance286d32007-09-12 13:53:49 +02005839 * initial network namespace
5840 */
5841 rtnl_lock();
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005842 for_each_netdev_safe(net, dev, aux) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005843 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005844 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005845
5846 /* Ignore unmoveable devices (i.e. loopback) */
5847 if (dev->features & NETIF_F_NETNS_LOCAL)
5848 continue;
5849
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005850 /* Leave virtual devices for the generic cleanup */
5851 if (dev->rtnl_link_ops)
5852 continue;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005853
Eric W. Biedermance286d32007-09-12 13:53:49 +02005854 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005855 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5856 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005857 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005858 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005859 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005860 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005861 }
5862 }
5863 rtnl_unlock();
5864}
5865
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00005866static void __net_exit default_device_exit_batch(struct list_head *net_list)
5867{
5868 /* At exit all network devices most be removed from a network
5869 * namespace. Do this in the reverse order of registeration.
5870 * Do this across as many network namespaces as possible to
5871 * improve batching efficiency.
5872 */
5873 struct net_device *dev;
5874 struct net *net;
5875 LIST_HEAD(dev_kill_list);
5876
5877 rtnl_lock();
5878 list_for_each_entry(net, net_list, exit_list) {
5879 for_each_netdev_reverse(net, dev) {
5880 if (dev->rtnl_link_ops)
5881 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5882 else
5883 unregister_netdevice_queue(dev, &dev_kill_list);
5884 }
5885 }
5886 unregister_netdevice_many(&dev_kill_list);
5887 rtnl_unlock();
5888}
5889
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005890static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005891 .exit = default_device_exit,
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00005892 .exit_batch = default_device_exit_batch,
Eric W. Biedermance286d32007-09-12 13:53:49 +02005893};
5894
Linus Torvalds1da177e2005-04-16 15:20:36 -07005895/*
5896 * Initialize the DEV module. At boot time this walks the device list and
5897 * unhooks any devices that fail to initialise (normally hardware not
5898 * present) and leaves us with a valid list of present and active devices.
5899 *
5900 */
5901
5902/*
5903 * This is called single threaded during boot, so no need
5904 * to take the rtnl semaphore.
5905 */
5906static int __init net_dev_init(void)
5907{
5908 int i, rc = -ENOMEM;
5909
5910 BUG_ON(!dev_boot_phase);
5911
Linus Torvalds1da177e2005-04-16 15:20:36 -07005912 if (dev_proc_init())
5913 goto out;
5914
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005915 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005916 goto out;
5917
5918 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08005919 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005920 INIT_LIST_HEAD(&ptype_base[i]);
5921
Eric W. Biederman881d9662007-09-17 11:56:21 -07005922 if (register_pernet_subsys(&netdev_net_ops))
5923 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005924
5925 /*
5926 * Initialise the packet receive queues.
5927 */
5928
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005929 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005930 struct softnet_data *queue;
5931
5932 queue = &per_cpu(softnet_data, i);
5933 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005934 queue->completion_queue = NULL;
5935 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005936
5937 queue->backlog.poll = process_backlog;
5938 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005939 queue->backlog.gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00005940 queue->backlog.gro_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005941 }
5942
Linus Torvalds1da177e2005-04-16 15:20:36 -07005943 dev_boot_phase = 0;
5944
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005945 /* The loopback device is special if any other network devices
5946 * is present in a network namespace the loopback device must
5947 * be present. Since we now dynamically allocate and free the
5948 * loopback device ensure this invariant is maintained by
5949 * keeping the loopback device as the first device on the
5950 * list of network devices. Ensuring the loopback devices
5951 * is the first device that appears and the last network device
5952 * that disappears.
5953 */
5954 if (register_pernet_device(&loopback_net_ops))
5955 goto out;
5956
5957 if (register_pernet_device(&default_device_ops))
5958 goto out;
5959
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005960 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5961 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005962
5963 hotcpu_notifier(dev_cpu_callback, 0);
5964 dst_init();
5965 dev_mcast_init();
5966 rc = 0;
5967out:
5968 return rc;
5969}
5970
5971subsys_initcall(net_dev_init);
5972
Krishna Kumare88721f2009-02-18 17:55:02 -08005973static int __init initialize_hashrnd(void)
5974{
5975 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5976 return 0;
5977}
5978
5979late_initcall_sync(initialize_hashrnd);
5980