blob: ac6ab12d3297a11ed0a78d41bf4ab14a5ca7cfc3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Herbert Xud565b0a2008-12-15 23:38:52 -0800132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
Herbert Xu5d38a072009-01-04 16:13:40 -0800135/* This should be increased if a protocol with a bigger head is added. */
136#define GRO_MAX_HEAD (MAX_HEADER + 128)
137
Herbert Xu5d0d9be2009-01-29 14:19:48 +0000138enum {
139 GRO_MERGED,
140 GRO_MERGED_FREE,
141 GRO_HELD,
142 GRO_NORMAL,
143 GRO_DROP,
144};
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146/*
147 * The list of packet types we will receive (as opposed to discard)
148 * and the routines to invoke.
149 *
150 * Why 16. Because with 16 the only overlap we get on a hash of the
151 * low nibble of the protocol value is RARP/SNAP/X.25.
152 *
153 * NOTE: That is no longer true with the addition of VLAN tags. Not
154 * sure which should go first, but I bet it won't make much
155 * difference if we are running VLANs. The good news is that
156 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700157 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 * --BLG
159 *
160 * 0800 IP
161 * 8100 802.1Q VLAN
162 * 0001 802.3
163 * 0002 AX.25
164 * 0004 802.2
165 * 8035 RARP
166 * 0005 SNAP
167 * 0805 X.25
168 * 0806 ARP
169 * 8137 IPX
170 * 0009 Localtalk
171 * 86DD IPv6
172 */
173
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800174#define PTYPE_HASH_SIZE (16)
175#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800178static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700179static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700182 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 * semaphore.
184 *
185 * Pure readers hold dev_base_lock for reading.
186 *
187 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700188 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 * actual updates. This allows pure readers to access the list even
190 * while a writer is preparing to update it.
191 *
192 * To put it another way, dev_base_lock is held for writing only to
193 * protect against pure readers; the rtnl semaphore provides the
194 * protection against other writers.
195 *
196 * See, for example usages, register_netdevice() and
197 * unregister_netdevice(), which must be called with the rtnl
198 * semaphore held.
199 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200DEFINE_RWLOCK(dev_base_lock);
201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202EXPORT_SYMBOL(dev_base_lock);
203
204#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700205#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Eric W. Biederman881d9662007-09-17 11:56:21 -0700207static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
209 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700210 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
Eric W. Biederman881d9662007-09-17 11:56:21 -0700213static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700215 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
Eric W. Biedermance286d32007-09-12 13:53:49 +0200218/* Device list insertion */
219static int list_netdevice(struct net_device *dev)
220{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900221 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200222
223 ASSERT_RTNL();
224
225 write_lock_bh(&dev_base_lock);
226 list_add_tail(&dev->dev_list, &net->dev_base_head);
227 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
228 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
229 write_unlock_bh(&dev_base_lock);
230 return 0;
231}
232
233/* Device list removal */
234static void unlist_netdevice(struct net_device *dev)
235{
236 ASSERT_RTNL();
237
238 /* Unlink dev from the device chain */
239 write_lock_bh(&dev_base_lock);
240 list_del(&dev->dev_list);
241 hlist_del(&dev->name_hlist);
242 hlist_del(&dev->index_hlist);
243 write_unlock_bh(&dev_base_lock);
244}
245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246/*
247 * Our notifier list
248 */
249
Alan Sternf07d5b92006-05-09 15:23:03 -0700250static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
252/*
253 * Device drivers call our routines to queue packets here. We empty the
254 * queue in the local softnet handler.
255 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700256
257DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
David S. Millercf508b12008-07-22 14:16:42 -0700259#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700260/*
David S. Millerc773e842008-07-08 23:13:53 -0700261 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700262 * according to dev->type
263 */
264static const unsigned short netdev_lock_type[] =
265 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
266 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
267 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
268 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
269 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
270 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
271 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
272 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
273 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
274 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
275 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
276 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
277 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800278 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800279 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700280
281static const char *netdev_lock_name[] =
282 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
283 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
284 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
285 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
286 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
287 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
288 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
289 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
290 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
291 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
292 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
293 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
294 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800295 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800296 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700297
298static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700299static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700300
301static inline unsigned short netdev_lock_pos(unsigned short dev_type)
302{
303 int i;
304
305 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
306 if (netdev_lock_type[i] == dev_type)
307 return i;
308 /* the last key is used by default */
309 return ARRAY_SIZE(netdev_lock_type) - 1;
310}
311
David S. Millercf508b12008-07-22 14:16:42 -0700312static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
313 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700314{
315 int i;
316
317 i = netdev_lock_pos(dev_type);
318 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
319 netdev_lock_name[i]);
320}
David S. Millercf508b12008-07-22 14:16:42 -0700321
322static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
323{
324 int i;
325
326 i = netdev_lock_pos(dev->type);
327 lockdep_set_class_and_name(&dev->addr_list_lock,
328 &netdev_addr_lock_key[i],
329 netdev_lock_name[i]);
330}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700331#else
David S. Millercf508b12008-07-22 14:16:42 -0700332static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
333 unsigned short dev_type)
334{
335}
336static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700337{
338}
339#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
341/*******************************************************************************
342
343 Protocol management and registration routines
344
345*******************************************************************************/
346
347/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 * Add a protocol ID to the list. Now that the input handler is
349 * smarter we can dispense with all the messy stuff that used to be
350 * here.
351 *
352 * BEWARE!!! Protocol handlers, mangling input packets,
353 * MUST BE last in hash buckets and checking protocol handlers
354 * MUST start from promiscuous ptype_all chain in net_bh.
355 * It is true now, do not change it.
356 * Explanation follows: if protocol handler, mangling packet, will
357 * be the first on list, it is not able to sense, that packet
358 * is cloned and should be copied-on-write, so that it will
359 * change it and subsequent readers will get broken packet.
360 * --ANK (980803)
361 */
362
363/**
364 * dev_add_pack - add packet handler
365 * @pt: packet type declaration
366 *
367 * Add a protocol handler to the networking stack. The passed &packet_type
368 * is linked into kernel lists and may not be freed until it has been
369 * removed from the kernel lists.
370 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900371 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 * guarantee all CPU's that are in middle of receiving packets
373 * will see the new packet type (until the next received packet).
374 */
375
376void dev_add_pack(struct packet_type *pt)
377{
378 int hash;
379
380 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700381 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700383 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800384 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 list_add_rcu(&pt->list, &ptype_base[hash]);
386 }
387 spin_unlock_bh(&ptype_lock);
388}
389
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390/**
391 * __dev_remove_pack - remove packet handler
392 * @pt: packet type declaration
393 *
394 * Remove a protocol handler that was previously added to the kernel
395 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
396 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900397 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 *
399 * The packet type might still be in use by receivers
400 * and must not be freed until after all the CPU's have gone
401 * through a quiescent state.
402 */
403void __dev_remove_pack(struct packet_type *pt)
404{
405 struct list_head *head;
406 struct packet_type *pt1;
407
408 spin_lock_bh(&ptype_lock);
409
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700410 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700412 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800413 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
415 list_for_each_entry(pt1, head, list) {
416 if (pt == pt1) {
417 list_del_rcu(&pt->list);
418 goto out;
419 }
420 }
421
422 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
423out:
424 spin_unlock_bh(&ptype_lock);
425}
426/**
427 * dev_remove_pack - remove packet handler
428 * @pt: packet type declaration
429 *
430 * Remove a protocol handler that was previously added to the kernel
431 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
432 * from the kernel lists and can be freed or reused once this function
433 * returns.
434 *
435 * This call sleeps to guarantee that no CPU is looking at the packet
436 * type after return.
437 */
438void dev_remove_pack(struct packet_type *pt)
439{
440 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 synchronize_net();
443}
444
445/******************************************************************************
446
447 Device Boot-time Settings Routines
448
449*******************************************************************************/
450
451/* Boot time configuration table */
452static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
453
454/**
455 * netdev_boot_setup_add - add new setup entry
456 * @name: name of the device
457 * @map: configured settings for the device
458 *
459 * Adds new setup entry to the dev_boot_setup list. The function
460 * returns 0 on error and 1 on success. This is a generic routine to
461 * all netdevices.
462 */
463static int netdev_boot_setup_add(char *name, struct ifmap *map)
464{
465 struct netdev_boot_setup *s;
466 int i;
467
468 s = dev_boot_setup;
469 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
470 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
471 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700472 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 memcpy(&s[i].map, map, sizeof(s[i].map));
474 break;
475 }
476 }
477
478 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
479}
480
481/**
482 * netdev_boot_setup_check - check boot time settings
483 * @dev: the netdevice
484 *
485 * Check boot time settings for the device.
486 * The found settings are set for the device to be used
487 * later in the device probing.
488 * Returns 0 if no settings found, 1 if they are.
489 */
490int netdev_boot_setup_check(struct net_device *dev)
491{
492 struct netdev_boot_setup *s = dev_boot_setup;
493 int i;
494
495 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
496 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700497 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 dev->irq = s[i].map.irq;
499 dev->base_addr = s[i].map.base_addr;
500 dev->mem_start = s[i].map.mem_start;
501 dev->mem_end = s[i].map.mem_end;
502 return 1;
503 }
504 }
505 return 0;
506}
507
508
509/**
510 * netdev_boot_base - get address from boot time settings
511 * @prefix: prefix for network device
512 * @unit: id for network device
513 *
514 * Check boot time settings for the base address of device.
515 * The found settings are set for the device to be used
516 * later in the device probing.
517 * Returns 0 if no settings found.
518 */
519unsigned long netdev_boot_base(const char *prefix, int unit)
520{
521 const struct netdev_boot_setup *s = dev_boot_setup;
522 char name[IFNAMSIZ];
523 int i;
524
525 sprintf(name, "%s%d", prefix, unit);
526
527 /*
528 * If device already registered then return base of 1
529 * to indicate not to probe for this interface
530 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700531 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 return 1;
533
534 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
535 if (!strcmp(name, s[i].name))
536 return s[i].map.base_addr;
537 return 0;
538}
539
540/*
541 * Saves at boot time configured settings for any netdevice.
542 */
543int __init netdev_boot_setup(char *str)
544{
545 int ints[5];
546 struct ifmap map;
547
548 str = get_options(str, ARRAY_SIZE(ints), ints);
549 if (!str || !*str)
550 return 0;
551
552 /* Save settings */
553 memset(&map, 0, sizeof(map));
554 if (ints[0] > 0)
555 map.irq = ints[1];
556 if (ints[0] > 1)
557 map.base_addr = ints[2];
558 if (ints[0] > 2)
559 map.mem_start = ints[3];
560 if (ints[0] > 3)
561 map.mem_end = ints[4];
562
563 /* Add new entry to the list */
564 return netdev_boot_setup_add(str, &map);
565}
566
567__setup("netdev=", netdev_boot_setup);
568
569/*******************************************************************************
570
571 Device Interface Subroutines
572
573*******************************************************************************/
574
575/**
576 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700577 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 * @name: name to find
579 *
580 * Find an interface by name. Must be called under RTNL semaphore
581 * or @dev_base_lock. If the name is found a pointer to the device
582 * is returned. If the name is not found then %NULL is returned. The
583 * reference counters are not incremented so the caller must be
584 * careful with locks.
585 */
586
Eric W. Biederman881d9662007-09-17 11:56:21 -0700587struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
589 struct hlist_node *p;
590
Eric W. Biederman881d9662007-09-17 11:56:21 -0700591 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev;
596 }
597 return NULL;
598}
599
600/**
601 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700602 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 * @name: name to find
604 *
605 * Find an interface by name. This can be called from any
606 * context and does its own locking. The returned handle has
607 * the usage count incremented and the caller must use dev_put() to
608 * release it when it is no longer needed. %NULL is returned if no
609 * matching device is found.
610 */
611
Eric W. Biederman881d9662007-09-17 11:56:21 -0700612struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613{
614 struct net_device *dev;
615
616 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700617 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 if (dev)
619 dev_hold(dev);
620 read_unlock(&dev_base_lock);
621 return dev;
622}
623
624/**
625 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700626 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 * @ifindex: index of device
628 *
629 * Search for an interface by index. Returns %NULL if the device
630 * is not found or a pointer to the device. The device has not
631 * had its reference counter increased so the caller must be careful
632 * about locking. The caller must hold either the RTNL semaphore
633 * or @dev_base_lock.
634 */
635
Eric W. Biederman881d9662007-09-17 11:56:21 -0700636struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637{
638 struct hlist_node *p;
639
Eric W. Biederman881d9662007-09-17 11:56:21 -0700640 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 struct net_device *dev
642 = hlist_entry(p, struct net_device, index_hlist);
643 if (dev->ifindex == ifindex)
644 return dev;
645 }
646 return NULL;
647}
648
649
650/**
651 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700652 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 * @ifindex: index of device
654 *
655 * Search for an interface by index. Returns NULL if the device
656 * is not found or a pointer to the device. The device returned has
657 * had a reference added and the pointer is safe until the user calls
658 * dev_put to indicate they have finished with it.
659 */
660
Eric W. Biederman881d9662007-09-17 11:56:21 -0700661struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662{
663 struct net_device *dev;
664
665 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 if (dev)
668 dev_hold(dev);
669 read_unlock(&dev_base_lock);
670 return dev;
671}
672
673/**
674 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700675 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 * @type: media type of device
677 * @ha: hardware address
678 *
679 * Search for an interface by MAC address. Returns NULL if the device
680 * is not found or a pointer to the device. The caller must hold the
681 * rtnl semaphore. The returned device has not had its ref count increased
682 * and the caller must therefore be careful about locking
683 *
684 * BUGS:
685 * If the API was consistent this would be __dev_get_by_hwaddr
686 */
687
Eric W. Biederman881d9662007-09-17 11:56:21 -0700688struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
690 struct net_device *dev;
691
692 ASSERT_RTNL();
693
Denis V. Lunev81103a52007-12-12 10:47:38 -0800694 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 if (dev->type == type &&
696 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700697 return dev;
698
699 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700}
701
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300702EXPORT_SYMBOL(dev_getbyhwaddr);
703
Eric W. Biederman881d9662007-09-17 11:56:21 -0700704struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700705{
706 struct net_device *dev;
707
708 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700709 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700710 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700711 return dev;
712
713 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700714}
715
716EXPORT_SYMBOL(__dev_getfirstbyhwtype);
717
Eric W. Biederman881d9662007-09-17 11:56:21 -0700718struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719{
720 struct net_device *dev;
721
722 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700723 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700724 if (dev)
725 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 rtnl_unlock();
727 return dev;
728}
729
730EXPORT_SYMBOL(dev_getfirstbyhwtype);
731
732/**
733 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700734 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 * @if_flags: IFF_* values
736 * @mask: bitmask of bits in if_flags to check
737 *
738 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900739 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 * had a reference added and the pointer is safe until the user calls
741 * dev_put to indicate they have finished with it.
742 */
743
Eric W. Biederman881d9662007-09-17 11:56:21 -0700744struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700746 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
Pavel Emelianov7562f872007-05-03 15:13:45 -0700748 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700750 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 if (((dev->flags ^ if_flags) & mask) == 0) {
752 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700753 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 break;
755 }
756 }
757 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700758 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759}
760
761/**
762 * dev_valid_name - check if name is okay for network device
763 * @name: name string
764 *
765 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700766 * to allow sysfs to work. We also disallow any kind of
767 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800769int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700771 if (*name == '\0')
772 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700773 if (strlen(name) >= IFNAMSIZ)
774 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700775 if (!strcmp(name, ".") || !strcmp(name, ".."))
776 return 0;
777
778 while (*name) {
779 if (*name == '/' || isspace(*name))
780 return 0;
781 name++;
782 }
783 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784}
785
786/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200787 * __dev_alloc_name - allocate a name for a device
788 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200790 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 *
792 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700793 * id. It scans list of devices to build up a free map, then chooses
794 * the first empty slot. The caller must hold the dev_base or rtnl lock
795 * while allocating the name and adding the device in order to avoid
796 * duplicates.
797 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
798 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 */
800
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200801static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802{
803 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 const char *p;
805 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700806 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 struct net_device *d;
808
809 p = strnchr(name, IFNAMSIZ-1, '%');
810 if (p) {
811 /*
812 * Verify the string as this thing may have come from
813 * the user. There must be either one "%d" and no other "%"
814 * characters.
815 */
816 if (p[1] != 'd' || strchr(p + 2, '%'))
817 return -EINVAL;
818
819 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700820 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 if (!inuse)
822 return -ENOMEM;
823
Eric W. Biederman881d9662007-09-17 11:56:21 -0700824 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 if (!sscanf(d->name, name, &i))
826 continue;
827 if (i < 0 || i >= max_netdevices)
828 continue;
829
830 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200831 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (!strncmp(buf, d->name, IFNAMSIZ))
833 set_bit(i, inuse);
834 }
835
836 i = find_first_zero_bit(inuse, max_netdevices);
837 free_page((unsigned long) inuse);
838 }
839
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200840 snprintf(buf, IFNAMSIZ, name, i);
841 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
844 /* It is possible to run out of possible slots
845 * when the name is long and there isn't enough space left
846 * for the digits, or if all bits are used.
847 */
848 return -ENFILE;
849}
850
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200851/**
852 * dev_alloc_name - allocate a name for a device
853 * @dev: device
854 * @name: name format string
855 *
856 * Passed a format string - eg "lt%d" it will try and find a suitable
857 * id. It scans list of devices to build up a free map, then chooses
858 * the first empty slot. The caller must hold the dev_base or rtnl lock
859 * while allocating the name and adding the device in order to avoid
860 * duplicates.
861 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
862 * Returns the number of the unit assigned or a negative errno code.
863 */
864
865int dev_alloc_name(struct net_device *dev, const char *name)
866{
867 char buf[IFNAMSIZ];
868 struct net *net;
869 int ret;
870
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900871 BUG_ON(!dev_net(dev));
872 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200873 ret = __dev_alloc_name(net, name, buf);
874 if (ret >= 0)
875 strlcpy(dev->name, buf, IFNAMSIZ);
876 return ret;
877}
878
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880/**
881 * dev_change_name - change name of a device
882 * @dev: device
883 * @newname: name (or format string) must be at least IFNAMSIZ
884 *
885 * Change name of a device, can pass format strings "eth%d".
886 * for wildcarding.
887 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700888int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889{
Herbert Xufcc5a032007-07-30 17:03:38 -0700890 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700892 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700893 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894
895 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900896 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900898 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 if (dev->flags & IFF_UP)
900 return -EBUSY;
901
902 if (!dev_valid_name(newname))
903 return -EINVAL;
904
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700905 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
906 return 0;
907
Herbert Xufcc5a032007-07-30 17:03:38 -0700908 memcpy(oldname, dev->name, IFNAMSIZ);
909
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 if (strchr(newname, '%')) {
911 err = dev_alloc_name(dev, newname);
912 if (err < 0)
913 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700915 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 return -EEXIST;
917 else
918 strlcpy(dev->name, newname, IFNAMSIZ);
919
Herbert Xufcc5a032007-07-30 17:03:38 -0700920rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700921 /* For now only devices in the initial network namespace
922 * are in sysfs.
923 */
924 if (net == &init_net) {
925 ret = device_rename(&dev->dev, dev->name);
926 if (ret) {
927 memcpy(dev->name, oldname, IFNAMSIZ);
928 return ret;
929 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700930 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700931
932 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600933 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700934 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700935 write_unlock_bh(&dev_base_lock);
936
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700937 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700938 ret = notifier_to_errno(ret);
939
940 if (ret) {
941 if (err) {
942 printk(KERN_ERR
943 "%s: name change rollback failed: %d.\n",
944 dev->name, ret);
945 } else {
946 err = ret;
947 memcpy(dev->name, oldname, IFNAMSIZ);
948 goto rollback;
949 }
950 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 return err;
953}
954
955/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700956 * dev_set_alias - change ifalias of a device
957 * @dev: device
958 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700959 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700960 *
961 * Set ifalias for a device,
962 */
963int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
964{
965 ASSERT_RTNL();
966
967 if (len >= IFALIASZ)
968 return -EINVAL;
969
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700970 if (!len) {
971 if (dev->ifalias) {
972 kfree(dev->ifalias);
973 dev->ifalias = NULL;
974 }
975 return 0;
976 }
977
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700978 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
979 if (!dev->ifalias)
980 return -ENOMEM;
981
982 strlcpy(dev->ifalias, alias, len+1);
983 return len;
984}
985
986
987/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700988 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700989 * @dev: device to cause notification
990 *
991 * Called to indicate a device has changed features.
992 */
993void netdev_features_change(struct net_device *dev)
994{
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700995 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700996}
997EXPORT_SYMBOL(netdev_features_change);
998
999/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 * netdev_state_change - device changes state
1001 * @dev: device to cause notification
1002 *
1003 * Called to indicate a device has changed state. This function calls
1004 * the notifier chains for netdev_chain and sends a NEWLINK message
1005 * to the routing socket.
1006 */
1007void netdev_state_change(struct net_device *dev)
1008{
1009 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001010 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1012 }
1013}
1014
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001015void netdev_bonding_change(struct net_device *dev)
1016{
1017 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1018}
1019EXPORT_SYMBOL(netdev_bonding_change);
1020
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021/**
1022 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001023 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 * @name: name of interface
1025 *
1026 * If a network interface is not present and the process has suitable
1027 * privileges this function loads the module. If module loading is not
1028 * available in this kernel then it becomes a nop.
1029 */
1030
Eric W. Biederman881d9662007-09-17 11:56:21 -07001031void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001033 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034
1035 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001036 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 read_unlock(&dev_base_lock);
1038
1039 if (!dev && capable(CAP_SYS_MODULE))
1040 request_module("%s", name);
1041}
1042
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043/**
1044 * dev_open - prepare an interface for use.
1045 * @dev: device to open
1046 *
1047 * Takes a device from down to up state. The device's private open
1048 * function is invoked and then the multicast lists are loaded. Finally
1049 * the device is moved into the up state and a %NETDEV_UP message is
1050 * sent to the netdev notifier chain.
1051 *
1052 * Calling this function on an active interface is a nop. On a failure
1053 * a negative errno code is returned.
1054 */
1055int dev_open(struct net_device *dev)
1056{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001057 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 int ret = 0;
1059
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001060 ASSERT_RTNL();
1061
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 /*
1063 * Is it already up?
1064 */
1065
1066 if (dev->flags & IFF_UP)
1067 return 0;
1068
1069 /*
1070 * Is it even present?
1071 */
1072 if (!netif_device_present(dev))
1073 return -ENODEV;
1074
1075 /*
1076 * Call device private open method
1077 */
1078 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001079
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001080 if (ops->ndo_validate_addr)
1081 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001082
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001083 if (!ret && ops->ndo_open)
1084 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001086 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 * If it went open OK then:
1088 */
1089
Jeff Garzikbada3392007-10-23 20:19:37 -07001090 if (ret)
1091 clear_bit(__LINK_STATE_START, &dev->state);
1092 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 /*
1094 * Set the flags.
1095 */
1096 dev->flags |= IFF_UP;
1097
1098 /*
Dan Williams649274d2009-01-11 00:20:39 -08001099 * Enable NET_DMA
1100 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001101 net_dmaengine_get();
Dan Williams649274d2009-01-11 00:20:39 -08001102
1103 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 * Initialize multicasting status
1105 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001106 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
1108 /*
1109 * Wakeup transmit queue engine
1110 */
1111 dev_activate(dev);
1112
1113 /*
1114 * ... and announce new interface.
1115 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001116 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 return ret;
1120}
1121
1122/**
1123 * dev_close - shutdown an interface.
1124 * @dev: device to shutdown
1125 *
1126 * This function moves an active device into down state. A
1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 * chain.
1130 */
1131int dev_close(struct net_device *dev)
1132{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001133 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001134 ASSERT_RTNL();
1135
David S. Miller9d5010d2007-09-12 14:33:25 +02001136 might_sleep();
1137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (!(dev->flags & IFF_UP))
1139 return 0;
1140
1141 /*
1142 * Tell people we are going down, so that they can
1143 * prepare to death, when device is still operating.
1144 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 clear_bit(__LINK_STATE_START, &dev->state);
1148
1149 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001150 * it can be even on different cpu. So just clear netif_running().
1151 *
1152 * dev->stop() will invoke napi_disable() on all of it's
1153 * napi_struct instances on this device.
1154 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001157 dev_deactivate(dev);
1158
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 /*
1160 * Call the device specific close. This cannot fail.
1161 * Only if device is UP
1162 *
1163 * We allow it to be called even after a DETACH hot-plug
1164 * event.
1165 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001166 if (ops->ndo_stop)
1167 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 /*
1170 * Device is now down.
1171 */
1172
1173 dev->flags &= ~IFF_UP;
1174
1175 /*
1176 * Tell people we are down
1177 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001178 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
Dan Williams649274d2009-01-11 00:20:39 -08001180 /*
1181 * Shutdown NET_DMA
1182 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001183 net_dmaengine_put();
Dan Williams649274d2009-01-11 00:20:39 -08001184
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 return 0;
1186}
1187
1188
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001189/**
1190 * dev_disable_lro - disable Large Receive Offload on a device
1191 * @dev: device
1192 *
1193 * Disable Large Receive Offload (LRO) on a net device. Must be
1194 * called under RTNL. This is needed if received packets may be
1195 * forwarded to another interface.
1196 */
1197void dev_disable_lro(struct net_device *dev)
1198{
1199 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1200 dev->ethtool_ops->set_flags) {
1201 u32 flags = dev->ethtool_ops->get_flags(dev);
1202 if (flags & ETH_FLAG_LRO) {
1203 flags &= ~ETH_FLAG_LRO;
1204 dev->ethtool_ops->set_flags(dev, flags);
1205 }
1206 }
1207 WARN_ON(dev->features & NETIF_F_LRO);
1208}
1209EXPORT_SYMBOL(dev_disable_lro);
1210
1211
Eric W. Biederman881d9662007-09-17 11:56:21 -07001212static int dev_boot_phase = 1;
1213
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214/*
1215 * Device change register/unregister. These are not inline or static
1216 * as we export them to the world.
1217 */
1218
1219/**
1220 * register_netdevice_notifier - register a network notifier block
1221 * @nb: notifier
1222 *
1223 * Register a notifier to be called when network device events occur.
1224 * The notifier passed is linked into the kernel structures and must
1225 * not be reused until it has been unregistered. A negative errno code
1226 * is returned on a failure.
1227 *
1228 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001229 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 * view of the network device list.
1231 */
1232
1233int register_netdevice_notifier(struct notifier_block *nb)
1234{
1235 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001236 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001237 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 int err;
1239
1240 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001241 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001242 if (err)
1243 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001244 if (dev_boot_phase)
1245 goto unlock;
1246 for_each_net(net) {
1247 for_each_netdev(net, dev) {
1248 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1249 err = notifier_to_errno(err);
1250 if (err)
1251 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
Eric W. Biederman881d9662007-09-17 11:56:21 -07001253 if (!(dev->flags & IFF_UP))
1254 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001255
Eric W. Biederman881d9662007-09-17 11:56:21 -07001256 nb->notifier_call(nb, NETDEV_UP, dev);
1257 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001259
1260unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 rtnl_unlock();
1262 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001263
1264rollback:
1265 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001266 for_each_net(net) {
1267 for_each_netdev(net, dev) {
1268 if (dev == last)
1269 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001270
Eric W. Biederman881d9662007-09-17 11:56:21 -07001271 if (dev->flags & IFF_UP) {
1272 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1273 nb->notifier_call(nb, NETDEV_DOWN, dev);
1274 }
1275 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001276 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001277 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001278
1279 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001280 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281}
1282
1283/**
1284 * unregister_netdevice_notifier - unregister a network notifier block
1285 * @nb: notifier
1286 *
1287 * Unregister a notifier previously registered by
1288 * register_netdevice_notifier(). The notifier is unlinked into the
1289 * kernel structures and may then be reused. A negative errno code
1290 * is returned on a failure.
1291 */
1292
1293int unregister_netdevice_notifier(struct notifier_block *nb)
1294{
Herbert Xu9f514952006-03-25 01:24:25 -08001295 int err;
1296
1297 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001298 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001299 rtnl_unlock();
1300 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301}
1302
1303/**
1304 * call_netdevice_notifiers - call all network notifier blocks
1305 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001306 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 *
1308 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001309 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 */
1311
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001312int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001314 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315}
1316
1317/* When > 0 there are consumers of rx skb time stamps */
1318static atomic_t netstamp_needed = ATOMIC_INIT(0);
1319
1320void net_enable_timestamp(void)
1321{
1322 atomic_inc(&netstamp_needed);
1323}
1324
1325void net_disable_timestamp(void)
1326{
1327 atomic_dec(&netstamp_needed);
1328}
1329
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001330static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331{
1332 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001333 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001334 else
1335 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336}
1337
1338/*
1339 * Support routine. Sends outgoing frames to any network
1340 * taps currently in use.
1341 */
1342
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001343static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344{
1345 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001346
1347 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348
1349 rcu_read_lock();
1350 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1351 /* Never send packets back to the socket
1352 * they originated from - MvS (miquels@drinkel.ow.org)
1353 */
1354 if ((ptype->dev == dev || !ptype->dev) &&
1355 (ptype->af_packet_priv == NULL ||
1356 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1357 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1358 if (!skb2)
1359 break;
1360
1361 /* skb->nh should be correctly
1362 set by sender, so that the second statement is
1363 just protection against buggy protocols.
1364 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001365 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001367 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001368 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 if (net_ratelimit())
1370 printk(KERN_CRIT "protocol %04x is "
1371 "buggy, dev %s\n",
1372 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001373 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 }
1375
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001376 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001378 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 }
1380 }
1381 rcu_read_unlock();
1382}
1383
Denis Vlasenko56079432006-03-29 15:57:29 -08001384
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001385static inline void __netif_reschedule(struct Qdisc *q)
1386{
1387 struct softnet_data *sd;
1388 unsigned long flags;
1389
1390 local_irq_save(flags);
1391 sd = &__get_cpu_var(softnet_data);
1392 q->next_sched = sd->output_queue;
1393 sd->output_queue = q;
1394 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1395 local_irq_restore(flags);
1396}
1397
David S. Miller37437bb2008-07-16 02:15:04 -07001398void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001399{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001400 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1401 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001402}
1403EXPORT_SYMBOL(__netif_schedule);
1404
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001405void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001406{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001407 if (atomic_dec_and_test(&skb->users)) {
1408 struct softnet_data *sd;
1409 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001410
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001411 local_irq_save(flags);
1412 sd = &__get_cpu_var(softnet_data);
1413 skb->next = sd->completion_queue;
1414 sd->completion_queue = skb;
1415 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1416 local_irq_restore(flags);
1417 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001418}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001419EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001420
1421void dev_kfree_skb_any(struct sk_buff *skb)
1422{
1423 if (in_irq() || irqs_disabled())
1424 dev_kfree_skb_irq(skb);
1425 else
1426 dev_kfree_skb(skb);
1427}
1428EXPORT_SYMBOL(dev_kfree_skb_any);
1429
1430
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001431/**
1432 * netif_device_detach - mark device as removed
1433 * @dev: network device
1434 *
1435 * Mark device as removed from system and therefore no longer available.
1436 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001437void netif_device_detach(struct net_device *dev)
1438{
1439 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1440 netif_running(dev)) {
1441 netif_stop_queue(dev);
1442 }
1443}
1444EXPORT_SYMBOL(netif_device_detach);
1445
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001446/**
1447 * netif_device_attach - mark device as attached
1448 * @dev: network device
1449 *
1450 * Mark device as attached from system and restart if needed.
1451 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001452void netif_device_attach(struct net_device *dev)
1453{
1454 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1455 netif_running(dev)) {
1456 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001457 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001458 }
1459}
1460EXPORT_SYMBOL(netif_device_attach);
1461
Ben Hutchings6de329e2008-06-16 17:02:28 -07001462static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1463{
1464 return ((features & NETIF_F_GEN_CSUM) ||
1465 ((features & NETIF_F_IP_CSUM) &&
1466 protocol == htons(ETH_P_IP)) ||
1467 ((features & NETIF_F_IPV6_CSUM) &&
1468 protocol == htons(ETH_P_IPV6)));
1469}
1470
1471static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1472{
1473 if (can_checksum_protocol(dev->features, skb->protocol))
1474 return true;
1475
1476 if (skb->protocol == htons(ETH_P_8021Q)) {
1477 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1478 if (can_checksum_protocol(dev->features & dev->vlan_features,
1479 veh->h_vlan_encapsulated_proto))
1480 return true;
1481 }
1482
1483 return false;
1484}
Denis Vlasenko56079432006-03-29 15:57:29 -08001485
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486/*
1487 * Invalidate hardware checksum when packet is to be mangled, and
1488 * complete checksum manually on outgoing path.
1489 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001490int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491{
Al Virod3bc23e2006-11-14 21:24:49 -08001492 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001493 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
Patrick McHardy84fa7932006-08-29 16:44:56 -07001495 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001496 goto out_set_summed;
1497
1498 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001499 /* Let GSO fix up the checksum. */
1500 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 }
1502
Herbert Xua0308472007-10-15 01:47:15 -07001503 offset = skb->csum_start - skb_headroom(skb);
1504 BUG_ON(offset >= skb_headlen(skb));
1505 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1506
1507 offset += skb->csum_offset;
1508 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1509
1510 if (skb_cloned(skb) &&
1511 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1513 if (ret)
1514 goto out;
1515 }
1516
Herbert Xua0308472007-10-15 01:47:15 -07001517 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001518out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001520out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 return ret;
1522}
1523
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001524/**
1525 * skb_gso_segment - Perform segmentation on skb.
1526 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001527 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001528 *
1529 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001530 *
1531 * It may return NULL if the skb requires no segmentation. This is
1532 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001533 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001534struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001535{
1536 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1537 struct packet_type *ptype;
Al Viro252e33462006-11-14 20:48:11 -08001538 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001539 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001540
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001541 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001542 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001543 __skb_pull(skb, skb->mac_len);
1544
Herbert Xu67fd1a72009-01-19 16:26:44 -08001545 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1546 struct net_device *dev = skb->dev;
1547 struct ethtool_drvinfo info = {};
1548
1549 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1550 dev->ethtool_ops->get_drvinfo(dev, &info);
1551
1552 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1553 "ip_summed=%d",
1554 info.driver, dev ? dev->features : 0L,
1555 skb->sk ? skb->sk->sk_route_caps : 0L,
1556 skb->len, skb->data_len, skb->ip_summed);
1557
Herbert Xua430a432006-07-08 13:34:56 -07001558 if (skb_header_cloned(skb) &&
1559 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1560 return ERR_PTR(err);
1561 }
1562
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001563 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001564 list_for_each_entry_rcu(ptype,
1565 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001566 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001567 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001568 err = ptype->gso_send_check(skb);
1569 segs = ERR_PTR(err);
1570 if (err || skb_gso_ok(skb, features))
1571 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001572 __skb_push(skb, (skb->data -
1573 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001574 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001575 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001576 break;
1577 }
1578 }
1579 rcu_read_unlock();
1580
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001581 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001582
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001583 return segs;
1584}
1585
1586EXPORT_SYMBOL(skb_gso_segment);
1587
Herbert Xufb286bb2005-11-10 13:01:24 -08001588/* Take action when hardware reception checksum errors are detected. */
1589#ifdef CONFIG_BUG
1590void netdev_rx_csum_fault(struct net_device *dev)
1591{
1592 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001593 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001594 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001595 dump_stack();
1596 }
1597}
1598EXPORT_SYMBOL(netdev_rx_csum_fault);
1599#endif
1600
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601/* Actually, we should eliminate this check as soon as we know, that:
1602 * 1. IOMMU is present and allows to map all the memory.
1603 * 2. No high memory really exists on this machine.
1604 */
1605
1606static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1607{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001608#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 int i;
1610
1611 if (dev->features & NETIF_F_HIGHDMA)
1612 return 0;
1613
1614 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1615 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1616 return 1;
1617
Herbert Xu3d3a8532006-06-27 13:33:10 -07001618#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 return 0;
1620}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001622struct dev_gso_cb {
1623 void (*destructor)(struct sk_buff *skb);
1624};
1625
1626#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1627
1628static void dev_gso_skb_destructor(struct sk_buff *skb)
1629{
1630 struct dev_gso_cb *cb;
1631
1632 do {
1633 struct sk_buff *nskb = skb->next;
1634
1635 skb->next = nskb->next;
1636 nskb->next = NULL;
1637 kfree_skb(nskb);
1638 } while (skb->next);
1639
1640 cb = DEV_GSO_CB(skb);
1641 if (cb->destructor)
1642 cb->destructor(skb);
1643}
1644
1645/**
1646 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1647 * @skb: buffer to segment
1648 *
1649 * This function segments the given skb and stores the list of segments
1650 * in skb->next.
1651 */
1652static int dev_gso_segment(struct sk_buff *skb)
1653{
1654 struct net_device *dev = skb->dev;
1655 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001656 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1657 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001658
Herbert Xu576a30e2006-06-27 13:22:38 -07001659 segs = skb_gso_segment(skb, features);
1660
1661 /* Verifying header integrity only. */
1662 if (!segs)
1663 return 0;
1664
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001665 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666 return PTR_ERR(segs);
1667
1668 skb->next = segs;
1669 DEV_GSO_CB(skb)->destructor = skb->destructor;
1670 skb->destructor = dev_gso_skb_destructor;
1671
1672 return 0;
1673}
1674
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001675int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1676 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001677{
Stephen Hemminger00829822008-11-20 20:14:53 -08001678 const struct net_device_ops *ops = dev->netdev_ops;
Patrick Ohlyac45f602009-02-12 05:03:37 +00001679 int rc;
Stephen Hemminger00829822008-11-20 20:14:53 -08001680
1681 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001682 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001683 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001684 dev_queue_xmit_nit(skb, dev);
1685
Herbert Xu576a30e2006-06-27 13:22:38 -07001686 if (netif_needs_gso(dev, skb)) {
1687 if (unlikely(dev_gso_segment(skb)))
1688 goto out_kfree_skb;
1689 if (skb->next)
1690 goto gso;
1691 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001692
Patrick Ohlyac45f602009-02-12 05:03:37 +00001693 rc = ops->ndo_start_xmit(skb, dev);
1694 /*
1695 * TODO: if skb_orphan() was called by
1696 * dev->hard_start_xmit() (for example, the unmodified
1697 * igb driver does that; bnx2 doesn't), then
1698 * skb_tx_software_timestamp() will be unable to send
1699 * back the time stamp.
1700 *
1701 * How can this be prevented? Always create another
1702 * reference to the socket before calling
1703 * dev->hard_start_xmit()? Prevent that skb_orphan()
1704 * does anything in dev->hard_start_xmit() by clearing
1705 * the skb destructor before the call and restoring it
1706 * afterwards, then doing the skb_orphan() ourselves?
1707 */
Patrick Ohlyac45f602009-02-12 05:03:37 +00001708 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001709 }
1710
Herbert Xu576a30e2006-06-27 13:22:38 -07001711gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001712 do {
1713 struct sk_buff *nskb = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001714
1715 skb->next = nskb->next;
1716 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001717 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001718 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001719 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001720 skb->next = nskb;
1721 return rc;
1722 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001723 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001724 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001725 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001726
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001727 skb->destructor = DEV_GSO_CB(skb)->destructor;
1728
1729out_kfree_skb:
1730 kfree_skb(skb);
1731 return 0;
1732}
1733
David S. Miller70192982009-01-27 16:34:47 -08001734static u32 skb_tx_hashrnd;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001735
David S. Miller70192982009-01-27 16:34:47 -08001736static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
David S. Miller8f0f2222008-07-15 03:47:03 -07001737{
David S. Miller70192982009-01-27 16:34:47 -08001738 u32 hash;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001739
David S. Millerd5a9e242009-01-27 16:22:11 -08001740 if (skb_rx_queue_recorded(skb)) {
David S. Miller70192982009-01-27 16:34:47 -08001741 hash = skb_get_rx_queue(skb);
1742 } else if (skb->sk && skb->sk->sk_hash) {
1743 hash = skb->sk->sk_hash;
1744 } else
1745 hash = skb->protocol;
David S. Millerd5a9e242009-01-27 16:22:11 -08001746
David S. Miller70192982009-01-27 16:34:47 -08001747 hash = jhash_1word(hash, skb_tx_hashrnd);
David S. Millerd5a9e242009-01-27 16:22:11 -08001748
David S. Millerb6b2fed2008-07-21 09:48:06 -07001749 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001750}
1751
David S. Millere8a04642008-07-17 00:34:19 -07001752static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1753 struct sk_buff *skb)
1754{
Stephen Hemminger00829822008-11-20 20:14:53 -08001755 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001756 u16 queue_index = 0;
1757
Stephen Hemminger00829822008-11-20 20:14:53 -08001758 if (ops->ndo_select_queue)
1759 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001760 else if (dev->real_num_tx_queues > 1)
David S. Miller70192982009-01-27 16:34:47 -08001761 queue_index = skb_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001762
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001763 skb_set_queue_mapping(skb, queue_index);
1764 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001765}
1766
Dave Jonesd29f7492008-07-22 14:09:06 -07001767/**
1768 * dev_queue_xmit - transmit a buffer
1769 * @skb: buffer to transmit
1770 *
1771 * Queue a buffer for transmission to a network device. The caller must
1772 * have set the device and priority and built the buffer before calling
1773 * this function. The function can be called from an interrupt.
1774 *
1775 * A negative errno code is returned on a failure. A success does not
1776 * guarantee the frame will be transmitted as it may be dropped due
1777 * to congestion or traffic shaping.
1778 *
1779 * -----------------------------------------------------------------------------------
1780 * I notice this method can also return errors from the queue disciplines,
1781 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1782 * be positive.
1783 *
1784 * Regardless of the return value, the skb is consumed, so it is currently
1785 * difficult to retry a send to this method. (You can bump the ref count
1786 * before sending to hold a reference for retry if you are careful.)
1787 *
1788 * When calling this method, interrupts MUST be enabled. This is because
1789 * the BH enable code must have IRQs enabled so that it will not deadlock.
1790 * --BLG
1791 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792int dev_queue_xmit(struct sk_buff *skb)
1793{
1794 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001795 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 struct Qdisc *q;
1797 int rc = -ENOMEM;
1798
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001799 /* GSO will handle the following emulations directly. */
1800 if (netif_needs_gso(dev, skb))
1801 goto gso;
1802
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 if (skb_shinfo(skb)->frag_list &&
1804 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001805 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 goto out_kfree_skb;
1807
1808 /* Fragmented skb is linearized if device does not support SG,
1809 * or if at least one of fragments is in highmem and device
1810 * does not support DMA from it.
1811 */
1812 if (skb_shinfo(skb)->nr_frags &&
1813 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001814 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 goto out_kfree_skb;
1816
1817 /* If packet is not checksummed and device does not support
1818 * checksumming for this protocol, complete checksumming here.
1819 */
Herbert Xu663ead32007-04-09 11:59:07 -07001820 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1821 skb_set_transport_header(skb, skb->csum_start -
1822 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001823 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1824 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001825 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001827gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001828 /* Disable soft irqs for various locks below. Also
1829 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001831 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
David S. Millereae792b2008-07-15 03:03:33 -07001833 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001834 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001835
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836#ifdef CONFIG_NET_CLS_ACT
1837 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1838#endif
1839 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001840 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
David S. Miller37437bb2008-07-16 02:15:04 -07001842 spin_lock(root_lock);
1843
David S. Millera9312ae2008-08-17 21:51:03 -07001844 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001845 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001846 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001847 } else {
1848 rc = qdisc_enqueue_root(skb, q);
1849 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001850 }
David S. Miller37437bb2008-07-16 02:15:04 -07001851 spin_unlock(root_lock);
1852
David S. Miller37437bb2008-07-16 02:15:04 -07001853 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 }
1855
1856 /* The device has no queue. Common case for software devices:
1857 loopback, all the sorts of tunnels...
1858
Herbert Xu932ff272006-06-09 12:20:56 -07001859 Really, it is unlikely that netif_tx_lock protection is necessary
1860 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 counters.)
1862 However, it is possible, that they rely on protection
1863 made by us here.
1864
1865 Check this and shot the lock. It is not prone from deadlocks.
1866 Either shot noqueue qdisc, it is even simpler 8)
1867 */
1868 if (dev->flags & IFF_UP) {
1869 int cpu = smp_processor_id(); /* ok because BHs are off */
1870
David S. Millerc773e842008-07-08 23:13:53 -07001871 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872
David S. Millerc773e842008-07-08 23:13:53 -07001873 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001875 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001877 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001878 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 goto out;
1880 }
1881 }
David S. Millerc773e842008-07-08 23:13:53 -07001882 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 if (net_ratelimit())
1884 printk(KERN_CRIT "Virtual device %s asks to "
1885 "queue packet!\n", dev->name);
1886 } else {
1887 /* Recursion is detected! It is possible,
1888 * unfortunately */
1889 if (net_ratelimit())
1890 printk(KERN_CRIT "Dead loop on virtual device "
1891 "%s, fix it urgently!\n", dev->name);
1892 }
1893 }
1894
1895 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001896 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
1898out_kfree_skb:
1899 kfree_skb(skb);
1900 return rc;
1901out:
Herbert Xud4828d82006-06-22 02:28:18 -07001902 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 return rc;
1904}
1905
1906
1907/*=======================================================================
1908 Receiver routines
1909 =======================================================================*/
1910
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001911int netdev_max_backlog __read_mostly = 1000;
1912int netdev_budget __read_mostly = 300;
1913int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914
1915DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1916
1917
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918/**
1919 * netif_rx - post buffer to the network code
1920 * @skb: buffer to post
1921 *
1922 * This function receives a packet from a device driver and queues it for
1923 * the upper (protocol) levels to process. It always succeeds. The buffer
1924 * may be dropped during processing for congestion control or by the
1925 * protocol layers.
1926 *
1927 * return values:
1928 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 * NET_RX_DROP (packet was dropped)
1930 *
1931 */
1932
1933int netif_rx(struct sk_buff *skb)
1934{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 struct softnet_data *queue;
1936 unsigned long flags;
1937
1938 /* if netpoll wants it, pretend we never saw it */
1939 if (netpoll_rx(skb))
1940 return NET_RX_DROP;
1941
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001942 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001943 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944
1945 /*
1946 * The code is rearranged so that the path is the most
1947 * short when CPU is congested, but is still operating.
1948 */
1949 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 queue = &__get_cpu_var(softnet_data);
1951
1952 __get_cpu_var(netdev_rx_stat).total++;
1953 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1954 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001958 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 }
1960
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001961 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 goto enqueue;
1963 }
1964
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 __get_cpu_var(netdev_rx_stat).dropped++;
1966 local_irq_restore(flags);
1967
1968 kfree_skb(skb);
1969 return NET_RX_DROP;
1970}
1971
1972int netif_rx_ni(struct sk_buff *skb)
1973{
1974 int err;
1975
1976 preempt_disable();
1977 err = netif_rx(skb);
1978 if (local_softirq_pending())
1979 do_softirq();
1980 preempt_enable();
1981
1982 return err;
1983}
1984
1985EXPORT_SYMBOL(netif_rx_ni);
1986
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987static void net_tx_action(struct softirq_action *h)
1988{
1989 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1990
1991 if (sd->completion_queue) {
1992 struct sk_buff *clist;
1993
1994 local_irq_disable();
1995 clist = sd->completion_queue;
1996 sd->completion_queue = NULL;
1997 local_irq_enable();
1998
1999 while (clist) {
2000 struct sk_buff *skb = clist;
2001 clist = clist->next;
2002
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002003 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 __kfree_skb(skb);
2005 }
2006 }
2007
2008 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002009 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010
2011 local_irq_disable();
2012 head = sd->output_queue;
2013 sd->output_queue = NULL;
2014 local_irq_enable();
2015
2016 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002017 struct Qdisc *q = head;
2018 spinlock_t *root_lock;
2019
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 head = head->next_sched;
2021
David S. Miller5fb66222008-08-02 20:02:43 -07002022 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002023 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002024 smp_mb__before_clear_bit();
2025 clear_bit(__QDISC_STATE_SCHED,
2026 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002027 qdisc_run(q);
2028 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002030 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002031 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002032 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002033 } else {
2034 smp_mb__before_clear_bit();
2035 clear_bit(__QDISC_STATE_SCHED,
2036 &q->state);
2037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 }
2039 }
2040 }
2041}
2042
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002043static inline int deliver_skb(struct sk_buff *skb,
2044 struct packet_type *pt_prev,
2045 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046{
2047 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002048 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049}
2050
2051#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002052/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053struct net_bridge;
2054struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2055 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002056void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057
Stephen Hemminger6229e362007-03-21 13:38:47 -07002058/*
2059 * If bridge module is loaded call bridging hook.
2060 * returns NULL if packet was consumed.
2061 */
2062struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2063 struct sk_buff *skb) __read_mostly;
2064static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2065 struct packet_type **pt_prev, int *ret,
2066 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067{
2068 struct net_bridge_port *port;
2069
Stephen Hemminger6229e362007-03-21 13:38:47 -07002070 if (skb->pkt_type == PACKET_LOOPBACK ||
2071 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2072 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
2074 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002075 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002077 }
2078
Stephen Hemminger6229e362007-03-21 13:38:47 -07002079 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080}
2081#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002082#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083#endif
2084
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002085#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2086struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2087EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2088
2089static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2090 struct packet_type **pt_prev,
2091 int *ret,
2092 struct net_device *orig_dev)
2093{
2094 if (skb->dev->macvlan_port == NULL)
2095 return skb;
2096
2097 if (*pt_prev) {
2098 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2099 *pt_prev = NULL;
2100 }
2101 return macvlan_handle_frame_hook(skb);
2102}
2103#else
2104#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2105#endif
2106
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107#ifdef CONFIG_NET_CLS_ACT
2108/* TODO: Maybe we should just force sch_ingress to be compiled in
2109 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2110 * a compare and 2 stores extra right now if we dont have it on
2111 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002112 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 * the ingress scheduler, you just cant add policies on ingress.
2114 *
2115 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002116static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002119 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002120 struct netdev_queue *rxq;
2121 int result = TC_ACT_OK;
2122 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002123
Herbert Xuf697c3e2007-10-14 00:38:47 -07002124 if (MAX_RED_LOOP < ttl++) {
2125 printk(KERN_WARNING
2126 "Redir loop detected Dropping packet (%d->%d)\n",
2127 skb->iif, dev->ifindex);
2128 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 }
2130
Herbert Xuf697c3e2007-10-14 00:38:47 -07002131 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2132 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2133
David S. Miller555353c2008-07-08 17:33:13 -07002134 rxq = &dev->rx_queue;
2135
David S. Miller83874002008-07-17 00:53:03 -07002136 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002137 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002138 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002139 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2140 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002141 spin_unlock(qdisc_lock(q));
2142 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002143
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 return result;
2145}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002146
2147static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2148 struct packet_type **pt_prev,
2149 int *ret, struct net_device *orig_dev)
2150{
David S. Miller8d50b532008-07-30 02:37:46 -07002151 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002152 goto out;
2153
2154 if (*pt_prev) {
2155 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2156 *pt_prev = NULL;
2157 } else {
2158 /* Huh? Why does turning on AF_PACKET affect this? */
2159 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2160 }
2161
2162 switch (ing_filter(skb)) {
2163 case TC_ACT_SHOT:
2164 case TC_ACT_STOLEN:
2165 kfree_skb(skb);
2166 return NULL;
2167 }
2168
2169out:
2170 skb->tc_verd = 0;
2171 return skb;
2172}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173#endif
2174
Patrick McHardybc1d0412008-07-14 22:49:30 -07002175/*
2176 * netif_nit_deliver - deliver received packets to network taps
2177 * @skb: buffer
2178 *
2179 * This function is used to deliver incoming packets to network
2180 * taps. It should be used when the normal netif_receive_skb path
2181 * is bypassed, for example because of VLAN acceleration.
2182 */
2183void netif_nit_deliver(struct sk_buff *skb)
2184{
2185 struct packet_type *ptype;
2186
2187 if (list_empty(&ptype_all))
2188 return;
2189
2190 skb_reset_network_header(skb);
2191 skb_reset_transport_header(skb);
2192 skb->mac_len = skb->network_header - skb->mac_header;
2193
2194 rcu_read_lock();
2195 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2196 if (!ptype->dev || ptype->dev == skb->dev)
2197 deliver_skb(skb, ptype, skb->dev);
2198 }
2199 rcu_read_unlock();
2200}
2201
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002202/**
2203 * netif_receive_skb - process receive buffer from network
2204 * @skb: buffer to process
2205 *
2206 * netif_receive_skb() is the main receive data processing function.
2207 * It always succeeds. The buffer may be dropped during processing
2208 * for congestion control or by the protocol layers.
2209 *
2210 * This function may only be called from softirq context and interrupts
2211 * should be enabled.
2212 *
2213 * Return values (usually ignored):
2214 * NET_RX_SUCCESS: no congestion
2215 * NET_RX_DROP: packet was dropped
2216 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217int netif_receive_skb(struct sk_buff *skb)
2218{
2219 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002220 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002221 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 int ret = NET_RX_DROP;
Al Viro252e33462006-11-14 20:48:11 -08002223 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002225 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2226 return NET_RX_SUCCESS;
2227
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002229 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 return NET_RX_DROP;
2231
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002232 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002233 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
Patrick McHardyc01003c2007-03-29 11:46:52 -07002235 if (!skb->iif)
2236 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002237
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002238 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002239 orig_dev = skb->dev;
2240 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002241 if (skb_bond_should_drop(skb))
2242 null_or_orig = orig_dev; /* deliver only exact match */
2243 else
2244 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002245 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002246
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 __get_cpu_var(netdev_rx_stat).total++;
2248
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002249 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002250 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002251 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252
2253 pt_prev = NULL;
2254
2255 rcu_read_lock();
2256
2257#ifdef CONFIG_NET_CLS_ACT
2258 if (skb->tc_verd & TC_NCLS) {
2259 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2260 goto ncls;
2261 }
2262#endif
2263
2264 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002265 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2266 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002267 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002268 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 pt_prev = ptype;
2270 }
2271 }
2272
2273#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002274 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2275 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277ncls:
2278#endif
2279
Stephen Hemminger6229e362007-03-21 13:38:47 -07002280 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2281 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002283 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2284 if (!skb)
2285 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286
Herbert Xu9a279bc2009-02-04 16:55:27 -08002287 skb_orphan(skb);
2288
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002290 list_for_each_entry_rcu(ptype,
2291 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002293 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2294 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002295 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002296 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 pt_prev = ptype;
2298 }
2299 }
2300
2301 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002302 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 } else {
2304 kfree_skb(skb);
2305 /* Jamal, now you will not able to escape explaining
2306 * me how you were going to use this. :-)
2307 */
2308 ret = NET_RX_DROP;
2309 }
2310
2311out:
2312 rcu_read_unlock();
2313 return ret;
2314}
2315
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002316/* Network device is going away, flush any packets still pending */
2317static void flush_backlog(void *arg)
2318{
2319 struct net_device *dev = arg;
2320 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2321 struct sk_buff *skb, *tmp;
2322
2323 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2324 if (skb->dev == dev) {
2325 __skb_unlink(skb, &queue->input_pkt_queue);
2326 kfree_skb(skb);
2327 }
2328}
2329
Herbert Xud565b0a2008-12-15 23:38:52 -08002330static int napi_gro_complete(struct sk_buff *skb)
2331{
2332 struct packet_type *ptype;
2333 __be16 type = skb->protocol;
2334 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2335 int err = -ENOENT;
2336
Herbert Xu5d38a072009-01-04 16:13:40 -08002337 if (NAPI_GRO_CB(skb)->count == 1)
Herbert Xud565b0a2008-12-15 23:38:52 -08002338 goto out;
2339
2340 rcu_read_lock();
2341 list_for_each_entry_rcu(ptype, head, list) {
2342 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2343 continue;
2344
2345 err = ptype->gro_complete(skb);
2346 break;
2347 }
2348 rcu_read_unlock();
2349
2350 if (err) {
2351 WARN_ON(&ptype->list == head);
2352 kfree_skb(skb);
2353 return NET_RX_SUCCESS;
2354 }
2355
2356out:
Herbert Xub5302562009-01-04 16:13:19 -08002357 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002358 return netif_receive_skb(skb);
2359}
2360
2361void napi_gro_flush(struct napi_struct *napi)
2362{
2363 struct sk_buff *skb, *next;
2364
2365 for (skb = napi->gro_list; skb; skb = next) {
2366 next = skb->next;
2367 skb->next = NULL;
2368 napi_gro_complete(skb);
2369 }
2370
Herbert Xu4ae55442009-02-08 18:00:36 +00002371 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002372 napi->gro_list = NULL;
2373}
2374EXPORT_SYMBOL(napi_gro_flush);
2375
Herbert Xu86911732009-01-29 14:19:50 +00002376void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2377{
2378 unsigned int offset = skb_gro_offset(skb);
2379
2380 hlen += offset;
2381 if (hlen <= skb_headlen(skb))
2382 return skb->data + offset;
2383
2384 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2385 skb_shinfo(skb)->frags[0].size <=
2386 hlen - skb_headlen(skb) ||
2387 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2388 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2389
2390 return page_address(skb_shinfo(skb)->frags[0].page) +
Herbert Xu56035022009-02-05 21:26:52 -08002391 skb_shinfo(skb)->frags[0].page_offset +
2392 offset - skb_headlen(skb);
Herbert Xu86911732009-01-29 14:19:50 +00002393}
2394EXPORT_SYMBOL(skb_gro_header);
2395
Herbert Xu96e93ea2009-01-06 10:49:34 -08002396int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002397{
2398 struct sk_buff **pp = NULL;
2399 struct packet_type *ptype;
2400 __be16 type = skb->protocol;
2401 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
Herbert Xu0da2afd52008-12-26 14:57:42 -08002402 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002403 int mac_len;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002404 int ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002405
2406 if (!(skb->dev->features & NETIF_F_GRO))
2407 goto normal;
2408
Herbert Xuf17f5c92009-01-14 14:36:12 -08002409 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
2410 goto normal;
2411
Herbert Xud565b0a2008-12-15 23:38:52 -08002412 rcu_read_lock();
2413 list_for_each_entry_rcu(ptype, head, list) {
Herbert Xud565b0a2008-12-15 23:38:52 -08002414 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2415 continue;
2416
Herbert Xu86911732009-01-29 14:19:50 +00002417 skb_set_network_header(skb, skb_gro_offset(skb));
Herbert Xud565b0a2008-12-15 23:38:52 -08002418 mac_len = skb->network_header - skb->mac_header;
2419 skb->mac_len = mac_len;
2420 NAPI_GRO_CB(skb)->same_flow = 0;
2421 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002422 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002423
Herbert Xud565b0a2008-12-15 23:38:52 -08002424 pp = ptype->gro_receive(&napi->gro_list, skb);
2425 break;
2426 }
2427 rcu_read_unlock();
2428
2429 if (&ptype->list == head)
2430 goto normal;
2431
Herbert Xu0da2afd52008-12-26 14:57:42 -08002432 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002433 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002434
Herbert Xud565b0a2008-12-15 23:38:52 -08002435 if (pp) {
2436 struct sk_buff *nskb = *pp;
2437
2438 *pp = nskb->next;
2439 nskb->next = NULL;
2440 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00002441 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08002442 }
2443
Herbert Xu0da2afd52008-12-26 14:57:42 -08002444 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002445 goto ok;
2446
Herbert Xu4ae55442009-02-08 18:00:36 +00002447 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
Herbert Xud565b0a2008-12-15 23:38:52 -08002448 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08002449
Herbert Xu4ae55442009-02-08 18:00:36 +00002450 napi->gro_count++;
Herbert Xud565b0a2008-12-15 23:38:52 -08002451 NAPI_GRO_CB(skb)->count = 1;
Herbert Xu86911732009-01-29 14:19:50 +00002452 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002453 skb->next = napi->gro_list;
2454 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002455 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08002456
Herbert Xuad0f9902009-02-01 01:24:55 -08002457pull:
2458 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2459 if (napi->gro_list == skb)
2460 napi->gro_list = skb->next;
2461 ret = GRO_DROP;
2462 }
2463
Herbert Xud565b0a2008-12-15 23:38:52 -08002464ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002465 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002466
2467normal:
Herbert Xuad0f9902009-02-01 01:24:55 -08002468 ret = GRO_NORMAL;
2469 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08002470}
Herbert Xu96e93ea2009-01-06 10:49:34 -08002471EXPORT_SYMBOL(dev_gro_receive);
2472
2473static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2474{
2475 struct sk_buff *p;
2476
2477 for (p = napi->gro_list; p; p = p->next) {
Herbert Xuaa4b9f52009-02-08 18:00:37 +00002478 NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
2479 skb_mac_header(p), skb_gro_mac_header(skb));
Herbert Xu96e93ea2009-01-06 10:49:34 -08002480 NAPI_GRO_CB(p)->flush = 0;
2481 }
2482
2483 return dev_gro_receive(napi, skb);
2484}
Herbert Xu5d38a072009-01-04 16:13:40 -08002485
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002486int napi_skb_finish(int ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08002487{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002488 int err = NET_RX_SUCCESS;
2489
2490 switch (ret) {
2491 case GRO_NORMAL:
Herbert Xu5d38a072009-01-04 16:13:40 -08002492 return netif_receive_skb(skb);
2493
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002494 case GRO_DROP:
2495 err = NET_RX_DROP;
2496 /* fall through */
2497
2498 case GRO_MERGED_FREE:
Herbert Xu5d38a072009-01-04 16:13:40 -08002499 kfree_skb(skb);
2500 break;
2501 }
2502
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002503 return err;
2504}
2505EXPORT_SYMBOL(napi_skb_finish);
2506
2507int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2508{
Herbert Xu86911732009-01-29 14:19:50 +00002509 skb_gro_reset_offset(skb);
2510
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002511 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002512}
2513EXPORT_SYMBOL(napi_gro_receive);
2514
Herbert Xu96e93ea2009-01-06 10:49:34 -08002515void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2516{
Herbert Xu96e93ea2009-01-06 10:49:34 -08002517 __skb_pull(skb, skb_headlen(skb));
2518 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2519
2520 napi->skb = skb;
2521}
2522EXPORT_SYMBOL(napi_reuse_skb);
2523
2524struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2525 struct napi_gro_fraginfo *info)
Herbert Xu5d38a072009-01-04 16:13:40 -08002526{
2527 struct net_device *dev = napi->dev;
2528 struct sk_buff *skb = napi->skb;
Herbert Xu86911732009-01-29 14:19:50 +00002529 struct ethhdr *eth;
Herbert Xu80595d52009-01-29 14:19:52 +00002530 skb_frag_t *frag;
2531 int i;
Herbert Xu5d38a072009-01-04 16:13:40 -08002532
2533 napi->skb = NULL;
2534
2535 if (!skb) {
2536 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2537 if (!skb)
2538 goto out;
2539
2540 skb_reserve(skb, NET_IP_ALIGN);
2541 }
2542
2543 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
Herbert Xu80595d52009-01-29 14:19:52 +00002544 frag = &info->frags[info->nr_frags - 1];
2545
2546 for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
2547 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2548 frag->size);
2549 frag++;
2550 }
Herbert Xu5d38a072009-01-04 16:13:40 -08002551 skb_shinfo(skb)->nr_frags = info->nr_frags;
Herbert Xu5d38a072009-01-04 16:13:40 -08002552
2553 skb->data_len = info->len;
2554 skb->len += info->len;
2555 skb->truesize += info->len;
2556
Herbert Xu86911732009-01-29 14:19:50 +00002557 skb_reset_mac_header(skb);
2558 skb_gro_reset_offset(skb);
2559
2560 eth = skb_gro_header(skb, sizeof(*eth));
2561 if (!eth) {
Herbert Xu96e93ea2009-01-06 10:49:34 -08002562 napi_reuse_skb(napi, skb);
Herbert Xu9a8e47f2009-01-17 19:47:18 +00002563 skb = NULL;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002564 goto out;
2565 }
Herbert Xu5d38a072009-01-04 16:13:40 -08002566
Herbert Xu86911732009-01-29 14:19:50 +00002567 skb_gro_pull(skb, sizeof(*eth));
2568
2569 /*
2570 * This works because the only protocols we care about don't require
2571 * special handling. We'll fix it up properly at the end.
2572 */
2573 skb->protocol = eth->h_proto;
Herbert Xu5d38a072009-01-04 16:13:40 -08002574
2575 skb->ip_summed = info->ip_summed;
2576 skb->csum = info->csum;
2577
Herbert Xu96e93ea2009-01-06 10:49:34 -08002578out:
2579 return skb;
2580}
2581EXPORT_SYMBOL(napi_fraginfo_skb);
2582
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002583int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2584{
2585 int err = NET_RX_SUCCESS;
2586
2587 switch (ret) {
2588 case GRO_NORMAL:
Herbert Xu86911732009-01-29 14:19:50 +00002589 case GRO_HELD:
Herbert Xu86911732009-01-29 14:19:50 +00002590 skb->protocol = eth_type_trans(skb, napi->dev);
2591
2592 if (ret == GRO_NORMAL)
2593 return netif_receive_skb(skb);
2594
2595 skb_gro_pull(skb, -ETH_HLEN);
2596 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002597
2598 case GRO_DROP:
2599 err = NET_RX_DROP;
2600 /* fall through */
2601
2602 case GRO_MERGED_FREE:
2603 napi_reuse_skb(napi, skb);
2604 break;
2605 }
2606
2607 return err;
2608}
2609EXPORT_SYMBOL(napi_frags_finish);
2610
Herbert Xu96e93ea2009-01-06 10:49:34 -08002611int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2612{
2613 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002614
2615 if (!skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002616 return NET_RX_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002617
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002618 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08002619}
2620EXPORT_SYMBOL(napi_gro_frags);
2621
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002622static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623{
2624 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2626 unsigned long start_time = jiffies;
2627
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002628 napi->weight = weight_p;
2629 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631
2632 local_irq_disable();
2633 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002634 if (!skb) {
2635 __napi_complete(napi);
2636 local_irq_enable();
2637 break;
2638 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 local_irq_enable();
2640
Herbert Xud565b0a2008-12-15 23:38:52 -08002641 napi_gro_receive(napi, skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002642 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643
Herbert Xud565b0a2008-12-15 23:38:52 -08002644 napi_gro_flush(napi);
2645
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002646 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647}
2648
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002649/**
2650 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002651 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002652 *
2653 * The entry's receive function will be scheduled to run
2654 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002655void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002656{
2657 unsigned long flags;
2658
2659 local_irq_save(flags);
2660 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2661 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2662 local_irq_restore(flags);
2663}
2664EXPORT_SYMBOL(__napi_schedule);
2665
Herbert Xud565b0a2008-12-15 23:38:52 -08002666void __napi_complete(struct napi_struct *n)
2667{
2668 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2669 BUG_ON(n->gro_list);
2670
2671 list_del(&n->poll_list);
2672 smp_mb__before_clear_bit();
2673 clear_bit(NAPI_STATE_SCHED, &n->state);
2674}
2675EXPORT_SYMBOL(__napi_complete);
2676
2677void napi_complete(struct napi_struct *n)
2678{
2679 unsigned long flags;
2680
2681 /*
2682 * don't let napi dequeue from the cpu poll list
2683 * just in case its running on a different cpu
2684 */
2685 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2686 return;
2687
2688 napi_gro_flush(n);
2689 local_irq_save(flags);
2690 __napi_complete(n);
2691 local_irq_restore(flags);
2692}
2693EXPORT_SYMBOL(napi_complete);
2694
2695void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2696 int (*poll)(struct napi_struct *, int), int weight)
2697{
2698 INIT_LIST_HEAD(&napi->poll_list);
Herbert Xu4ae55442009-02-08 18:00:36 +00002699 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002700 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002701 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002702 napi->poll = poll;
2703 napi->weight = weight;
2704 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002705 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002706#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002707 spin_lock_init(&napi->poll_lock);
2708 napi->poll_owner = -1;
2709#endif
2710 set_bit(NAPI_STATE_SCHED, &napi->state);
2711}
2712EXPORT_SYMBOL(netif_napi_add);
2713
2714void netif_napi_del(struct napi_struct *napi)
2715{
2716 struct sk_buff *skb, *next;
2717
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002718 list_del_init(&napi->dev_list);
Herbert Xu5d38a072009-01-04 16:13:40 -08002719 kfree(napi->skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002720
2721 for (skb = napi->gro_list; skb; skb = next) {
2722 next = skb->next;
2723 skb->next = NULL;
2724 kfree_skb(skb);
2725 }
2726
2727 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00002728 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002729}
2730EXPORT_SYMBOL(netif_napi_del);
2731
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002732
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733static void net_rx_action(struct softirq_action *h)
2734{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002735 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002736 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002737 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002738 void *have;
2739
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 local_irq_disable();
2741
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002742 while (!list_empty(list)) {
2743 struct napi_struct *n;
2744 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002746 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002747 * Allow this to run for 2 jiffies since which will allow
2748 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002749 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002750 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 goto softnet_break;
2752
2753 local_irq_enable();
2754
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002755 /* Even though interrupts have been re-enabled, this
2756 * access is safe because interrupts can only add new
2757 * entries to the tail of this list, and only ->poll()
2758 * calls can remove this head entry from the list.
2759 */
2760 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002762 have = netpoll_poll_lock(n);
2763
2764 weight = n->weight;
2765
David S. Miller0a7606c2007-10-29 21:28:47 -07002766 /* This NAPI_STATE_SCHED test is for avoiding a race
2767 * with netpoll's poll_napi(). Only the entity which
2768 * obtains the lock and sees NAPI_STATE_SCHED set will
2769 * actually make the ->poll() call. Therefore we avoid
2770 * accidently calling ->poll() when NAPI is not scheduled.
2771 */
2772 work = 0;
2773 if (test_bit(NAPI_STATE_SCHED, &n->state))
2774 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002775
2776 WARN_ON_ONCE(work > weight);
2777
2778 budget -= work;
2779
2780 local_irq_disable();
2781
2782 /* Drivers must not modify the NAPI state if they
2783 * consume the entire weight. In such cases this code
2784 * still "owns" the NAPI instance and therefore can
2785 * move the instance around on the list at-will.
2786 */
David S. Millerfed17f32008-01-07 21:00:40 -08002787 if (unlikely(work == weight)) {
2788 if (unlikely(napi_disable_pending(n)))
2789 __napi_complete(n);
2790 else
2791 list_move_tail(&n->poll_list, list);
2792 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002793
2794 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795 }
2796out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002797 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002798
Chris Leechdb217332006-06-17 21:24:58 -07002799#ifdef CONFIG_NET_DMA
2800 /*
2801 * There may not be any more sk_buffs coming right now, so push
2802 * any pending DMA copies to hardware
2803 */
Dan Williams2ba05622009-01-06 11:38:14 -07002804 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07002805#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002806
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807 return;
2808
2809softnet_break:
2810 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2811 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2812 goto out;
2813}
2814
2815static gifconf_func_t * gifconf_list [NPROTO];
2816
2817/**
2818 * register_gifconf - register a SIOCGIF handler
2819 * @family: Address family
2820 * @gifconf: Function handler
2821 *
2822 * Register protocol dependent address dumping routines. The handler
2823 * that is passed must not be freed or reused until it has been replaced
2824 * by another handler.
2825 */
2826int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2827{
2828 if (family >= NPROTO)
2829 return -EINVAL;
2830 gifconf_list[family] = gifconf;
2831 return 0;
2832}
2833
2834
2835/*
2836 * Map an interface index to its name (SIOCGIFNAME)
2837 */
2838
2839/*
2840 * We need this ioctl for efficient implementation of the
2841 * if_indextoname() function required by the IPv6 API. Without
2842 * it, we would have to search all the interfaces to find a
2843 * match. --pb
2844 */
2845
Eric W. Biederman881d9662007-09-17 11:56:21 -07002846static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002847{
2848 struct net_device *dev;
2849 struct ifreq ifr;
2850
2851 /*
2852 * Fetch the caller's info block.
2853 */
2854
2855 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2856 return -EFAULT;
2857
2858 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002859 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002860 if (!dev) {
2861 read_unlock(&dev_base_lock);
2862 return -ENODEV;
2863 }
2864
2865 strcpy(ifr.ifr_name, dev->name);
2866 read_unlock(&dev_base_lock);
2867
2868 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2869 return -EFAULT;
2870 return 0;
2871}
2872
2873/*
2874 * Perform a SIOCGIFCONF call. This structure will change
2875 * size eventually, and there is nothing I can do about it.
2876 * Thus we will need a 'compatibility mode'.
2877 */
2878
Eric W. Biederman881d9662007-09-17 11:56:21 -07002879static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002880{
2881 struct ifconf ifc;
2882 struct net_device *dev;
2883 char __user *pos;
2884 int len;
2885 int total;
2886 int i;
2887
2888 /*
2889 * Fetch the caller's info block.
2890 */
2891
2892 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2893 return -EFAULT;
2894
2895 pos = ifc.ifc_buf;
2896 len = ifc.ifc_len;
2897
2898 /*
2899 * Loop over the interfaces, and write an info block for each.
2900 */
2901
2902 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002903 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002904 for (i = 0; i < NPROTO; i++) {
2905 if (gifconf_list[i]) {
2906 int done;
2907 if (!pos)
2908 done = gifconf_list[i](dev, NULL, 0);
2909 else
2910 done = gifconf_list[i](dev, pos + total,
2911 len - total);
2912 if (done < 0)
2913 return -EFAULT;
2914 total += done;
2915 }
2916 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002917 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002918
2919 /*
2920 * All done. Write the updated control block back to the caller.
2921 */
2922 ifc.ifc_len = total;
2923
2924 /*
2925 * Both BSD and Solaris return 0 here, so we do too.
2926 */
2927 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2928}
2929
2930#ifdef CONFIG_PROC_FS
2931/*
2932 * This is invoked by the /proc filesystem handler to display a device
2933 * in detail.
2934 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002935void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002936 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937{
Denis V. Luneve372c412007-11-19 22:31:54 -08002938 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002939 loff_t off;
2940 struct net_device *dev;
2941
Linus Torvalds1da177e2005-04-16 15:20:36 -07002942 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002943 if (!*pos)
2944 return SEQ_START_TOKEN;
2945
2946 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002947 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002948 if (off++ == *pos)
2949 return dev;
2950
2951 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002952}
2953
2954void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2955{
Denis V. Luneve372c412007-11-19 22:31:54 -08002956 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002957 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002958 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002959 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002960}
2961
2962void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002963 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002964{
2965 read_unlock(&dev_base_lock);
2966}
2967
2968static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2969{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002970 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002971
Rusty Russell5a1b5892007-04-28 21:04:03 -07002972 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2973 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2974 dev->name, stats->rx_bytes, stats->rx_packets,
2975 stats->rx_errors,
2976 stats->rx_dropped + stats->rx_missed_errors,
2977 stats->rx_fifo_errors,
2978 stats->rx_length_errors + stats->rx_over_errors +
2979 stats->rx_crc_errors + stats->rx_frame_errors,
2980 stats->rx_compressed, stats->multicast,
2981 stats->tx_bytes, stats->tx_packets,
2982 stats->tx_errors, stats->tx_dropped,
2983 stats->tx_fifo_errors, stats->collisions,
2984 stats->tx_carrier_errors +
2985 stats->tx_aborted_errors +
2986 stats->tx_window_errors +
2987 stats->tx_heartbeat_errors,
2988 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989}
2990
2991/*
2992 * Called from the PROCfs module. This now uses the new arbitrary sized
2993 * /proc/net interface to create /proc/net/dev
2994 */
2995static int dev_seq_show(struct seq_file *seq, void *v)
2996{
2997 if (v == SEQ_START_TOKEN)
2998 seq_puts(seq, "Inter-| Receive "
2999 " | Transmit\n"
3000 " face |bytes packets errs drop fifo frame "
3001 "compressed multicast|bytes packets errs "
3002 "drop fifo colls carrier compressed\n");
3003 else
3004 dev_seq_printf_stats(seq, v);
3005 return 0;
3006}
3007
3008static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3009{
3010 struct netif_rx_stats *rc = NULL;
3011
Mike Travis0c0b0ac2008-05-02 16:43:08 -07003012 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003013 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003014 rc = &per_cpu(netdev_rx_stat, *pos);
3015 break;
3016 } else
3017 ++*pos;
3018 return rc;
3019}
3020
3021static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3022{
3023 return softnet_get_online(pos);
3024}
3025
3026static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3027{
3028 ++*pos;
3029 return softnet_get_online(pos);
3030}
3031
3032static void softnet_seq_stop(struct seq_file *seq, void *v)
3033{
3034}
3035
3036static int softnet_seq_show(struct seq_file *seq, void *v)
3037{
3038 struct netif_rx_stats *s = v;
3039
3040 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07003041 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07003042 0, 0, 0, 0, /* was fastroute */
3043 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003044 return 0;
3045}
3046
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003047static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003048 .start = dev_seq_start,
3049 .next = dev_seq_next,
3050 .stop = dev_seq_stop,
3051 .show = dev_seq_show,
3052};
3053
3054static int dev_seq_open(struct inode *inode, struct file *file)
3055{
Denis V. Luneve372c412007-11-19 22:31:54 -08003056 return seq_open_net(inode, file, &dev_seq_ops,
3057 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058}
3059
Arjan van de Ven9a321442007-02-12 00:55:35 -08003060static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003061 .owner = THIS_MODULE,
3062 .open = dev_seq_open,
3063 .read = seq_read,
3064 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08003065 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066};
3067
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003068static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003069 .start = softnet_seq_start,
3070 .next = softnet_seq_next,
3071 .stop = softnet_seq_stop,
3072 .show = softnet_seq_show,
3073};
3074
3075static int softnet_seq_open(struct inode *inode, struct file *file)
3076{
3077 return seq_open(file, &softnet_seq_ops);
3078}
3079
Arjan van de Ven9a321442007-02-12 00:55:35 -08003080static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003081 .owner = THIS_MODULE,
3082 .open = softnet_seq_open,
3083 .read = seq_read,
3084 .llseek = seq_lseek,
3085 .release = seq_release,
3086};
3087
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003088static void *ptype_get_idx(loff_t pos)
3089{
3090 struct packet_type *pt = NULL;
3091 loff_t i = 0;
3092 int t;
3093
3094 list_for_each_entry_rcu(pt, &ptype_all, list) {
3095 if (i == pos)
3096 return pt;
3097 ++i;
3098 }
3099
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003100 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003101 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3102 if (i == pos)
3103 return pt;
3104 ++i;
3105 }
3106 }
3107 return NULL;
3108}
3109
3110static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003111 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003112{
3113 rcu_read_lock();
3114 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3115}
3116
3117static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3118{
3119 struct packet_type *pt;
3120 struct list_head *nxt;
3121 int hash;
3122
3123 ++*pos;
3124 if (v == SEQ_START_TOKEN)
3125 return ptype_get_idx(0);
3126
3127 pt = v;
3128 nxt = pt->list.next;
3129 if (pt->type == htons(ETH_P_ALL)) {
3130 if (nxt != &ptype_all)
3131 goto found;
3132 hash = 0;
3133 nxt = ptype_base[0].next;
3134 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003135 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003136
3137 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003138 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003139 return NULL;
3140 nxt = ptype_base[hash].next;
3141 }
3142found:
3143 return list_entry(nxt, struct packet_type, list);
3144}
3145
3146static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003147 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003148{
3149 rcu_read_unlock();
3150}
3151
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003152static int ptype_seq_show(struct seq_file *seq, void *v)
3153{
3154 struct packet_type *pt = v;
3155
3156 if (v == SEQ_START_TOKEN)
3157 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003158 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003159 if (pt->type == htons(ETH_P_ALL))
3160 seq_puts(seq, "ALL ");
3161 else
3162 seq_printf(seq, "%04x", ntohs(pt->type));
3163
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003164 seq_printf(seq, " %-8s %pF\n",
3165 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003166 }
3167
3168 return 0;
3169}
3170
3171static const struct seq_operations ptype_seq_ops = {
3172 .start = ptype_seq_start,
3173 .next = ptype_seq_next,
3174 .stop = ptype_seq_stop,
3175 .show = ptype_seq_show,
3176};
3177
3178static int ptype_seq_open(struct inode *inode, struct file *file)
3179{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003180 return seq_open_net(inode, file, &ptype_seq_ops,
3181 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003182}
3183
3184static const struct file_operations ptype_seq_fops = {
3185 .owner = THIS_MODULE,
3186 .open = ptype_seq_open,
3187 .read = seq_read,
3188 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003189 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003190};
3191
3192
Pavel Emelyanov46650792007-10-08 20:38:39 -07003193static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003194{
3195 int rc = -ENOMEM;
3196
Eric W. Biederman881d9662007-09-17 11:56:21 -07003197 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003198 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003199 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003200 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003201 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003202 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003203
Eric W. Biederman881d9662007-09-17 11:56:21 -07003204 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003205 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003206 rc = 0;
3207out:
3208 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003209out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003210 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003211out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003212 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003213out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003214 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003215 goto out;
3216}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003217
Pavel Emelyanov46650792007-10-08 20:38:39 -07003218static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003219{
3220 wext_proc_exit(net);
3221
3222 proc_net_remove(net, "ptype");
3223 proc_net_remove(net, "softnet_stat");
3224 proc_net_remove(net, "dev");
3225}
3226
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003227static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003228 .init = dev_proc_net_init,
3229 .exit = dev_proc_net_exit,
3230};
3231
3232static int __init dev_proc_init(void)
3233{
3234 return register_pernet_subsys(&dev_proc_ops);
3235}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003236#else
3237#define dev_proc_init() 0
3238#endif /* CONFIG_PROC_FS */
3239
3240
3241/**
3242 * netdev_set_master - set up master/slave pair
3243 * @slave: slave device
3244 * @master: new master device
3245 *
3246 * Changes the master device of the slave. Pass %NULL to break the
3247 * bonding. The caller must hold the RTNL semaphore. On a failure
3248 * a negative errno code is returned. On success the reference counts
3249 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3250 * function returns zero.
3251 */
3252int netdev_set_master(struct net_device *slave, struct net_device *master)
3253{
3254 struct net_device *old = slave->master;
3255
3256 ASSERT_RTNL();
3257
3258 if (master) {
3259 if (old)
3260 return -EBUSY;
3261 dev_hold(master);
3262 }
3263
3264 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003265
Linus Torvalds1da177e2005-04-16 15:20:36 -07003266 synchronize_net();
3267
3268 if (old)
3269 dev_put(old);
3270
3271 if (master)
3272 slave->flags |= IFF_SLAVE;
3273 else
3274 slave->flags &= ~IFF_SLAVE;
3275
3276 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3277 return 0;
3278}
3279
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003280static void dev_change_rx_flags(struct net_device *dev, int flags)
3281{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003282 const struct net_device_ops *ops = dev->netdev_ops;
3283
3284 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3285 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003286}
3287
Wang Chendad9b332008-06-18 01:48:28 -07003288static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003289{
3290 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003291 uid_t uid;
3292 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003293
Patrick McHardy24023452007-07-14 18:51:31 -07003294 ASSERT_RTNL();
3295
Wang Chendad9b332008-06-18 01:48:28 -07003296 dev->flags |= IFF_PROMISC;
3297 dev->promiscuity += inc;
3298 if (dev->promiscuity == 0) {
3299 /*
3300 * Avoid overflow.
3301 * If inc causes overflow, untouch promisc and return error.
3302 */
3303 if (inc < 0)
3304 dev->flags &= ~IFF_PROMISC;
3305 else {
3306 dev->promiscuity -= inc;
3307 printk(KERN_WARNING "%s: promiscuity touches roof, "
3308 "set promiscuity failed, promiscuity feature "
3309 "of device might be broken.\n", dev->name);
3310 return -EOVERFLOW;
3311 }
3312 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003313 if (dev->flags != old_flags) {
3314 printk(KERN_INFO "device %s %s promiscuous mode\n",
3315 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3316 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003317 if (audit_enabled) {
3318 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003319 audit_log(current->audit_context, GFP_ATOMIC,
3320 AUDIT_ANOM_PROMISCUOUS,
3321 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3322 dev->name, (dev->flags & IFF_PROMISC),
3323 (old_flags & IFF_PROMISC),
3324 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003325 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003326 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003327 }
Patrick McHardy24023452007-07-14 18:51:31 -07003328
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003329 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003330 }
Wang Chendad9b332008-06-18 01:48:28 -07003331 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003332}
3333
Linus Torvalds1da177e2005-04-16 15:20:36 -07003334/**
3335 * dev_set_promiscuity - update promiscuity count on a device
3336 * @dev: device
3337 * @inc: modifier
3338 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003339 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003340 * remains above zero the interface remains promiscuous. Once it hits zero
3341 * the device reverts back to normal filtering operation. A negative inc
3342 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003343 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003344 */
Wang Chendad9b332008-06-18 01:48:28 -07003345int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003346{
3347 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003348 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003349
Wang Chendad9b332008-06-18 01:48:28 -07003350 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003351 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003352 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003353 if (dev->flags != old_flags)
3354 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003355 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003356}
3357
3358/**
3359 * dev_set_allmulti - update allmulti count on a device
3360 * @dev: device
3361 * @inc: modifier
3362 *
3363 * Add or remove reception of all multicast frames to a device. While the
3364 * count in the device remains above zero the interface remains listening
3365 * to all interfaces. Once it hits zero the device reverts back to normal
3366 * filtering operation. A negative @inc value is used to drop the counter
3367 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003368 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003369 */
3370
Wang Chendad9b332008-06-18 01:48:28 -07003371int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003372{
3373 unsigned short old_flags = dev->flags;
3374
Patrick McHardy24023452007-07-14 18:51:31 -07003375 ASSERT_RTNL();
3376
Linus Torvalds1da177e2005-04-16 15:20:36 -07003377 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003378 dev->allmulti += inc;
3379 if (dev->allmulti == 0) {
3380 /*
3381 * Avoid overflow.
3382 * If inc causes overflow, untouch allmulti and return error.
3383 */
3384 if (inc < 0)
3385 dev->flags &= ~IFF_ALLMULTI;
3386 else {
3387 dev->allmulti -= inc;
3388 printk(KERN_WARNING "%s: allmulti touches roof, "
3389 "set allmulti failed, allmulti feature of "
3390 "device might be broken.\n", dev->name);
3391 return -EOVERFLOW;
3392 }
3393 }
Patrick McHardy24023452007-07-14 18:51:31 -07003394 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003395 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003396 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003397 }
Wang Chendad9b332008-06-18 01:48:28 -07003398 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003399}
3400
3401/*
3402 * Upload unicast and multicast address lists to device and
3403 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003404 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003405 * are present.
3406 */
3407void __dev_set_rx_mode(struct net_device *dev)
3408{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003409 const struct net_device_ops *ops = dev->netdev_ops;
3410
Patrick McHardy4417da62007-06-27 01:28:10 -07003411 /* dev_open will call this function so the list will stay sane. */
3412 if (!(dev->flags&IFF_UP))
3413 return;
3414
3415 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003416 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003417
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003418 if (ops->ndo_set_rx_mode)
3419 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003420 else {
3421 /* Unicast addresses changes may only happen under the rtnl,
3422 * therefore calling __dev_set_promiscuity here is safe.
3423 */
3424 if (dev->uc_count > 0 && !dev->uc_promisc) {
3425 __dev_set_promiscuity(dev, 1);
3426 dev->uc_promisc = 1;
3427 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3428 __dev_set_promiscuity(dev, -1);
3429 dev->uc_promisc = 0;
3430 }
3431
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003432 if (ops->ndo_set_multicast_list)
3433 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003434 }
3435}
3436
3437void dev_set_rx_mode(struct net_device *dev)
3438{
David S. Millerb9e40852008-07-15 00:15:08 -07003439 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003440 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003441 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442}
3443
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003444int __dev_addr_delete(struct dev_addr_list **list, int *count,
3445 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003446{
3447 struct dev_addr_list *da;
3448
3449 for (; (da = *list) != NULL; list = &da->next) {
3450 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3451 alen == da->da_addrlen) {
3452 if (glbl) {
3453 int old_glbl = da->da_gusers;
3454 da->da_gusers = 0;
3455 if (old_glbl == 0)
3456 break;
3457 }
3458 if (--da->da_users)
3459 return 0;
3460
3461 *list = da->next;
3462 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003463 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003464 return 0;
3465 }
3466 }
3467 return -ENOENT;
3468}
3469
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003470int __dev_addr_add(struct dev_addr_list **list, int *count,
3471 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003472{
3473 struct dev_addr_list *da;
3474
3475 for (da = *list; da != NULL; da = da->next) {
3476 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3477 da->da_addrlen == alen) {
3478 if (glbl) {
3479 int old_glbl = da->da_gusers;
3480 da->da_gusers = 1;
3481 if (old_glbl)
3482 return 0;
3483 }
3484 da->da_users++;
3485 return 0;
3486 }
3487 }
3488
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003489 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003490 if (da == NULL)
3491 return -ENOMEM;
3492 memcpy(da->da_addr, addr, alen);
3493 da->da_addrlen = alen;
3494 da->da_users = 1;
3495 da->da_gusers = glbl ? 1 : 0;
3496 da->next = *list;
3497 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003498 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003499 return 0;
3500}
3501
Patrick McHardy4417da62007-06-27 01:28:10 -07003502/**
3503 * dev_unicast_delete - Release secondary unicast address.
3504 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003505 * @addr: address to delete
3506 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003507 *
3508 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003509 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003510 *
3511 * The caller must hold the rtnl_mutex.
3512 */
3513int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3514{
3515 int err;
3516
3517 ASSERT_RTNL();
3518
David S. Millerb9e40852008-07-15 00:15:08 -07003519 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003520 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3521 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003522 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003523 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003524 return err;
3525}
3526EXPORT_SYMBOL(dev_unicast_delete);
3527
3528/**
3529 * dev_unicast_add - add a secondary unicast address
3530 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003531 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003532 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003533 *
3534 * Add a secondary unicast address to the device or increase
3535 * the reference count if it already exists.
3536 *
3537 * The caller must hold the rtnl_mutex.
3538 */
3539int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3540{
3541 int err;
3542
3543 ASSERT_RTNL();
3544
David S. Millerb9e40852008-07-15 00:15:08 -07003545 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003546 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3547 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003548 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003549 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003550 return err;
3551}
3552EXPORT_SYMBOL(dev_unicast_add);
3553
Chris Leeche83a2ea2008-01-31 16:53:23 -08003554int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3555 struct dev_addr_list **from, int *from_count)
3556{
3557 struct dev_addr_list *da, *next;
3558 int err = 0;
3559
3560 da = *from;
3561 while (da != NULL) {
3562 next = da->next;
3563 if (!da->da_synced) {
3564 err = __dev_addr_add(to, to_count,
3565 da->da_addr, da->da_addrlen, 0);
3566 if (err < 0)
3567 break;
3568 da->da_synced = 1;
3569 da->da_users++;
3570 } else if (da->da_users == 1) {
3571 __dev_addr_delete(to, to_count,
3572 da->da_addr, da->da_addrlen, 0);
3573 __dev_addr_delete(from, from_count,
3574 da->da_addr, da->da_addrlen, 0);
3575 }
3576 da = next;
3577 }
3578 return err;
3579}
3580
3581void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3582 struct dev_addr_list **from, int *from_count)
3583{
3584 struct dev_addr_list *da, *next;
3585
3586 da = *from;
3587 while (da != NULL) {
3588 next = da->next;
3589 if (da->da_synced) {
3590 __dev_addr_delete(to, to_count,
3591 da->da_addr, da->da_addrlen, 0);
3592 da->da_synced = 0;
3593 __dev_addr_delete(from, from_count,
3594 da->da_addr, da->da_addrlen, 0);
3595 }
3596 da = next;
3597 }
3598}
3599
3600/**
3601 * dev_unicast_sync - Synchronize device's unicast list to another device
3602 * @to: destination device
3603 * @from: source device
3604 *
3605 * Add newly added addresses to the destination device and release
3606 * addresses that have no users left. The source device must be
3607 * locked by netif_tx_lock_bh.
3608 *
3609 * This function is intended to be called from the dev->set_rx_mode
3610 * function of layered software devices.
3611 */
3612int dev_unicast_sync(struct net_device *to, struct net_device *from)
3613{
3614 int err = 0;
3615
David S. Millerb9e40852008-07-15 00:15:08 -07003616 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003617 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3618 &from->uc_list, &from->uc_count);
3619 if (!err)
3620 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003621 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003622 return err;
3623}
3624EXPORT_SYMBOL(dev_unicast_sync);
3625
3626/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003627 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003628 * @to: destination device
3629 * @from: source device
3630 *
3631 * Remove all addresses that were added to the destination device by
3632 * dev_unicast_sync(). This function is intended to be called from the
3633 * dev->stop function of layered software devices.
3634 */
3635void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3636{
David S. Millerb9e40852008-07-15 00:15:08 -07003637 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003638 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003639
3640 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3641 &from->uc_list, &from->uc_count);
3642 __dev_set_rx_mode(to);
3643
David S. Millere308a5d2008-07-15 00:13:44 -07003644 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003645 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003646}
3647EXPORT_SYMBOL(dev_unicast_unsync);
3648
Denis Cheng12972622007-07-18 02:12:56 -07003649static void __dev_addr_discard(struct dev_addr_list **list)
3650{
3651 struct dev_addr_list *tmp;
3652
3653 while (*list != NULL) {
3654 tmp = *list;
3655 *list = tmp->next;
3656 if (tmp->da_users > tmp->da_gusers)
3657 printk("__dev_addr_discard: address leakage! "
3658 "da_users=%d\n", tmp->da_users);
3659 kfree(tmp);
3660 }
3661}
3662
Denis Cheng26cc2522007-07-18 02:12:03 -07003663static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003664{
David S. Millerb9e40852008-07-15 00:15:08 -07003665 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003666
Patrick McHardy4417da62007-06-27 01:28:10 -07003667 __dev_addr_discard(&dev->uc_list);
3668 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003669
Denis Cheng456ad752007-07-18 02:10:54 -07003670 __dev_addr_discard(&dev->mc_list);
3671 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003672
David S. Millerb9e40852008-07-15 00:15:08 -07003673 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003674}
3675
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003676/**
3677 * dev_get_flags - get flags reported to userspace
3678 * @dev: device
3679 *
3680 * Get the combination of flag bits exported through APIs to userspace.
3681 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003682unsigned dev_get_flags(const struct net_device *dev)
3683{
3684 unsigned flags;
3685
3686 flags = (dev->flags & ~(IFF_PROMISC |
3687 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003688 IFF_RUNNING |
3689 IFF_LOWER_UP |
3690 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003691 (dev->gflags & (IFF_PROMISC |
3692 IFF_ALLMULTI));
3693
Stefan Rompfb00055a2006-03-20 17:09:11 -08003694 if (netif_running(dev)) {
3695 if (netif_oper_up(dev))
3696 flags |= IFF_RUNNING;
3697 if (netif_carrier_ok(dev))
3698 flags |= IFF_LOWER_UP;
3699 if (netif_dormant(dev))
3700 flags |= IFF_DORMANT;
3701 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003702
3703 return flags;
3704}
3705
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003706/**
3707 * dev_change_flags - change device settings
3708 * @dev: device
3709 * @flags: device state flags
3710 *
3711 * Change settings on device based state flags. The flags are
3712 * in the userspace exported format.
3713 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003714int dev_change_flags(struct net_device *dev, unsigned flags)
3715{
Thomas Graf7c355f52007-06-05 16:03:03 -07003716 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003717 int old_flags = dev->flags;
3718
Patrick McHardy24023452007-07-14 18:51:31 -07003719 ASSERT_RTNL();
3720
Linus Torvalds1da177e2005-04-16 15:20:36 -07003721 /*
3722 * Set the flags on our device.
3723 */
3724
3725 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3726 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3727 IFF_AUTOMEDIA)) |
3728 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3729 IFF_ALLMULTI));
3730
3731 /*
3732 * Load in the correct multicast list now the flags have changed.
3733 */
3734
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003735 if ((old_flags ^ flags) & IFF_MULTICAST)
3736 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003737
Patrick McHardy4417da62007-06-27 01:28:10 -07003738 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003739
3740 /*
3741 * Have we downed the interface. We handle IFF_UP ourselves
3742 * according to user attempts to set it, rather than blindly
3743 * setting it.
3744 */
3745
3746 ret = 0;
3747 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3748 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3749
3750 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003751 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003752 }
3753
3754 if (dev->flags & IFF_UP &&
3755 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3756 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003757 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003758
3759 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3760 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3761 dev->gflags ^= IFF_PROMISC;
3762 dev_set_promiscuity(dev, inc);
3763 }
3764
3765 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3766 is important. Some (broken) drivers set IFF_PROMISC, when
3767 IFF_ALLMULTI is requested not asking us and not reporting.
3768 */
3769 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3770 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3771 dev->gflags ^= IFF_ALLMULTI;
3772 dev_set_allmulti(dev, inc);
3773 }
3774
Thomas Graf7c355f52007-06-05 16:03:03 -07003775 /* Exclude state transition flags, already notified */
3776 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3777 if (changes)
3778 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003779
3780 return ret;
3781}
3782
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003783/**
3784 * dev_set_mtu - Change maximum transfer unit
3785 * @dev: device
3786 * @new_mtu: new transfer unit
3787 *
3788 * Change the maximum transfer size of the network device.
3789 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003790int dev_set_mtu(struct net_device *dev, int new_mtu)
3791{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003792 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003793 int err;
3794
3795 if (new_mtu == dev->mtu)
3796 return 0;
3797
3798 /* MTU must be positive. */
3799 if (new_mtu < 0)
3800 return -EINVAL;
3801
3802 if (!netif_device_present(dev))
3803 return -ENODEV;
3804
3805 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003806 if (ops->ndo_change_mtu)
3807 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003808 else
3809 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003810
Linus Torvalds1da177e2005-04-16 15:20:36 -07003811 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003812 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003813 return err;
3814}
3815
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003816/**
3817 * dev_set_mac_address - Change Media Access Control Address
3818 * @dev: device
3819 * @sa: new address
3820 *
3821 * Change the hardware (MAC) address of the device
3822 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003823int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3824{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003825 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003826 int err;
3827
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003828 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003829 return -EOPNOTSUPP;
3830 if (sa->sa_family != dev->type)
3831 return -EINVAL;
3832 if (!netif_device_present(dev))
3833 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003834 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003835 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003836 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003837 return err;
3838}
3839
3840/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003841 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003842 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003843static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003844{
3845 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003846 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003847
3848 if (!dev)
3849 return -ENODEV;
3850
3851 switch (cmd) {
3852 case SIOCGIFFLAGS: /* Get interface flags */
3853 ifr->ifr_flags = dev_get_flags(dev);
3854 return 0;
3855
Linus Torvalds1da177e2005-04-16 15:20:36 -07003856 case SIOCGIFMETRIC: /* Get the metric on the interface
3857 (currently unused) */
3858 ifr->ifr_metric = 0;
3859 return 0;
3860
Linus Torvalds1da177e2005-04-16 15:20:36 -07003861 case SIOCGIFMTU: /* Get the MTU of a device */
3862 ifr->ifr_mtu = dev->mtu;
3863 return 0;
3864
Linus Torvalds1da177e2005-04-16 15:20:36 -07003865 case SIOCGIFHWADDR:
3866 if (!dev->addr_len)
3867 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3868 else
3869 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3870 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3871 ifr->ifr_hwaddr.sa_family = dev->type;
3872 return 0;
3873
Jeff Garzik14e3e072007-10-08 00:06:32 -07003874 case SIOCGIFSLAVE:
3875 err = -EINVAL;
3876 break;
3877
3878 case SIOCGIFMAP:
3879 ifr->ifr_map.mem_start = dev->mem_start;
3880 ifr->ifr_map.mem_end = dev->mem_end;
3881 ifr->ifr_map.base_addr = dev->base_addr;
3882 ifr->ifr_map.irq = dev->irq;
3883 ifr->ifr_map.dma = dev->dma;
3884 ifr->ifr_map.port = dev->if_port;
3885 return 0;
3886
3887 case SIOCGIFINDEX:
3888 ifr->ifr_ifindex = dev->ifindex;
3889 return 0;
3890
3891 case SIOCGIFTXQLEN:
3892 ifr->ifr_qlen = dev->tx_queue_len;
3893 return 0;
3894
3895 default:
3896 /* dev_ioctl() should ensure this case
3897 * is never reached
3898 */
3899 WARN_ON(1);
3900 err = -EINVAL;
3901 break;
3902
3903 }
3904 return err;
3905}
3906
3907/*
3908 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3909 */
3910static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3911{
3912 int err;
3913 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003914 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003915
3916 if (!dev)
3917 return -ENODEV;
3918
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003919 ops = dev->netdev_ops;
3920
Jeff Garzik14e3e072007-10-08 00:06:32 -07003921 switch (cmd) {
3922 case SIOCSIFFLAGS: /* Set interface flags */
3923 return dev_change_flags(dev, ifr->ifr_flags);
3924
3925 case SIOCSIFMETRIC: /* Set the metric on the interface
3926 (currently unused) */
3927 return -EOPNOTSUPP;
3928
3929 case SIOCSIFMTU: /* Set the MTU of a device */
3930 return dev_set_mtu(dev, ifr->ifr_mtu);
3931
Linus Torvalds1da177e2005-04-16 15:20:36 -07003932 case SIOCSIFHWADDR:
3933 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3934
3935 case SIOCSIFHWBROADCAST:
3936 if (ifr->ifr_hwaddr.sa_family != dev->type)
3937 return -EINVAL;
3938 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3939 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003940 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003941 return 0;
3942
Linus Torvalds1da177e2005-04-16 15:20:36 -07003943 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003944 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003945 if (!netif_device_present(dev))
3946 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003947 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003948 }
3949 return -EOPNOTSUPP;
3950
3951 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003952 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003953 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3954 return -EINVAL;
3955 if (!netif_device_present(dev))
3956 return -ENODEV;
3957 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3958 dev->addr_len, 1);
3959
3960 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003961 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003962 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3963 return -EINVAL;
3964 if (!netif_device_present(dev))
3965 return -ENODEV;
3966 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3967 dev->addr_len, 1);
3968
Linus Torvalds1da177e2005-04-16 15:20:36 -07003969 case SIOCSIFTXQLEN:
3970 if (ifr->ifr_qlen < 0)
3971 return -EINVAL;
3972 dev->tx_queue_len = ifr->ifr_qlen;
3973 return 0;
3974
3975 case SIOCSIFNAME:
3976 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3977 return dev_change_name(dev, ifr->ifr_newname);
3978
3979 /*
3980 * Unknown or private ioctl
3981 */
3982
3983 default:
3984 if ((cmd >= SIOCDEVPRIVATE &&
3985 cmd <= SIOCDEVPRIVATE + 15) ||
3986 cmd == SIOCBONDENSLAVE ||
3987 cmd == SIOCBONDRELEASE ||
3988 cmd == SIOCBONDSETHWADDR ||
3989 cmd == SIOCBONDSLAVEINFOQUERY ||
3990 cmd == SIOCBONDINFOQUERY ||
3991 cmd == SIOCBONDCHANGEACTIVE ||
3992 cmd == SIOCGMIIPHY ||
3993 cmd == SIOCGMIIREG ||
3994 cmd == SIOCSMIIREG ||
3995 cmd == SIOCBRADDIF ||
3996 cmd == SIOCBRDELIF ||
Patrick Ohlyd24fff22009-02-12 05:03:40 +00003997 cmd == SIOCSHWTSTAMP ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003998 cmd == SIOCWANDEV) {
3999 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004000 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004001 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004002 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004003 else
4004 err = -ENODEV;
4005 }
4006 } else
4007 err = -EINVAL;
4008
4009 }
4010 return err;
4011}
4012
4013/*
4014 * This function handles all "interface"-type I/O control requests. The actual
4015 * 'doing' part of this is dev_ifsioc above.
4016 */
4017
4018/**
4019 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004020 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004021 * @cmd: command to issue
4022 * @arg: pointer to a struct ifreq in user space
4023 *
4024 * Issue ioctl functions to devices. This is normally called by the
4025 * user space syscall interfaces but can sometimes be useful for
4026 * other purposes. The return value is the return from the syscall if
4027 * positive or a negative errno code on error.
4028 */
4029
Eric W. Biederman881d9662007-09-17 11:56:21 -07004030int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004031{
4032 struct ifreq ifr;
4033 int ret;
4034 char *colon;
4035
4036 /* One special case: SIOCGIFCONF takes ifconf argument
4037 and requires shared lock, because it sleeps writing
4038 to user space.
4039 */
4040
4041 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004042 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004043 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004044 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004045 return ret;
4046 }
4047 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004048 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004049
4050 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4051 return -EFAULT;
4052
4053 ifr.ifr_name[IFNAMSIZ-1] = 0;
4054
4055 colon = strchr(ifr.ifr_name, ':');
4056 if (colon)
4057 *colon = 0;
4058
4059 /*
4060 * See which interface the caller is talking about.
4061 */
4062
4063 switch (cmd) {
4064 /*
4065 * These ioctl calls:
4066 * - can be done by all.
4067 * - atomic and do not require locking.
4068 * - return a value
4069 */
4070 case SIOCGIFFLAGS:
4071 case SIOCGIFMETRIC:
4072 case SIOCGIFMTU:
4073 case SIOCGIFHWADDR:
4074 case SIOCGIFSLAVE:
4075 case SIOCGIFMAP:
4076 case SIOCGIFINDEX:
4077 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004078 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004079 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004080 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004081 read_unlock(&dev_base_lock);
4082 if (!ret) {
4083 if (colon)
4084 *colon = ':';
4085 if (copy_to_user(arg, &ifr,
4086 sizeof(struct ifreq)))
4087 ret = -EFAULT;
4088 }
4089 return ret;
4090
4091 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004092 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004093 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004094 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004095 rtnl_unlock();
4096 if (!ret) {
4097 if (colon)
4098 *colon = ':';
4099 if (copy_to_user(arg, &ifr,
4100 sizeof(struct ifreq)))
4101 ret = -EFAULT;
4102 }
4103 return ret;
4104
4105 /*
4106 * These ioctl calls:
4107 * - require superuser power.
4108 * - require strict serialization.
4109 * - return a value
4110 */
4111 case SIOCGMIIPHY:
4112 case SIOCGMIIREG:
4113 case SIOCSIFNAME:
4114 if (!capable(CAP_NET_ADMIN))
4115 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004116 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004117 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004118 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004119 rtnl_unlock();
4120 if (!ret) {
4121 if (colon)
4122 *colon = ':';
4123 if (copy_to_user(arg, &ifr,
4124 sizeof(struct ifreq)))
4125 ret = -EFAULT;
4126 }
4127 return ret;
4128
4129 /*
4130 * These ioctl calls:
4131 * - require superuser power.
4132 * - require strict serialization.
4133 * - do not return a value
4134 */
4135 case SIOCSIFFLAGS:
4136 case SIOCSIFMETRIC:
4137 case SIOCSIFMTU:
4138 case SIOCSIFMAP:
4139 case SIOCSIFHWADDR:
4140 case SIOCSIFSLAVE:
4141 case SIOCADDMULTI:
4142 case SIOCDELMULTI:
4143 case SIOCSIFHWBROADCAST:
4144 case SIOCSIFTXQLEN:
4145 case SIOCSMIIREG:
4146 case SIOCBONDENSLAVE:
4147 case SIOCBONDRELEASE:
4148 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004149 case SIOCBONDCHANGEACTIVE:
4150 case SIOCBRADDIF:
4151 case SIOCBRDELIF:
Patrick Ohlyd24fff22009-02-12 05:03:40 +00004152 case SIOCSHWTSTAMP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004153 if (!capable(CAP_NET_ADMIN))
4154 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08004155 /* fall through */
4156 case SIOCBONDSLAVEINFOQUERY:
4157 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004158 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004159 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004160 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004161 rtnl_unlock();
4162 return ret;
4163
4164 case SIOCGIFMEM:
4165 /* Get the per device memory space. We can add this but
4166 * currently do not support it */
4167 case SIOCSIFMEM:
4168 /* Set the per device memory buffer space.
4169 * Not applicable in our case */
4170 case SIOCSIFLINK:
4171 return -EINVAL;
4172
4173 /*
4174 * Unknown or private ioctl.
4175 */
4176 default:
4177 if (cmd == SIOCWANDEV ||
4178 (cmd >= SIOCDEVPRIVATE &&
4179 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004180 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004181 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004182 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004183 rtnl_unlock();
4184 if (!ret && copy_to_user(arg, &ifr,
4185 sizeof(struct ifreq)))
4186 ret = -EFAULT;
4187 return ret;
4188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004189 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07004190 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004191 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004192 return -EINVAL;
4193 }
4194}
4195
4196
4197/**
4198 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004199 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004200 *
4201 * Returns a suitable unique value for a new device interface
4202 * number. The caller must hold the rtnl semaphore or the
4203 * dev_base_lock to be sure it remains unique.
4204 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004205static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004206{
4207 static int ifindex;
4208 for (;;) {
4209 if (++ifindex <= 0)
4210 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004211 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004212 return ifindex;
4213 }
4214}
4215
Linus Torvalds1da177e2005-04-16 15:20:36 -07004216/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004217static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004218
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004219static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004220{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004221 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004222}
4223
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004224static void rollback_registered(struct net_device *dev)
4225{
4226 BUG_ON(dev_boot_phase);
4227 ASSERT_RTNL();
4228
4229 /* Some devices call without registering for initialization unwind. */
4230 if (dev->reg_state == NETREG_UNINITIALIZED) {
4231 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4232 "was registered\n", dev->name, dev);
4233
4234 WARN_ON(1);
4235 return;
4236 }
4237
4238 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4239
4240 /* If device is running, close it first. */
4241 dev_close(dev);
4242
4243 /* And unlink it from device chain. */
4244 unlist_netdevice(dev);
4245
4246 dev->reg_state = NETREG_UNREGISTERING;
4247
4248 synchronize_net();
4249
4250 /* Shutdown queueing discipline. */
4251 dev_shutdown(dev);
4252
4253
4254 /* Notify protocols, that we are about to destroy
4255 this device. They should clean all the things.
4256 */
4257 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4258
4259 /*
4260 * Flush the unicast and multicast chains
4261 */
4262 dev_addr_discard(dev);
4263
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004264 if (dev->netdev_ops->ndo_uninit)
4265 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004266
4267 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004268 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004269
4270 /* Remove entries from kobject tree */
4271 netdev_unregister_kobject(dev);
4272
4273 synchronize_net();
4274
4275 dev_put(dev);
4276}
4277
David S. Millere8a04642008-07-17 00:34:19 -07004278static void __netdev_init_queue_locks_one(struct net_device *dev,
4279 struct netdev_queue *dev_queue,
4280 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004281{
4282 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004283 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004284 dev_queue->xmit_lock_owner = -1;
4285}
4286
4287static void netdev_init_queue_locks(struct net_device *dev)
4288{
David S. Millere8a04642008-07-17 00:34:19 -07004289 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4290 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004291}
4292
Herbert Xub63365a2008-10-23 01:11:29 -07004293unsigned long netdev_fix_features(unsigned long features, const char *name)
4294{
4295 /* Fix illegal SG+CSUM combinations. */
4296 if ((features & NETIF_F_SG) &&
4297 !(features & NETIF_F_ALL_CSUM)) {
4298 if (name)
4299 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4300 "checksum feature.\n", name);
4301 features &= ~NETIF_F_SG;
4302 }
4303
4304 /* TSO requires that SG is present as well. */
4305 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4306 if (name)
4307 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4308 "SG feature.\n", name);
4309 features &= ~NETIF_F_TSO;
4310 }
4311
4312 if (features & NETIF_F_UFO) {
4313 if (!(features & NETIF_F_GEN_CSUM)) {
4314 if (name)
4315 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4316 "since no NETIF_F_HW_CSUM feature.\n",
4317 name);
4318 features &= ~NETIF_F_UFO;
4319 }
4320
4321 if (!(features & NETIF_F_SG)) {
4322 if (name)
4323 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4324 "since no NETIF_F_SG feature.\n", name);
4325 features &= ~NETIF_F_UFO;
4326 }
4327 }
4328
4329 return features;
4330}
4331EXPORT_SYMBOL(netdev_fix_features);
4332
Linus Torvalds1da177e2005-04-16 15:20:36 -07004333/**
4334 * register_netdevice - register a network device
4335 * @dev: device to register
4336 *
4337 * Take a completed network device structure and add it to the kernel
4338 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4339 * chain. 0 is returned on success. A negative errno code is returned
4340 * on a failure to set up the device, or if the name is a duplicate.
4341 *
4342 * Callers must hold the rtnl semaphore. You may want
4343 * register_netdev() instead of this.
4344 *
4345 * BUGS:
4346 * The locking appears insufficient to guarantee two parallel registers
4347 * will not get the same name.
4348 */
4349
4350int register_netdevice(struct net_device *dev)
4351{
4352 struct hlist_head *head;
4353 struct hlist_node *p;
4354 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004355 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004356
4357 BUG_ON(dev_boot_phase);
4358 ASSERT_RTNL();
4359
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004360 might_sleep();
4361
Linus Torvalds1da177e2005-04-16 15:20:36 -07004362 /* When net_device's are persistent, this will be fatal. */
4363 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004364 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004365
David S. Millerf1f28aa2008-07-15 00:08:33 -07004366 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004367 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004368 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369
Linus Torvalds1da177e2005-04-16 15:20:36 -07004370 dev->iflink = -1;
4371
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004372#ifdef CONFIG_COMPAT_NET_DEV_OPS
4373 /* Netdevice_ops API compatiability support.
4374 * This is temporary until all network devices are converted.
4375 */
4376 if (dev->netdev_ops) {
4377 const struct net_device_ops *ops = dev->netdev_ops;
4378
4379 dev->init = ops->ndo_init;
4380 dev->uninit = ops->ndo_uninit;
4381 dev->open = ops->ndo_open;
4382 dev->change_rx_flags = ops->ndo_change_rx_flags;
4383 dev->set_rx_mode = ops->ndo_set_rx_mode;
4384 dev->set_multicast_list = ops->ndo_set_multicast_list;
4385 dev->set_mac_address = ops->ndo_set_mac_address;
4386 dev->validate_addr = ops->ndo_validate_addr;
4387 dev->do_ioctl = ops->ndo_do_ioctl;
4388 dev->set_config = ops->ndo_set_config;
4389 dev->change_mtu = ops->ndo_change_mtu;
4390 dev->tx_timeout = ops->ndo_tx_timeout;
4391 dev->get_stats = ops->ndo_get_stats;
4392 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4393 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4394 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4395#ifdef CONFIG_NET_POLL_CONTROLLER
4396 dev->poll_controller = ops->ndo_poll_controller;
4397#endif
4398 } else {
4399 char drivername[64];
4400 pr_info("%s (%s): not using net_device_ops yet\n",
4401 dev->name, netdev_drivername(dev, drivername, 64));
4402
4403 /* This works only because net_device_ops and the
4404 compatiablity structure are the same. */
4405 dev->netdev_ops = (void *) &(dev->init);
4406 }
4407#endif
4408
Linus Torvalds1da177e2005-04-16 15:20:36 -07004409 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004410 if (dev->netdev_ops->ndo_init) {
4411 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004412 if (ret) {
4413 if (ret > 0)
4414 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004415 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004416 }
4417 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004418
Linus Torvalds1da177e2005-04-16 15:20:36 -07004419 if (!dev_valid_name(dev->name)) {
4420 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004421 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004422 }
4423
Eric W. Biederman881d9662007-09-17 11:56:21 -07004424 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004425 if (dev->iflink == -1)
4426 dev->iflink = dev->ifindex;
4427
4428 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004429 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004430 hlist_for_each(p, head) {
4431 struct net_device *d
4432 = hlist_entry(p, struct net_device, name_hlist);
4433 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4434 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004435 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004436 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004437 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004438
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004439 /* Fix illegal checksum combinations */
4440 if ((dev->features & NETIF_F_HW_CSUM) &&
4441 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4442 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4443 dev->name);
4444 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4445 }
4446
4447 if ((dev->features & NETIF_F_NO_CSUM) &&
4448 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4449 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4450 dev->name);
4451 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4452 }
4453
Herbert Xub63365a2008-10-23 01:11:29 -07004454 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004455
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004456 /* Enable software GSO if SG is supported. */
4457 if (dev->features & NETIF_F_SG)
4458 dev->features |= NETIF_F_GSO;
4459
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004460 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004461 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004462 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004463 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004464 dev->reg_state = NETREG_REGISTERED;
4465
Linus Torvalds1da177e2005-04-16 15:20:36 -07004466 /*
4467 * Default initial state at registry is that the
4468 * device is present.
4469 */
4470
4471 set_bit(__LINK_STATE_PRESENT, &dev->state);
4472
Linus Torvalds1da177e2005-04-16 15:20:36 -07004473 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004474 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004475 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004476
4477 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004478 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004479 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004480 if (ret) {
4481 rollback_registered(dev);
4482 dev->reg_state = NETREG_UNREGISTERED;
4483 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004484
4485out:
4486 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004487
4488err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004489 if (dev->netdev_ops->ndo_uninit)
4490 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004491 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004492}
4493
4494/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08004495 * init_dummy_netdev - init a dummy network device for NAPI
4496 * @dev: device to init
4497 *
4498 * This takes a network device structure and initialize the minimum
4499 * amount of fields so it can be used to schedule NAPI polls without
4500 * registering a full blown interface. This is to be used by drivers
4501 * that need to tie several hardware interfaces to a single NAPI
4502 * poll scheduler due to HW limitations.
4503 */
4504int init_dummy_netdev(struct net_device *dev)
4505{
4506 /* Clear everything. Note we don't initialize spinlocks
4507 * are they aren't supposed to be taken by any of the
4508 * NAPI code and this dummy netdev is supposed to be
4509 * only ever used for NAPI polls
4510 */
4511 memset(dev, 0, sizeof(struct net_device));
4512
4513 /* make sure we BUG if trying to hit standard
4514 * register/unregister code path
4515 */
4516 dev->reg_state = NETREG_DUMMY;
4517
4518 /* initialize the ref count */
4519 atomic_set(&dev->refcnt, 1);
4520
4521 /* NAPI wants this */
4522 INIT_LIST_HEAD(&dev->napi_list);
4523
4524 /* a dummy interface is started by default */
4525 set_bit(__LINK_STATE_PRESENT, &dev->state);
4526 set_bit(__LINK_STATE_START, &dev->state);
4527
4528 return 0;
4529}
4530EXPORT_SYMBOL_GPL(init_dummy_netdev);
4531
4532
4533/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07004534 * register_netdev - register a network device
4535 * @dev: device to register
4536 *
4537 * Take a completed network device structure and add it to the kernel
4538 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4539 * chain. 0 is returned on success. A negative errno code is returned
4540 * on a failure to set up the device, or if the name is a duplicate.
4541 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004542 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004543 * and expands the device name if you passed a format string to
4544 * alloc_netdev.
4545 */
4546int register_netdev(struct net_device *dev)
4547{
4548 int err;
4549
4550 rtnl_lock();
4551
4552 /*
4553 * If the name is a format string the caller wants us to do a
4554 * name allocation.
4555 */
4556 if (strchr(dev->name, '%')) {
4557 err = dev_alloc_name(dev, dev->name);
4558 if (err < 0)
4559 goto out;
4560 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004561
Linus Torvalds1da177e2005-04-16 15:20:36 -07004562 err = register_netdevice(dev);
4563out:
4564 rtnl_unlock();
4565 return err;
4566}
4567EXPORT_SYMBOL(register_netdev);
4568
4569/*
4570 * netdev_wait_allrefs - wait until all references are gone.
4571 *
4572 * This is called when unregistering network devices.
4573 *
4574 * Any protocol or device that holds a reference should register
4575 * for netdevice notification, and cleanup and put back the
4576 * reference if they receive an UNREGISTER event.
4577 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004578 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004579 */
4580static void netdev_wait_allrefs(struct net_device *dev)
4581{
4582 unsigned long rebroadcast_time, warning_time;
4583
4584 rebroadcast_time = warning_time = jiffies;
4585 while (atomic_read(&dev->refcnt) != 0) {
4586 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004587 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004588
4589 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004590 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004591
4592 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4593 &dev->state)) {
4594 /* We must not have linkwatch events
4595 * pending on unregister. If this
4596 * happens, we simply run the queue
4597 * unscheduled, resulting in a noop
4598 * for this device.
4599 */
4600 linkwatch_run_queue();
4601 }
4602
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004603 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004604
4605 rebroadcast_time = jiffies;
4606 }
4607
4608 msleep(250);
4609
4610 if (time_after(jiffies, warning_time + 10 * HZ)) {
4611 printk(KERN_EMERG "unregister_netdevice: "
4612 "waiting for %s to become free. Usage "
4613 "count = %d\n",
4614 dev->name, atomic_read(&dev->refcnt));
4615 warning_time = jiffies;
4616 }
4617 }
4618}
4619
4620/* The sequence is:
4621 *
4622 * rtnl_lock();
4623 * ...
4624 * register_netdevice(x1);
4625 * register_netdevice(x2);
4626 * ...
4627 * unregister_netdevice(y1);
4628 * unregister_netdevice(y2);
4629 * ...
4630 * rtnl_unlock();
4631 * free_netdev(y1);
4632 * free_netdev(y2);
4633 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004634 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004635 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004636 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004637 * without deadlocking with linkwatch via keventd.
4638 * 2) Since we run with the RTNL semaphore not held, we can sleep
4639 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004640 *
4641 * We must not return until all unregister events added during
4642 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004643 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004644void netdev_run_todo(void)
4645{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004646 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004647
Linus Torvalds1da177e2005-04-16 15:20:36 -07004648 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004649 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004650
4651 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004652
Linus Torvalds1da177e2005-04-16 15:20:36 -07004653 while (!list_empty(&list)) {
4654 struct net_device *dev
4655 = list_entry(list.next, struct net_device, todo_list);
4656 list_del(&dev->todo_list);
4657
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004658 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004659 printk(KERN_ERR "network todo '%s' but state %d\n",
4660 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004661 dump_stack();
4662 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004663 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004664
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004665 dev->reg_state = NETREG_UNREGISTERED;
4666
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004667 on_each_cpu(flush_backlog, dev, 1);
4668
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004669 netdev_wait_allrefs(dev);
4670
4671 /* paranoia */
4672 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004673 WARN_ON(dev->ip_ptr);
4674 WARN_ON(dev->ip6_ptr);
4675 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004676
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004677 if (dev->destructor)
4678 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004679
4680 /* Free network device */
4681 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004682 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004683}
4684
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004685/**
4686 * dev_get_stats - get network device statistics
4687 * @dev: device to get statistics from
4688 *
4689 * Get network statistics from device. The device driver may provide
4690 * its own method by setting dev->netdev_ops->get_stats; otherwise
4691 * the internal statistics structure is used.
4692 */
4693const struct net_device_stats *dev_get_stats(struct net_device *dev)
4694 {
4695 const struct net_device_ops *ops = dev->netdev_ops;
4696
4697 if (ops->ndo_get_stats)
4698 return ops->ndo_get_stats(dev);
4699 else
4700 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004701}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004702EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004703
David S. Millerdc2b4842008-07-08 17:18:23 -07004704static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004705 struct netdev_queue *queue,
4706 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004707{
David S. Millerdc2b4842008-07-08 17:18:23 -07004708 queue->dev = dev;
4709}
4710
David S. Millerbb949fb2008-07-08 16:55:56 -07004711static void netdev_init_queues(struct net_device *dev)
4712{
David S. Millere8a04642008-07-17 00:34:19 -07004713 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4714 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004715 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004716}
4717
Linus Torvalds1da177e2005-04-16 15:20:36 -07004718/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004719 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004720 * @sizeof_priv: size of private data to allocate space for
4721 * @name: device name format string
4722 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004723 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004724 *
4725 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004726 * and performs basic initialization. Also allocates subquue structs
4727 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004728 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004729struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4730 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004731{
David S. Millere8a04642008-07-17 00:34:19 -07004732 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004733 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004734 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004735 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004736
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004737 BUG_ON(strlen(name) >= sizeof(dev->name));
4738
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004739 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004740 if (sizeof_priv) {
4741 /* ensure 32-byte alignment of private area */
4742 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4743 alloc_size += sizeof_priv;
4744 }
4745 /* ensure 32-byte alignment of whole construct */
4746 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004747
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004748 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004749 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004750 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004751 return NULL;
4752 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004753
Stephen Hemminger79439862008-07-21 13:28:44 -07004754 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004755 if (!tx) {
4756 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4757 "tx qdiscs.\n");
4758 kfree(p);
4759 return NULL;
4760 }
4761
Linus Torvalds1da177e2005-04-16 15:20:36 -07004762 dev = (struct net_device *)
4763 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4764 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004765 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004766
David S. Millere8a04642008-07-17 00:34:19 -07004767 dev->_tx = tx;
4768 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004769 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004770
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004771 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004772
David S. Millerbb949fb2008-07-08 16:55:56 -07004773 netdev_init_queues(dev);
4774
Herbert Xud565b0a2008-12-15 23:38:52 -08004775 INIT_LIST_HEAD(&dev->napi_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004776 setup(dev);
4777 strcpy(dev->name, name);
4778 return dev;
4779}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004780EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004781
4782/**
4783 * free_netdev - free network device
4784 * @dev: device
4785 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004786 * This function does the last stage of destroying an allocated device
4787 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004788 * If this is the last reference then it will be freed.
4789 */
4790void free_netdev(struct net_device *dev)
4791{
Herbert Xud565b0a2008-12-15 23:38:52 -08004792 struct napi_struct *p, *n;
4793
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004794 release_net(dev_net(dev));
4795
David S. Millere8a04642008-07-17 00:34:19 -07004796 kfree(dev->_tx);
4797
Herbert Xud565b0a2008-12-15 23:38:52 -08004798 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4799 netif_napi_del(p);
4800
Stephen Hemminger3041a062006-05-26 13:25:24 -07004801 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004802 if (dev->reg_state == NETREG_UNINITIALIZED) {
4803 kfree((char *)dev - dev->padded);
4804 return;
4805 }
4806
4807 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4808 dev->reg_state = NETREG_RELEASED;
4809
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004810 /* will free via device release */
4811 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004812}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004813
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004814/**
4815 * synchronize_net - Synchronize with packet receive processing
4816 *
4817 * Wait for packets currently being received to be done.
4818 * Does not block later packets from starting.
4819 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004820void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004821{
4822 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004823 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004824}
4825
4826/**
4827 * unregister_netdevice - remove device from the kernel
4828 * @dev: device
4829 *
4830 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004831 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004832 *
4833 * Callers must hold the rtnl semaphore. You may want
4834 * unregister_netdev() instead of this.
4835 */
4836
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004837void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004838{
Herbert Xua6620712007-12-12 19:21:56 -08004839 ASSERT_RTNL();
4840
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004841 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004842 /* Finish processing unregister after unlock */
4843 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004844}
4845
4846/**
4847 * unregister_netdev - remove device from the kernel
4848 * @dev: device
4849 *
4850 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004851 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004852 *
4853 * This is just a wrapper for unregister_netdevice that takes
4854 * the rtnl semaphore. In general you want to use this and not
4855 * unregister_netdevice.
4856 */
4857void unregister_netdev(struct net_device *dev)
4858{
4859 rtnl_lock();
4860 unregister_netdevice(dev);
4861 rtnl_unlock();
4862}
4863
4864EXPORT_SYMBOL(unregister_netdev);
4865
Eric W. Biedermance286d32007-09-12 13:53:49 +02004866/**
4867 * dev_change_net_namespace - move device to different nethost namespace
4868 * @dev: device
4869 * @net: network namespace
4870 * @pat: If not NULL name pattern to try if the current device name
4871 * is already taken in the destination network namespace.
4872 *
4873 * This function shuts down a device interface and moves it
4874 * to a new network namespace. On success 0 is returned, on
4875 * a failure a netagive errno code is returned.
4876 *
4877 * Callers must hold the rtnl semaphore.
4878 */
4879
4880int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4881{
4882 char buf[IFNAMSIZ];
4883 const char *destname;
4884 int err;
4885
4886 ASSERT_RTNL();
4887
4888 /* Don't allow namespace local devices to be moved. */
4889 err = -EINVAL;
4890 if (dev->features & NETIF_F_NETNS_LOCAL)
4891 goto out;
4892
Eric W. Biederman38918452008-10-27 17:51:47 -07004893#ifdef CONFIG_SYSFS
4894 /* Don't allow real devices to be moved when sysfs
4895 * is enabled.
4896 */
4897 err = -EINVAL;
4898 if (dev->dev.parent)
4899 goto out;
4900#endif
4901
Eric W. Biedermance286d32007-09-12 13:53:49 +02004902 /* Ensure the device has been registrered */
4903 err = -EINVAL;
4904 if (dev->reg_state != NETREG_REGISTERED)
4905 goto out;
4906
4907 /* Get out if there is nothing todo */
4908 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004909 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004910 goto out;
4911
4912 /* Pick the destination device name, and ensure
4913 * we can use it in the destination network namespace.
4914 */
4915 err = -EEXIST;
4916 destname = dev->name;
4917 if (__dev_get_by_name(net, destname)) {
4918 /* We get here if we can't use the current device name */
4919 if (!pat)
4920 goto out;
4921 if (!dev_valid_name(pat))
4922 goto out;
4923 if (strchr(pat, '%')) {
4924 if (__dev_alloc_name(net, pat, buf) < 0)
4925 goto out;
4926 destname = buf;
4927 } else
4928 destname = pat;
4929 if (__dev_get_by_name(net, destname))
4930 goto out;
4931 }
4932
4933 /*
4934 * And now a mini version of register_netdevice unregister_netdevice.
4935 */
4936
4937 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004938 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004939
4940 /* And unlink it from device chain */
4941 err = -ENODEV;
4942 unlist_netdevice(dev);
4943
4944 synchronize_net();
4945
4946 /* Shutdown queueing discipline. */
4947 dev_shutdown(dev);
4948
4949 /* Notify protocols, that we are about to destroy
4950 this device. They should clean all the things.
4951 */
4952 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4953
4954 /*
4955 * Flush the unicast and multicast chains
4956 */
4957 dev_addr_discard(dev);
4958
Eric W. Biederman38918452008-10-27 17:51:47 -07004959 netdev_unregister_kobject(dev);
4960
Eric W. Biedermance286d32007-09-12 13:53:49 +02004961 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004962 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004963
4964 /* Assign the new device name */
4965 if (destname != dev->name)
4966 strcpy(dev->name, destname);
4967
4968 /* If there is an ifindex conflict assign a new one */
4969 if (__dev_get_by_index(net, dev->ifindex)) {
4970 int iflink = (dev->iflink == dev->ifindex);
4971 dev->ifindex = dev_new_index(net);
4972 if (iflink)
4973 dev->iflink = dev->ifindex;
4974 }
4975
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004976 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004977 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004978 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004979
4980 /* Add the device back in the hashes */
4981 list_netdevice(dev);
4982
4983 /* Notify protocols, that a new device appeared. */
4984 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4985
4986 synchronize_net();
4987 err = 0;
4988out:
4989 return err;
4990}
4991
Linus Torvalds1da177e2005-04-16 15:20:36 -07004992static int dev_cpu_callback(struct notifier_block *nfb,
4993 unsigned long action,
4994 void *ocpu)
4995{
4996 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004997 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004998 struct sk_buff *skb;
4999 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5000 struct softnet_data *sd, *oldsd;
5001
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07005002 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005003 return NOTIFY_OK;
5004
5005 local_irq_disable();
5006 cpu = smp_processor_id();
5007 sd = &per_cpu(softnet_data, cpu);
5008 oldsd = &per_cpu(softnet_data, oldcpu);
5009
5010 /* Find end of our completion_queue. */
5011 list_skb = &sd->completion_queue;
5012 while (*list_skb)
5013 list_skb = &(*list_skb)->next;
5014 /* Append completion queue from offline CPU. */
5015 *list_skb = oldsd->completion_queue;
5016 oldsd->completion_queue = NULL;
5017
5018 /* Find end of our output_queue. */
5019 list_net = &sd->output_queue;
5020 while (*list_net)
5021 list_net = &(*list_net)->next_sched;
5022 /* Append output queue from offline CPU. */
5023 *list_net = oldsd->output_queue;
5024 oldsd->output_queue = NULL;
5025
5026 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5027 local_irq_enable();
5028
5029 /* Process offline CPU's input_pkt_queue */
5030 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5031 netif_rx(skb);
5032
5033 return NOTIFY_OK;
5034}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005035
5036
Herbert Xu7f353bf2007-08-10 15:47:58 -07005037/**
Herbert Xub63365a2008-10-23 01:11:29 -07005038 * netdev_increment_features - increment feature set by one
5039 * @all: current feature set
5040 * @one: new feature set
5041 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005042 *
5043 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005044 * @one to the master device with current feature set @all. Will not
5045 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005046 */
Herbert Xub63365a2008-10-23 01:11:29 -07005047unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5048 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005049{
Herbert Xub63365a2008-10-23 01:11:29 -07005050 /* If device needs checksumming, downgrade to it. */
5051 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5052 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5053 else if (mask & NETIF_F_ALL_CSUM) {
5054 /* If one device supports v4/v6 checksumming, set for all. */
5055 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5056 !(all & NETIF_F_GEN_CSUM)) {
5057 all &= ~NETIF_F_ALL_CSUM;
5058 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5059 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005060
Herbert Xub63365a2008-10-23 01:11:29 -07005061 /* If one device supports hw checksumming, set for all. */
5062 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5063 all &= ~NETIF_F_ALL_CSUM;
5064 all |= NETIF_F_HW_CSUM;
5065 }
5066 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005067
Herbert Xub63365a2008-10-23 01:11:29 -07005068 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005069
Herbert Xub63365a2008-10-23 01:11:29 -07005070 one |= all & NETIF_F_ONE_FOR_ALL;
5071 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5072 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005073
5074 return all;
5075}
Herbert Xub63365a2008-10-23 01:11:29 -07005076EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005077
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005078static struct hlist_head *netdev_create_hash(void)
5079{
5080 int i;
5081 struct hlist_head *hash;
5082
5083 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5084 if (hash != NULL)
5085 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5086 INIT_HLIST_HEAD(&hash[i]);
5087
5088 return hash;
5089}
5090
Eric W. Biederman881d9662007-09-17 11:56:21 -07005091/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005092static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005093{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005094 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005095
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005096 net->dev_name_head = netdev_create_hash();
5097 if (net->dev_name_head == NULL)
5098 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005099
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005100 net->dev_index_head = netdev_create_hash();
5101 if (net->dev_index_head == NULL)
5102 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005103
5104 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005105
5106err_idx:
5107 kfree(net->dev_name_head);
5108err_name:
5109 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005110}
5111
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005112/**
5113 * netdev_drivername - network driver for the device
5114 * @dev: network device
5115 * @buffer: buffer for resulting name
5116 * @len: size of buffer
5117 *
5118 * Determine network driver for device.
5119 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005120char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005121{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005122 const struct device_driver *driver;
5123 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005124
5125 if (len <= 0 || !buffer)
5126 return buffer;
5127 buffer[0] = 0;
5128
5129 parent = dev->dev.parent;
5130
5131 if (!parent)
5132 return buffer;
5133
5134 driver = parent->driver;
5135 if (driver && driver->name)
5136 strlcpy(buffer, driver->name, len);
5137 return buffer;
5138}
5139
Pavel Emelyanov46650792007-10-08 20:38:39 -07005140static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005141{
5142 kfree(net->dev_name_head);
5143 kfree(net->dev_index_head);
5144}
5145
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005146static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005147 .init = netdev_init,
5148 .exit = netdev_exit,
5149};
5150
Pavel Emelyanov46650792007-10-08 20:38:39 -07005151static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005152{
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005153 struct net_device *dev;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005154 /*
5155 * Push all migratable of the network devices back to the
5156 * initial network namespace
5157 */
5158 rtnl_lock();
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005159restart:
5160 for_each_netdev(net, dev) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005161 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005162 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005163
5164 /* Ignore unmoveable devices (i.e. loopback) */
5165 if (dev->features & NETIF_F_NETNS_LOCAL)
5166 continue;
5167
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005168 /* Delete virtual devices */
5169 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5170 dev->rtnl_link_ops->dellink(dev);
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005171 goto restart;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005172 }
5173
Eric W. Biedermance286d32007-09-12 13:53:49 +02005174 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005175 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5176 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005177 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005178 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005179 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005180 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005181 }
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005182 goto restart;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005183 }
5184 rtnl_unlock();
5185}
5186
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005187static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005188 .exit = default_device_exit,
5189};
5190
Linus Torvalds1da177e2005-04-16 15:20:36 -07005191/*
5192 * Initialize the DEV module. At boot time this walks the device list and
5193 * unhooks any devices that fail to initialise (normally hardware not
5194 * present) and leaves us with a valid list of present and active devices.
5195 *
5196 */
5197
5198/*
5199 * This is called single threaded during boot, so no need
5200 * to take the rtnl semaphore.
5201 */
5202static int __init net_dev_init(void)
5203{
5204 int i, rc = -ENOMEM;
5205
5206 BUG_ON(!dev_boot_phase);
5207
Linus Torvalds1da177e2005-04-16 15:20:36 -07005208 if (dev_proc_init())
5209 goto out;
5210
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005211 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005212 goto out;
5213
5214 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08005215 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005216 INIT_LIST_HEAD(&ptype_base[i]);
5217
Eric W. Biederman881d9662007-09-17 11:56:21 -07005218 if (register_pernet_subsys(&netdev_net_ops))
5219 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005220
5221 /*
5222 * Initialise the packet receive queues.
5223 */
5224
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005225 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005226 struct softnet_data *queue;
5227
5228 queue = &per_cpu(softnet_data, i);
5229 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005230 queue->completion_queue = NULL;
5231 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005232
5233 queue->backlog.poll = process_backlog;
5234 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005235 queue->backlog.gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00005236 queue->backlog.gro_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005237 }
5238
Linus Torvalds1da177e2005-04-16 15:20:36 -07005239 dev_boot_phase = 0;
5240
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005241 /* The loopback device is special if any other network devices
5242 * is present in a network namespace the loopback device must
5243 * be present. Since we now dynamically allocate and free the
5244 * loopback device ensure this invariant is maintained by
5245 * keeping the loopback device as the first device on the
5246 * list of network devices. Ensuring the loopback devices
5247 * is the first device that appears and the last network device
5248 * that disappears.
5249 */
5250 if (register_pernet_device(&loopback_net_ops))
5251 goto out;
5252
5253 if (register_pernet_device(&default_device_ops))
5254 goto out;
5255
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005256 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5257 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005258
5259 hotcpu_notifier(dev_cpu_callback, 0);
5260 dst_init();
5261 dev_mcast_init();
5262 rc = 0;
5263out:
5264 return rc;
5265}
5266
5267subsys_initcall(net_dev_init);
5268
Krishna Kumare88721f2009-02-18 17:55:02 -08005269static int __init initialize_hashrnd(void)
5270{
5271 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5272 return 0;
5273}
5274
5275late_initcall_sync(initialize_hashrnd);
5276
Linus Torvalds1da177e2005-04-16 15:20:36 -07005277EXPORT_SYMBOL(__dev_get_by_index);
5278EXPORT_SYMBOL(__dev_get_by_name);
5279EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08005280EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005281EXPORT_SYMBOL(dev_add_pack);
5282EXPORT_SYMBOL(dev_alloc_name);
5283EXPORT_SYMBOL(dev_close);
5284EXPORT_SYMBOL(dev_get_by_flags);
5285EXPORT_SYMBOL(dev_get_by_index);
5286EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005287EXPORT_SYMBOL(dev_open);
5288EXPORT_SYMBOL(dev_queue_xmit);
5289EXPORT_SYMBOL(dev_remove_pack);
5290EXPORT_SYMBOL(dev_set_allmulti);
5291EXPORT_SYMBOL(dev_set_promiscuity);
5292EXPORT_SYMBOL(dev_change_flags);
5293EXPORT_SYMBOL(dev_set_mtu);
5294EXPORT_SYMBOL(dev_set_mac_address);
5295EXPORT_SYMBOL(free_netdev);
5296EXPORT_SYMBOL(netdev_boot_setup_check);
5297EXPORT_SYMBOL(netdev_set_master);
5298EXPORT_SYMBOL(netdev_state_change);
5299EXPORT_SYMBOL(netif_receive_skb);
5300EXPORT_SYMBOL(netif_rx);
5301EXPORT_SYMBOL(register_gifconf);
5302EXPORT_SYMBOL(register_netdevice);
5303EXPORT_SYMBOL(register_netdevice_notifier);
5304EXPORT_SYMBOL(skb_checksum_help);
5305EXPORT_SYMBOL(synchronize_net);
5306EXPORT_SYMBOL(unregister_netdevice);
5307EXPORT_SYMBOL(unregister_netdevice_notifier);
5308EXPORT_SYMBOL(net_enable_timestamp);
5309EXPORT_SYMBOL(net_disable_timestamp);
5310EXPORT_SYMBOL(dev_get_flags);
5311
5312#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5313EXPORT_SYMBOL(br_handle_frame_hook);
5314EXPORT_SYMBOL(br_fdb_get_hook);
5315EXPORT_SYMBOL(br_fdb_put_hook);
5316#endif
5317
Linus Torvalds1da177e2005-04-16 15:20:36 -07005318EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005319
5320EXPORT_PER_CPU_SYMBOL(softnet_data);