blob: f1647d7dd14bdf8f730522d5a251636e392a3a6d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
93#include <linux/notifier.h>
94#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/sock.h>
97#include <linux/rtnetlink.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/stat.h>
101#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700102#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <linux/highmem.h>
107#include <linux/init.h>
108#include <linux/kmod.h>
109#include <linux/module.h>
110#include <linux/kallsyms.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700123#include "net-sysfs.h"
124
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125/*
126 * The list of packet types we will receive (as opposed to discard)
127 * and the routines to invoke.
128 *
129 * Why 16. Because with 16 the only overlap we get on a hash of the
130 * low nibble of the protocol value is RARP/SNAP/X.25.
131 *
132 * NOTE: That is no longer true with the addition of VLAN tags. Not
133 * sure which should go first, but I bet it won't make much
134 * difference if we are running VLANs. The good news is that
135 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700136 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 * --BLG
138 *
139 * 0800 IP
140 * 8100 802.1Q VLAN
141 * 0001 802.3
142 * 0002 AX.25
143 * 0004 802.2
144 * 8035 RARP
145 * 0005 SNAP
146 * 0805 X.25
147 * 0806 ARP
148 * 8137 IPX
149 * 0009 Localtalk
150 * 86DD IPv6
151 */
152
153static DEFINE_SPINLOCK(ptype_lock);
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700154static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
155static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Chris Leechdb217332006-06-17 21:24:58 -0700157#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700158struct net_dma {
159 struct dma_client client;
160 spinlock_t lock;
161 cpumask_t channel_mask;
162 struct dma_chan *channels[NR_CPUS];
163};
164
165static enum dma_state_client
166netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
167 enum dma_state state);
168
169static struct net_dma net_dma = {
170 .client = {
171 .event_callback = netdev_dma_event,
172 },
173};
Chris Leechdb217332006-06-17 21:24:58 -0700174#endif
175
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 * semaphore.
179 *
180 * Pure readers hold dev_base_lock for reading.
181 *
182 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700183 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 * actual updates. This allows pure readers to access the list even
185 * while a writer is preparing to update it.
186 *
187 * To put it another way, dev_base_lock is held for writing only to
188 * protect against pure readers; the rtnl semaphore provides the
189 * protection against other writers.
190 *
191 * See, for example usages, register_netdevice() and
192 * unregister_netdevice(), which must be called with the rtnl
193 * semaphore held.
194 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195DEFINE_RWLOCK(dev_base_lock);
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197EXPORT_SYMBOL(dev_base_lock);
198
199#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700200#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Eric W. Biederman881d9662007-09-17 11:56:21 -0700202static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203{
204 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700205 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206}
207
Eric W. Biederman881d9662007-09-17 11:56:21 -0700208static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700210 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
Eric W. Biedermance286d32007-09-12 13:53:49 +0200213/* Device list insertion */
214static int list_netdevice(struct net_device *dev)
215{
216 struct net *net = dev->nd_net;
217
218 ASSERT_RTNL();
219
220 write_lock_bh(&dev_base_lock);
221 list_add_tail(&dev->dev_list, &net->dev_base_head);
222 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
223 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
224 write_unlock_bh(&dev_base_lock);
225 return 0;
226}
227
228/* Device list removal */
229static void unlist_netdevice(struct net_device *dev)
230{
231 ASSERT_RTNL();
232
233 /* Unlink dev from the device chain */
234 write_lock_bh(&dev_base_lock);
235 list_del(&dev->dev_list);
236 hlist_del(&dev->name_hlist);
237 hlist_del(&dev->index_hlist);
238 write_unlock_bh(&dev_base_lock);
239}
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/*
242 * Our notifier list
243 */
244
Alan Sternf07d5b92006-05-09 15:23:03 -0700245static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
247/*
248 * Device drivers call our routines to queue packets here. We empty the
249 * queue in the local softnet handler.
250 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700251
252DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700254#ifdef CONFIG_DEBUG_LOCK_ALLOC
255/*
256 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
257 * according to dev->type
258 */
259static const unsigned short netdev_lock_type[] =
260 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
261 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
262 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
263 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
264 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
265 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
266 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
267 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
268 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
269 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
270 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
271 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
272 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
273 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
274 ARPHRD_NONE};
275
276static const char *netdev_lock_name[] =
277 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
278 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
279 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
280 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
281 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
282 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
283 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
284 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
285 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
286 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
287 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
288 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
289 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
290 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
291 "_xmit_NONE"};
292
293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294
295static inline unsigned short netdev_lock_pos(unsigned short dev_type)
296{
297 int i;
298
299 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
300 if (netdev_lock_type[i] == dev_type)
301 return i;
302 /* the last key is used by default */
303 return ARRAY_SIZE(netdev_lock_type) - 1;
304}
305
306static inline void netdev_set_lockdep_class(spinlock_t *lock,
307 unsigned short dev_type)
308{
309 int i;
310
311 i = netdev_lock_pos(dev_type);
312 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
313 netdev_lock_name[i]);
314}
315#else
316static inline void netdev_set_lockdep_class(spinlock_t *lock,
317 unsigned short dev_type)
318{
319}
320#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
322/*******************************************************************************
323
324 Protocol management and registration routines
325
326*******************************************************************************/
327
328/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 * Add a protocol ID to the list. Now that the input handler is
330 * smarter we can dispense with all the messy stuff that used to be
331 * here.
332 *
333 * BEWARE!!! Protocol handlers, mangling input packets,
334 * MUST BE last in hash buckets and checking protocol handlers
335 * MUST start from promiscuous ptype_all chain in net_bh.
336 * It is true now, do not change it.
337 * Explanation follows: if protocol handler, mangling packet, will
338 * be the first on list, it is not able to sense, that packet
339 * is cloned and should be copied-on-write, so that it will
340 * change it and subsequent readers will get broken packet.
341 * --ANK (980803)
342 */
343
344/**
345 * dev_add_pack - add packet handler
346 * @pt: packet type declaration
347 *
348 * Add a protocol handler to the networking stack. The passed &packet_type
349 * is linked into kernel lists and may not be freed until it has been
350 * removed from the kernel lists.
351 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900352 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 * guarantee all CPU's that are in middle of receiving packets
354 * will see the new packet type (until the next received packet).
355 */
356
357void dev_add_pack(struct packet_type *pt)
358{
359 int hash;
360
361 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700362 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700364 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 hash = ntohs(pt->type) & 15;
366 list_add_rcu(&pt->list, &ptype_base[hash]);
367 }
368 spin_unlock_bh(&ptype_lock);
369}
370
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371/**
372 * __dev_remove_pack - remove packet handler
373 * @pt: packet type declaration
374 *
375 * Remove a protocol handler that was previously added to the kernel
376 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
377 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900378 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 *
380 * The packet type might still be in use by receivers
381 * and must not be freed until after all the CPU's have gone
382 * through a quiescent state.
383 */
384void __dev_remove_pack(struct packet_type *pt)
385{
386 struct list_head *head;
387 struct packet_type *pt1;
388
389 spin_lock_bh(&ptype_lock);
390
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700391 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700393 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 head = &ptype_base[ntohs(pt->type) & 15];
395
396 list_for_each_entry(pt1, head, list) {
397 if (pt == pt1) {
398 list_del_rcu(&pt->list);
399 goto out;
400 }
401 }
402
403 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
404out:
405 spin_unlock_bh(&ptype_lock);
406}
407/**
408 * dev_remove_pack - remove packet handler
409 * @pt: packet type declaration
410 *
411 * Remove a protocol handler that was previously added to the kernel
412 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
413 * from the kernel lists and can be freed or reused once this function
414 * returns.
415 *
416 * This call sleeps to guarantee that no CPU is looking at the packet
417 * type after return.
418 */
419void dev_remove_pack(struct packet_type *pt)
420{
421 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900422
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 synchronize_net();
424}
425
426/******************************************************************************
427
428 Device Boot-time Settings Routines
429
430*******************************************************************************/
431
432/* Boot time configuration table */
433static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
434
435/**
436 * netdev_boot_setup_add - add new setup entry
437 * @name: name of the device
438 * @map: configured settings for the device
439 *
440 * Adds new setup entry to the dev_boot_setup list. The function
441 * returns 0 on error and 1 on success. This is a generic routine to
442 * all netdevices.
443 */
444static int netdev_boot_setup_add(char *name, struct ifmap *map)
445{
446 struct netdev_boot_setup *s;
447 int i;
448
449 s = dev_boot_setup;
450 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
451 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
452 memset(s[i].name, 0, sizeof(s[i].name));
453 strcpy(s[i].name, name);
454 memcpy(&s[i].map, map, sizeof(s[i].map));
455 break;
456 }
457 }
458
459 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
460}
461
462/**
463 * netdev_boot_setup_check - check boot time settings
464 * @dev: the netdevice
465 *
466 * Check boot time settings for the device.
467 * The found settings are set for the device to be used
468 * later in the device probing.
469 * Returns 0 if no settings found, 1 if they are.
470 */
471int netdev_boot_setup_check(struct net_device *dev)
472{
473 struct netdev_boot_setup *s = dev_boot_setup;
474 int i;
475
476 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
477 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
478 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
479 dev->irq = s[i].map.irq;
480 dev->base_addr = s[i].map.base_addr;
481 dev->mem_start = s[i].map.mem_start;
482 dev->mem_end = s[i].map.mem_end;
483 return 1;
484 }
485 }
486 return 0;
487}
488
489
490/**
491 * netdev_boot_base - get address from boot time settings
492 * @prefix: prefix for network device
493 * @unit: id for network device
494 *
495 * Check boot time settings for the base address of device.
496 * The found settings are set for the device to be used
497 * later in the device probing.
498 * Returns 0 if no settings found.
499 */
500unsigned long netdev_boot_base(const char *prefix, int unit)
501{
502 const struct netdev_boot_setup *s = dev_boot_setup;
503 char name[IFNAMSIZ];
504 int i;
505
506 sprintf(name, "%s%d", prefix, unit);
507
508 /*
509 * If device already registered then return base of 1
510 * to indicate not to probe for this interface
511 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700512 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 return 1;
514
515 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
516 if (!strcmp(name, s[i].name))
517 return s[i].map.base_addr;
518 return 0;
519}
520
521/*
522 * Saves at boot time configured settings for any netdevice.
523 */
524int __init netdev_boot_setup(char *str)
525{
526 int ints[5];
527 struct ifmap map;
528
529 str = get_options(str, ARRAY_SIZE(ints), ints);
530 if (!str || !*str)
531 return 0;
532
533 /* Save settings */
534 memset(&map, 0, sizeof(map));
535 if (ints[0] > 0)
536 map.irq = ints[1];
537 if (ints[0] > 1)
538 map.base_addr = ints[2];
539 if (ints[0] > 2)
540 map.mem_start = ints[3];
541 if (ints[0] > 3)
542 map.mem_end = ints[4];
543
544 /* Add new entry to the list */
545 return netdev_boot_setup_add(str, &map);
546}
547
548__setup("netdev=", netdev_boot_setup);
549
550/*******************************************************************************
551
552 Device Interface Subroutines
553
554*******************************************************************************/
555
556/**
557 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700558 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 * @name: name to find
560 *
561 * Find an interface by name. Must be called under RTNL semaphore
562 * or @dev_base_lock. If the name is found a pointer to the device
563 * is returned. If the name is not found then %NULL is returned. The
564 * reference counters are not incremented so the caller must be
565 * careful with locks.
566 */
567
Eric W. Biederman881d9662007-09-17 11:56:21 -0700568struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569{
570 struct hlist_node *p;
571
Eric W. Biederman881d9662007-09-17 11:56:21 -0700572 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 struct net_device *dev
574 = hlist_entry(p, struct net_device, name_hlist);
575 if (!strncmp(dev->name, name, IFNAMSIZ))
576 return dev;
577 }
578 return NULL;
579}
580
581/**
582 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700583 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 * @name: name to find
585 *
586 * Find an interface by name. This can be called from any
587 * context and does its own locking. The returned handle has
588 * the usage count incremented and the caller must use dev_put() to
589 * release it when it is no longer needed. %NULL is returned if no
590 * matching device is found.
591 */
592
Eric W. Biederman881d9662007-09-17 11:56:21 -0700593struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
595 struct net_device *dev;
596
597 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700598 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 if (dev)
600 dev_hold(dev);
601 read_unlock(&dev_base_lock);
602 return dev;
603}
604
605/**
606 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700607 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 * @ifindex: index of device
609 *
610 * Search for an interface by index. Returns %NULL if the device
611 * is not found or a pointer to the device. The device has not
612 * had its reference counter increased so the caller must be careful
613 * about locking. The caller must hold either the RTNL semaphore
614 * or @dev_base_lock.
615 */
616
Eric W. Biederman881d9662007-09-17 11:56:21 -0700617struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
619 struct hlist_node *p;
620
Eric W. Biederman881d9662007-09-17 11:56:21 -0700621 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 struct net_device *dev
623 = hlist_entry(p, struct net_device, index_hlist);
624 if (dev->ifindex == ifindex)
625 return dev;
626 }
627 return NULL;
628}
629
630
631/**
632 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700633 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 * @ifindex: index of device
635 *
636 * Search for an interface by index. Returns NULL if the device
637 * is not found or a pointer to the device. The device returned has
638 * had a reference added and the pointer is safe until the user calls
639 * dev_put to indicate they have finished with it.
640 */
641
Eric W. Biederman881d9662007-09-17 11:56:21 -0700642struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643{
644 struct net_device *dev;
645
646 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700647 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if (dev)
649 dev_hold(dev);
650 read_unlock(&dev_base_lock);
651 return dev;
652}
653
654/**
655 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700656 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 * @type: media type of device
658 * @ha: hardware address
659 *
660 * Search for an interface by MAC address. Returns NULL if the device
661 * is not found or a pointer to the device. The caller must hold the
662 * rtnl semaphore. The returned device has not had its ref count increased
663 * and the caller must therefore be careful about locking
664 *
665 * BUGS:
666 * If the API was consistent this would be __dev_get_by_hwaddr
667 */
668
Eric W. Biederman881d9662007-09-17 11:56:21 -0700669struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 struct net_device *dev;
672
673 ASSERT_RTNL();
674
Eric W. Biederman881d9662007-09-17 11:56:21 -0700675 for_each_netdev(&init_net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 if (dev->type == type &&
677 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700678 return dev;
679
680 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681}
682
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300683EXPORT_SYMBOL(dev_getbyhwaddr);
684
Eric W. Biederman881d9662007-09-17 11:56:21 -0700685struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700686{
687 struct net_device *dev;
688
689 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700690 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700691 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700692 return dev;
693
694 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700695}
696
697EXPORT_SYMBOL(__dev_getfirstbyhwtype);
698
Eric W. Biederman881d9662007-09-17 11:56:21 -0700699struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700{
701 struct net_device *dev;
702
703 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700704 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700705 if (dev)
706 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 rtnl_unlock();
708 return dev;
709}
710
711EXPORT_SYMBOL(dev_getfirstbyhwtype);
712
713/**
714 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700715 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 * @if_flags: IFF_* values
717 * @mask: bitmask of bits in if_flags to check
718 *
719 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900720 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 * had a reference added and the pointer is safe until the user calls
722 * dev_put to indicate they have finished with it.
723 */
724
Eric W. Biederman881d9662007-09-17 11:56:21 -0700725struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700727 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
Pavel Emelianov7562f872007-05-03 15:13:45 -0700729 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700731 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 if (((dev->flags ^ if_flags) & mask) == 0) {
733 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700734 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 break;
736 }
737 }
738 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700739 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740}
741
742/**
743 * dev_valid_name - check if name is okay for network device
744 * @name: name string
745 *
746 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700747 * to allow sysfs to work. We also disallow any kind of
748 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800750int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700752 if (*name == '\0')
753 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700754 if (strlen(name) >= IFNAMSIZ)
755 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700756 if (!strcmp(name, ".") || !strcmp(name, ".."))
757 return 0;
758
759 while (*name) {
760 if (*name == '/' || isspace(*name))
761 return 0;
762 name++;
763 }
764 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765}
766
767/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200768 * __dev_alloc_name - allocate a name for a device
769 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200771 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 *
773 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700774 * id. It scans list of devices to build up a free map, then chooses
775 * the first empty slot. The caller must hold the dev_base or rtnl lock
776 * while allocating the name and adding the device in order to avoid
777 * duplicates.
778 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
779 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 */
781
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200782static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
784 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 const char *p;
786 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700787 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 struct net_device *d;
789
790 p = strnchr(name, IFNAMSIZ-1, '%');
791 if (p) {
792 /*
793 * Verify the string as this thing may have come from
794 * the user. There must be either one "%d" and no other "%"
795 * characters.
796 */
797 if (p[1] != 'd' || strchr(p + 2, '%'))
798 return -EINVAL;
799
800 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700801 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 if (!inuse)
803 return -ENOMEM;
804
Eric W. Biederman881d9662007-09-17 11:56:21 -0700805 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 if (!sscanf(d->name, name, &i))
807 continue;
808 if (i < 0 || i >= max_netdevices)
809 continue;
810
811 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200812 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 if (!strncmp(buf, d->name, IFNAMSIZ))
814 set_bit(i, inuse);
815 }
816
817 i = find_first_zero_bit(inuse, max_netdevices);
818 free_page((unsigned long) inuse);
819 }
820
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200821 snprintf(buf, IFNAMSIZ, name, i);
822 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
825 /* It is possible to run out of possible slots
826 * when the name is long and there isn't enough space left
827 * for the digits, or if all bits are used.
828 */
829 return -ENFILE;
830}
831
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200832/**
833 * dev_alloc_name - allocate a name for a device
834 * @dev: device
835 * @name: name format string
836 *
837 * Passed a format string - eg "lt%d" it will try and find a suitable
838 * id. It scans list of devices to build up a free map, then chooses
839 * the first empty slot. The caller must hold the dev_base or rtnl lock
840 * while allocating the name and adding the device in order to avoid
841 * duplicates.
842 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
843 * Returns the number of the unit assigned or a negative errno code.
844 */
845
846int dev_alloc_name(struct net_device *dev, const char *name)
847{
848 char buf[IFNAMSIZ];
849 struct net *net;
850 int ret;
851
852 BUG_ON(!dev->nd_net);
853 net = dev->nd_net;
854 ret = __dev_alloc_name(net, name, buf);
855 if (ret >= 0)
856 strlcpy(dev->name, buf, IFNAMSIZ);
857 return ret;
858}
859
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860
861/**
862 * dev_change_name - change name of a device
863 * @dev: device
864 * @newname: name (or format string) must be at least IFNAMSIZ
865 *
866 * Change name of a device, can pass format strings "eth%d".
867 * for wildcarding.
868 */
869int dev_change_name(struct net_device *dev, char *newname)
870{
Herbert Xufcc5a032007-07-30 17:03:38 -0700871 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700873 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700874 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875
876 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700877 BUG_ON(!dev->nd_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
Eric W. Biederman881d9662007-09-17 11:56:21 -0700879 net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 if (dev->flags & IFF_UP)
881 return -EBUSY;
882
883 if (!dev_valid_name(newname))
884 return -EINVAL;
885
Herbert Xufcc5a032007-07-30 17:03:38 -0700886 memcpy(oldname, dev->name, IFNAMSIZ);
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 if (strchr(newname, '%')) {
889 err = dev_alloc_name(dev, newname);
890 if (err < 0)
891 return err;
892 strcpy(newname, dev->name);
893 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700894 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 return -EEXIST;
896 else
897 strlcpy(dev->name, newname, IFNAMSIZ);
898
Herbert Xufcc5a032007-07-30 17:03:38 -0700899rollback:
Eric W. Biederman92749822007-04-03 00:07:30 -0600900 device_rename(&dev->dev, dev->name);
Herbert Xu7f988ea2007-07-30 16:35:46 -0700901
902 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600903 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700904 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700905 write_unlock_bh(&dev_base_lock);
906
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700907 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700908 ret = notifier_to_errno(ret);
909
910 if (ret) {
911 if (err) {
912 printk(KERN_ERR
913 "%s: name change rollback failed: %d.\n",
914 dev->name, ret);
915 } else {
916 err = ret;
917 memcpy(dev->name, oldname, IFNAMSIZ);
918 goto rollback;
919 }
920 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 return err;
923}
924
925/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700926 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700927 * @dev: device to cause notification
928 *
929 * Called to indicate a device has changed features.
930 */
931void netdev_features_change(struct net_device *dev)
932{
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700933 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700934}
935EXPORT_SYMBOL(netdev_features_change);
936
937/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 * netdev_state_change - device changes state
939 * @dev: device to cause notification
940 *
941 * Called to indicate a device has changed state. This function calls
942 * the notifier chains for netdev_chain and sends a NEWLINK message
943 * to the routing socket.
944 */
945void netdev_state_change(struct net_device *dev)
946{
947 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700948 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
950 }
951}
952
953/**
954 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700955 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 * @name: name of interface
957 *
958 * If a network interface is not present and the process has suitable
959 * privileges this function loads the module. If module loading is not
960 * available in this kernel then it becomes a nop.
961 */
962
Eric W. Biederman881d9662007-09-17 11:56:21 -0700963void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900965 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
967 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700968 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 read_unlock(&dev_base_lock);
970
971 if (!dev && capable(CAP_SYS_MODULE))
972 request_module("%s", name);
973}
974
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975/**
976 * dev_open - prepare an interface for use.
977 * @dev: device to open
978 *
979 * Takes a device from down to up state. The device's private open
980 * function is invoked and then the multicast lists are loaded. Finally
981 * the device is moved into the up state and a %NETDEV_UP message is
982 * sent to the netdev notifier chain.
983 *
984 * Calling this function on an active interface is a nop. On a failure
985 * a negative errno code is returned.
986 */
987int dev_open(struct net_device *dev)
988{
989 int ret = 0;
990
991 /*
992 * Is it already up?
993 */
994
995 if (dev->flags & IFF_UP)
996 return 0;
997
998 /*
999 * Is it even present?
1000 */
1001 if (!netif_device_present(dev))
1002 return -ENODEV;
1003
1004 /*
1005 * Call device private open method
1006 */
1007 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001008
1009 if (dev->validate_addr)
1010 ret = dev->validate_addr(dev);
1011
1012 if (!ret && dev->open)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 ret = dev->open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001015 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 * If it went open OK then:
1017 */
1018
Jeff Garzikbada3392007-10-23 20:19:37 -07001019 if (ret)
1020 clear_bit(__LINK_STATE_START, &dev->state);
1021 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 /*
1023 * Set the flags.
1024 */
1025 dev->flags |= IFF_UP;
1026
1027 /*
1028 * Initialize multicasting status
1029 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001030 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 /*
1033 * Wakeup transmit queue engine
1034 */
1035 dev_activate(dev);
1036
1037 /*
1038 * ... and announce new interface.
1039 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001040 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001042
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 return ret;
1044}
1045
1046/**
1047 * dev_close - shutdown an interface.
1048 * @dev: device to shutdown
1049 *
1050 * This function moves an active device into down state. A
1051 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1052 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1053 * chain.
1054 */
1055int dev_close(struct net_device *dev)
1056{
David S. Miller9d5010d2007-09-12 14:33:25 +02001057 might_sleep();
1058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 if (!(dev->flags & IFF_UP))
1060 return 0;
1061
1062 /*
1063 * Tell people we are going down, so that they can
1064 * prepare to death, when device is still operating.
1065 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001066 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
1068 dev_deactivate(dev);
1069
1070 clear_bit(__LINK_STATE_START, &dev->state);
1071
1072 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001073 * it can be even on different cpu. So just clear netif_running().
1074 *
1075 * dev->stop() will invoke napi_disable() on all of it's
1076 * napi_struct instances on this device.
1077 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
1080 /*
1081 * Call the device specific close. This cannot fail.
1082 * Only if device is UP
1083 *
1084 * We allow it to be called even after a DETACH hot-plug
1085 * event.
1086 */
1087 if (dev->stop)
1088 dev->stop(dev);
1089
1090 /*
1091 * Device is now down.
1092 */
1093
1094 dev->flags &= ~IFF_UP;
1095
1096 /*
1097 * Tell people we are down
1098 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001099 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
1101 return 0;
1102}
1103
1104
Eric W. Biederman881d9662007-09-17 11:56:21 -07001105static int dev_boot_phase = 1;
1106
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107/*
1108 * Device change register/unregister. These are not inline or static
1109 * as we export them to the world.
1110 */
1111
1112/**
1113 * register_netdevice_notifier - register a network notifier block
1114 * @nb: notifier
1115 *
1116 * Register a notifier to be called when network device events occur.
1117 * The notifier passed is linked into the kernel structures and must
1118 * not be reused until it has been unregistered. A negative errno code
1119 * is returned on a failure.
1120 *
1121 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001122 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 * view of the network device list.
1124 */
1125
1126int register_netdevice_notifier(struct notifier_block *nb)
1127{
1128 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001129 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001130 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 int err;
1132
1133 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001134 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001135 if (err)
1136 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001137 if (dev_boot_phase)
1138 goto unlock;
1139 for_each_net(net) {
1140 for_each_netdev(net, dev) {
1141 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1142 err = notifier_to_errno(err);
1143 if (err)
1144 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145
Eric W. Biederman881d9662007-09-17 11:56:21 -07001146 if (!(dev->flags & IFF_UP))
1147 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001148
Eric W. Biederman881d9662007-09-17 11:56:21 -07001149 nb->notifier_call(nb, NETDEV_UP, dev);
1150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001152
1153unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 rtnl_unlock();
1155 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001156
1157rollback:
1158 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001159 for_each_net(net) {
1160 for_each_netdev(net, dev) {
1161 if (dev == last)
1162 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001163
Eric W. Biederman881d9662007-09-17 11:56:21 -07001164 if (dev->flags & IFF_UP) {
1165 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1166 nb->notifier_call(nb, NETDEV_DOWN, dev);
1167 }
1168 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001169 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001170 }
1171 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172}
1173
1174/**
1175 * unregister_netdevice_notifier - unregister a network notifier block
1176 * @nb: notifier
1177 *
1178 * Unregister a notifier previously registered by
1179 * register_netdevice_notifier(). The notifier is unlinked into the
1180 * kernel structures and may then be reused. A negative errno code
1181 * is returned on a failure.
1182 */
1183
1184int unregister_netdevice_notifier(struct notifier_block *nb)
1185{
Herbert Xu9f514952006-03-25 01:24:25 -08001186 int err;
1187
1188 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001189 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001190 rtnl_unlock();
1191 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192}
1193
1194/**
1195 * call_netdevice_notifiers - call all network notifier blocks
1196 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001197 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 *
1199 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001200 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 */
1202
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001203int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001205 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206}
1207
1208/* When > 0 there are consumers of rx skb time stamps */
1209static atomic_t netstamp_needed = ATOMIC_INIT(0);
1210
1211void net_enable_timestamp(void)
1212{
1213 atomic_inc(&netstamp_needed);
1214}
1215
1216void net_disable_timestamp(void)
1217{
1218 atomic_dec(&netstamp_needed);
1219}
1220
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001221static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222{
1223 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001224 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001225 else
1226 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227}
1228
1229/*
1230 * Support routine. Sends outgoing frames to any network
1231 * taps currently in use.
1232 */
1233
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001234static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235{
1236 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001237
1238 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239
1240 rcu_read_lock();
1241 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1242 /* Never send packets back to the socket
1243 * they originated from - MvS (miquels@drinkel.ow.org)
1244 */
1245 if ((ptype->dev == dev || !ptype->dev) &&
1246 (ptype->af_packet_priv == NULL ||
1247 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1248 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1249 if (!skb2)
1250 break;
1251
1252 /* skb->nh should be correctly
1253 set by sender, so that the second statement is
1254 just protection against buggy protocols.
1255 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001256 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001258 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001259 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 if (net_ratelimit())
1261 printk(KERN_CRIT "protocol %04x is "
1262 "buggy, dev %s\n",
1263 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001264 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 }
1266
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001267 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001269 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 }
1271 }
1272 rcu_read_unlock();
1273}
1274
Denis Vlasenko56079432006-03-29 15:57:29 -08001275
1276void __netif_schedule(struct net_device *dev)
1277{
1278 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1279 unsigned long flags;
1280 struct softnet_data *sd;
1281
1282 local_irq_save(flags);
1283 sd = &__get_cpu_var(softnet_data);
1284 dev->next_sched = sd->output_queue;
1285 sd->output_queue = dev;
1286 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1287 local_irq_restore(flags);
1288 }
1289}
1290EXPORT_SYMBOL(__netif_schedule);
1291
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001292void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001293{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001294 if (atomic_dec_and_test(&skb->users)) {
1295 struct softnet_data *sd;
1296 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001297
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001298 local_irq_save(flags);
1299 sd = &__get_cpu_var(softnet_data);
1300 skb->next = sd->completion_queue;
1301 sd->completion_queue = skb;
1302 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1303 local_irq_restore(flags);
1304 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001305}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001306EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001307
1308void dev_kfree_skb_any(struct sk_buff *skb)
1309{
1310 if (in_irq() || irqs_disabled())
1311 dev_kfree_skb_irq(skb);
1312 else
1313 dev_kfree_skb(skb);
1314}
1315EXPORT_SYMBOL(dev_kfree_skb_any);
1316
1317
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001318/**
1319 * netif_device_detach - mark device as removed
1320 * @dev: network device
1321 *
1322 * Mark device as removed from system and therefore no longer available.
1323 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001324void netif_device_detach(struct net_device *dev)
1325{
1326 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1327 netif_running(dev)) {
1328 netif_stop_queue(dev);
1329 }
1330}
1331EXPORT_SYMBOL(netif_device_detach);
1332
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001333/**
1334 * netif_device_attach - mark device as attached
1335 * @dev: network device
1336 *
1337 * Mark device as attached from system and restart if needed.
1338 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001339void netif_device_attach(struct net_device *dev)
1340{
1341 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1342 netif_running(dev)) {
1343 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001344 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001345 }
1346}
1347EXPORT_SYMBOL(netif_device_attach);
1348
1349
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350/*
1351 * Invalidate hardware checksum when packet is to be mangled, and
1352 * complete checksum manually on outgoing path.
1353 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001354int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355{
Al Virod3bc23e2006-11-14 21:24:49 -08001356 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001357 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Patrick McHardy84fa7932006-08-29 16:44:56 -07001359 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001360 goto out_set_summed;
1361
1362 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001363 /* Let GSO fix up the checksum. */
1364 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 }
1366
Herbert Xua0308472007-10-15 01:47:15 -07001367 offset = skb->csum_start - skb_headroom(skb);
1368 BUG_ON(offset >= skb_headlen(skb));
1369 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1370
1371 offset += skb->csum_offset;
1372 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1373
1374 if (skb_cloned(skb) &&
1375 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1377 if (ret)
1378 goto out;
1379 }
1380
Herbert Xua0308472007-10-15 01:47:15 -07001381 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001382out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001384out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 return ret;
1386}
1387
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001388/**
1389 * skb_gso_segment - Perform segmentation on skb.
1390 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001391 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001392 *
1393 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001394 *
1395 * It may return NULL if the skb requires no segmentation. This is
1396 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001397 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001398struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001399{
1400 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1401 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001402 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001403 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001404
1405 BUG_ON(skb_shinfo(skb)->frag_list);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001406
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001407 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001408 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001409 __skb_pull(skb, skb->mac_len);
1410
Herbert Xuf9d106a2007-04-23 22:36:13 -07001411 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001412 if (skb_header_cloned(skb) &&
1413 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1414 return ERR_PTR(err);
1415 }
1416
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001417 rcu_read_lock();
1418 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1419 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001420 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001421 err = ptype->gso_send_check(skb);
1422 segs = ERR_PTR(err);
1423 if (err || skb_gso_ok(skb, features))
1424 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001425 __skb_push(skb, (skb->data -
1426 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001427 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001428 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001429 break;
1430 }
1431 }
1432 rcu_read_unlock();
1433
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001434 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001435
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001436 return segs;
1437}
1438
1439EXPORT_SYMBOL(skb_gso_segment);
1440
Herbert Xufb286bb2005-11-10 13:01:24 -08001441/* Take action when hardware reception checksum errors are detected. */
1442#ifdef CONFIG_BUG
1443void netdev_rx_csum_fault(struct net_device *dev)
1444{
1445 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001446 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001447 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001448 dump_stack();
1449 }
1450}
1451EXPORT_SYMBOL(netdev_rx_csum_fault);
1452#endif
1453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454/* Actually, we should eliminate this check as soon as we know, that:
1455 * 1. IOMMU is present and allows to map all the memory.
1456 * 2. No high memory really exists on this machine.
1457 */
1458
1459static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1460{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001461#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 int i;
1463
1464 if (dev->features & NETIF_F_HIGHDMA)
1465 return 0;
1466
1467 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1468 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1469 return 1;
1470
Herbert Xu3d3a8532006-06-27 13:33:10 -07001471#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 return 0;
1473}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001475struct dev_gso_cb {
1476 void (*destructor)(struct sk_buff *skb);
1477};
1478
1479#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1480
1481static void dev_gso_skb_destructor(struct sk_buff *skb)
1482{
1483 struct dev_gso_cb *cb;
1484
1485 do {
1486 struct sk_buff *nskb = skb->next;
1487
1488 skb->next = nskb->next;
1489 nskb->next = NULL;
1490 kfree_skb(nskb);
1491 } while (skb->next);
1492
1493 cb = DEV_GSO_CB(skb);
1494 if (cb->destructor)
1495 cb->destructor(skb);
1496}
1497
1498/**
1499 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1500 * @skb: buffer to segment
1501 *
1502 * This function segments the given skb and stores the list of segments
1503 * in skb->next.
1504 */
1505static int dev_gso_segment(struct sk_buff *skb)
1506{
1507 struct net_device *dev = skb->dev;
1508 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001509 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1510 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001511
Herbert Xu576a30e2006-06-27 13:22:38 -07001512 segs = skb_gso_segment(skb, features);
1513
1514 /* Verifying header integrity only. */
1515 if (!segs)
1516 return 0;
1517
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001518 if (unlikely(IS_ERR(segs)))
1519 return PTR_ERR(segs);
1520
1521 skb->next = segs;
1522 DEV_GSO_CB(skb)->destructor = skb->destructor;
1523 skb->destructor = dev_gso_skb_destructor;
1524
1525 return 0;
1526}
1527
1528int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1529{
1530 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001531 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001532 dev_queue_xmit_nit(skb, dev);
1533
Herbert Xu576a30e2006-06-27 13:22:38 -07001534 if (netif_needs_gso(dev, skb)) {
1535 if (unlikely(dev_gso_segment(skb)))
1536 goto out_kfree_skb;
1537 if (skb->next)
1538 goto gso;
1539 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001540
Herbert Xu576a30e2006-06-27 13:22:38 -07001541 return dev->hard_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001542 }
1543
Herbert Xu576a30e2006-06-27 13:22:38 -07001544gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001545 do {
1546 struct sk_buff *nskb = skb->next;
1547 int rc;
1548
1549 skb->next = nskb->next;
1550 nskb->next = NULL;
1551 rc = dev->hard_start_xmit(nskb, dev);
1552 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001553 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001554 skb->next = nskb;
1555 return rc;
1556 }
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001557 if (unlikely((netif_queue_stopped(dev) ||
Pavel Emelyanov668f8952007-10-21 17:01:56 -07001558 netif_subqueue_stopped(dev, skb)) &&
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001559 skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001560 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001561 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001562
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001563 skb->destructor = DEV_GSO_CB(skb)->destructor;
1564
1565out_kfree_skb:
1566 kfree_skb(skb);
1567 return 0;
1568}
1569
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570/**
1571 * dev_queue_xmit - transmit a buffer
1572 * @skb: buffer to transmit
1573 *
1574 * Queue a buffer for transmission to a network device. The caller must
1575 * have set the device and priority and built the buffer before calling
1576 * this function. The function can be called from an interrupt.
1577 *
1578 * A negative errno code is returned on a failure. A success does not
1579 * guarantee the frame will be transmitted as it may be dropped due
1580 * to congestion or traffic shaping.
Ben Greearaf191362005-04-24 20:12:36 -07001581 *
1582 * -----------------------------------------------------------------------------------
1583 * I notice this method can also return errors from the queue disciplines,
1584 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1585 * be positive.
1586 *
1587 * Regardless of the return value, the skb is consumed, so it is currently
1588 * difficult to retry a send to this method. (You can bump the ref count
1589 * before sending to hold a reference for retry if you are careful.)
1590 *
1591 * When calling this method, interrupts MUST be enabled. This is because
1592 * the BH enable code must have IRQs enabled so that it will not deadlock.
1593 * --BLG
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 */
1595
1596int dev_queue_xmit(struct sk_buff *skb)
1597{
1598 struct net_device *dev = skb->dev;
1599 struct Qdisc *q;
1600 int rc = -ENOMEM;
1601
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001602 /* GSO will handle the following emulations directly. */
1603 if (netif_needs_gso(dev, skb))
1604 goto gso;
1605
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 if (skb_shinfo(skb)->frag_list &&
1607 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001608 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 goto out_kfree_skb;
1610
1611 /* Fragmented skb is linearized if device does not support SG,
1612 * or if at least one of fragments is in highmem and device
1613 * does not support DMA from it.
1614 */
1615 if (skb_shinfo(skb)->nr_frags &&
1616 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001617 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 goto out_kfree_skb;
1619
1620 /* If packet is not checksummed and device does not support
1621 * checksumming for this protocol, complete checksumming here.
1622 */
Herbert Xu663ead32007-04-09 11:59:07 -07001623 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1624 skb_set_transport_header(skb, skb->csum_start -
1625 skb_headroom(skb));
1626
Herbert Xua2988302007-06-28 13:44:37 -07001627 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1628 !((dev->features & NETIF_F_IP_CSUM) &&
1629 skb->protocol == htons(ETH_P_IP)) &&
1630 !((dev->features & NETIF_F_IPV6_CSUM) &&
1631 skb->protocol == htons(ETH_P_IPV6)))
Herbert Xu663ead32007-04-09 11:59:07 -07001632 if (skb_checksum_help(skb))
1633 goto out_kfree_skb;
1634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001636gso:
Eric Dumazet2d7ceec2005-09-27 15:22:58 -07001637 spin_lock_prefetch(&dev->queue_lock);
1638
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001639 /* Disable soft irqs for various locks below. Also
1640 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001642 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001644 /* Updates of qdisc are serialized by queue_lock.
1645 * The struct Qdisc which is pointed to by qdisc is now a
1646 * rcu structure - it may be accessed without acquiring
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 * a lock (but the structure may be stale.) The freeing of the
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001648 * qdisc will be deferred until it's known that there are no
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 * more references to it.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001650 *
1651 * If the qdisc has an enqueue function, we still need to
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 * hold the queue_lock before calling it, since queue_lock
1653 * also serializes access to the device queue.
1654 */
1655
1656 q = rcu_dereference(dev->qdisc);
1657#ifdef CONFIG_NET_CLS_ACT
1658 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1659#endif
1660 if (q->enqueue) {
1661 /* Grab device queue */
1662 spin_lock(&dev->queue_lock);
Patrick McHardy85670cc2006-09-27 16:45:45 -07001663 q = dev->qdisc;
1664 if (q->enqueue) {
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001665 /* reset queue_mapping to zero */
Pavel Emelyanovdfa40912007-10-21 16:57:55 -07001666 skb_set_queue_mapping(skb, 0);
Patrick McHardy85670cc2006-09-27 16:45:45 -07001667 rc = q->enqueue(skb, q);
1668 qdisc_run(dev);
1669 spin_unlock(&dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Patrick McHardy85670cc2006-09-27 16:45:45 -07001671 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1672 goto out;
1673 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 spin_unlock(&dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 }
1676
1677 /* The device has no queue. Common case for software devices:
1678 loopback, all the sorts of tunnels...
1679
Herbert Xu932ff272006-06-09 12:20:56 -07001680 Really, it is unlikely that netif_tx_lock protection is necessary
1681 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 counters.)
1683 However, it is possible, that they rely on protection
1684 made by us here.
1685
1686 Check this and shot the lock. It is not prone from deadlocks.
1687 Either shot noqueue qdisc, it is even simpler 8)
1688 */
1689 if (dev->flags & IFF_UP) {
1690 int cpu = smp_processor_id(); /* ok because BHs are off */
1691
1692 if (dev->xmit_lock_owner != cpu) {
1693
1694 HARD_TX_LOCK(dev, cpu);
1695
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001696 if (!netif_queue_stopped(dev) &&
Pavel Emelyanov668f8952007-10-21 17:01:56 -07001697 !netif_subqueue_stopped(dev, skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 rc = 0;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001699 if (!dev_hard_start_xmit(skb, dev)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 HARD_TX_UNLOCK(dev);
1701 goto out;
1702 }
1703 }
1704 HARD_TX_UNLOCK(dev);
1705 if (net_ratelimit())
1706 printk(KERN_CRIT "Virtual device %s asks to "
1707 "queue packet!\n", dev->name);
1708 } else {
1709 /* Recursion is detected! It is possible,
1710 * unfortunately */
1711 if (net_ratelimit())
1712 printk(KERN_CRIT "Dead loop on virtual device "
1713 "%s, fix it urgently!\n", dev->name);
1714 }
1715 }
1716
1717 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001718 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
1720out_kfree_skb:
1721 kfree_skb(skb);
1722 return rc;
1723out:
Herbert Xud4828d82006-06-22 02:28:18 -07001724 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 return rc;
1726}
1727
1728
1729/*=======================================================================
1730 Receiver routines
1731 =======================================================================*/
1732
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001733int netdev_max_backlog __read_mostly = 1000;
1734int netdev_budget __read_mostly = 300;
1735int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736
1737DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1738
1739
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740/**
1741 * netif_rx - post buffer to the network code
1742 * @skb: buffer to post
1743 *
1744 * This function receives a packet from a device driver and queues it for
1745 * the upper (protocol) levels to process. It always succeeds. The buffer
1746 * may be dropped during processing for congestion control or by the
1747 * protocol layers.
1748 *
1749 * return values:
1750 * NET_RX_SUCCESS (no congestion)
1751 * NET_RX_CN_LOW (low congestion)
1752 * NET_RX_CN_MOD (moderate congestion)
1753 * NET_RX_CN_HIGH (high congestion)
1754 * NET_RX_DROP (packet was dropped)
1755 *
1756 */
1757
1758int netif_rx(struct sk_buff *skb)
1759{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 struct softnet_data *queue;
1761 unsigned long flags;
1762
1763 /* if netpoll wants it, pretend we never saw it */
1764 if (netpoll_rx(skb))
1765 return NET_RX_DROP;
1766
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001767 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001768 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
1770 /*
1771 * The code is rearranged so that the path is the most
1772 * short when CPU is congested, but is still operating.
1773 */
1774 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 queue = &__get_cpu_var(softnet_data);
1776
1777 __get_cpu_var(netdev_rx_stat).total++;
1778 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1779 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780enqueue:
1781 dev_hold(skb->dev);
1782 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001784 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 }
1786
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001787 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 goto enqueue;
1789 }
1790
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 __get_cpu_var(netdev_rx_stat).dropped++;
1792 local_irq_restore(flags);
1793
1794 kfree_skb(skb);
1795 return NET_RX_DROP;
1796}
1797
1798int netif_rx_ni(struct sk_buff *skb)
1799{
1800 int err;
1801
1802 preempt_disable();
1803 err = netif_rx(skb);
1804 if (local_softirq_pending())
1805 do_softirq();
1806 preempt_enable();
1807
1808 return err;
1809}
1810
1811EXPORT_SYMBOL(netif_rx_ni);
1812
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001813static inline struct net_device *skb_bond(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814{
1815 struct net_device *dev = skb->dev;
1816
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001817 if (dev->master) {
David S. Miller7ea49ed2006-08-14 17:08:36 -07001818 if (skb_bond_should_drop(skb)) {
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001819 kfree_skb(skb);
1820 return NULL;
1821 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 skb->dev = dev->master;
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001823 }
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001824
1825 return dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826}
1827
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001828
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829static void net_tx_action(struct softirq_action *h)
1830{
1831 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1832
1833 if (sd->completion_queue) {
1834 struct sk_buff *clist;
1835
1836 local_irq_disable();
1837 clist = sd->completion_queue;
1838 sd->completion_queue = NULL;
1839 local_irq_enable();
1840
1841 while (clist) {
1842 struct sk_buff *skb = clist;
1843 clist = clist->next;
1844
1845 BUG_TRAP(!atomic_read(&skb->users));
1846 __kfree_skb(skb);
1847 }
1848 }
1849
1850 if (sd->output_queue) {
1851 struct net_device *head;
1852
1853 local_irq_disable();
1854 head = sd->output_queue;
1855 sd->output_queue = NULL;
1856 local_irq_enable();
1857
1858 while (head) {
1859 struct net_device *dev = head;
1860 head = head->next_sched;
1861
1862 smp_mb__before_clear_bit();
1863 clear_bit(__LINK_STATE_SCHED, &dev->state);
1864
1865 if (spin_trylock(&dev->queue_lock)) {
1866 qdisc_run(dev);
1867 spin_unlock(&dev->queue_lock);
1868 } else {
1869 netif_schedule(dev);
1870 }
1871 }
1872 }
1873}
1874
Stephen Hemminger6f05f622007-03-08 20:46:03 -08001875static inline int deliver_skb(struct sk_buff *skb,
1876 struct packet_type *pt_prev,
1877 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878{
1879 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001880 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881}
1882
1883#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07001884/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885struct net_bridge;
1886struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1887 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07001888void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889
Stephen Hemminger6229e362007-03-21 13:38:47 -07001890/*
1891 * If bridge module is loaded call bridging hook.
1892 * returns NULL if packet was consumed.
1893 */
1894struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1895 struct sk_buff *skb) __read_mostly;
1896static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1897 struct packet_type **pt_prev, int *ret,
1898 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899{
1900 struct net_bridge_port *port;
1901
Stephen Hemminger6229e362007-03-21 13:38:47 -07001902 if (skb->pkt_type == PACKET_LOOPBACK ||
1903 (port = rcu_dereference(skb->dev->br_port)) == NULL)
1904 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905
1906 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07001907 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001909 }
1910
Stephen Hemminger6229e362007-03-21 13:38:47 -07001911 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912}
1913#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07001914#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915#endif
1916
Patrick McHardyb863ceb2007-07-14 18:55:06 -07001917#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1918struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1919EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1920
1921static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1922 struct packet_type **pt_prev,
1923 int *ret,
1924 struct net_device *orig_dev)
1925{
1926 if (skb->dev->macvlan_port == NULL)
1927 return skb;
1928
1929 if (*pt_prev) {
1930 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1931 *pt_prev = NULL;
1932 }
1933 return macvlan_handle_frame_hook(skb);
1934}
1935#else
1936#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
1937#endif
1938
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939#ifdef CONFIG_NET_CLS_ACT
1940/* TODO: Maybe we should just force sch_ingress to be compiled in
1941 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1942 * a compare and 2 stores extra right now if we dont have it on
1943 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001944 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 * the ingress scheduler, you just cant add policies on ingress.
1946 *
1947 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001948static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949{
1950 struct Qdisc *q;
1951 struct net_device *dev = skb->dev;
1952 int result = TC_ACT_OK;
Herbert Xuf697c3e2007-10-14 00:38:47 -07001953 u32 ttl = G_TC_RTTL(skb->tc_verd);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001954
Herbert Xuf697c3e2007-10-14 00:38:47 -07001955 if (MAX_RED_LOOP < ttl++) {
1956 printk(KERN_WARNING
1957 "Redir loop detected Dropping packet (%d->%d)\n",
1958 skb->iif, dev->ifindex);
1959 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 }
1961
Herbert Xuf697c3e2007-10-14 00:38:47 -07001962 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1963 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1964
1965 spin_lock(&dev->ingress_lock);
1966 if ((q = dev->qdisc_ingress) != NULL)
1967 result = q->enqueue(skb, q);
1968 spin_unlock(&dev->ingress_lock);
1969
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 return result;
1971}
Herbert Xuf697c3e2007-10-14 00:38:47 -07001972
1973static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1974 struct packet_type **pt_prev,
1975 int *ret, struct net_device *orig_dev)
1976{
1977 if (!skb->dev->qdisc_ingress)
1978 goto out;
1979
1980 if (*pt_prev) {
1981 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1982 *pt_prev = NULL;
1983 } else {
1984 /* Huh? Why does turning on AF_PACKET affect this? */
1985 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1986 }
1987
1988 switch (ing_filter(skb)) {
1989 case TC_ACT_SHOT:
1990 case TC_ACT_STOLEN:
1991 kfree_skb(skb);
1992 return NULL;
1993 }
1994
1995out:
1996 skb->tc_verd = 0;
1997 return skb;
1998}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999#endif
2000
2001int netif_receive_skb(struct sk_buff *skb)
2002{
2003 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002004 struct net_device *orig_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002006 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007
2008 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002009 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 return NET_RX_DROP;
2011
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002012 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002013 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
Patrick McHardyc01003c2007-03-29 11:46:52 -07002015 if (!skb->iif)
2016 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002017
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002018 orig_dev = skb_bond(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002020 if (!orig_dev)
2021 return NET_RX_DROP;
2022
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 __get_cpu_var(netdev_rx_stat).total++;
2024
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002025 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002026 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002027 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028
2029 pt_prev = NULL;
2030
2031 rcu_read_lock();
2032
2033#ifdef CONFIG_NET_CLS_ACT
2034 if (skb->tc_verd & TC_NCLS) {
2035 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2036 goto ncls;
2037 }
2038#endif
2039
2040 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2041 if (!ptype->dev || ptype->dev == skb->dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002042 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002043 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 pt_prev = ptype;
2045 }
2046 }
2047
2048#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002049 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2050 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052ncls:
2053#endif
2054
Stephen Hemminger6229e362007-03-21 13:38:47 -07002055 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2056 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002058 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2059 if (!skb)
2060 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
2062 type = skb->protocol;
2063 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2064 if (ptype->type == type &&
2065 (!ptype->dev || ptype->dev == skb->dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002066 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002067 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 pt_prev = ptype;
2069 }
2070 }
2071
2072 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002073 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 } else {
2075 kfree_skb(skb);
2076 /* Jamal, now you will not able to escape explaining
2077 * me how you were going to use this. :-)
2078 */
2079 ret = NET_RX_DROP;
2080 }
2081
2082out:
2083 rcu_read_unlock();
2084 return ret;
2085}
2086
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002087static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088{
2089 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2091 unsigned long start_time = jiffies;
2092
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002093 napi->weight = weight_p;
2094 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 struct sk_buff *skb;
2096 struct net_device *dev;
2097
2098 local_irq_disable();
2099 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002100 if (!skb) {
2101 __napi_complete(napi);
2102 local_irq_enable();
2103 break;
2104 }
2105
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 local_irq_enable();
2107
2108 dev = skb->dev;
2109
2110 netif_receive_skb(skb);
2111
2112 dev_put(dev);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002113 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002115 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116}
2117
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002118/**
2119 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002120 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002121 *
2122 * The entry's receive function will be scheduled to run
2123 */
2124void fastcall __napi_schedule(struct napi_struct *n)
2125{
2126 unsigned long flags;
2127
2128 local_irq_save(flags);
2129 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2130 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2131 local_irq_restore(flags);
2132}
2133EXPORT_SYMBOL(__napi_schedule);
2134
2135
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136static void net_rx_action(struct softirq_action *h)
2137{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002138 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 unsigned long start_time = jiffies;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002140 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002141 void *have;
2142
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 local_irq_disable();
2144
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002145 while (!list_empty(list)) {
2146 struct napi_struct *n;
2147 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002149 /* If softirq window is exhuasted then punt.
2150 *
2151 * Note that this is a slight policy change from the
2152 * previous NAPI code, which would allow up to 2
2153 * jiffies to pass before breaking out. The test
2154 * used to be "jiffies - start_time > 1".
2155 */
2156 if (unlikely(budget <= 0 || jiffies != start_time))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 goto softnet_break;
2158
2159 local_irq_enable();
2160
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002161 /* Even though interrupts have been re-enabled, this
2162 * access is safe because interrupts can only add new
2163 * entries to the tail of this list, and only ->poll()
2164 * calls can remove this head entry from the list.
2165 */
2166 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002168 have = netpoll_poll_lock(n);
2169
2170 weight = n->weight;
2171
2172 work = n->poll(n, weight);
2173
2174 WARN_ON_ONCE(work > weight);
2175
2176 budget -= work;
2177
2178 local_irq_disable();
2179
2180 /* Drivers must not modify the NAPI state if they
2181 * consume the entire weight. In such cases this code
2182 * still "owns" the NAPI instance and therefore can
2183 * move the instance around on the list at-will.
2184 */
2185 if (unlikely(work == weight))
2186 list_move_tail(&n->poll_list, list);
2187
2188 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 }
2190out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002191 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002192
Chris Leechdb217332006-06-17 21:24:58 -07002193#ifdef CONFIG_NET_DMA
2194 /*
2195 * There may not be any more sk_buffs coming right now, so push
2196 * any pending DMA copies to hardware
2197 */
Dan Williamsd379b012007-07-09 11:56:42 -07002198 if (!cpus_empty(net_dma.channel_mask)) {
2199 int chan_idx;
2200 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2201 struct dma_chan *chan = net_dma.channels[chan_idx];
2202 if (chan)
2203 dma_async_memcpy_issue_pending(chan);
2204 }
Chris Leechdb217332006-06-17 21:24:58 -07002205 }
2206#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002207
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 return;
2209
2210softnet_break:
2211 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2212 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2213 goto out;
2214}
2215
2216static gifconf_func_t * gifconf_list [NPROTO];
2217
2218/**
2219 * register_gifconf - register a SIOCGIF handler
2220 * @family: Address family
2221 * @gifconf: Function handler
2222 *
2223 * Register protocol dependent address dumping routines. The handler
2224 * that is passed must not be freed or reused until it has been replaced
2225 * by another handler.
2226 */
2227int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2228{
2229 if (family >= NPROTO)
2230 return -EINVAL;
2231 gifconf_list[family] = gifconf;
2232 return 0;
2233}
2234
2235
2236/*
2237 * Map an interface index to its name (SIOCGIFNAME)
2238 */
2239
2240/*
2241 * We need this ioctl for efficient implementation of the
2242 * if_indextoname() function required by the IPv6 API. Without
2243 * it, we would have to search all the interfaces to find a
2244 * match. --pb
2245 */
2246
Eric W. Biederman881d9662007-09-17 11:56:21 -07002247static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248{
2249 struct net_device *dev;
2250 struct ifreq ifr;
2251
2252 /*
2253 * Fetch the caller's info block.
2254 */
2255
2256 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2257 return -EFAULT;
2258
2259 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002260 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 if (!dev) {
2262 read_unlock(&dev_base_lock);
2263 return -ENODEV;
2264 }
2265
2266 strcpy(ifr.ifr_name, dev->name);
2267 read_unlock(&dev_base_lock);
2268
2269 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2270 return -EFAULT;
2271 return 0;
2272}
2273
2274/*
2275 * Perform a SIOCGIFCONF call. This structure will change
2276 * size eventually, and there is nothing I can do about it.
2277 * Thus we will need a 'compatibility mode'.
2278 */
2279
Eric W. Biederman881d9662007-09-17 11:56:21 -07002280static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281{
2282 struct ifconf ifc;
2283 struct net_device *dev;
2284 char __user *pos;
2285 int len;
2286 int total;
2287 int i;
2288
2289 /*
2290 * Fetch the caller's info block.
2291 */
2292
2293 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2294 return -EFAULT;
2295
2296 pos = ifc.ifc_buf;
2297 len = ifc.ifc_len;
2298
2299 /*
2300 * Loop over the interfaces, and write an info block for each.
2301 */
2302
2303 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002304 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 for (i = 0; i < NPROTO; i++) {
2306 if (gifconf_list[i]) {
2307 int done;
2308 if (!pos)
2309 done = gifconf_list[i](dev, NULL, 0);
2310 else
2311 done = gifconf_list[i](dev, pos + total,
2312 len - total);
2313 if (done < 0)
2314 return -EFAULT;
2315 total += done;
2316 }
2317 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002318 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319
2320 /*
2321 * All done. Write the updated control block back to the caller.
2322 */
2323 ifc.ifc_len = total;
2324
2325 /*
2326 * Both BSD and Solaris return 0 here, so we do too.
2327 */
2328 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2329}
2330
2331#ifdef CONFIG_PROC_FS
2332/*
2333 * This is invoked by the /proc filesystem handler to display a device
2334 * in detail.
2335 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2337{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002338 struct net *net = seq->private;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002339 loff_t off;
2340 struct net_device *dev;
2341
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002343 if (!*pos)
2344 return SEQ_START_TOKEN;
2345
2346 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002347 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002348 if (off++ == *pos)
2349 return dev;
2350
2351 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
2354void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2355{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002356 struct net *net = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002358 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002359 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360}
2361
2362void dev_seq_stop(struct seq_file *seq, void *v)
2363{
2364 read_unlock(&dev_base_lock);
2365}
2366
2367static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2368{
Rusty Russellc45d2862007-03-28 14:29:08 -07002369 struct net_device_stats *stats = dev->get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370
Rusty Russell5a1b5892007-04-28 21:04:03 -07002371 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2372 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2373 dev->name, stats->rx_bytes, stats->rx_packets,
2374 stats->rx_errors,
2375 stats->rx_dropped + stats->rx_missed_errors,
2376 stats->rx_fifo_errors,
2377 stats->rx_length_errors + stats->rx_over_errors +
2378 stats->rx_crc_errors + stats->rx_frame_errors,
2379 stats->rx_compressed, stats->multicast,
2380 stats->tx_bytes, stats->tx_packets,
2381 stats->tx_errors, stats->tx_dropped,
2382 stats->tx_fifo_errors, stats->collisions,
2383 stats->tx_carrier_errors +
2384 stats->tx_aborted_errors +
2385 stats->tx_window_errors +
2386 stats->tx_heartbeat_errors,
2387 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388}
2389
2390/*
2391 * Called from the PROCfs module. This now uses the new arbitrary sized
2392 * /proc/net interface to create /proc/net/dev
2393 */
2394static int dev_seq_show(struct seq_file *seq, void *v)
2395{
2396 if (v == SEQ_START_TOKEN)
2397 seq_puts(seq, "Inter-| Receive "
2398 " | Transmit\n"
2399 " face |bytes packets errs drop fifo frame "
2400 "compressed multicast|bytes packets errs "
2401 "drop fifo colls carrier compressed\n");
2402 else
2403 dev_seq_printf_stats(seq, v);
2404 return 0;
2405}
2406
2407static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2408{
2409 struct netif_rx_stats *rc = NULL;
2410
2411 while (*pos < NR_CPUS)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002412 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 rc = &per_cpu(netdev_rx_stat, *pos);
2414 break;
2415 } else
2416 ++*pos;
2417 return rc;
2418}
2419
2420static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2421{
2422 return softnet_get_online(pos);
2423}
2424
2425static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2426{
2427 ++*pos;
2428 return softnet_get_online(pos);
2429}
2430
2431static void softnet_seq_stop(struct seq_file *seq, void *v)
2432{
2433}
2434
2435static int softnet_seq_show(struct seq_file *seq, void *v)
2436{
2437 struct netif_rx_stats *s = v;
2438
2439 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002440 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002441 0, 0, 0, 0, /* was fastroute */
2442 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 return 0;
2444}
2445
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002446static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 .start = dev_seq_start,
2448 .next = dev_seq_next,
2449 .stop = dev_seq_stop,
2450 .show = dev_seq_show,
2451};
2452
2453static int dev_seq_open(struct inode *inode, struct file *file)
2454{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002455 struct seq_file *seq;
2456 int res;
2457 res = seq_open(file, &dev_seq_ops);
2458 if (!res) {
2459 seq = file->private_data;
Eric W. Biederman077130c2007-09-13 09:18:57 +02002460 seq->private = get_proc_net(inode);
2461 if (!seq->private) {
2462 seq_release(inode, file);
2463 res = -ENXIO;
2464 }
Eric W. Biederman881d9662007-09-17 11:56:21 -07002465 }
2466 return res;
2467}
2468
2469static int dev_seq_release(struct inode *inode, struct file *file)
2470{
2471 struct seq_file *seq = file->private_data;
2472 struct net *net = seq->private;
2473 put_net(net);
2474 return seq_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475}
2476
Arjan van de Ven9a321442007-02-12 00:55:35 -08002477static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 .owner = THIS_MODULE,
2479 .open = dev_seq_open,
2480 .read = seq_read,
2481 .llseek = seq_lseek,
Eric W. Biederman881d9662007-09-17 11:56:21 -07002482 .release = dev_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483};
2484
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002485static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .start = softnet_seq_start,
2487 .next = softnet_seq_next,
2488 .stop = softnet_seq_stop,
2489 .show = softnet_seq_show,
2490};
2491
2492static int softnet_seq_open(struct inode *inode, struct file *file)
2493{
2494 return seq_open(file, &softnet_seq_ops);
2495}
2496
Arjan van de Ven9a321442007-02-12 00:55:35 -08002497static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .owner = THIS_MODULE,
2499 .open = softnet_seq_open,
2500 .read = seq_read,
2501 .llseek = seq_lseek,
2502 .release = seq_release,
2503};
2504
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002505static void *ptype_get_idx(loff_t pos)
2506{
2507 struct packet_type *pt = NULL;
2508 loff_t i = 0;
2509 int t;
2510
2511 list_for_each_entry_rcu(pt, &ptype_all, list) {
2512 if (i == pos)
2513 return pt;
2514 ++i;
2515 }
2516
2517 for (t = 0; t < 16; t++) {
2518 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2519 if (i == pos)
2520 return pt;
2521 ++i;
2522 }
2523 }
2524 return NULL;
2525}
2526
2527static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2528{
2529 rcu_read_lock();
2530 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2531}
2532
2533static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2534{
2535 struct packet_type *pt;
2536 struct list_head *nxt;
2537 int hash;
2538
2539 ++*pos;
2540 if (v == SEQ_START_TOKEN)
2541 return ptype_get_idx(0);
2542
2543 pt = v;
2544 nxt = pt->list.next;
2545 if (pt->type == htons(ETH_P_ALL)) {
2546 if (nxt != &ptype_all)
2547 goto found;
2548 hash = 0;
2549 nxt = ptype_base[0].next;
2550 } else
2551 hash = ntohs(pt->type) & 15;
2552
2553 while (nxt == &ptype_base[hash]) {
2554 if (++hash >= 16)
2555 return NULL;
2556 nxt = ptype_base[hash].next;
2557 }
2558found:
2559 return list_entry(nxt, struct packet_type, list);
2560}
2561
2562static void ptype_seq_stop(struct seq_file *seq, void *v)
2563{
2564 rcu_read_unlock();
2565}
2566
2567static void ptype_seq_decode(struct seq_file *seq, void *sym)
2568{
2569#ifdef CONFIG_KALLSYMS
2570 unsigned long offset = 0, symsize;
2571 const char *symname;
2572 char *modname;
2573 char namebuf[128];
2574
2575 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2576 &modname, namebuf);
2577
2578 if (symname) {
2579 char *delim = ":";
2580
2581 if (!modname)
2582 modname = delim = "";
2583 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2584 symname, offset);
2585 return;
2586 }
2587#endif
2588
2589 seq_printf(seq, "[%p]", sym);
2590}
2591
2592static int ptype_seq_show(struct seq_file *seq, void *v)
2593{
2594 struct packet_type *pt = v;
2595
2596 if (v == SEQ_START_TOKEN)
2597 seq_puts(seq, "Type Device Function\n");
2598 else {
2599 if (pt->type == htons(ETH_P_ALL))
2600 seq_puts(seq, "ALL ");
2601 else
2602 seq_printf(seq, "%04x", ntohs(pt->type));
2603
2604 seq_printf(seq, " %-8s ",
2605 pt->dev ? pt->dev->name : "");
2606 ptype_seq_decode(seq, pt->func);
2607 seq_putc(seq, '\n');
2608 }
2609
2610 return 0;
2611}
2612
2613static const struct seq_operations ptype_seq_ops = {
2614 .start = ptype_seq_start,
2615 .next = ptype_seq_next,
2616 .stop = ptype_seq_stop,
2617 .show = ptype_seq_show,
2618};
2619
2620static int ptype_seq_open(struct inode *inode, struct file *file)
2621{
2622 return seq_open(file, &ptype_seq_ops);
2623}
2624
2625static const struct file_operations ptype_seq_fops = {
2626 .owner = THIS_MODULE,
2627 .open = ptype_seq_open,
2628 .read = seq_read,
2629 .llseek = seq_lseek,
2630 .release = seq_release,
2631};
2632
2633
Pavel Emelyanov46650792007-10-08 20:38:39 -07002634static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635{
2636 int rc = -ENOMEM;
2637
Eric W. Biederman881d9662007-09-17 11:56:21 -07002638 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002640 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002642 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002643 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002644
Eric W. Biederman881d9662007-09-17 11:56:21 -07002645 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002646 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 rc = 0;
2648out:
2649 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002650out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002651 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002653 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002655 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 goto out;
2657}
Eric W. Biederman881d9662007-09-17 11:56:21 -07002658
Pavel Emelyanov46650792007-10-08 20:38:39 -07002659static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07002660{
2661 wext_proc_exit(net);
2662
2663 proc_net_remove(net, "ptype");
2664 proc_net_remove(net, "softnet_stat");
2665 proc_net_remove(net, "dev");
2666}
2667
Pavel Emelyanov46650792007-10-08 20:38:39 -07002668static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07002669 .init = dev_proc_net_init,
2670 .exit = dev_proc_net_exit,
2671};
2672
2673static int __init dev_proc_init(void)
2674{
2675 return register_pernet_subsys(&dev_proc_ops);
2676}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677#else
2678#define dev_proc_init() 0
2679#endif /* CONFIG_PROC_FS */
2680
2681
2682/**
2683 * netdev_set_master - set up master/slave pair
2684 * @slave: slave device
2685 * @master: new master device
2686 *
2687 * Changes the master device of the slave. Pass %NULL to break the
2688 * bonding. The caller must hold the RTNL semaphore. On a failure
2689 * a negative errno code is returned. On success the reference counts
2690 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2691 * function returns zero.
2692 */
2693int netdev_set_master(struct net_device *slave, struct net_device *master)
2694{
2695 struct net_device *old = slave->master;
2696
2697 ASSERT_RTNL();
2698
2699 if (master) {
2700 if (old)
2701 return -EBUSY;
2702 dev_hold(master);
2703 }
2704
2705 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002706
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707 synchronize_net();
2708
2709 if (old)
2710 dev_put(old);
2711
2712 if (master)
2713 slave->flags |= IFF_SLAVE;
2714 else
2715 slave->flags &= ~IFF_SLAVE;
2716
2717 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2718 return 0;
2719}
2720
Patrick McHardy4417da62007-06-27 01:28:10 -07002721static void __dev_set_promiscuity(struct net_device *dev, int inc)
2722{
2723 unsigned short old_flags = dev->flags;
2724
Patrick McHardy24023452007-07-14 18:51:31 -07002725 ASSERT_RTNL();
2726
Patrick McHardy4417da62007-06-27 01:28:10 -07002727 if ((dev->promiscuity += inc) == 0)
2728 dev->flags &= ~IFF_PROMISC;
2729 else
2730 dev->flags |= IFF_PROMISC;
2731 if (dev->flags != old_flags) {
2732 printk(KERN_INFO "device %s %s promiscuous mode\n",
2733 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2734 "left");
2735 audit_log(current->audit_context, GFP_ATOMIC,
2736 AUDIT_ANOM_PROMISCUOUS,
2737 "dev=%s prom=%d old_prom=%d auid=%u",
2738 dev->name, (dev->flags & IFF_PROMISC),
2739 (old_flags & IFF_PROMISC),
2740 audit_get_loginuid(current->audit_context));
Patrick McHardy24023452007-07-14 18:51:31 -07002741
2742 if (dev->change_rx_flags)
2743 dev->change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07002744 }
2745}
2746
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747/**
2748 * dev_set_promiscuity - update promiscuity count on a device
2749 * @dev: device
2750 * @inc: modifier
2751 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002752 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753 * remains above zero the interface remains promiscuous. Once it hits zero
2754 * the device reverts back to normal filtering operation. A negative inc
2755 * value is used to drop promiscuity on the device.
2756 */
2757void dev_set_promiscuity(struct net_device *dev, int inc)
2758{
2759 unsigned short old_flags = dev->flags;
2760
Patrick McHardy4417da62007-06-27 01:28:10 -07002761 __dev_set_promiscuity(dev, inc);
2762 if (dev->flags != old_flags)
2763 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764}
2765
2766/**
2767 * dev_set_allmulti - update allmulti count on a device
2768 * @dev: device
2769 * @inc: modifier
2770 *
2771 * Add or remove reception of all multicast frames to a device. While the
2772 * count in the device remains above zero the interface remains listening
2773 * to all interfaces. Once it hits zero the device reverts back to normal
2774 * filtering operation. A negative @inc value is used to drop the counter
2775 * when releasing a resource needing all multicasts.
2776 */
2777
2778void dev_set_allmulti(struct net_device *dev, int inc)
2779{
2780 unsigned short old_flags = dev->flags;
2781
Patrick McHardy24023452007-07-14 18:51:31 -07002782 ASSERT_RTNL();
2783
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 dev->flags |= IFF_ALLMULTI;
2785 if ((dev->allmulti += inc) == 0)
2786 dev->flags &= ~IFF_ALLMULTI;
Patrick McHardy24023452007-07-14 18:51:31 -07002787 if (dev->flags ^ old_flags) {
2788 if (dev->change_rx_flags)
2789 dev->change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07002790 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07002791 }
Patrick McHardy4417da62007-06-27 01:28:10 -07002792}
2793
2794/*
2795 * Upload unicast and multicast address lists to device and
2796 * configure RX filtering. When the device doesn't support unicast
2797 * filtering it is put in promiscous mode while unicast addresses
2798 * are present.
2799 */
2800void __dev_set_rx_mode(struct net_device *dev)
2801{
2802 /* dev_open will call this function so the list will stay sane. */
2803 if (!(dev->flags&IFF_UP))
2804 return;
2805
2806 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09002807 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07002808
2809 if (dev->set_rx_mode)
2810 dev->set_rx_mode(dev);
2811 else {
2812 /* Unicast addresses changes may only happen under the rtnl,
2813 * therefore calling __dev_set_promiscuity here is safe.
2814 */
2815 if (dev->uc_count > 0 && !dev->uc_promisc) {
2816 __dev_set_promiscuity(dev, 1);
2817 dev->uc_promisc = 1;
2818 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2819 __dev_set_promiscuity(dev, -1);
2820 dev->uc_promisc = 0;
2821 }
2822
2823 if (dev->set_multicast_list)
2824 dev->set_multicast_list(dev);
2825 }
2826}
2827
2828void dev_set_rx_mode(struct net_device *dev)
2829{
2830 netif_tx_lock_bh(dev);
2831 __dev_set_rx_mode(dev);
2832 netif_tx_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002833}
2834
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002835int __dev_addr_delete(struct dev_addr_list **list, int *count,
2836 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07002837{
2838 struct dev_addr_list *da;
2839
2840 for (; (da = *list) != NULL; list = &da->next) {
2841 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2842 alen == da->da_addrlen) {
2843 if (glbl) {
2844 int old_glbl = da->da_gusers;
2845 da->da_gusers = 0;
2846 if (old_glbl == 0)
2847 break;
2848 }
2849 if (--da->da_users)
2850 return 0;
2851
2852 *list = da->next;
2853 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002854 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07002855 return 0;
2856 }
2857 }
2858 return -ENOENT;
2859}
2860
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002861int __dev_addr_add(struct dev_addr_list **list, int *count,
2862 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07002863{
2864 struct dev_addr_list *da;
2865
2866 for (da = *list; da != NULL; da = da->next) {
2867 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2868 da->da_addrlen == alen) {
2869 if (glbl) {
2870 int old_glbl = da->da_gusers;
2871 da->da_gusers = 1;
2872 if (old_glbl)
2873 return 0;
2874 }
2875 da->da_users++;
2876 return 0;
2877 }
2878 }
2879
2880 da = kmalloc(sizeof(*da), GFP_ATOMIC);
2881 if (da == NULL)
2882 return -ENOMEM;
2883 memcpy(da->da_addr, addr, alen);
2884 da->da_addrlen = alen;
2885 da->da_users = 1;
2886 da->da_gusers = glbl ? 1 : 0;
2887 da->next = *list;
2888 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002889 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07002890 return 0;
2891}
2892
Patrick McHardy4417da62007-06-27 01:28:10 -07002893/**
2894 * dev_unicast_delete - Release secondary unicast address.
2895 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002896 * @addr: address to delete
2897 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07002898 *
2899 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002900 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07002901 *
2902 * The caller must hold the rtnl_mutex.
2903 */
2904int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2905{
2906 int err;
2907
2908 ASSERT_RTNL();
2909
2910 netif_tx_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002911 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2912 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07002913 __dev_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07002914 netif_tx_unlock_bh(dev);
2915 return err;
2916}
2917EXPORT_SYMBOL(dev_unicast_delete);
2918
2919/**
2920 * dev_unicast_add - add a secondary unicast address
2921 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002922 * @addr: address to delete
2923 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07002924 *
2925 * Add a secondary unicast address to the device or increase
2926 * the reference count if it already exists.
2927 *
2928 * The caller must hold the rtnl_mutex.
2929 */
2930int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2931{
2932 int err;
2933
2934 ASSERT_RTNL();
2935
2936 netif_tx_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002937 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2938 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07002939 __dev_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07002940 netif_tx_unlock_bh(dev);
2941 return err;
2942}
2943EXPORT_SYMBOL(dev_unicast_add);
2944
Denis Cheng12972622007-07-18 02:12:56 -07002945static void __dev_addr_discard(struct dev_addr_list **list)
2946{
2947 struct dev_addr_list *tmp;
2948
2949 while (*list != NULL) {
2950 tmp = *list;
2951 *list = tmp->next;
2952 if (tmp->da_users > tmp->da_gusers)
2953 printk("__dev_addr_discard: address leakage! "
2954 "da_users=%d\n", tmp->da_users);
2955 kfree(tmp);
2956 }
2957}
2958
Denis Cheng26cc2522007-07-18 02:12:03 -07002959static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07002960{
2961 netif_tx_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07002962
Patrick McHardy4417da62007-06-27 01:28:10 -07002963 __dev_addr_discard(&dev->uc_list);
2964 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07002965
Denis Cheng456ad752007-07-18 02:10:54 -07002966 __dev_addr_discard(&dev->mc_list);
2967 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07002968
Denis Cheng456ad752007-07-18 02:10:54 -07002969 netif_tx_unlock_bh(dev);
2970}
2971
Linus Torvalds1da177e2005-04-16 15:20:36 -07002972unsigned dev_get_flags(const struct net_device *dev)
2973{
2974 unsigned flags;
2975
2976 flags = (dev->flags & ~(IFF_PROMISC |
2977 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08002978 IFF_RUNNING |
2979 IFF_LOWER_UP |
2980 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07002981 (dev->gflags & (IFF_PROMISC |
2982 IFF_ALLMULTI));
2983
Stefan Rompfb00055a2006-03-20 17:09:11 -08002984 if (netif_running(dev)) {
2985 if (netif_oper_up(dev))
2986 flags |= IFF_RUNNING;
2987 if (netif_carrier_ok(dev))
2988 flags |= IFF_LOWER_UP;
2989 if (netif_dormant(dev))
2990 flags |= IFF_DORMANT;
2991 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002992
2993 return flags;
2994}
2995
2996int dev_change_flags(struct net_device *dev, unsigned flags)
2997{
Thomas Graf7c355f52007-06-05 16:03:03 -07002998 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002999 int old_flags = dev->flags;
3000
Patrick McHardy24023452007-07-14 18:51:31 -07003001 ASSERT_RTNL();
3002
Linus Torvalds1da177e2005-04-16 15:20:36 -07003003 /*
3004 * Set the flags on our device.
3005 */
3006
3007 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3008 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3009 IFF_AUTOMEDIA)) |
3010 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3011 IFF_ALLMULTI));
3012
3013 /*
3014 * Load in the correct multicast list now the flags have changed.
3015 */
3016
Patrick McHardy24023452007-07-14 18:51:31 -07003017 if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3018 dev->change_rx_flags(dev, IFF_MULTICAST);
3019
Patrick McHardy4417da62007-06-27 01:28:10 -07003020 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021
3022 /*
3023 * Have we downed the interface. We handle IFF_UP ourselves
3024 * according to user attempts to set it, rather than blindly
3025 * setting it.
3026 */
3027
3028 ret = 0;
3029 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3030 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3031
3032 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003033 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034 }
3035
3036 if (dev->flags & IFF_UP &&
3037 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3038 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003039 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040
3041 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3042 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3043 dev->gflags ^= IFF_PROMISC;
3044 dev_set_promiscuity(dev, inc);
3045 }
3046
3047 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3048 is important. Some (broken) drivers set IFF_PROMISC, when
3049 IFF_ALLMULTI is requested not asking us and not reporting.
3050 */
3051 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3052 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3053 dev->gflags ^= IFF_ALLMULTI;
3054 dev_set_allmulti(dev, inc);
3055 }
3056
Thomas Graf7c355f52007-06-05 16:03:03 -07003057 /* Exclude state transition flags, already notified */
3058 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3059 if (changes)
3060 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003061
3062 return ret;
3063}
3064
3065int dev_set_mtu(struct net_device *dev, int new_mtu)
3066{
3067 int err;
3068
3069 if (new_mtu == dev->mtu)
3070 return 0;
3071
3072 /* MTU must be positive. */
3073 if (new_mtu < 0)
3074 return -EINVAL;
3075
3076 if (!netif_device_present(dev))
3077 return -ENODEV;
3078
3079 err = 0;
3080 if (dev->change_mtu)
3081 err = dev->change_mtu(dev, new_mtu);
3082 else
3083 dev->mtu = new_mtu;
3084 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003085 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003086 return err;
3087}
3088
3089int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3090{
3091 int err;
3092
3093 if (!dev->set_mac_address)
3094 return -EOPNOTSUPP;
3095 if (sa->sa_family != dev->type)
3096 return -EINVAL;
3097 if (!netif_device_present(dev))
3098 return -ENODEV;
3099 err = dev->set_mac_address(dev, sa);
3100 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003101 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003102 return err;
3103}
3104
3105/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003106 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003107 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003108static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003109{
3110 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003111 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003112
3113 if (!dev)
3114 return -ENODEV;
3115
3116 switch (cmd) {
3117 case SIOCGIFFLAGS: /* Get interface flags */
3118 ifr->ifr_flags = dev_get_flags(dev);
3119 return 0;
3120
Linus Torvalds1da177e2005-04-16 15:20:36 -07003121 case SIOCGIFMETRIC: /* Get the metric on the interface
3122 (currently unused) */
3123 ifr->ifr_metric = 0;
3124 return 0;
3125
Linus Torvalds1da177e2005-04-16 15:20:36 -07003126 case SIOCGIFMTU: /* Get the MTU of a device */
3127 ifr->ifr_mtu = dev->mtu;
3128 return 0;
3129
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130 case SIOCGIFHWADDR:
3131 if (!dev->addr_len)
3132 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3133 else
3134 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3135 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3136 ifr->ifr_hwaddr.sa_family = dev->type;
3137 return 0;
3138
Jeff Garzik14e3e072007-10-08 00:06:32 -07003139 case SIOCGIFSLAVE:
3140 err = -EINVAL;
3141 break;
3142
3143 case SIOCGIFMAP:
3144 ifr->ifr_map.mem_start = dev->mem_start;
3145 ifr->ifr_map.mem_end = dev->mem_end;
3146 ifr->ifr_map.base_addr = dev->base_addr;
3147 ifr->ifr_map.irq = dev->irq;
3148 ifr->ifr_map.dma = dev->dma;
3149 ifr->ifr_map.port = dev->if_port;
3150 return 0;
3151
3152 case SIOCGIFINDEX:
3153 ifr->ifr_ifindex = dev->ifindex;
3154 return 0;
3155
3156 case SIOCGIFTXQLEN:
3157 ifr->ifr_qlen = dev->tx_queue_len;
3158 return 0;
3159
3160 default:
3161 /* dev_ioctl() should ensure this case
3162 * is never reached
3163 */
3164 WARN_ON(1);
3165 err = -EINVAL;
3166 break;
3167
3168 }
3169 return err;
3170}
3171
3172/*
3173 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3174 */
3175static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3176{
3177 int err;
3178 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3179
3180 if (!dev)
3181 return -ENODEV;
3182
3183 switch (cmd) {
3184 case SIOCSIFFLAGS: /* Set interface flags */
3185 return dev_change_flags(dev, ifr->ifr_flags);
3186
3187 case SIOCSIFMETRIC: /* Set the metric on the interface
3188 (currently unused) */
3189 return -EOPNOTSUPP;
3190
3191 case SIOCSIFMTU: /* Set the MTU of a device */
3192 return dev_set_mtu(dev, ifr->ifr_mtu);
3193
Linus Torvalds1da177e2005-04-16 15:20:36 -07003194 case SIOCSIFHWADDR:
3195 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3196
3197 case SIOCSIFHWBROADCAST:
3198 if (ifr->ifr_hwaddr.sa_family != dev->type)
3199 return -EINVAL;
3200 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3201 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003202 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003203 return 0;
3204
Linus Torvalds1da177e2005-04-16 15:20:36 -07003205 case SIOCSIFMAP:
3206 if (dev->set_config) {
3207 if (!netif_device_present(dev))
3208 return -ENODEV;
3209 return dev->set_config(dev, &ifr->ifr_map);
3210 }
3211 return -EOPNOTSUPP;
3212
3213 case SIOCADDMULTI:
3214 if (!dev->set_multicast_list ||
3215 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3216 return -EINVAL;
3217 if (!netif_device_present(dev))
3218 return -ENODEV;
3219 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3220 dev->addr_len, 1);
3221
3222 case SIOCDELMULTI:
3223 if (!dev->set_multicast_list ||
3224 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3225 return -EINVAL;
3226 if (!netif_device_present(dev))
3227 return -ENODEV;
3228 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3229 dev->addr_len, 1);
3230
Linus Torvalds1da177e2005-04-16 15:20:36 -07003231 case SIOCSIFTXQLEN:
3232 if (ifr->ifr_qlen < 0)
3233 return -EINVAL;
3234 dev->tx_queue_len = ifr->ifr_qlen;
3235 return 0;
3236
3237 case SIOCSIFNAME:
3238 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3239 return dev_change_name(dev, ifr->ifr_newname);
3240
3241 /*
3242 * Unknown or private ioctl
3243 */
3244
3245 default:
3246 if ((cmd >= SIOCDEVPRIVATE &&
3247 cmd <= SIOCDEVPRIVATE + 15) ||
3248 cmd == SIOCBONDENSLAVE ||
3249 cmd == SIOCBONDRELEASE ||
3250 cmd == SIOCBONDSETHWADDR ||
3251 cmd == SIOCBONDSLAVEINFOQUERY ||
3252 cmd == SIOCBONDINFOQUERY ||
3253 cmd == SIOCBONDCHANGEACTIVE ||
3254 cmd == SIOCGMIIPHY ||
3255 cmd == SIOCGMIIREG ||
3256 cmd == SIOCSMIIREG ||
3257 cmd == SIOCBRADDIF ||
3258 cmd == SIOCBRDELIF ||
3259 cmd == SIOCWANDEV) {
3260 err = -EOPNOTSUPP;
3261 if (dev->do_ioctl) {
3262 if (netif_device_present(dev))
3263 err = dev->do_ioctl(dev, ifr,
3264 cmd);
3265 else
3266 err = -ENODEV;
3267 }
3268 } else
3269 err = -EINVAL;
3270
3271 }
3272 return err;
3273}
3274
3275/*
3276 * This function handles all "interface"-type I/O control requests. The actual
3277 * 'doing' part of this is dev_ifsioc above.
3278 */
3279
3280/**
3281 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003282 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003283 * @cmd: command to issue
3284 * @arg: pointer to a struct ifreq in user space
3285 *
3286 * Issue ioctl functions to devices. This is normally called by the
3287 * user space syscall interfaces but can sometimes be useful for
3288 * other purposes. The return value is the return from the syscall if
3289 * positive or a negative errno code on error.
3290 */
3291
Eric W. Biederman881d9662007-09-17 11:56:21 -07003292int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003293{
3294 struct ifreq ifr;
3295 int ret;
3296 char *colon;
3297
3298 /* One special case: SIOCGIFCONF takes ifconf argument
3299 and requires shared lock, because it sleeps writing
3300 to user space.
3301 */
3302
3303 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003304 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003305 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003306 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003307 return ret;
3308 }
3309 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003310 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003311
3312 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3313 return -EFAULT;
3314
3315 ifr.ifr_name[IFNAMSIZ-1] = 0;
3316
3317 colon = strchr(ifr.ifr_name, ':');
3318 if (colon)
3319 *colon = 0;
3320
3321 /*
3322 * See which interface the caller is talking about.
3323 */
3324
3325 switch (cmd) {
3326 /*
3327 * These ioctl calls:
3328 * - can be done by all.
3329 * - atomic and do not require locking.
3330 * - return a value
3331 */
3332 case SIOCGIFFLAGS:
3333 case SIOCGIFMETRIC:
3334 case SIOCGIFMTU:
3335 case SIOCGIFHWADDR:
3336 case SIOCGIFSLAVE:
3337 case SIOCGIFMAP:
3338 case SIOCGIFINDEX:
3339 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003340 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003341 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07003342 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003343 read_unlock(&dev_base_lock);
3344 if (!ret) {
3345 if (colon)
3346 *colon = ':';
3347 if (copy_to_user(arg, &ifr,
3348 sizeof(struct ifreq)))
3349 ret = -EFAULT;
3350 }
3351 return ret;
3352
3353 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003354 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003355 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003356 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003357 rtnl_unlock();
3358 if (!ret) {
3359 if (colon)
3360 *colon = ':';
3361 if (copy_to_user(arg, &ifr,
3362 sizeof(struct ifreq)))
3363 ret = -EFAULT;
3364 }
3365 return ret;
3366
3367 /*
3368 * These ioctl calls:
3369 * - require superuser power.
3370 * - require strict serialization.
3371 * - return a value
3372 */
3373 case SIOCGMIIPHY:
3374 case SIOCGMIIREG:
3375 case SIOCSIFNAME:
3376 if (!capable(CAP_NET_ADMIN))
3377 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003378 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003379 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003380 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003381 rtnl_unlock();
3382 if (!ret) {
3383 if (colon)
3384 *colon = ':';
3385 if (copy_to_user(arg, &ifr,
3386 sizeof(struct ifreq)))
3387 ret = -EFAULT;
3388 }
3389 return ret;
3390
3391 /*
3392 * These ioctl calls:
3393 * - require superuser power.
3394 * - require strict serialization.
3395 * - do not return a value
3396 */
3397 case SIOCSIFFLAGS:
3398 case SIOCSIFMETRIC:
3399 case SIOCSIFMTU:
3400 case SIOCSIFMAP:
3401 case SIOCSIFHWADDR:
3402 case SIOCSIFSLAVE:
3403 case SIOCADDMULTI:
3404 case SIOCDELMULTI:
3405 case SIOCSIFHWBROADCAST:
3406 case SIOCSIFTXQLEN:
3407 case SIOCSMIIREG:
3408 case SIOCBONDENSLAVE:
3409 case SIOCBONDRELEASE:
3410 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003411 case SIOCBONDCHANGEACTIVE:
3412 case SIOCBRADDIF:
3413 case SIOCBRDELIF:
3414 if (!capable(CAP_NET_ADMIN))
3415 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003416 /* fall through */
3417 case SIOCBONDSLAVEINFOQUERY:
3418 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003419 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003420 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003421 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003422 rtnl_unlock();
3423 return ret;
3424
3425 case SIOCGIFMEM:
3426 /* Get the per device memory space. We can add this but
3427 * currently do not support it */
3428 case SIOCSIFMEM:
3429 /* Set the per device memory buffer space.
3430 * Not applicable in our case */
3431 case SIOCSIFLINK:
3432 return -EINVAL;
3433
3434 /*
3435 * Unknown or private ioctl.
3436 */
3437 default:
3438 if (cmd == SIOCWANDEV ||
3439 (cmd >= SIOCDEVPRIVATE &&
3440 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003441 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003443 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003444 rtnl_unlock();
3445 if (!ret && copy_to_user(arg, &ifr,
3446 sizeof(struct ifreq)))
3447 ret = -EFAULT;
3448 return ret;
3449 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003450 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07003451 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003452 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003453 return -EINVAL;
3454 }
3455}
3456
3457
3458/**
3459 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003460 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003461 *
3462 * Returns a suitable unique value for a new device interface
3463 * number. The caller must hold the rtnl semaphore or the
3464 * dev_base_lock to be sure it remains unique.
3465 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003466static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003467{
3468 static int ifindex;
3469 for (;;) {
3470 if (++ifindex <= 0)
3471 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003472 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003473 return ifindex;
3474 }
3475}
3476
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477/* Delayed registration/unregisteration */
3478static DEFINE_SPINLOCK(net_todo_list_lock);
3479static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3480
Stephen Hemminger6f05f622007-03-08 20:46:03 -08003481static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003482{
3483 spin_lock(&net_todo_list_lock);
3484 list_add_tail(&dev->todo_list, &net_todo_list);
3485 spin_unlock(&net_todo_list_lock);
3486}
3487
3488/**
3489 * register_netdevice - register a network device
3490 * @dev: device to register
3491 *
3492 * Take a completed network device structure and add it to the kernel
3493 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3494 * chain. 0 is returned on success. A negative errno code is returned
3495 * on a failure to set up the device, or if the name is a duplicate.
3496 *
3497 * Callers must hold the rtnl semaphore. You may want
3498 * register_netdev() instead of this.
3499 *
3500 * BUGS:
3501 * The locking appears insufficient to guarantee two parallel registers
3502 * will not get the same name.
3503 */
3504
3505int register_netdevice(struct net_device *dev)
3506{
3507 struct hlist_head *head;
3508 struct hlist_node *p;
3509 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003510 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003511
3512 BUG_ON(dev_boot_phase);
3513 ASSERT_RTNL();
3514
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003515 might_sleep();
3516
Linus Torvalds1da177e2005-04-16 15:20:36 -07003517 /* When net_device's are persistent, this will be fatal. */
3518 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Eric W. Biederman881d9662007-09-17 11:56:21 -07003519 BUG_ON(!dev->nd_net);
3520 net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003521
3522 spin_lock_init(&dev->queue_lock);
Herbert Xu932ff272006-06-09 12:20:56 -07003523 spin_lock_init(&dev->_xmit_lock);
Jarek Poplawski723e98b2007-05-15 22:46:18 -07003524 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003525 dev->xmit_lock_owner = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003526 spin_lock_init(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003527
Linus Torvalds1da177e2005-04-16 15:20:36 -07003528 dev->iflink = -1;
3529
3530 /* Init, if this function is available */
3531 if (dev->init) {
3532 ret = dev->init(dev);
3533 if (ret) {
3534 if (ret > 0)
3535 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08003536 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003537 }
3538 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003539
Linus Torvalds1da177e2005-04-16 15:20:36 -07003540 if (!dev_valid_name(dev->name)) {
3541 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003542 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003543 }
3544
Eric W. Biederman881d9662007-09-17 11:56:21 -07003545 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003546 if (dev->iflink == -1)
3547 dev->iflink = dev->ifindex;
3548
3549 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003550 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003551 hlist_for_each(p, head) {
3552 struct net_device *d
3553 = hlist_entry(p, struct net_device, name_hlist);
3554 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3555 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003556 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003557 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003558 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003559
Stephen Hemmingerd212f872007-06-27 00:47:37 -07003560 /* Fix illegal checksum combinations */
3561 if ((dev->features & NETIF_F_HW_CSUM) &&
3562 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3563 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3564 dev->name);
3565 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3566 }
3567
3568 if ((dev->features & NETIF_F_NO_CSUM) &&
3569 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3570 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3571 dev->name);
3572 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3573 }
3574
3575
Linus Torvalds1da177e2005-04-16 15:20:36 -07003576 /* Fix illegal SG+CSUM combinations. */
3577 if ((dev->features & NETIF_F_SG) &&
Herbert Xu8648b302006-06-17 22:06:05 -07003578 !(dev->features & NETIF_F_ALL_CSUM)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07003579 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003580 dev->name);
3581 dev->features &= ~NETIF_F_SG;
3582 }
3583
3584 /* TSO requires that SG is present as well. */
3585 if ((dev->features & NETIF_F_TSO) &&
3586 !(dev->features & NETIF_F_SG)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07003587 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003588 dev->name);
3589 dev->features &= ~NETIF_F_TSO;
3590 }
Ananda Rajue89e9cf2005-10-18 15:46:41 -07003591 if (dev->features & NETIF_F_UFO) {
3592 if (!(dev->features & NETIF_F_HW_CSUM)) {
3593 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3594 "NETIF_F_HW_CSUM feature.\n",
3595 dev->name);
3596 dev->features &= ~NETIF_F_UFO;
3597 }
3598 if (!(dev->features & NETIF_F_SG)) {
3599 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3600 "NETIF_F_SG feature.\n",
3601 dev->name);
3602 dev->features &= ~NETIF_F_UFO;
3603 }
3604 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605
Eric W. Biederman8b41d182007-09-26 22:02:53 -07003606 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003607 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003608 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003609 dev->reg_state = NETREG_REGISTERED;
3610
Linus Torvalds1da177e2005-04-16 15:20:36 -07003611 /*
3612 * Default initial state at registry is that the
3613 * device is present.
3614 */
3615
3616 set_bit(__LINK_STATE_PRESENT, &dev->state);
3617
Linus Torvalds1da177e2005-04-16 15:20:36 -07003618 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003619 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02003620 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003621
3622 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003623 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07003624 ret = notifier_to_errno(ret);
3625 if (ret)
3626 unregister_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003627
3628out:
3629 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003630
3631err_uninit:
3632 if (dev->uninit)
3633 dev->uninit(dev);
3634 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003635}
3636
3637/**
3638 * register_netdev - register a network device
3639 * @dev: device to register
3640 *
3641 * Take a completed network device structure and add it to the kernel
3642 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3643 * chain. 0 is returned on success. A negative errno code is returned
3644 * on a failure to set up the device, or if the name is a duplicate.
3645 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07003646 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647 * and expands the device name if you passed a format string to
3648 * alloc_netdev.
3649 */
3650int register_netdev(struct net_device *dev)
3651{
3652 int err;
3653
3654 rtnl_lock();
3655
3656 /*
3657 * If the name is a format string the caller wants us to do a
3658 * name allocation.
3659 */
3660 if (strchr(dev->name, '%')) {
3661 err = dev_alloc_name(dev, dev->name);
3662 if (err < 0)
3663 goto out;
3664 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003665
Linus Torvalds1da177e2005-04-16 15:20:36 -07003666 err = register_netdevice(dev);
3667out:
3668 rtnl_unlock();
3669 return err;
3670}
3671EXPORT_SYMBOL(register_netdev);
3672
3673/*
3674 * netdev_wait_allrefs - wait until all references are gone.
3675 *
3676 * This is called when unregistering network devices.
3677 *
3678 * Any protocol or device that holds a reference should register
3679 * for netdevice notification, and cleanup and put back the
3680 * reference if they receive an UNREGISTER event.
3681 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003682 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003683 */
3684static void netdev_wait_allrefs(struct net_device *dev)
3685{
3686 unsigned long rebroadcast_time, warning_time;
3687
3688 rebroadcast_time = warning_time = jiffies;
3689 while (atomic_read(&dev->refcnt) != 0) {
3690 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003691 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003692
3693 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003694 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003695
3696 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3697 &dev->state)) {
3698 /* We must not have linkwatch events
3699 * pending on unregister. If this
3700 * happens, we simply run the queue
3701 * unscheduled, resulting in a noop
3702 * for this device.
3703 */
3704 linkwatch_run_queue();
3705 }
3706
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003707 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003708
3709 rebroadcast_time = jiffies;
3710 }
3711
3712 msleep(250);
3713
3714 if (time_after(jiffies, warning_time + 10 * HZ)) {
3715 printk(KERN_EMERG "unregister_netdevice: "
3716 "waiting for %s to become free. Usage "
3717 "count = %d\n",
3718 dev->name, atomic_read(&dev->refcnt));
3719 warning_time = jiffies;
3720 }
3721 }
3722}
3723
3724/* The sequence is:
3725 *
3726 * rtnl_lock();
3727 * ...
3728 * register_netdevice(x1);
3729 * register_netdevice(x2);
3730 * ...
3731 * unregister_netdevice(y1);
3732 * unregister_netdevice(y2);
3733 * ...
3734 * rtnl_unlock();
3735 * free_netdev(y1);
3736 * free_netdev(y2);
3737 *
3738 * We are invoked by rtnl_unlock() after it drops the semaphore.
3739 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003740 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07003741 * without deadlocking with linkwatch via keventd.
3742 * 2) Since we run with the RTNL semaphore not held, we can sleep
3743 * safely in order to wait for the netdev refcnt to drop to zero.
3744 */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003745static DEFINE_MUTEX(net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003746void netdev_run_todo(void)
3747{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003748 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003749
3750 /* Need to guard against multiple cpu's getting out of order. */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003751 mutex_lock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003752
3753 /* Not safe to do outside the semaphore. We must not return
3754 * until all unregister events invoked by the local processor
3755 * have been completed (either by this todo run, or one on
3756 * another cpu).
3757 */
3758 if (list_empty(&net_todo_list))
3759 goto out;
3760
3761 /* Snapshot list, allow later requests */
3762 spin_lock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003763 list_replace_init(&net_todo_list, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003764 spin_unlock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003765
Linus Torvalds1da177e2005-04-16 15:20:36 -07003766 while (!list_empty(&list)) {
3767 struct net_device *dev
3768 = list_entry(list.next, struct net_device, todo_list);
3769 list_del(&dev->todo_list);
3770
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003771 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003772 printk(KERN_ERR "network todo '%s' but state %d\n",
3773 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003774 dump_stack();
3775 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003776 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003777
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003778 dev->reg_state = NETREG_UNREGISTERED;
3779
3780 netdev_wait_allrefs(dev);
3781
3782 /* paranoia */
3783 BUG_ON(atomic_read(&dev->refcnt));
3784 BUG_TRAP(!dev->ip_ptr);
3785 BUG_TRAP(!dev->ip6_ptr);
3786 BUG_TRAP(!dev->dn_ptr);
3787
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003788 if (dev->destructor)
3789 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07003790
3791 /* Free network device */
3792 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003793 }
3794
3795out:
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003796 mutex_unlock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003797}
3798
Rusty Russell5a1b5892007-04-28 21:04:03 -07003799static struct net_device_stats *internal_stats(struct net_device *dev)
Rusty Russellc45d2862007-03-28 14:29:08 -07003800{
Rusty Russell5a1b5892007-04-28 21:04:03 -07003801 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07003802}
3803
Linus Torvalds1da177e2005-04-16 15:20:36 -07003804/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003805 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003806 * @sizeof_priv: size of private data to allocate space for
3807 * @name: device name format string
3808 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003809 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07003810 *
3811 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003812 * and performs basic initialization. Also allocates subquue structs
3813 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003814 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003815struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3816 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003817{
3818 void *p;
3819 struct net_device *dev;
3820 int alloc_size;
3821
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07003822 BUG_ON(strlen(name) >= sizeof(dev->name));
3823
Linus Torvalds1da177e2005-04-16 15:20:36 -07003824 /* ensure 32-byte alignment of both the device and private area */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003825 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
Patrick McHardy31ce72a2007-07-20 19:45:45 -07003826 (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003827 ~NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003828 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3829
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07003830 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003831 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07003832 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003833 return NULL;
3834 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003835
3836 dev = (struct net_device *)
3837 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3838 dev->padded = (char *)dev - (char *)p;
Eric W. Biederman6d34b1c2007-09-12 12:57:33 +02003839 dev->nd_net = &init_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003840
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003841 if (sizeof_priv) {
3842 dev->priv = ((char *)dev +
3843 ((sizeof(struct net_device) +
3844 (sizeof(struct net_device_subqueue) *
Patrick McHardy31ce72a2007-07-20 19:45:45 -07003845 (queue_count - 1)) + NETDEV_ALIGN_CONST)
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003846 & ~NETDEV_ALIGN_CONST));
3847 }
3848
3849 dev->egress_subqueue_count = queue_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003850
Rusty Russell5a1b5892007-04-28 21:04:03 -07003851 dev->get_stats = internal_stats;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003852 netpoll_netdev_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003853 setup(dev);
3854 strcpy(dev->name, name);
3855 return dev;
3856}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003857EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003858
3859/**
3860 * free_netdev - free network device
3861 * @dev: device
3862 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003863 * This function does the last stage of destroying an allocated device
3864 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003865 * If this is the last reference then it will be freed.
3866 */
3867void free_netdev(struct net_device *dev)
3868{
Stephen Hemminger3041a062006-05-26 13:25:24 -07003869 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003870 if (dev->reg_state == NETREG_UNINITIALIZED) {
3871 kfree((char *)dev - dev->padded);
3872 return;
3873 }
3874
3875 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3876 dev->reg_state = NETREG_RELEASED;
3877
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07003878 /* will free via device release */
3879 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003880}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003881
Linus Torvalds1da177e2005-04-16 15:20:36 -07003882/* Synchronize with packet receive processing. */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003883void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003884{
3885 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07003886 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003887}
3888
3889/**
3890 * unregister_netdevice - remove device from the kernel
3891 * @dev: device
3892 *
3893 * This function shuts down a device interface and removes it
3894 * from the kernel tables. On success 0 is returned, on a failure
3895 * a negative errno code is returned.
3896 *
3897 * Callers must hold the rtnl semaphore. You may want
3898 * unregister_netdev() instead of this.
3899 */
3900
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08003901void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003902{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003903 BUG_ON(dev_boot_phase);
3904 ASSERT_RTNL();
3905
3906 /* Some devices call without registering for initialization unwind. */
3907 if (dev->reg_state == NETREG_UNINITIALIZED) {
3908 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3909 "was registered\n", dev->name, dev);
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08003910
3911 WARN_ON(1);
3912 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003913 }
3914
3915 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3916
3917 /* If device is running, close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07003918 dev_close(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003919
3920 /* And unlink it from device chain. */
Eric W. Biedermance286d32007-09-12 13:53:49 +02003921 unlist_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003922
3923 dev->reg_state = NETREG_UNREGISTERING;
3924
3925 synchronize_net();
3926
3927 /* Shutdown queueing discipline. */
3928 dev_shutdown(dev);
3929
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003930
Linus Torvalds1da177e2005-04-16 15:20:36 -07003931 /* Notify protocols, that we are about to destroy
3932 this device. They should clean all the things.
3933 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003934 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003935
Linus Torvalds1da177e2005-04-16 15:20:36 -07003936 /*
Patrick McHardy4417da62007-06-27 01:28:10 -07003937 * Flush the unicast and multicast chains
Linus Torvalds1da177e2005-04-16 15:20:36 -07003938 */
Denis Cheng26cc2522007-07-18 02:12:03 -07003939 dev_addr_discard(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003940
3941 if (dev->uninit)
3942 dev->uninit(dev);
3943
3944 /* Notifier chain MUST detach us from master device. */
3945 BUG_TRAP(!dev->master);
3946
Eric W. Biederman8b41d182007-09-26 22:02:53 -07003947 /* Remove entries from kobject tree */
3948 netdev_unregister_kobject(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07003949
Linus Torvalds1da177e2005-04-16 15:20:36 -07003950 /* Finish processing unregister after unlock */
3951 net_set_todo(dev);
3952
3953 synchronize_net();
3954
3955 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003956}
3957
3958/**
3959 * unregister_netdev - remove device from the kernel
3960 * @dev: device
3961 *
3962 * This function shuts down a device interface and removes it
3963 * from the kernel tables. On success 0 is returned, on a failure
3964 * a negative errno code is returned.
3965 *
3966 * This is just a wrapper for unregister_netdevice that takes
3967 * the rtnl semaphore. In general you want to use this and not
3968 * unregister_netdevice.
3969 */
3970void unregister_netdev(struct net_device *dev)
3971{
3972 rtnl_lock();
3973 unregister_netdevice(dev);
3974 rtnl_unlock();
3975}
3976
3977EXPORT_SYMBOL(unregister_netdev);
3978
Eric W. Biedermance286d32007-09-12 13:53:49 +02003979/**
3980 * dev_change_net_namespace - move device to different nethost namespace
3981 * @dev: device
3982 * @net: network namespace
3983 * @pat: If not NULL name pattern to try if the current device name
3984 * is already taken in the destination network namespace.
3985 *
3986 * This function shuts down a device interface and moves it
3987 * to a new network namespace. On success 0 is returned, on
3988 * a failure a netagive errno code is returned.
3989 *
3990 * Callers must hold the rtnl semaphore.
3991 */
3992
3993int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
3994{
3995 char buf[IFNAMSIZ];
3996 const char *destname;
3997 int err;
3998
3999 ASSERT_RTNL();
4000
4001 /* Don't allow namespace local devices to be moved. */
4002 err = -EINVAL;
4003 if (dev->features & NETIF_F_NETNS_LOCAL)
4004 goto out;
4005
4006 /* Ensure the device has been registrered */
4007 err = -EINVAL;
4008 if (dev->reg_state != NETREG_REGISTERED)
4009 goto out;
4010
4011 /* Get out if there is nothing todo */
4012 err = 0;
4013 if (dev->nd_net == net)
4014 goto out;
4015
4016 /* Pick the destination device name, and ensure
4017 * we can use it in the destination network namespace.
4018 */
4019 err = -EEXIST;
4020 destname = dev->name;
4021 if (__dev_get_by_name(net, destname)) {
4022 /* We get here if we can't use the current device name */
4023 if (!pat)
4024 goto out;
4025 if (!dev_valid_name(pat))
4026 goto out;
4027 if (strchr(pat, '%')) {
4028 if (__dev_alloc_name(net, pat, buf) < 0)
4029 goto out;
4030 destname = buf;
4031 } else
4032 destname = pat;
4033 if (__dev_get_by_name(net, destname))
4034 goto out;
4035 }
4036
4037 /*
4038 * And now a mini version of register_netdevice unregister_netdevice.
4039 */
4040
4041 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004042 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004043
4044 /* And unlink it from device chain */
4045 err = -ENODEV;
4046 unlist_netdevice(dev);
4047
4048 synchronize_net();
4049
4050 /* Shutdown queueing discipline. */
4051 dev_shutdown(dev);
4052
4053 /* Notify protocols, that we are about to destroy
4054 this device. They should clean all the things.
4055 */
4056 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4057
4058 /*
4059 * Flush the unicast and multicast chains
4060 */
4061 dev_addr_discard(dev);
4062
4063 /* Actually switch the network namespace */
4064 dev->nd_net = net;
4065
4066 /* Assign the new device name */
4067 if (destname != dev->name)
4068 strcpy(dev->name, destname);
4069
4070 /* If there is an ifindex conflict assign a new one */
4071 if (__dev_get_by_index(net, dev->ifindex)) {
4072 int iflink = (dev->iflink == dev->ifindex);
4073 dev->ifindex = dev_new_index(net);
4074 if (iflink)
4075 dev->iflink = dev->ifindex;
4076 }
4077
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004078 /* Fixup kobjects */
Eric W. Biedermance286d32007-09-12 13:53:49 +02004079 err = device_rename(&dev->dev, dev->name);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004080 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004081
4082 /* Add the device back in the hashes */
4083 list_netdevice(dev);
4084
4085 /* Notify protocols, that a new device appeared. */
4086 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4087
4088 synchronize_net();
4089 err = 0;
4090out:
4091 return err;
4092}
4093
Linus Torvalds1da177e2005-04-16 15:20:36 -07004094static int dev_cpu_callback(struct notifier_block *nfb,
4095 unsigned long action,
4096 void *ocpu)
4097{
4098 struct sk_buff **list_skb;
4099 struct net_device **list_net;
4100 struct sk_buff *skb;
4101 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4102 struct softnet_data *sd, *oldsd;
4103
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004104 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004105 return NOTIFY_OK;
4106
4107 local_irq_disable();
4108 cpu = smp_processor_id();
4109 sd = &per_cpu(softnet_data, cpu);
4110 oldsd = &per_cpu(softnet_data, oldcpu);
4111
4112 /* Find end of our completion_queue. */
4113 list_skb = &sd->completion_queue;
4114 while (*list_skb)
4115 list_skb = &(*list_skb)->next;
4116 /* Append completion queue from offline CPU. */
4117 *list_skb = oldsd->completion_queue;
4118 oldsd->completion_queue = NULL;
4119
4120 /* Find end of our output_queue. */
4121 list_net = &sd->output_queue;
4122 while (*list_net)
4123 list_net = &(*list_net)->next_sched;
4124 /* Append output queue from offline CPU. */
4125 *list_net = oldsd->output_queue;
4126 oldsd->output_queue = NULL;
4127
4128 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4129 local_irq_enable();
4130
4131 /* Process offline CPU's input_pkt_queue */
4132 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4133 netif_rx(skb);
4134
4135 return NOTIFY_OK;
4136}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004137
Chris Leechdb217332006-06-17 21:24:58 -07004138#ifdef CONFIG_NET_DMA
4139/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004140 * net_dma_rebalance - try to maintain one DMA channel per CPU
4141 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4142 *
4143 * This is called when the number of channels allocated to the net_dma client
4144 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004145 */
Dan Williamsd379b012007-07-09 11:56:42 -07004146
4147static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004148{
Dan Williamsd379b012007-07-09 11:56:42 -07004149 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004150 struct dma_chan *chan;
4151
Dan Williamsd379b012007-07-09 11:56:42 -07004152 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004153 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004154 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004155 return;
4156 }
4157
4158 i = 0;
4159 cpu = first_cpu(cpu_online_map);
4160
Dan Williamsd379b012007-07-09 11:56:42 -07004161 for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4162 chan = net_dma->channels[chan_idx];
4163
4164 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4165 + (i < (num_online_cpus() %
4166 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004167
4168 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004169 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004170 cpu = next_cpu(cpu, cpu_online_map);
4171 n--;
4172 }
4173 i++;
4174 }
Chris Leechdb217332006-06-17 21:24:58 -07004175}
4176
4177/**
4178 * netdev_dma_event - event callback for the net_dma_client
4179 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004180 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004181 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004182 */
Dan Williamsd379b012007-07-09 11:56:42 -07004183static enum dma_state_client
4184netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4185 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004186{
Dan Williamsd379b012007-07-09 11:56:42 -07004187 int i, found = 0, pos = -1;
4188 struct net_dma *net_dma =
4189 container_of(client, struct net_dma, client);
4190 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4191
4192 spin_lock(&net_dma->lock);
4193 switch (state) {
4194 case DMA_RESOURCE_AVAILABLE:
4195 for (i = 0; i < NR_CPUS; i++)
4196 if (net_dma->channels[i] == chan) {
4197 found = 1;
4198 break;
4199 } else if (net_dma->channels[i] == NULL && pos < 0)
4200 pos = i;
4201
4202 if (!found && pos >= 0) {
4203 ack = DMA_ACK;
4204 net_dma->channels[pos] = chan;
4205 cpu_set(pos, net_dma->channel_mask);
4206 net_dma_rebalance(net_dma);
4207 }
Chris Leechdb217332006-06-17 21:24:58 -07004208 break;
4209 case DMA_RESOURCE_REMOVED:
Dan Williamsd379b012007-07-09 11:56:42 -07004210 for (i = 0; i < NR_CPUS; i++)
4211 if (net_dma->channels[i] == chan) {
4212 found = 1;
4213 pos = i;
4214 break;
4215 }
4216
4217 if (found) {
4218 ack = DMA_ACK;
4219 cpu_clear(pos, net_dma->channel_mask);
4220 net_dma->channels[i] = NULL;
4221 net_dma_rebalance(net_dma);
4222 }
Chris Leechdb217332006-06-17 21:24:58 -07004223 break;
4224 default:
4225 break;
4226 }
Dan Williamsd379b012007-07-09 11:56:42 -07004227 spin_unlock(&net_dma->lock);
4228
4229 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07004230}
4231
4232/**
4233 * netdev_dma_regiser - register the networking subsystem as a DMA client
4234 */
4235static int __init netdev_dma_register(void)
4236{
Dan Williamsd379b012007-07-09 11:56:42 -07004237 spin_lock_init(&net_dma.lock);
4238 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4239 dma_async_client_register(&net_dma.client);
4240 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07004241 return 0;
4242}
4243
4244#else
4245static int __init netdev_dma_register(void) { return -ENODEV; }
4246#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004247
Herbert Xu7f353bf2007-08-10 15:47:58 -07004248/**
4249 * netdev_compute_feature - compute conjunction of two feature sets
4250 * @all: first feature set
4251 * @one: second feature set
4252 *
4253 * Computes a new feature set after adding a device with feature set
4254 * @one to the master device with current feature set @all. Returns
4255 * the new feature set.
4256 */
4257int netdev_compute_features(unsigned long all, unsigned long one)
4258{
4259 /* if device needs checksumming, downgrade to hw checksumming */
4260 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4261 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4262
4263 /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4264 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4265 all ^= NETIF_F_HW_CSUM
4266 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4267
4268 if (one & NETIF_F_GSO)
4269 one |= NETIF_F_GSO_SOFTWARE;
4270 one |= NETIF_F_GSO;
4271
4272 /* If even one device supports robust GSO, enable it for all. */
4273 if (one & NETIF_F_GSO_ROBUST)
4274 all |= NETIF_F_GSO_ROBUST;
4275
4276 all &= one | NETIF_F_LLTX;
4277
4278 if (!(all & NETIF_F_ALL_CSUM))
4279 all &= ~NETIF_F_SG;
4280 if (!(all & NETIF_F_SG))
4281 all &= ~NETIF_F_GSO_MASK;
4282
4283 return all;
4284}
4285EXPORT_SYMBOL(netdev_compute_features);
4286
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004287static struct hlist_head *netdev_create_hash(void)
4288{
4289 int i;
4290 struct hlist_head *hash;
4291
4292 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4293 if (hash != NULL)
4294 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4295 INIT_HLIST_HEAD(&hash[i]);
4296
4297 return hash;
4298}
4299
Eric W. Biederman881d9662007-09-17 11:56:21 -07004300/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07004301static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004302{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004303 INIT_LIST_HEAD(&net->dev_base_head);
4304 rwlock_init(&dev_base_lock);
4305
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004306 net->dev_name_head = netdev_create_hash();
4307 if (net->dev_name_head == NULL)
4308 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004309
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004310 net->dev_index_head = netdev_create_hash();
4311 if (net->dev_index_head == NULL)
4312 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004313
4314 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004315
4316err_idx:
4317 kfree(net->dev_name_head);
4318err_name:
4319 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004320}
4321
Pavel Emelyanov46650792007-10-08 20:38:39 -07004322static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004323{
4324 kfree(net->dev_name_head);
4325 kfree(net->dev_index_head);
4326}
4327
Pavel Emelyanov46650792007-10-08 20:38:39 -07004328static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004329 .init = netdev_init,
4330 .exit = netdev_exit,
4331};
4332
Pavel Emelyanov46650792007-10-08 20:38:39 -07004333static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02004334{
4335 struct net_device *dev, *next;
4336 /*
4337 * Push all migratable of the network devices back to the
4338 * initial network namespace
4339 */
4340 rtnl_lock();
4341 for_each_netdev_safe(net, dev, next) {
4342 int err;
4343
4344 /* Ignore unmoveable devices (i.e. loopback) */
4345 if (dev->features & NETIF_F_NETNS_LOCAL)
4346 continue;
4347
4348 /* Push remaing network devices to init_net */
4349 err = dev_change_net_namespace(dev, &init_net, "dev%d");
4350 if (err) {
4351 printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4352 __func__, dev->name, err);
4353 unregister_netdevice(dev);
4354 }
4355 }
4356 rtnl_unlock();
4357}
4358
Pavel Emelyanov46650792007-10-08 20:38:39 -07004359static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004360 .exit = default_device_exit,
4361};
4362
Linus Torvalds1da177e2005-04-16 15:20:36 -07004363/*
4364 * Initialize the DEV module. At boot time this walks the device list and
4365 * unhooks any devices that fail to initialise (normally hardware not
4366 * present) and leaves us with a valid list of present and active devices.
4367 *
4368 */
4369
4370/*
4371 * This is called single threaded during boot, so no need
4372 * to take the rtnl semaphore.
4373 */
4374static int __init net_dev_init(void)
4375{
4376 int i, rc = -ENOMEM;
4377
4378 BUG_ON(!dev_boot_phase);
4379
Linus Torvalds1da177e2005-04-16 15:20:36 -07004380 if (dev_proc_init())
4381 goto out;
4382
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004383 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004384 goto out;
4385
4386 INIT_LIST_HEAD(&ptype_all);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004387 for (i = 0; i < 16; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004388 INIT_LIST_HEAD(&ptype_base[i]);
4389
Eric W. Biederman881d9662007-09-17 11:56:21 -07004390 if (register_pernet_subsys(&netdev_net_ops))
4391 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004392
Eric W. Biedermance286d32007-09-12 13:53:49 +02004393 if (register_pernet_device(&default_device_ops))
4394 goto out;
4395
Linus Torvalds1da177e2005-04-16 15:20:36 -07004396 /*
4397 * Initialise the packet receive queues.
4398 */
4399
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004400 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004401 struct softnet_data *queue;
4402
4403 queue = &per_cpu(softnet_data, i);
4404 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004405 queue->completion_queue = NULL;
4406 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004407
4408 queue->backlog.poll = process_backlog;
4409 queue->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004410 }
4411
Chris Leechdb217332006-06-17 21:24:58 -07004412 netdev_dma_register();
4413
Linus Torvalds1da177e2005-04-16 15:20:36 -07004414 dev_boot_phase = 0;
4415
4416 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4417 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4418
4419 hotcpu_notifier(dev_cpu_callback, 0);
4420 dst_init();
4421 dev_mcast_init();
4422 rc = 0;
4423out:
4424 return rc;
4425}
4426
4427subsys_initcall(net_dev_init);
4428
4429EXPORT_SYMBOL(__dev_get_by_index);
4430EXPORT_SYMBOL(__dev_get_by_name);
4431EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08004432EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004433EXPORT_SYMBOL(dev_add_pack);
4434EXPORT_SYMBOL(dev_alloc_name);
4435EXPORT_SYMBOL(dev_close);
4436EXPORT_SYMBOL(dev_get_by_flags);
4437EXPORT_SYMBOL(dev_get_by_index);
4438EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004439EXPORT_SYMBOL(dev_open);
4440EXPORT_SYMBOL(dev_queue_xmit);
4441EXPORT_SYMBOL(dev_remove_pack);
4442EXPORT_SYMBOL(dev_set_allmulti);
4443EXPORT_SYMBOL(dev_set_promiscuity);
4444EXPORT_SYMBOL(dev_change_flags);
4445EXPORT_SYMBOL(dev_set_mtu);
4446EXPORT_SYMBOL(dev_set_mac_address);
4447EXPORT_SYMBOL(free_netdev);
4448EXPORT_SYMBOL(netdev_boot_setup_check);
4449EXPORT_SYMBOL(netdev_set_master);
4450EXPORT_SYMBOL(netdev_state_change);
4451EXPORT_SYMBOL(netif_receive_skb);
4452EXPORT_SYMBOL(netif_rx);
4453EXPORT_SYMBOL(register_gifconf);
4454EXPORT_SYMBOL(register_netdevice);
4455EXPORT_SYMBOL(register_netdevice_notifier);
4456EXPORT_SYMBOL(skb_checksum_help);
4457EXPORT_SYMBOL(synchronize_net);
4458EXPORT_SYMBOL(unregister_netdevice);
4459EXPORT_SYMBOL(unregister_netdevice_notifier);
4460EXPORT_SYMBOL(net_enable_timestamp);
4461EXPORT_SYMBOL(net_disable_timestamp);
4462EXPORT_SYMBOL(dev_get_flags);
4463
4464#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4465EXPORT_SYMBOL(br_handle_frame_hook);
4466EXPORT_SYMBOL(br_fdb_get_hook);
4467EXPORT_SYMBOL(br_fdb_put_hook);
4468#endif
4469
4470#ifdef CONFIG_KMOD
4471EXPORT_SYMBOL(dev_load);
4472#endif
4473
4474EXPORT_PER_CPU_SYMBOL(softnet_data);