blob: bab8bcedd62eaed745eee9aba085fd437b4bd2cf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Herbert Xud565b0a2008-12-15 23:38:52 -0800132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
Herbert Xu5d38a072009-01-04 16:13:40 -0800135/* This should be increased if a protocol with a bigger head is added. */
136#define GRO_MAX_HEAD (MAX_HEADER + 128)
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138/*
139 * The list of packet types we will receive (as opposed to discard)
140 * and the routines to invoke.
141 *
142 * Why 16. Because with 16 the only overlap we get on a hash of the
143 * low nibble of the protocol value is RARP/SNAP/X.25.
144 *
145 * NOTE: That is no longer true with the addition of VLAN tags. Not
146 * sure which should go first, but I bet it won't make much
147 * difference if we are running VLANs. The good news is that
148 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700149 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 * --BLG
151 *
152 * 0800 IP
153 * 8100 802.1Q VLAN
154 * 0001 802.3
155 * 0002 AX.25
156 * 0004 802.2
157 * 8035 RARP
158 * 0005 SNAP
159 * 0805 X.25
160 * 0806 ARP
161 * 8137 IPX
162 * 0009 Localtalk
163 * 86DD IPv6
164 */
165
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800166#define PTYPE_HASH_SIZE (16)
167#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800170static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700171static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Chris Leechdb217332006-06-17 21:24:58 -0700173#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700174struct net_dma {
175 struct dma_client client;
176 spinlock_t lock;
177 cpumask_t channel_mask;
Mike Travis0c0b0ac2008-05-02 16:43:08 -0700178 struct dma_chan **channels;
Dan Williamsd379b012007-07-09 11:56:42 -0700179};
180
181static enum dma_state_client
182netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
183 enum dma_state state);
184
185static struct net_dma net_dma = {
186 .client = {
187 .event_callback = netdev_dma_event,
188 },
189};
Chris Leechdb217332006-06-17 21:24:58 -0700190#endif
191
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700193 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 * semaphore.
195 *
196 * Pure readers hold dev_base_lock for reading.
197 *
198 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700199 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 * actual updates. This allows pure readers to access the list even
201 * while a writer is preparing to update it.
202 *
203 * To put it another way, dev_base_lock is held for writing only to
204 * protect against pure readers; the rtnl semaphore provides the
205 * protection against other writers.
206 *
207 * See, for example usages, register_netdevice() and
208 * unregister_netdevice(), which must be called with the rtnl
209 * semaphore held.
210 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211DEFINE_RWLOCK(dev_base_lock);
212
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213EXPORT_SYMBOL(dev_base_lock);
214
215#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700216#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
Eric W. Biederman881d9662007-09-17 11:56:21 -0700218static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
220 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700221 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222}
223
Eric W. Biederman881d9662007-09-17 11:56:21 -0700224static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700226 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Eric W. Biedermance286d32007-09-12 13:53:49 +0200229/* Device list insertion */
230static int list_netdevice(struct net_device *dev)
231{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900232 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200233
234 ASSERT_RTNL();
235
236 write_lock_bh(&dev_base_lock);
237 list_add_tail(&dev->dev_list, &net->dev_base_head);
238 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
239 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
240 write_unlock_bh(&dev_base_lock);
241 return 0;
242}
243
244/* Device list removal */
245static void unlist_netdevice(struct net_device *dev)
246{
247 ASSERT_RTNL();
248
249 /* Unlink dev from the device chain */
250 write_lock_bh(&dev_base_lock);
251 list_del(&dev->dev_list);
252 hlist_del(&dev->name_hlist);
253 hlist_del(&dev->index_hlist);
254 write_unlock_bh(&dev_base_lock);
255}
256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257/*
258 * Our notifier list
259 */
260
Alan Sternf07d5b92006-05-09 15:23:03 -0700261static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
263/*
264 * Device drivers call our routines to queue packets here. We empty the
265 * queue in the local softnet handler.
266 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700267
268DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
David S. Millercf508b12008-07-22 14:16:42 -0700270#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700271/*
David S. Millerc773e842008-07-08 23:13:53 -0700272 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700273 * according to dev->type
274 */
275static const unsigned short netdev_lock_type[] =
276 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
277 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
278 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
279 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
280 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
281 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
282 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
283 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
284 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
285 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
286 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
287 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
288 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800289 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800290 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700291
292static const char *netdev_lock_name[] =
293 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
294 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
295 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
296 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
297 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
298 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
299 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
300 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
301 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
302 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
303 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
304 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
305 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800306 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800307 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700308
309static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700310static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700311
312static inline unsigned short netdev_lock_pos(unsigned short dev_type)
313{
314 int i;
315
316 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
317 if (netdev_lock_type[i] == dev_type)
318 return i;
319 /* the last key is used by default */
320 return ARRAY_SIZE(netdev_lock_type) - 1;
321}
322
David S. Millercf508b12008-07-22 14:16:42 -0700323static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
324 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700325{
326 int i;
327
328 i = netdev_lock_pos(dev_type);
329 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
330 netdev_lock_name[i]);
331}
David S. Millercf508b12008-07-22 14:16:42 -0700332
333static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
334{
335 int i;
336
337 i = netdev_lock_pos(dev->type);
338 lockdep_set_class_and_name(&dev->addr_list_lock,
339 &netdev_addr_lock_key[i],
340 netdev_lock_name[i]);
341}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700342#else
David S. Millercf508b12008-07-22 14:16:42 -0700343static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
344 unsigned short dev_type)
345{
346}
347static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700348{
349}
350#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351
352/*******************************************************************************
353
354 Protocol management and registration routines
355
356*******************************************************************************/
357
358/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 * Add a protocol ID to the list. Now that the input handler is
360 * smarter we can dispense with all the messy stuff that used to be
361 * here.
362 *
363 * BEWARE!!! Protocol handlers, mangling input packets,
364 * MUST BE last in hash buckets and checking protocol handlers
365 * MUST start from promiscuous ptype_all chain in net_bh.
366 * It is true now, do not change it.
367 * Explanation follows: if protocol handler, mangling packet, will
368 * be the first on list, it is not able to sense, that packet
369 * is cloned and should be copied-on-write, so that it will
370 * change it and subsequent readers will get broken packet.
371 * --ANK (980803)
372 */
373
374/**
375 * dev_add_pack - add packet handler
376 * @pt: packet type declaration
377 *
378 * Add a protocol handler to the networking stack. The passed &packet_type
379 * is linked into kernel lists and may not be freed until it has been
380 * removed from the kernel lists.
381 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900382 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 * guarantee all CPU's that are in middle of receiving packets
384 * will see the new packet type (until the next received packet).
385 */
386
387void dev_add_pack(struct packet_type *pt)
388{
389 int hash;
390
391 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700392 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700394 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399}
400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401/**
402 * __dev_remove_pack - remove packet handler
403 * @pt: packet type declaration
404 *
405 * Remove a protocol handler that was previously added to the kernel
406 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
407 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900408 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 *
410 * The packet type might still be in use by receivers
411 * and must not be freed until after all the CPU's have gone
412 * through a quiescent state.
413 */
414void __dev_remove_pack(struct packet_type *pt)
415{
416 struct list_head *head;
417 struct packet_type *pt1;
418
419 spin_lock_bh(&ptype_lock);
420
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700421 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700423 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800424 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 list_for_each_entry(pt1, head, list) {
427 if (pt == pt1) {
428 list_del_rcu(&pt->list);
429 goto out;
430 }
431 }
432
433 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
434out:
435 spin_unlock_bh(&ptype_lock);
436}
437/**
438 * dev_remove_pack - remove packet handler
439 * @pt: packet type declaration
440 *
441 * Remove a protocol handler that was previously added to the kernel
442 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
443 * from the kernel lists and can be freed or reused once this function
444 * returns.
445 *
446 * This call sleeps to guarantee that no CPU is looking at the packet
447 * type after return.
448 */
449void dev_remove_pack(struct packet_type *pt)
450{
451 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 synchronize_net();
454}
455
456/******************************************************************************
457
458 Device Boot-time Settings Routines
459
460*******************************************************************************/
461
462/* Boot time configuration table */
463static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
464
465/**
466 * netdev_boot_setup_add - add new setup entry
467 * @name: name of the device
468 * @map: configured settings for the device
469 *
470 * Adds new setup entry to the dev_boot_setup list. The function
471 * returns 0 on error and 1 on success. This is a generic routine to
472 * all netdevices.
473 */
474static int netdev_boot_setup_add(char *name, struct ifmap *map)
475{
476 struct netdev_boot_setup *s;
477 int i;
478
479 s = dev_boot_setup;
480 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
481 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
482 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700483 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 memcpy(&s[i].map, map, sizeof(s[i].map));
485 break;
486 }
487 }
488
489 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
490}
491
492/**
493 * netdev_boot_setup_check - check boot time settings
494 * @dev: the netdevice
495 *
496 * Check boot time settings for the device.
497 * The found settings are set for the device to be used
498 * later in the device probing.
499 * Returns 0 if no settings found, 1 if they are.
500 */
501int netdev_boot_setup_check(struct net_device *dev)
502{
503 struct netdev_boot_setup *s = dev_boot_setup;
504 int i;
505
506 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
507 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700508 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 dev->irq = s[i].map.irq;
510 dev->base_addr = s[i].map.base_addr;
511 dev->mem_start = s[i].map.mem_start;
512 dev->mem_end = s[i].map.mem_end;
513 return 1;
514 }
515 }
516 return 0;
517}
518
519
520/**
521 * netdev_boot_base - get address from boot time settings
522 * @prefix: prefix for network device
523 * @unit: id for network device
524 *
525 * Check boot time settings for the base address of device.
526 * The found settings are set for the device to be used
527 * later in the device probing.
528 * Returns 0 if no settings found.
529 */
530unsigned long netdev_boot_base(const char *prefix, int unit)
531{
532 const struct netdev_boot_setup *s = dev_boot_setup;
533 char name[IFNAMSIZ];
534 int i;
535
536 sprintf(name, "%s%d", prefix, unit);
537
538 /*
539 * If device already registered then return base of 1
540 * to indicate not to probe for this interface
541 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700542 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 return 1;
544
545 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
546 if (!strcmp(name, s[i].name))
547 return s[i].map.base_addr;
548 return 0;
549}
550
551/*
552 * Saves at boot time configured settings for any netdevice.
553 */
554int __init netdev_boot_setup(char *str)
555{
556 int ints[5];
557 struct ifmap map;
558
559 str = get_options(str, ARRAY_SIZE(ints), ints);
560 if (!str || !*str)
561 return 0;
562
563 /* Save settings */
564 memset(&map, 0, sizeof(map));
565 if (ints[0] > 0)
566 map.irq = ints[1];
567 if (ints[0] > 1)
568 map.base_addr = ints[2];
569 if (ints[0] > 2)
570 map.mem_start = ints[3];
571 if (ints[0] > 3)
572 map.mem_end = ints[4];
573
574 /* Add new entry to the list */
575 return netdev_boot_setup_add(str, &map);
576}
577
578__setup("netdev=", netdev_boot_setup);
579
580/*******************************************************************************
581
582 Device Interface Subroutines
583
584*******************************************************************************/
585
586/**
587 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700588 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 * @name: name to find
590 *
591 * Find an interface by name. Must be called under RTNL semaphore
592 * or @dev_base_lock. If the name is found a pointer to the device
593 * is returned. If the name is not found then %NULL is returned. The
594 * reference counters are not incremented so the caller must be
595 * careful with locks.
596 */
597
Eric W. Biederman881d9662007-09-17 11:56:21 -0700598struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599{
600 struct hlist_node *p;
601
Eric W. Biederman881d9662007-09-17 11:56:21 -0700602 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 struct net_device *dev
604 = hlist_entry(p, struct net_device, name_hlist);
605 if (!strncmp(dev->name, name, IFNAMSIZ))
606 return dev;
607 }
608 return NULL;
609}
610
611/**
612 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700613 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 * @name: name to find
615 *
616 * Find an interface by name. This can be called from any
617 * context and does its own locking. The returned handle has
618 * the usage count incremented and the caller must use dev_put() to
619 * release it when it is no longer needed. %NULL is returned if no
620 * matching device is found.
621 */
622
Eric W. Biederman881d9662007-09-17 11:56:21 -0700623struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624{
625 struct net_device *dev;
626
627 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700628 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (dev)
630 dev_hold(dev);
631 read_unlock(&dev_base_lock);
632 return dev;
633}
634
635/**
636 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700637 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 * @ifindex: index of device
639 *
640 * Search for an interface by index. Returns %NULL if the device
641 * is not found or a pointer to the device. The device has not
642 * had its reference counter increased so the caller must be careful
643 * about locking. The caller must hold either the RTNL semaphore
644 * or @dev_base_lock.
645 */
646
Eric W. Biederman881d9662007-09-17 11:56:21 -0700647struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648{
649 struct hlist_node *p;
650
Eric W. Biederman881d9662007-09-17 11:56:21 -0700651 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 struct net_device *dev
653 = hlist_entry(p, struct net_device, index_hlist);
654 if (dev->ifindex == ifindex)
655 return dev;
656 }
657 return NULL;
658}
659
660
661/**
662 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700663 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 * @ifindex: index of device
665 *
666 * Search for an interface by index. Returns NULL if the device
667 * is not found or a pointer to the device. The device returned has
668 * had a reference added and the pointer is safe until the user calls
669 * dev_put to indicate they have finished with it.
670 */
671
Eric W. Biederman881d9662007-09-17 11:56:21 -0700672struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct net_device *dev;
675
676 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700677 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 if (dev)
679 dev_hold(dev);
680 read_unlock(&dev_base_lock);
681 return dev;
682}
683
684/**
685 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700686 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 * @type: media type of device
688 * @ha: hardware address
689 *
690 * Search for an interface by MAC address. Returns NULL if the device
691 * is not found or a pointer to the device. The caller must hold the
692 * rtnl semaphore. The returned device has not had its ref count increased
693 * and the caller must therefore be careful about locking
694 *
695 * BUGS:
696 * If the API was consistent this would be __dev_get_by_hwaddr
697 */
698
Eric W. Biederman881d9662007-09-17 11:56:21 -0700699struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700{
701 struct net_device *dev;
702
703 ASSERT_RTNL();
704
Denis V. Lunev81103a52007-12-12 10:47:38 -0800705 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 if (dev->type == type &&
707 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700708 return dev;
709
710 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711}
712
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300713EXPORT_SYMBOL(dev_getbyhwaddr);
714
Eric W. Biederman881d9662007-09-17 11:56:21 -0700715struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700716{
717 struct net_device *dev;
718
719 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700720 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700721 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700722 return dev;
723
724 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700725}
726
727EXPORT_SYMBOL(__dev_getfirstbyhwtype);
728
Eric W. Biederman881d9662007-09-17 11:56:21 -0700729struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730{
731 struct net_device *dev;
732
733 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700734 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700735 if (dev)
736 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 rtnl_unlock();
738 return dev;
739}
740
741EXPORT_SYMBOL(dev_getfirstbyhwtype);
742
743/**
744 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700745 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 * @if_flags: IFF_* values
747 * @mask: bitmask of bits in if_flags to check
748 *
749 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900750 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 * had a reference added and the pointer is safe until the user calls
752 * dev_put to indicate they have finished with it.
753 */
754
Eric W. Biederman881d9662007-09-17 11:56:21 -0700755struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700757 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Pavel Emelianov7562f872007-05-03 15:13:45 -0700759 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700761 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 if (((dev->flags ^ if_flags) & mask) == 0) {
763 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700764 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 break;
766 }
767 }
768 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700769 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770}
771
772/**
773 * dev_valid_name - check if name is okay for network device
774 * @name: name string
775 *
776 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700777 * to allow sysfs to work. We also disallow any kind of
778 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800780int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700782 if (*name == '\0')
783 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700784 if (strlen(name) >= IFNAMSIZ)
785 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700786 if (!strcmp(name, ".") || !strcmp(name, ".."))
787 return 0;
788
789 while (*name) {
790 if (*name == '/' || isspace(*name))
791 return 0;
792 name++;
793 }
794 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795}
796
797/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200798 * __dev_alloc_name - allocate a name for a device
799 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200801 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 *
803 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700804 * id. It scans list of devices to build up a free map, then chooses
805 * the first empty slot. The caller must hold the dev_base or rtnl lock
806 * while allocating the name and adding the device in order to avoid
807 * duplicates.
808 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
809 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 */
811
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200812static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813{
814 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 const char *p;
816 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700817 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 struct net_device *d;
819
820 p = strnchr(name, IFNAMSIZ-1, '%');
821 if (p) {
822 /*
823 * Verify the string as this thing may have come from
824 * the user. There must be either one "%d" and no other "%"
825 * characters.
826 */
827 if (p[1] != 'd' || strchr(p + 2, '%'))
828 return -EINVAL;
829
830 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700831 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (!inuse)
833 return -ENOMEM;
834
Eric W. Biederman881d9662007-09-17 11:56:21 -0700835 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 if (!sscanf(d->name, name, &i))
837 continue;
838 if (i < 0 || i >= max_netdevices)
839 continue;
840
841 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200842 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 if (!strncmp(buf, d->name, IFNAMSIZ))
844 set_bit(i, inuse);
845 }
846
847 i = find_first_zero_bit(inuse, max_netdevices);
848 free_page((unsigned long) inuse);
849 }
850
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200851 snprintf(buf, IFNAMSIZ, name, i);
852 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
855 /* It is possible to run out of possible slots
856 * when the name is long and there isn't enough space left
857 * for the digits, or if all bits are used.
858 */
859 return -ENFILE;
860}
861
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200862/**
863 * dev_alloc_name - allocate a name for a device
864 * @dev: device
865 * @name: name format string
866 *
867 * Passed a format string - eg "lt%d" it will try and find a suitable
868 * id. It scans list of devices to build up a free map, then chooses
869 * the first empty slot. The caller must hold the dev_base or rtnl lock
870 * while allocating the name and adding the device in order to avoid
871 * duplicates.
872 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
873 * Returns the number of the unit assigned or a negative errno code.
874 */
875
876int dev_alloc_name(struct net_device *dev, const char *name)
877{
878 char buf[IFNAMSIZ];
879 struct net *net;
880 int ret;
881
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900882 BUG_ON(!dev_net(dev));
883 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200884 ret = __dev_alloc_name(net, name, buf);
885 if (ret >= 0)
886 strlcpy(dev->name, buf, IFNAMSIZ);
887 return ret;
888}
889
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891/**
892 * dev_change_name - change name of a device
893 * @dev: device
894 * @newname: name (or format string) must be at least IFNAMSIZ
895 *
896 * Change name of a device, can pass format strings "eth%d".
897 * for wildcarding.
898 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700899int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900{
Herbert Xufcc5a032007-07-30 17:03:38 -0700901 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700903 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700904 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900907 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900909 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 if (dev->flags & IFF_UP)
911 return -EBUSY;
912
913 if (!dev_valid_name(newname))
914 return -EINVAL;
915
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700916 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
917 return 0;
918
Herbert Xufcc5a032007-07-30 17:03:38 -0700919 memcpy(oldname, dev->name, IFNAMSIZ);
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 if (strchr(newname, '%')) {
922 err = dev_alloc_name(dev, newname);
923 if (err < 0)
924 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700926 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 return -EEXIST;
928 else
929 strlcpy(dev->name, newname, IFNAMSIZ);
930
Herbert Xufcc5a032007-07-30 17:03:38 -0700931rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700932 /* For now only devices in the initial network namespace
933 * are in sysfs.
934 */
935 if (net == &init_net) {
936 ret = device_rename(&dev->dev, dev->name);
937 if (ret) {
938 memcpy(dev->name, oldname, IFNAMSIZ);
939 return ret;
940 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700941 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700942
943 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600944 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700945 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700946 write_unlock_bh(&dev_base_lock);
947
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700948 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700949 ret = notifier_to_errno(ret);
950
951 if (ret) {
952 if (err) {
953 printk(KERN_ERR
954 "%s: name change rollback failed: %d.\n",
955 dev->name, ret);
956 } else {
957 err = ret;
958 memcpy(dev->name, oldname, IFNAMSIZ);
959 goto rollback;
960 }
961 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
963 return err;
964}
965
966/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700967 * dev_set_alias - change ifalias of a device
968 * @dev: device
969 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700970 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700971 *
972 * Set ifalias for a device,
973 */
974int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
975{
976 ASSERT_RTNL();
977
978 if (len >= IFALIASZ)
979 return -EINVAL;
980
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700981 if (!len) {
982 if (dev->ifalias) {
983 kfree(dev->ifalias);
984 dev->ifalias = NULL;
985 }
986 return 0;
987 }
988
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700989 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
990 if (!dev->ifalias)
991 return -ENOMEM;
992
993 strlcpy(dev->ifalias, alias, len+1);
994 return len;
995}
996
997
998/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700999 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001000 * @dev: device to cause notification
1001 *
1002 * Called to indicate a device has changed features.
1003 */
1004void netdev_features_change(struct net_device *dev)
1005{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001006 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001007}
1008EXPORT_SYMBOL(netdev_features_change);
1009
1010/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 * netdev_state_change - device changes state
1012 * @dev: device to cause notification
1013 *
1014 * Called to indicate a device has changed state. This function calls
1015 * the notifier chains for netdev_chain and sends a NEWLINK message
1016 * to the routing socket.
1017 */
1018void netdev_state_change(struct net_device *dev)
1019{
1020 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001021 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1023 }
1024}
1025
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001026void netdev_bonding_change(struct net_device *dev)
1027{
1028 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1029}
1030EXPORT_SYMBOL(netdev_bonding_change);
1031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032/**
1033 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001034 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 * @name: name of interface
1036 *
1037 * If a network interface is not present and the process has suitable
1038 * privileges this function loads the module. If module loading is not
1039 * available in this kernel then it becomes a nop.
1040 */
1041
Eric W. Biederman881d9662007-09-17 11:56:21 -07001042void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001044 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045
1046 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001047 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 read_unlock(&dev_base_lock);
1049
1050 if (!dev && capable(CAP_SYS_MODULE))
1051 request_module("%s", name);
1052}
1053
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054/**
1055 * dev_open - prepare an interface for use.
1056 * @dev: device to open
1057 *
1058 * Takes a device from down to up state. The device's private open
1059 * function is invoked and then the multicast lists are loaded. Finally
1060 * the device is moved into the up state and a %NETDEV_UP message is
1061 * sent to the netdev notifier chain.
1062 *
1063 * Calling this function on an active interface is a nop. On a failure
1064 * a negative errno code is returned.
1065 */
1066int dev_open(struct net_device *dev)
1067{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001068 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 int ret = 0;
1070
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001071 ASSERT_RTNL();
1072
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 /*
1074 * Is it already up?
1075 */
1076
1077 if (dev->flags & IFF_UP)
1078 return 0;
1079
1080 /*
1081 * Is it even present?
1082 */
1083 if (!netif_device_present(dev))
1084 return -ENODEV;
1085
1086 /*
1087 * Call device private open method
1088 */
1089 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001090
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001091 if (ops->ndo_validate_addr)
1092 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001093
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001094 if (!ret && ops->ndo_open)
1095 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001097 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 * If it went open OK then:
1099 */
1100
Jeff Garzikbada3392007-10-23 20:19:37 -07001101 if (ret)
1102 clear_bit(__LINK_STATE_START, &dev->state);
1103 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 /*
1105 * Set the flags.
1106 */
1107 dev->flags |= IFF_UP;
1108
1109 /*
1110 * Initialize multicasting status
1111 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001112 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 /*
1115 * Wakeup transmit queue engine
1116 */
1117 dev_activate(dev);
1118
1119 /*
1120 * ... and announce new interface.
1121 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001122 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001124
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 return ret;
1126}
1127
1128/**
1129 * dev_close - shutdown an interface.
1130 * @dev: device to shutdown
1131 *
1132 * This function moves an active device into down state. A
1133 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1134 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1135 * chain.
1136 */
1137int dev_close(struct net_device *dev)
1138{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001139 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001140 ASSERT_RTNL();
1141
David S. Miller9d5010d2007-09-12 14:33:25 +02001142 might_sleep();
1143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 if (!(dev->flags & IFF_UP))
1145 return 0;
1146
1147 /*
1148 * Tell people we are going down, so that they can
1149 * prepare to death, when device is still operating.
1150 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001151 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 clear_bit(__LINK_STATE_START, &dev->state);
1154
1155 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001156 * it can be even on different cpu. So just clear netif_running().
1157 *
1158 * dev->stop() will invoke napi_disable() on all of it's
1159 * napi_struct instances on this device.
1160 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001163 dev_deactivate(dev);
1164
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 /*
1166 * Call the device specific close. This cannot fail.
1167 * Only if device is UP
1168 *
1169 * We allow it to be called even after a DETACH hot-plug
1170 * event.
1171 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001172 if (ops->ndo_stop)
1173 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175 /*
1176 * Device is now down.
1177 */
1178
1179 dev->flags &= ~IFF_UP;
1180
1181 /*
1182 * Tell people we are down
1183 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001184 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 return 0;
1187}
1188
1189
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001190/**
1191 * dev_disable_lro - disable Large Receive Offload on a device
1192 * @dev: device
1193 *
1194 * Disable Large Receive Offload (LRO) on a net device. Must be
1195 * called under RTNL. This is needed if received packets may be
1196 * forwarded to another interface.
1197 */
1198void dev_disable_lro(struct net_device *dev)
1199{
1200 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1201 dev->ethtool_ops->set_flags) {
1202 u32 flags = dev->ethtool_ops->get_flags(dev);
1203 if (flags & ETH_FLAG_LRO) {
1204 flags &= ~ETH_FLAG_LRO;
1205 dev->ethtool_ops->set_flags(dev, flags);
1206 }
1207 }
1208 WARN_ON(dev->features & NETIF_F_LRO);
1209}
1210EXPORT_SYMBOL(dev_disable_lro);
1211
1212
Eric W. Biederman881d9662007-09-17 11:56:21 -07001213static int dev_boot_phase = 1;
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215/*
1216 * Device change register/unregister. These are not inline or static
1217 * as we export them to the world.
1218 */
1219
1220/**
1221 * register_netdevice_notifier - register a network notifier block
1222 * @nb: notifier
1223 *
1224 * Register a notifier to be called when network device events occur.
1225 * The notifier passed is linked into the kernel structures and must
1226 * not be reused until it has been unregistered. A negative errno code
1227 * is returned on a failure.
1228 *
1229 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001230 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 * view of the network device list.
1232 */
1233
1234int register_netdevice_notifier(struct notifier_block *nb)
1235{
1236 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001237 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001238 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 int err;
1240
1241 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001242 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001243 if (err)
1244 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001245 if (dev_boot_phase)
1246 goto unlock;
1247 for_each_net(net) {
1248 for_each_netdev(net, dev) {
1249 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1250 err = notifier_to_errno(err);
1251 if (err)
1252 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253
Eric W. Biederman881d9662007-09-17 11:56:21 -07001254 if (!(dev->flags & IFF_UP))
1255 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001256
Eric W. Biederman881d9662007-09-17 11:56:21 -07001257 nb->notifier_call(nb, NETDEV_UP, dev);
1258 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001260
1261unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 rtnl_unlock();
1263 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001264
1265rollback:
1266 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001267 for_each_net(net) {
1268 for_each_netdev(net, dev) {
1269 if (dev == last)
1270 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001271
Eric W. Biederman881d9662007-09-17 11:56:21 -07001272 if (dev->flags & IFF_UP) {
1273 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1274 nb->notifier_call(nb, NETDEV_DOWN, dev);
1275 }
1276 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001277 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001278 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001279
1280 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001281 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282}
1283
1284/**
1285 * unregister_netdevice_notifier - unregister a network notifier block
1286 * @nb: notifier
1287 *
1288 * Unregister a notifier previously registered by
1289 * register_netdevice_notifier(). The notifier is unlinked into the
1290 * kernel structures and may then be reused. A negative errno code
1291 * is returned on a failure.
1292 */
1293
1294int unregister_netdevice_notifier(struct notifier_block *nb)
1295{
Herbert Xu9f514952006-03-25 01:24:25 -08001296 int err;
1297
1298 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001299 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001300 rtnl_unlock();
1301 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302}
1303
1304/**
1305 * call_netdevice_notifiers - call all network notifier blocks
1306 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001307 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 *
1309 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001310 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 */
1312
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001313int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001315 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316}
1317
1318/* When > 0 there are consumers of rx skb time stamps */
1319static atomic_t netstamp_needed = ATOMIC_INIT(0);
1320
1321void net_enable_timestamp(void)
1322{
1323 atomic_inc(&netstamp_needed);
1324}
1325
1326void net_disable_timestamp(void)
1327{
1328 atomic_dec(&netstamp_needed);
1329}
1330
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001331static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332{
1333 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001334 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001335 else
1336 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337}
1338
1339/*
1340 * Support routine. Sends outgoing frames to any network
1341 * taps currently in use.
1342 */
1343
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001344static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345{
1346 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001347
1348 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
1350 rcu_read_lock();
1351 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1352 /* Never send packets back to the socket
1353 * they originated from - MvS (miquels@drinkel.ow.org)
1354 */
1355 if ((ptype->dev == dev || !ptype->dev) &&
1356 (ptype->af_packet_priv == NULL ||
1357 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1358 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1359 if (!skb2)
1360 break;
1361
1362 /* skb->nh should be correctly
1363 set by sender, so that the second statement is
1364 just protection against buggy protocols.
1365 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001366 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001368 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001369 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 if (net_ratelimit())
1371 printk(KERN_CRIT "protocol %04x is "
1372 "buggy, dev %s\n",
1373 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001374 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 }
1376
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001377 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001379 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 }
1382 rcu_read_unlock();
1383}
1384
Denis Vlasenko56079432006-03-29 15:57:29 -08001385
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001386static inline void __netif_reschedule(struct Qdisc *q)
1387{
1388 struct softnet_data *sd;
1389 unsigned long flags;
1390
1391 local_irq_save(flags);
1392 sd = &__get_cpu_var(softnet_data);
1393 q->next_sched = sd->output_queue;
1394 sd->output_queue = q;
1395 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1396 local_irq_restore(flags);
1397}
1398
David S. Miller37437bb2008-07-16 02:15:04 -07001399void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001400{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001401 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1402 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001403}
1404EXPORT_SYMBOL(__netif_schedule);
1405
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001406void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001407{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001408 if (atomic_dec_and_test(&skb->users)) {
1409 struct softnet_data *sd;
1410 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001411
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001412 local_irq_save(flags);
1413 sd = &__get_cpu_var(softnet_data);
1414 skb->next = sd->completion_queue;
1415 sd->completion_queue = skb;
1416 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1417 local_irq_restore(flags);
1418 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001419}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001420EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001421
1422void dev_kfree_skb_any(struct sk_buff *skb)
1423{
1424 if (in_irq() || irqs_disabled())
1425 dev_kfree_skb_irq(skb);
1426 else
1427 dev_kfree_skb(skb);
1428}
1429EXPORT_SYMBOL(dev_kfree_skb_any);
1430
1431
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001432/**
1433 * netif_device_detach - mark device as removed
1434 * @dev: network device
1435 *
1436 * Mark device as removed from system and therefore no longer available.
1437 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001438void netif_device_detach(struct net_device *dev)
1439{
1440 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1441 netif_running(dev)) {
1442 netif_stop_queue(dev);
1443 }
1444}
1445EXPORT_SYMBOL(netif_device_detach);
1446
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001447/**
1448 * netif_device_attach - mark device as attached
1449 * @dev: network device
1450 *
1451 * Mark device as attached from system and restart if needed.
1452 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001453void netif_device_attach(struct net_device *dev)
1454{
1455 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1456 netif_running(dev)) {
1457 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001458 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001459 }
1460}
1461EXPORT_SYMBOL(netif_device_attach);
1462
Ben Hutchings6de329e2008-06-16 17:02:28 -07001463static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1464{
1465 return ((features & NETIF_F_GEN_CSUM) ||
1466 ((features & NETIF_F_IP_CSUM) &&
1467 protocol == htons(ETH_P_IP)) ||
1468 ((features & NETIF_F_IPV6_CSUM) &&
1469 protocol == htons(ETH_P_IPV6)));
1470}
1471
1472static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1473{
1474 if (can_checksum_protocol(dev->features, skb->protocol))
1475 return true;
1476
1477 if (skb->protocol == htons(ETH_P_8021Q)) {
1478 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1479 if (can_checksum_protocol(dev->features & dev->vlan_features,
1480 veh->h_vlan_encapsulated_proto))
1481 return true;
1482 }
1483
1484 return false;
1485}
Denis Vlasenko56079432006-03-29 15:57:29 -08001486
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487/*
1488 * Invalidate hardware checksum when packet is to be mangled, and
1489 * complete checksum manually on outgoing path.
1490 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001491int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
Al Virod3bc23e2006-11-14 21:24:49 -08001493 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001494 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
Patrick McHardy84fa7932006-08-29 16:44:56 -07001496 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001497 goto out_set_summed;
1498
1499 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001500 /* Let GSO fix up the checksum. */
1501 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 }
1503
Herbert Xua0308472007-10-15 01:47:15 -07001504 offset = skb->csum_start - skb_headroom(skb);
1505 BUG_ON(offset >= skb_headlen(skb));
1506 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1507
1508 offset += skb->csum_offset;
1509 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1510
1511 if (skb_cloned(skb) &&
1512 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1514 if (ret)
1515 goto out;
1516 }
1517
Herbert Xua0308472007-10-15 01:47:15 -07001518 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001519out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001521out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 return ret;
1523}
1524
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001525/**
1526 * skb_gso_segment - Perform segmentation on skb.
1527 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001528 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001529 *
1530 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001531 *
1532 * It may return NULL if the skb requires no segmentation. This is
1533 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001534 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001535struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001536{
1537 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1538 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001539 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001540 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001541
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001542 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001543 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001544 __skb_pull(skb, skb->mac_len);
1545
Herbert Xuf9d106a2007-04-23 22:36:13 -07001546 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001547 if (skb_header_cloned(skb) &&
1548 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1549 return ERR_PTR(err);
1550 }
1551
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001552 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001553 list_for_each_entry_rcu(ptype,
1554 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001555 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001556 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001557 err = ptype->gso_send_check(skb);
1558 segs = ERR_PTR(err);
1559 if (err || skb_gso_ok(skb, features))
1560 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001561 __skb_push(skb, (skb->data -
1562 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001563 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001564 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001565 break;
1566 }
1567 }
1568 rcu_read_unlock();
1569
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001570 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001571
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001572 return segs;
1573}
1574
1575EXPORT_SYMBOL(skb_gso_segment);
1576
Herbert Xufb286bb2005-11-10 13:01:24 -08001577/* Take action when hardware reception checksum errors are detected. */
1578#ifdef CONFIG_BUG
1579void netdev_rx_csum_fault(struct net_device *dev)
1580{
1581 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001582 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001583 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001584 dump_stack();
1585 }
1586}
1587EXPORT_SYMBOL(netdev_rx_csum_fault);
1588#endif
1589
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590/* Actually, we should eliminate this check as soon as we know, that:
1591 * 1. IOMMU is present and allows to map all the memory.
1592 * 2. No high memory really exists on this machine.
1593 */
1594
1595static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1596{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001597#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 int i;
1599
1600 if (dev->features & NETIF_F_HIGHDMA)
1601 return 0;
1602
1603 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1604 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1605 return 1;
1606
Herbert Xu3d3a8532006-06-27 13:33:10 -07001607#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 return 0;
1609}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001611struct dev_gso_cb {
1612 void (*destructor)(struct sk_buff *skb);
1613};
1614
1615#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1616
1617static void dev_gso_skb_destructor(struct sk_buff *skb)
1618{
1619 struct dev_gso_cb *cb;
1620
1621 do {
1622 struct sk_buff *nskb = skb->next;
1623
1624 skb->next = nskb->next;
1625 nskb->next = NULL;
1626 kfree_skb(nskb);
1627 } while (skb->next);
1628
1629 cb = DEV_GSO_CB(skb);
1630 if (cb->destructor)
1631 cb->destructor(skb);
1632}
1633
1634/**
1635 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1636 * @skb: buffer to segment
1637 *
1638 * This function segments the given skb and stores the list of segments
1639 * in skb->next.
1640 */
1641static int dev_gso_segment(struct sk_buff *skb)
1642{
1643 struct net_device *dev = skb->dev;
1644 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001645 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1646 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001647
Herbert Xu576a30e2006-06-27 13:22:38 -07001648 segs = skb_gso_segment(skb, features);
1649
1650 /* Verifying header integrity only. */
1651 if (!segs)
1652 return 0;
1653
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001654 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001655 return PTR_ERR(segs);
1656
1657 skb->next = segs;
1658 DEV_GSO_CB(skb)->destructor = skb->destructor;
1659 skb->destructor = dev_gso_skb_destructor;
1660
1661 return 0;
1662}
1663
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001664int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1665 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666{
Stephen Hemminger00829822008-11-20 20:14:53 -08001667 const struct net_device_ops *ops = dev->netdev_ops;
1668
1669 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001670 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001671 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001672 dev_queue_xmit_nit(skb, dev);
1673
Herbert Xu576a30e2006-06-27 13:22:38 -07001674 if (netif_needs_gso(dev, skb)) {
1675 if (unlikely(dev_gso_segment(skb)))
1676 goto out_kfree_skb;
1677 if (skb->next)
1678 goto gso;
1679 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001680
Stephen Hemminger00829822008-11-20 20:14:53 -08001681 return ops->ndo_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001682 }
1683
Herbert Xu576a30e2006-06-27 13:22:38 -07001684gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001685 do {
1686 struct sk_buff *nskb = skb->next;
1687 int rc;
1688
1689 skb->next = nskb->next;
1690 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001691 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001692 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001693 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001694 skb->next = nskb;
1695 return rc;
1696 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001697 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001698 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001699 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001700
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001701 skb->destructor = DEV_GSO_CB(skb)->destructor;
1702
1703out_kfree_skb:
1704 kfree_skb(skb);
1705 return 0;
1706}
1707
David S. Millerb6b2fed2008-07-21 09:48:06 -07001708static u32 simple_tx_hashrnd;
1709static int simple_tx_hashrnd_initialized = 0;
1710
David S. Miller8f0f2222008-07-15 03:47:03 -07001711static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1712{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001713 u32 addr1, addr2, ports;
1714 u32 hash, ihl;
Alexander Duyckad55dca2008-09-20 22:05:50 -07001715 u8 ip_proto = 0;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001716
1717 if (unlikely(!simple_tx_hashrnd_initialized)) {
1718 get_random_bytes(&simple_tx_hashrnd, 4);
1719 simple_tx_hashrnd_initialized = 1;
1720 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001721
1722 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001723 case htons(ETH_P_IP):
Alexander Duyckad55dca2008-09-20 22:05:50 -07001724 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1725 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001726 addr1 = ip_hdr(skb)->saddr;
1727 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001728 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001729 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001730 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001731 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001732 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1733 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001734 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001735 break;
1736 default:
1737 return 0;
1738 }
1739
David S. Miller8f0f2222008-07-15 03:47:03 -07001740
1741 switch (ip_proto) {
1742 case IPPROTO_TCP:
1743 case IPPROTO_UDP:
1744 case IPPROTO_DCCP:
1745 case IPPROTO_ESP:
1746 case IPPROTO_AH:
1747 case IPPROTO_SCTP:
1748 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001749 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001750 break;
1751
1752 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001753 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001754 break;
1755 }
1756
David S. Millerb6b2fed2008-07-21 09:48:06 -07001757 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1758
1759 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001760}
1761
David S. Millere8a04642008-07-17 00:34:19 -07001762static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1763 struct sk_buff *skb)
1764{
Stephen Hemminger00829822008-11-20 20:14:53 -08001765 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001766 u16 queue_index = 0;
1767
Stephen Hemminger00829822008-11-20 20:14:53 -08001768 if (ops->ndo_select_queue)
1769 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001770 else if (dev->real_num_tx_queues > 1)
1771 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001772
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001773 skb_set_queue_mapping(skb, queue_index);
1774 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001775}
1776
Dave Jonesd29f7492008-07-22 14:09:06 -07001777/**
1778 * dev_queue_xmit - transmit a buffer
1779 * @skb: buffer to transmit
1780 *
1781 * Queue a buffer for transmission to a network device. The caller must
1782 * have set the device and priority and built the buffer before calling
1783 * this function. The function can be called from an interrupt.
1784 *
1785 * A negative errno code is returned on a failure. A success does not
1786 * guarantee the frame will be transmitted as it may be dropped due
1787 * to congestion or traffic shaping.
1788 *
1789 * -----------------------------------------------------------------------------------
1790 * I notice this method can also return errors from the queue disciplines,
1791 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1792 * be positive.
1793 *
1794 * Regardless of the return value, the skb is consumed, so it is currently
1795 * difficult to retry a send to this method. (You can bump the ref count
1796 * before sending to hold a reference for retry if you are careful.)
1797 *
1798 * When calling this method, interrupts MUST be enabled. This is because
1799 * the BH enable code must have IRQs enabled so that it will not deadlock.
1800 * --BLG
1801 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802int dev_queue_xmit(struct sk_buff *skb)
1803{
1804 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001805 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 struct Qdisc *q;
1807 int rc = -ENOMEM;
1808
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001809 /* GSO will handle the following emulations directly. */
1810 if (netif_needs_gso(dev, skb))
1811 goto gso;
1812
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 if (skb_shinfo(skb)->frag_list &&
1814 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001815 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 goto out_kfree_skb;
1817
1818 /* Fragmented skb is linearized if device does not support SG,
1819 * or if at least one of fragments is in highmem and device
1820 * does not support DMA from it.
1821 */
1822 if (skb_shinfo(skb)->nr_frags &&
1823 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001824 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 goto out_kfree_skb;
1826
1827 /* If packet is not checksummed and device does not support
1828 * checksumming for this protocol, complete checksumming here.
1829 */
Herbert Xu663ead32007-04-09 11:59:07 -07001830 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1831 skb_set_transport_header(skb, skb->csum_start -
1832 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001833 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1834 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001835 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001837gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001838 /* Disable soft irqs for various locks below. Also
1839 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001841 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842
David S. Millereae792b2008-07-15 03:03:33 -07001843 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001844 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001845
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846#ifdef CONFIG_NET_CLS_ACT
1847 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1848#endif
1849 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001850 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851
David S. Miller37437bb2008-07-16 02:15:04 -07001852 spin_lock(root_lock);
1853
David S. Millera9312ae2008-08-17 21:51:03 -07001854 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001855 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001856 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001857 } else {
1858 rc = qdisc_enqueue_root(skb, q);
1859 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001860 }
David S. Miller37437bb2008-07-16 02:15:04 -07001861 spin_unlock(root_lock);
1862
David S. Miller37437bb2008-07-16 02:15:04 -07001863 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 }
1865
1866 /* The device has no queue. Common case for software devices:
1867 loopback, all the sorts of tunnels...
1868
Herbert Xu932ff272006-06-09 12:20:56 -07001869 Really, it is unlikely that netif_tx_lock protection is necessary
1870 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 counters.)
1872 However, it is possible, that they rely on protection
1873 made by us here.
1874
1875 Check this and shot the lock. It is not prone from deadlocks.
1876 Either shot noqueue qdisc, it is even simpler 8)
1877 */
1878 if (dev->flags & IFF_UP) {
1879 int cpu = smp_processor_id(); /* ok because BHs are off */
1880
David S. Millerc773e842008-07-08 23:13:53 -07001881 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
David S. Millerc773e842008-07-08 23:13:53 -07001883 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001885 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001887 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001888 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 goto out;
1890 }
1891 }
David S. Millerc773e842008-07-08 23:13:53 -07001892 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 if (net_ratelimit())
1894 printk(KERN_CRIT "Virtual device %s asks to "
1895 "queue packet!\n", dev->name);
1896 } else {
1897 /* Recursion is detected! It is possible,
1898 * unfortunately */
1899 if (net_ratelimit())
1900 printk(KERN_CRIT "Dead loop on virtual device "
1901 "%s, fix it urgently!\n", dev->name);
1902 }
1903 }
1904
1905 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001906 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907
1908out_kfree_skb:
1909 kfree_skb(skb);
1910 return rc;
1911out:
Herbert Xud4828d82006-06-22 02:28:18 -07001912 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 return rc;
1914}
1915
1916
1917/*=======================================================================
1918 Receiver routines
1919 =======================================================================*/
1920
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001921int netdev_max_backlog __read_mostly = 1000;
1922int netdev_budget __read_mostly = 300;
1923int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
1925DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1926
1927
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928/**
1929 * netif_rx - post buffer to the network code
1930 * @skb: buffer to post
1931 *
1932 * This function receives a packet from a device driver and queues it for
1933 * the upper (protocol) levels to process. It always succeeds. The buffer
1934 * may be dropped during processing for congestion control or by the
1935 * protocol layers.
1936 *
1937 * return values:
1938 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 * NET_RX_DROP (packet was dropped)
1940 *
1941 */
1942
1943int netif_rx(struct sk_buff *skb)
1944{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 struct softnet_data *queue;
1946 unsigned long flags;
1947
1948 /* if netpoll wants it, pretend we never saw it */
1949 if (netpoll_rx(skb))
1950 return NET_RX_DROP;
1951
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001952 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001953 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
1955 /*
1956 * The code is rearranged so that the path is the most
1957 * short when CPU is congested, but is still operating.
1958 */
1959 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 queue = &__get_cpu_var(softnet_data);
1961
1962 __get_cpu_var(netdev_rx_stat).total++;
1963 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1964 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001968 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 }
1970
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001971 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 goto enqueue;
1973 }
1974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 __get_cpu_var(netdev_rx_stat).dropped++;
1976 local_irq_restore(flags);
1977
1978 kfree_skb(skb);
1979 return NET_RX_DROP;
1980}
1981
1982int netif_rx_ni(struct sk_buff *skb)
1983{
1984 int err;
1985
1986 preempt_disable();
1987 err = netif_rx(skb);
1988 if (local_softirq_pending())
1989 do_softirq();
1990 preempt_enable();
1991
1992 return err;
1993}
1994
1995EXPORT_SYMBOL(netif_rx_ni);
1996
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997static void net_tx_action(struct softirq_action *h)
1998{
1999 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2000
2001 if (sd->completion_queue) {
2002 struct sk_buff *clist;
2003
2004 local_irq_disable();
2005 clist = sd->completion_queue;
2006 sd->completion_queue = NULL;
2007 local_irq_enable();
2008
2009 while (clist) {
2010 struct sk_buff *skb = clist;
2011 clist = clist->next;
2012
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002013 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 __kfree_skb(skb);
2015 }
2016 }
2017
2018 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002019 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020
2021 local_irq_disable();
2022 head = sd->output_queue;
2023 sd->output_queue = NULL;
2024 local_irq_enable();
2025
2026 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002027 struct Qdisc *q = head;
2028 spinlock_t *root_lock;
2029
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 head = head->next_sched;
2031
David S. Miller5fb66222008-08-02 20:02:43 -07002032 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002033 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002034 smp_mb__before_clear_bit();
2035 clear_bit(__QDISC_STATE_SCHED,
2036 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002037 qdisc_run(q);
2038 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002040 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002041 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002042 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002043 } else {
2044 smp_mb__before_clear_bit();
2045 clear_bit(__QDISC_STATE_SCHED,
2046 &q->state);
2047 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 }
2049 }
2050 }
2051}
2052
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002053static inline int deliver_skb(struct sk_buff *skb,
2054 struct packet_type *pt_prev,
2055 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056{
2057 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002058 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059}
2060
2061#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002062/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063struct net_bridge;
2064struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2065 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002066void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067
Stephen Hemminger6229e362007-03-21 13:38:47 -07002068/*
2069 * If bridge module is loaded call bridging hook.
2070 * returns NULL if packet was consumed.
2071 */
2072struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2073 struct sk_buff *skb) __read_mostly;
2074static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2075 struct packet_type **pt_prev, int *ret,
2076 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077{
2078 struct net_bridge_port *port;
2079
Stephen Hemminger6229e362007-03-21 13:38:47 -07002080 if (skb->pkt_type == PACKET_LOOPBACK ||
2081 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2082 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
2084 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002085 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002087 }
2088
Stephen Hemminger6229e362007-03-21 13:38:47 -07002089 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090}
2091#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002092#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093#endif
2094
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002095#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2096struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2097EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2098
2099static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2100 struct packet_type **pt_prev,
2101 int *ret,
2102 struct net_device *orig_dev)
2103{
2104 if (skb->dev->macvlan_port == NULL)
2105 return skb;
2106
2107 if (*pt_prev) {
2108 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2109 *pt_prev = NULL;
2110 }
2111 return macvlan_handle_frame_hook(skb);
2112}
2113#else
2114#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2115#endif
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117#ifdef CONFIG_NET_CLS_ACT
2118/* TODO: Maybe we should just force sch_ingress to be compiled in
2119 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2120 * a compare and 2 stores extra right now if we dont have it on
2121 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002122 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 * the ingress scheduler, you just cant add policies on ingress.
2124 *
2125 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002126static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002129 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002130 struct netdev_queue *rxq;
2131 int result = TC_ACT_OK;
2132 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002133
Herbert Xuf697c3e2007-10-14 00:38:47 -07002134 if (MAX_RED_LOOP < ttl++) {
2135 printk(KERN_WARNING
2136 "Redir loop detected Dropping packet (%d->%d)\n",
2137 skb->iif, dev->ifindex);
2138 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 }
2140
Herbert Xuf697c3e2007-10-14 00:38:47 -07002141 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2142 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2143
David S. Miller555353c2008-07-08 17:33:13 -07002144 rxq = &dev->rx_queue;
2145
David S. Miller83874002008-07-17 00:53:03 -07002146 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002147 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002148 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002149 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2150 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002151 spin_unlock(qdisc_lock(q));
2152 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002153
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 return result;
2155}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002156
2157static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2158 struct packet_type **pt_prev,
2159 int *ret, struct net_device *orig_dev)
2160{
David S. Miller8d50b532008-07-30 02:37:46 -07002161 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002162 goto out;
2163
2164 if (*pt_prev) {
2165 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2166 *pt_prev = NULL;
2167 } else {
2168 /* Huh? Why does turning on AF_PACKET affect this? */
2169 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2170 }
2171
2172 switch (ing_filter(skb)) {
2173 case TC_ACT_SHOT:
2174 case TC_ACT_STOLEN:
2175 kfree_skb(skb);
2176 return NULL;
2177 }
2178
2179out:
2180 skb->tc_verd = 0;
2181 return skb;
2182}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183#endif
2184
Patrick McHardybc1d0412008-07-14 22:49:30 -07002185/*
2186 * netif_nit_deliver - deliver received packets to network taps
2187 * @skb: buffer
2188 *
2189 * This function is used to deliver incoming packets to network
2190 * taps. It should be used when the normal netif_receive_skb path
2191 * is bypassed, for example because of VLAN acceleration.
2192 */
2193void netif_nit_deliver(struct sk_buff *skb)
2194{
2195 struct packet_type *ptype;
2196
2197 if (list_empty(&ptype_all))
2198 return;
2199
2200 skb_reset_network_header(skb);
2201 skb_reset_transport_header(skb);
2202 skb->mac_len = skb->network_header - skb->mac_header;
2203
2204 rcu_read_lock();
2205 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2206 if (!ptype->dev || ptype->dev == skb->dev)
2207 deliver_skb(skb, ptype, skb->dev);
2208 }
2209 rcu_read_unlock();
2210}
2211
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002212/**
2213 * netif_receive_skb - process receive buffer from network
2214 * @skb: buffer to process
2215 *
2216 * netif_receive_skb() is the main receive data processing function.
2217 * It always succeeds. The buffer may be dropped during processing
2218 * for congestion control or by the protocol layers.
2219 *
2220 * This function may only be called from softirq context and interrupts
2221 * should be enabled.
2222 *
2223 * Return values (usually ignored):
2224 * NET_RX_SUCCESS: no congestion
2225 * NET_RX_DROP: packet was dropped
2226 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227int netif_receive_skb(struct sk_buff *skb)
2228{
2229 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002230 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002231 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002233 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002235 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2236 return NET_RX_SUCCESS;
2237
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002239 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 return NET_RX_DROP;
2241
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002242 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002243 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244
Patrick McHardyc01003c2007-03-29 11:46:52 -07002245 if (!skb->iif)
2246 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002247
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002248 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002249 orig_dev = skb->dev;
2250 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002251 if (skb_bond_should_drop(skb))
2252 null_or_orig = orig_dev; /* deliver only exact match */
2253 else
2254 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002255 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002256
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 __get_cpu_var(netdev_rx_stat).total++;
2258
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002259 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002260 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002261 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
2263 pt_prev = NULL;
2264
2265 rcu_read_lock();
2266
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002267 /* Don't receive packets in an exiting network namespace */
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002268 if (!net_alive(dev_net(skb->dev))) {
2269 kfree_skb(skb);
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002270 goto out;
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002271 }
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002272
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273#ifdef CONFIG_NET_CLS_ACT
2274 if (skb->tc_verd & TC_NCLS) {
2275 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2276 goto ncls;
2277 }
2278#endif
2279
2280 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002281 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2282 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002283 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002284 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 pt_prev = ptype;
2286 }
2287 }
2288
2289#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002290 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2291 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293ncls:
2294#endif
2295
Stephen Hemminger6229e362007-03-21 13:38:47 -07002296 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2297 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002299 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2300 if (!skb)
2301 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302
2303 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002304 list_for_each_entry_rcu(ptype,
2305 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002307 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2308 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002309 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002310 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 pt_prev = ptype;
2312 }
2313 }
2314
2315 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002316 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 } else {
2318 kfree_skb(skb);
2319 /* Jamal, now you will not able to escape explaining
2320 * me how you were going to use this. :-)
2321 */
2322 ret = NET_RX_DROP;
2323 }
2324
2325out:
2326 rcu_read_unlock();
2327 return ret;
2328}
2329
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002330/* Network device is going away, flush any packets still pending */
2331static void flush_backlog(void *arg)
2332{
2333 struct net_device *dev = arg;
2334 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2335 struct sk_buff *skb, *tmp;
2336
2337 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2338 if (skb->dev == dev) {
2339 __skb_unlink(skb, &queue->input_pkt_queue);
2340 kfree_skb(skb);
2341 }
2342}
2343
Herbert Xud565b0a2008-12-15 23:38:52 -08002344static int napi_gro_complete(struct sk_buff *skb)
2345{
2346 struct packet_type *ptype;
2347 __be16 type = skb->protocol;
2348 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2349 int err = -ENOENT;
2350
Herbert Xu5d38a072009-01-04 16:13:40 -08002351 if (NAPI_GRO_CB(skb)->count == 1)
Herbert Xud565b0a2008-12-15 23:38:52 -08002352 goto out;
2353
2354 rcu_read_lock();
2355 list_for_each_entry_rcu(ptype, head, list) {
2356 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2357 continue;
2358
2359 err = ptype->gro_complete(skb);
2360 break;
2361 }
2362 rcu_read_unlock();
2363
2364 if (err) {
2365 WARN_ON(&ptype->list == head);
2366 kfree_skb(skb);
2367 return NET_RX_SUCCESS;
2368 }
2369
2370out:
Herbert Xub5302562009-01-04 16:13:19 -08002371 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002372 __skb_push(skb, -skb_network_offset(skb));
2373 return netif_receive_skb(skb);
2374}
2375
2376void napi_gro_flush(struct napi_struct *napi)
2377{
2378 struct sk_buff *skb, *next;
2379
2380 for (skb = napi->gro_list; skb; skb = next) {
2381 next = skb->next;
2382 skb->next = NULL;
2383 napi_gro_complete(skb);
2384 }
2385
2386 napi->gro_list = NULL;
2387}
2388EXPORT_SYMBOL(napi_gro_flush);
2389
Herbert Xu96e93ea2009-01-06 10:49:34 -08002390int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002391{
2392 struct sk_buff **pp = NULL;
2393 struct packet_type *ptype;
2394 __be16 type = skb->protocol;
2395 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2396 int count = 0;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002397 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002398 int mac_len;
Herbert Xu5d38a072009-01-04 16:13:40 -08002399 int free;
Herbert Xud565b0a2008-12-15 23:38:52 -08002400
2401 if (!(skb->dev->features & NETIF_F_GRO))
2402 goto normal;
2403
2404 rcu_read_lock();
2405 list_for_each_entry_rcu(ptype, head, list) {
2406 struct sk_buff *p;
2407
2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2409 continue;
2410
2411 skb_reset_network_header(skb);
2412 mac_len = skb->network_header - skb->mac_header;
2413 skb->mac_len = mac_len;
2414 NAPI_GRO_CB(skb)->same_flow = 0;
2415 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002416 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002417
2418 for (p = napi->gro_list; p; p = p->next) {
2419 count++;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002420
2421 if (!NAPI_GRO_CB(p)->same_flow)
2422 continue;
2423
2424 if (p->mac_len != mac_len ||
2425 memcmp(skb_mac_header(p), skb_mac_header(skb),
2426 mac_len))
2427 NAPI_GRO_CB(p)->same_flow = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002428 }
2429
2430 pp = ptype->gro_receive(&napi->gro_list, skb);
2431 break;
2432 }
2433 rcu_read_unlock();
2434
2435 if (&ptype->list == head)
2436 goto normal;
2437
Herbert Xu0da2afd52008-12-26 14:57:42 -08002438 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d38a072009-01-04 16:13:40 -08002439 free = NAPI_GRO_CB(skb)->free;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002440
Herbert Xud565b0a2008-12-15 23:38:52 -08002441 if (pp) {
2442 struct sk_buff *nskb = *pp;
2443
2444 *pp = nskb->next;
2445 nskb->next = NULL;
2446 napi_gro_complete(nskb);
2447 count--;
2448 }
2449
Herbert Xu0da2afd52008-12-26 14:57:42 -08002450 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002451 goto ok;
2452
2453 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2454 __skb_push(skb, -skb_network_offset(skb));
2455 goto normal;
2456 }
2457
2458 NAPI_GRO_CB(skb)->count = 1;
Herbert Xub5302562009-01-04 16:13:19 -08002459 skb_shinfo(skb)->gso_size = skb->len;
Herbert Xud565b0a2008-12-15 23:38:52 -08002460 skb->next = napi->gro_list;
2461 napi->gro_list = skb;
2462
2463ok:
Herbert Xu5d38a072009-01-04 16:13:40 -08002464 return free;
Herbert Xud565b0a2008-12-15 23:38:52 -08002465
2466normal:
Herbert Xu5d38a072009-01-04 16:13:40 -08002467 return -1;
2468}
Herbert Xu96e93ea2009-01-06 10:49:34 -08002469EXPORT_SYMBOL(dev_gro_receive);
2470
2471static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2472{
2473 struct sk_buff *p;
2474
2475 for (p = napi->gro_list; p; p = p->next) {
2476 NAPI_GRO_CB(p)->same_flow = 1;
2477 NAPI_GRO_CB(p)->flush = 0;
2478 }
2479
2480 return dev_gro_receive(napi, skb);
2481}
Herbert Xu5d38a072009-01-04 16:13:40 -08002482
2483int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2484{
2485 switch (__napi_gro_receive(napi, skb)) {
2486 case -1:
2487 return netif_receive_skb(skb);
2488
2489 case 1:
2490 kfree_skb(skb);
2491 break;
2492 }
2493
2494 return NET_RX_SUCCESS;
Herbert Xud565b0a2008-12-15 23:38:52 -08002495}
2496EXPORT_SYMBOL(napi_gro_receive);
2497
Herbert Xu96e93ea2009-01-06 10:49:34 -08002498void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2499{
2500 skb_shinfo(skb)->nr_frags = 0;
2501
2502 skb->len -= skb->data_len;
2503 skb->truesize -= skb->data_len;
2504 skb->data_len = 0;
2505
2506 __skb_pull(skb, skb_headlen(skb));
2507 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2508
2509 napi->skb = skb;
2510}
2511EXPORT_SYMBOL(napi_reuse_skb);
2512
2513struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2514 struct napi_gro_fraginfo *info)
Herbert Xu5d38a072009-01-04 16:13:40 -08002515{
2516 struct net_device *dev = napi->dev;
2517 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002518
2519 napi->skb = NULL;
2520
2521 if (!skb) {
2522 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2523 if (!skb)
2524 goto out;
2525
2526 skb_reserve(skb, NET_IP_ALIGN);
2527 }
2528
2529 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2530 skb_shinfo(skb)->nr_frags = info->nr_frags;
2531 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2532
2533 skb->data_len = info->len;
2534 skb->len += info->len;
2535 skb->truesize += info->len;
2536
Herbert Xu96e93ea2009-01-06 10:49:34 -08002537 if (!pskb_may_pull(skb, ETH_HLEN)) {
2538 napi_reuse_skb(napi, skb);
2539 goto out;
2540 }
Herbert Xu5d38a072009-01-04 16:13:40 -08002541
2542 skb->protocol = eth_type_trans(skb, dev);
2543
2544 skb->ip_summed = info->ip_summed;
2545 skb->csum = info->csum;
2546
Herbert Xu96e93ea2009-01-06 10:49:34 -08002547out:
2548 return skb;
2549}
2550EXPORT_SYMBOL(napi_fraginfo_skb);
2551
2552int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2553{
2554 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
2555 int err = NET_RX_DROP;
2556
2557 if (!skb)
2558 goto out;
2559
2560 err = NET_RX_SUCCESS;
2561
Herbert Xu5d38a072009-01-04 16:13:40 -08002562 switch (__napi_gro_receive(napi, skb)) {
2563 case -1:
2564 return netif_receive_skb(skb);
2565
2566 case 0:
2567 goto out;
2568 }
2569
Herbert Xu96e93ea2009-01-06 10:49:34 -08002570 napi_reuse_skb(napi, skb);
Herbert Xu5d38a072009-01-04 16:13:40 -08002571
2572out:
2573 return err;
2574}
2575EXPORT_SYMBOL(napi_gro_frags);
2576
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002577static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578{
2579 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2581 unsigned long start_time = jiffies;
2582
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002583 napi->weight = weight_p;
2584 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586
2587 local_irq_disable();
2588 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002589 if (!skb) {
2590 __napi_complete(napi);
2591 local_irq_enable();
2592 break;
2593 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 local_irq_enable();
2595
Herbert Xud565b0a2008-12-15 23:38:52 -08002596 napi_gro_receive(napi, skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002597 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598
Herbert Xud565b0a2008-12-15 23:38:52 -08002599 napi_gro_flush(napi);
2600
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002601 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602}
2603
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002604/**
2605 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002606 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002607 *
2608 * The entry's receive function will be scheduled to run
2609 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002610void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002611{
2612 unsigned long flags;
2613
2614 local_irq_save(flags);
2615 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2616 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2617 local_irq_restore(flags);
2618}
2619EXPORT_SYMBOL(__napi_schedule);
2620
Herbert Xud565b0a2008-12-15 23:38:52 -08002621void __napi_complete(struct napi_struct *n)
2622{
2623 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2624 BUG_ON(n->gro_list);
2625
2626 list_del(&n->poll_list);
2627 smp_mb__before_clear_bit();
2628 clear_bit(NAPI_STATE_SCHED, &n->state);
2629}
2630EXPORT_SYMBOL(__napi_complete);
2631
2632void napi_complete(struct napi_struct *n)
2633{
2634 unsigned long flags;
2635
2636 /*
2637 * don't let napi dequeue from the cpu poll list
2638 * just in case its running on a different cpu
2639 */
2640 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2641 return;
2642
2643 napi_gro_flush(n);
2644 local_irq_save(flags);
2645 __napi_complete(n);
2646 local_irq_restore(flags);
2647}
2648EXPORT_SYMBOL(napi_complete);
2649
2650void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2651 int (*poll)(struct napi_struct *, int), int weight)
2652{
2653 INIT_LIST_HEAD(&napi->poll_list);
2654 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002655 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002656 napi->poll = poll;
2657 napi->weight = weight;
2658 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002659 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002660#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002661 spin_lock_init(&napi->poll_lock);
2662 napi->poll_owner = -1;
2663#endif
2664 set_bit(NAPI_STATE_SCHED, &napi->state);
2665}
2666EXPORT_SYMBOL(netif_napi_add);
2667
2668void netif_napi_del(struct napi_struct *napi)
2669{
2670 struct sk_buff *skb, *next;
2671
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002672 list_del_init(&napi->dev_list);
Herbert Xu5d38a072009-01-04 16:13:40 -08002673 kfree(napi->skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002674
2675 for (skb = napi->gro_list; skb; skb = next) {
2676 next = skb->next;
2677 skb->next = NULL;
2678 kfree_skb(skb);
2679 }
2680
2681 napi->gro_list = NULL;
2682}
2683EXPORT_SYMBOL(netif_napi_del);
2684
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002685
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686static void net_rx_action(struct softirq_action *h)
2687{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002688 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002689 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002690 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002691 void *have;
2692
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 local_irq_disable();
2694
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002695 while (!list_empty(list)) {
2696 struct napi_struct *n;
2697 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002699 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002700 * Allow this to run for 2 jiffies since which will allow
2701 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002702 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002703 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704 goto softnet_break;
2705
2706 local_irq_enable();
2707
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002708 /* Even though interrupts have been re-enabled, this
2709 * access is safe because interrupts can only add new
2710 * entries to the tail of this list, and only ->poll()
2711 * calls can remove this head entry from the list.
2712 */
2713 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002715 have = netpoll_poll_lock(n);
2716
2717 weight = n->weight;
2718
David S. Miller0a7606c2007-10-29 21:28:47 -07002719 /* This NAPI_STATE_SCHED test is for avoiding a race
2720 * with netpoll's poll_napi(). Only the entity which
2721 * obtains the lock and sees NAPI_STATE_SCHED set will
2722 * actually make the ->poll() call. Therefore we avoid
2723 * accidently calling ->poll() when NAPI is not scheduled.
2724 */
2725 work = 0;
2726 if (test_bit(NAPI_STATE_SCHED, &n->state))
2727 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002728
2729 WARN_ON_ONCE(work > weight);
2730
2731 budget -= work;
2732
2733 local_irq_disable();
2734
2735 /* Drivers must not modify the NAPI state if they
2736 * consume the entire weight. In such cases this code
2737 * still "owns" the NAPI instance and therefore can
2738 * move the instance around on the list at-will.
2739 */
David S. Millerfed17f32008-01-07 21:00:40 -08002740 if (unlikely(work == weight)) {
2741 if (unlikely(napi_disable_pending(n)))
2742 __napi_complete(n);
2743 else
2744 list_move_tail(&n->poll_list, list);
2745 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002746
2747 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002748 }
2749out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002750 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002751
Chris Leechdb217332006-06-17 21:24:58 -07002752#ifdef CONFIG_NET_DMA
2753 /*
2754 * There may not be any more sk_buffs coming right now, so push
2755 * any pending DMA copies to hardware
2756 */
Dan Williamsd379b012007-07-09 11:56:42 -07002757 if (!cpus_empty(net_dma.channel_mask)) {
2758 int chan_idx;
Mike Travis0e12f842008-05-12 21:21:13 +02002759 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07002760 struct dma_chan *chan = net_dma.channels[chan_idx];
2761 if (chan)
2762 dma_async_memcpy_issue_pending(chan);
2763 }
Chris Leechdb217332006-06-17 21:24:58 -07002764 }
2765#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002766
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767 return;
2768
2769softnet_break:
2770 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2771 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2772 goto out;
2773}
2774
2775static gifconf_func_t * gifconf_list [NPROTO];
2776
2777/**
2778 * register_gifconf - register a SIOCGIF handler
2779 * @family: Address family
2780 * @gifconf: Function handler
2781 *
2782 * Register protocol dependent address dumping routines. The handler
2783 * that is passed must not be freed or reused until it has been replaced
2784 * by another handler.
2785 */
2786int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2787{
2788 if (family >= NPROTO)
2789 return -EINVAL;
2790 gifconf_list[family] = gifconf;
2791 return 0;
2792}
2793
2794
2795/*
2796 * Map an interface index to its name (SIOCGIFNAME)
2797 */
2798
2799/*
2800 * We need this ioctl for efficient implementation of the
2801 * if_indextoname() function required by the IPv6 API. Without
2802 * it, we would have to search all the interfaces to find a
2803 * match. --pb
2804 */
2805
Eric W. Biederman881d9662007-09-17 11:56:21 -07002806static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807{
2808 struct net_device *dev;
2809 struct ifreq ifr;
2810
2811 /*
2812 * Fetch the caller's info block.
2813 */
2814
2815 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2816 return -EFAULT;
2817
2818 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002819 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002820 if (!dev) {
2821 read_unlock(&dev_base_lock);
2822 return -ENODEV;
2823 }
2824
2825 strcpy(ifr.ifr_name, dev->name);
2826 read_unlock(&dev_base_lock);
2827
2828 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2829 return -EFAULT;
2830 return 0;
2831}
2832
2833/*
2834 * Perform a SIOCGIFCONF call. This structure will change
2835 * size eventually, and there is nothing I can do about it.
2836 * Thus we will need a 'compatibility mode'.
2837 */
2838
Eric W. Biederman881d9662007-09-17 11:56:21 -07002839static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002840{
2841 struct ifconf ifc;
2842 struct net_device *dev;
2843 char __user *pos;
2844 int len;
2845 int total;
2846 int i;
2847
2848 /*
2849 * Fetch the caller's info block.
2850 */
2851
2852 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2853 return -EFAULT;
2854
2855 pos = ifc.ifc_buf;
2856 len = ifc.ifc_len;
2857
2858 /*
2859 * Loop over the interfaces, and write an info block for each.
2860 */
2861
2862 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002863 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002864 for (i = 0; i < NPROTO; i++) {
2865 if (gifconf_list[i]) {
2866 int done;
2867 if (!pos)
2868 done = gifconf_list[i](dev, NULL, 0);
2869 else
2870 done = gifconf_list[i](dev, pos + total,
2871 len - total);
2872 if (done < 0)
2873 return -EFAULT;
2874 total += done;
2875 }
2876 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002877 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002878
2879 /*
2880 * All done. Write the updated control block back to the caller.
2881 */
2882 ifc.ifc_len = total;
2883
2884 /*
2885 * Both BSD and Solaris return 0 here, so we do too.
2886 */
2887 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2888}
2889
2890#ifdef CONFIG_PROC_FS
2891/*
2892 * This is invoked by the /proc filesystem handler to display a device
2893 * in detail.
2894 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002895void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002896 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002897{
Denis V. Luneve372c412007-11-19 22:31:54 -08002898 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002899 loff_t off;
2900 struct net_device *dev;
2901
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002903 if (!*pos)
2904 return SEQ_START_TOKEN;
2905
2906 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002907 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002908 if (off++ == *pos)
2909 return dev;
2910
2911 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002912}
2913
2914void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2915{
Denis V. Luneve372c412007-11-19 22:31:54 -08002916 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002917 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002918 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002919 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002920}
2921
2922void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002923 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002924{
2925 read_unlock(&dev_base_lock);
2926}
2927
2928static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2929{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002930 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931
Rusty Russell5a1b5892007-04-28 21:04:03 -07002932 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2933 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2934 dev->name, stats->rx_bytes, stats->rx_packets,
2935 stats->rx_errors,
2936 stats->rx_dropped + stats->rx_missed_errors,
2937 stats->rx_fifo_errors,
2938 stats->rx_length_errors + stats->rx_over_errors +
2939 stats->rx_crc_errors + stats->rx_frame_errors,
2940 stats->rx_compressed, stats->multicast,
2941 stats->tx_bytes, stats->tx_packets,
2942 stats->tx_errors, stats->tx_dropped,
2943 stats->tx_fifo_errors, stats->collisions,
2944 stats->tx_carrier_errors +
2945 stats->tx_aborted_errors +
2946 stats->tx_window_errors +
2947 stats->tx_heartbeat_errors,
2948 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002949}
2950
2951/*
2952 * Called from the PROCfs module. This now uses the new arbitrary sized
2953 * /proc/net interface to create /proc/net/dev
2954 */
2955static int dev_seq_show(struct seq_file *seq, void *v)
2956{
2957 if (v == SEQ_START_TOKEN)
2958 seq_puts(seq, "Inter-| Receive "
2959 " | Transmit\n"
2960 " face |bytes packets errs drop fifo frame "
2961 "compressed multicast|bytes packets errs "
2962 "drop fifo colls carrier compressed\n");
2963 else
2964 dev_seq_printf_stats(seq, v);
2965 return 0;
2966}
2967
2968static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2969{
2970 struct netif_rx_stats *rc = NULL;
2971
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002972 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002973 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002974 rc = &per_cpu(netdev_rx_stat, *pos);
2975 break;
2976 } else
2977 ++*pos;
2978 return rc;
2979}
2980
2981static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2982{
2983 return softnet_get_online(pos);
2984}
2985
2986static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2987{
2988 ++*pos;
2989 return softnet_get_online(pos);
2990}
2991
2992static void softnet_seq_stop(struct seq_file *seq, void *v)
2993{
2994}
2995
2996static int softnet_seq_show(struct seq_file *seq, void *v)
2997{
2998 struct netif_rx_stats *s = v;
2999
3000 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07003001 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07003002 0, 0, 0, 0, /* was fastroute */
3003 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004 return 0;
3005}
3006
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003007static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003008 .start = dev_seq_start,
3009 .next = dev_seq_next,
3010 .stop = dev_seq_stop,
3011 .show = dev_seq_show,
3012};
3013
3014static int dev_seq_open(struct inode *inode, struct file *file)
3015{
Denis V. Luneve372c412007-11-19 22:31:54 -08003016 return seq_open_net(inode, file, &dev_seq_ops,
3017 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003018}
3019
Arjan van de Ven9a321442007-02-12 00:55:35 -08003020static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 .owner = THIS_MODULE,
3022 .open = dev_seq_open,
3023 .read = seq_read,
3024 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08003025 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026};
3027
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003028static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003029 .start = softnet_seq_start,
3030 .next = softnet_seq_next,
3031 .stop = softnet_seq_stop,
3032 .show = softnet_seq_show,
3033};
3034
3035static int softnet_seq_open(struct inode *inode, struct file *file)
3036{
3037 return seq_open(file, &softnet_seq_ops);
3038}
3039
Arjan van de Ven9a321442007-02-12 00:55:35 -08003040static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041 .owner = THIS_MODULE,
3042 .open = softnet_seq_open,
3043 .read = seq_read,
3044 .llseek = seq_lseek,
3045 .release = seq_release,
3046};
3047
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003048static void *ptype_get_idx(loff_t pos)
3049{
3050 struct packet_type *pt = NULL;
3051 loff_t i = 0;
3052 int t;
3053
3054 list_for_each_entry_rcu(pt, &ptype_all, list) {
3055 if (i == pos)
3056 return pt;
3057 ++i;
3058 }
3059
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003060 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003061 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3062 if (i == pos)
3063 return pt;
3064 ++i;
3065 }
3066 }
3067 return NULL;
3068}
3069
3070static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003071 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003072{
3073 rcu_read_lock();
3074 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3075}
3076
3077static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3078{
3079 struct packet_type *pt;
3080 struct list_head *nxt;
3081 int hash;
3082
3083 ++*pos;
3084 if (v == SEQ_START_TOKEN)
3085 return ptype_get_idx(0);
3086
3087 pt = v;
3088 nxt = pt->list.next;
3089 if (pt->type == htons(ETH_P_ALL)) {
3090 if (nxt != &ptype_all)
3091 goto found;
3092 hash = 0;
3093 nxt = ptype_base[0].next;
3094 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003095 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003096
3097 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003098 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003099 return NULL;
3100 nxt = ptype_base[hash].next;
3101 }
3102found:
3103 return list_entry(nxt, struct packet_type, list);
3104}
3105
3106static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003107 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003108{
3109 rcu_read_unlock();
3110}
3111
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003112static int ptype_seq_show(struct seq_file *seq, void *v)
3113{
3114 struct packet_type *pt = v;
3115
3116 if (v == SEQ_START_TOKEN)
3117 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003118 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003119 if (pt->type == htons(ETH_P_ALL))
3120 seq_puts(seq, "ALL ");
3121 else
3122 seq_printf(seq, "%04x", ntohs(pt->type));
3123
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003124 seq_printf(seq, " %-8s %pF\n",
3125 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003126 }
3127
3128 return 0;
3129}
3130
3131static const struct seq_operations ptype_seq_ops = {
3132 .start = ptype_seq_start,
3133 .next = ptype_seq_next,
3134 .stop = ptype_seq_stop,
3135 .show = ptype_seq_show,
3136};
3137
3138static int ptype_seq_open(struct inode *inode, struct file *file)
3139{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003140 return seq_open_net(inode, file, &ptype_seq_ops,
3141 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003142}
3143
3144static const struct file_operations ptype_seq_fops = {
3145 .owner = THIS_MODULE,
3146 .open = ptype_seq_open,
3147 .read = seq_read,
3148 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003149 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003150};
3151
3152
Pavel Emelyanov46650792007-10-08 20:38:39 -07003153static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003154{
3155 int rc = -ENOMEM;
3156
Eric W. Biederman881d9662007-09-17 11:56:21 -07003157 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003158 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003159 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003161 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003162 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003163
Eric W. Biederman881d9662007-09-17 11:56:21 -07003164 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003165 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003166 rc = 0;
3167out:
3168 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003169out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003170 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003171out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003172 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003173out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003174 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003175 goto out;
3176}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003177
Pavel Emelyanov46650792007-10-08 20:38:39 -07003178static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003179{
3180 wext_proc_exit(net);
3181
3182 proc_net_remove(net, "ptype");
3183 proc_net_remove(net, "softnet_stat");
3184 proc_net_remove(net, "dev");
3185}
3186
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003187static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003188 .init = dev_proc_net_init,
3189 .exit = dev_proc_net_exit,
3190};
3191
3192static int __init dev_proc_init(void)
3193{
3194 return register_pernet_subsys(&dev_proc_ops);
3195}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003196#else
3197#define dev_proc_init() 0
3198#endif /* CONFIG_PROC_FS */
3199
3200
3201/**
3202 * netdev_set_master - set up master/slave pair
3203 * @slave: slave device
3204 * @master: new master device
3205 *
3206 * Changes the master device of the slave. Pass %NULL to break the
3207 * bonding. The caller must hold the RTNL semaphore. On a failure
3208 * a negative errno code is returned. On success the reference counts
3209 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3210 * function returns zero.
3211 */
3212int netdev_set_master(struct net_device *slave, struct net_device *master)
3213{
3214 struct net_device *old = slave->master;
3215
3216 ASSERT_RTNL();
3217
3218 if (master) {
3219 if (old)
3220 return -EBUSY;
3221 dev_hold(master);
3222 }
3223
3224 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003225
Linus Torvalds1da177e2005-04-16 15:20:36 -07003226 synchronize_net();
3227
3228 if (old)
3229 dev_put(old);
3230
3231 if (master)
3232 slave->flags |= IFF_SLAVE;
3233 else
3234 slave->flags &= ~IFF_SLAVE;
3235
3236 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3237 return 0;
3238}
3239
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003240static void dev_change_rx_flags(struct net_device *dev, int flags)
3241{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003242 const struct net_device_ops *ops = dev->netdev_ops;
3243
3244 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3245 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003246}
3247
Wang Chendad9b332008-06-18 01:48:28 -07003248static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003249{
3250 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003251 uid_t uid;
3252 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003253
Patrick McHardy24023452007-07-14 18:51:31 -07003254 ASSERT_RTNL();
3255
Wang Chendad9b332008-06-18 01:48:28 -07003256 dev->flags |= IFF_PROMISC;
3257 dev->promiscuity += inc;
3258 if (dev->promiscuity == 0) {
3259 /*
3260 * Avoid overflow.
3261 * If inc causes overflow, untouch promisc and return error.
3262 */
3263 if (inc < 0)
3264 dev->flags &= ~IFF_PROMISC;
3265 else {
3266 dev->promiscuity -= inc;
3267 printk(KERN_WARNING "%s: promiscuity touches roof, "
3268 "set promiscuity failed, promiscuity feature "
3269 "of device might be broken.\n", dev->name);
3270 return -EOVERFLOW;
3271 }
3272 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003273 if (dev->flags != old_flags) {
3274 printk(KERN_INFO "device %s %s promiscuous mode\n",
3275 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3276 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003277 if (audit_enabled) {
3278 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003279 audit_log(current->audit_context, GFP_ATOMIC,
3280 AUDIT_ANOM_PROMISCUOUS,
3281 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3282 dev->name, (dev->flags & IFF_PROMISC),
3283 (old_flags & IFF_PROMISC),
3284 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003285 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003286 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003287 }
Patrick McHardy24023452007-07-14 18:51:31 -07003288
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003289 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003290 }
Wang Chendad9b332008-06-18 01:48:28 -07003291 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003292}
3293
Linus Torvalds1da177e2005-04-16 15:20:36 -07003294/**
3295 * dev_set_promiscuity - update promiscuity count on a device
3296 * @dev: device
3297 * @inc: modifier
3298 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003299 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003300 * remains above zero the interface remains promiscuous. Once it hits zero
3301 * the device reverts back to normal filtering operation. A negative inc
3302 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003303 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003304 */
Wang Chendad9b332008-06-18 01:48:28 -07003305int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003306{
3307 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003308 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003309
Wang Chendad9b332008-06-18 01:48:28 -07003310 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003311 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003312 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003313 if (dev->flags != old_flags)
3314 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003315 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003316}
3317
3318/**
3319 * dev_set_allmulti - update allmulti count on a device
3320 * @dev: device
3321 * @inc: modifier
3322 *
3323 * Add or remove reception of all multicast frames to a device. While the
3324 * count in the device remains above zero the interface remains listening
3325 * to all interfaces. Once it hits zero the device reverts back to normal
3326 * filtering operation. A negative @inc value is used to drop the counter
3327 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003328 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003329 */
3330
Wang Chendad9b332008-06-18 01:48:28 -07003331int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003332{
3333 unsigned short old_flags = dev->flags;
3334
Patrick McHardy24023452007-07-14 18:51:31 -07003335 ASSERT_RTNL();
3336
Linus Torvalds1da177e2005-04-16 15:20:36 -07003337 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003338 dev->allmulti += inc;
3339 if (dev->allmulti == 0) {
3340 /*
3341 * Avoid overflow.
3342 * If inc causes overflow, untouch allmulti and return error.
3343 */
3344 if (inc < 0)
3345 dev->flags &= ~IFF_ALLMULTI;
3346 else {
3347 dev->allmulti -= inc;
3348 printk(KERN_WARNING "%s: allmulti touches roof, "
3349 "set allmulti failed, allmulti feature of "
3350 "device might be broken.\n", dev->name);
3351 return -EOVERFLOW;
3352 }
3353 }
Patrick McHardy24023452007-07-14 18:51:31 -07003354 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003355 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003356 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003357 }
Wang Chendad9b332008-06-18 01:48:28 -07003358 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003359}
3360
3361/*
3362 * Upload unicast and multicast address lists to device and
3363 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003364 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003365 * are present.
3366 */
3367void __dev_set_rx_mode(struct net_device *dev)
3368{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003369 const struct net_device_ops *ops = dev->netdev_ops;
3370
Patrick McHardy4417da62007-06-27 01:28:10 -07003371 /* dev_open will call this function so the list will stay sane. */
3372 if (!(dev->flags&IFF_UP))
3373 return;
3374
3375 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003376 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003377
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003378 if (ops->ndo_set_rx_mode)
3379 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003380 else {
3381 /* Unicast addresses changes may only happen under the rtnl,
3382 * therefore calling __dev_set_promiscuity here is safe.
3383 */
3384 if (dev->uc_count > 0 && !dev->uc_promisc) {
3385 __dev_set_promiscuity(dev, 1);
3386 dev->uc_promisc = 1;
3387 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3388 __dev_set_promiscuity(dev, -1);
3389 dev->uc_promisc = 0;
3390 }
3391
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003392 if (ops->ndo_set_multicast_list)
3393 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003394 }
3395}
3396
3397void dev_set_rx_mode(struct net_device *dev)
3398{
David S. Millerb9e40852008-07-15 00:15:08 -07003399 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003400 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003401 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003402}
3403
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003404int __dev_addr_delete(struct dev_addr_list **list, int *count,
3405 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003406{
3407 struct dev_addr_list *da;
3408
3409 for (; (da = *list) != NULL; list = &da->next) {
3410 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3411 alen == da->da_addrlen) {
3412 if (glbl) {
3413 int old_glbl = da->da_gusers;
3414 da->da_gusers = 0;
3415 if (old_glbl == 0)
3416 break;
3417 }
3418 if (--da->da_users)
3419 return 0;
3420
3421 *list = da->next;
3422 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003423 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003424 return 0;
3425 }
3426 }
3427 return -ENOENT;
3428}
3429
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003430int __dev_addr_add(struct dev_addr_list **list, int *count,
3431 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003432{
3433 struct dev_addr_list *da;
3434
3435 for (da = *list; da != NULL; da = da->next) {
3436 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3437 da->da_addrlen == alen) {
3438 if (glbl) {
3439 int old_glbl = da->da_gusers;
3440 da->da_gusers = 1;
3441 if (old_glbl)
3442 return 0;
3443 }
3444 da->da_users++;
3445 return 0;
3446 }
3447 }
3448
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003449 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003450 if (da == NULL)
3451 return -ENOMEM;
3452 memcpy(da->da_addr, addr, alen);
3453 da->da_addrlen = alen;
3454 da->da_users = 1;
3455 da->da_gusers = glbl ? 1 : 0;
3456 da->next = *list;
3457 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003458 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003459 return 0;
3460}
3461
Patrick McHardy4417da62007-06-27 01:28:10 -07003462/**
3463 * dev_unicast_delete - Release secondary unicast address.
3464 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003465 * @addr: address to delete
3466 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003467 *
3468 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003469 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003470 *
3471 * The caller must hold the rtnl_mutex.
3472 */
3473int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3474{
3475 int err;
3476
3477 ASSERT_RTNL();
3478
David S. Millerb9e40852008-07-15 00:15:08 -07003479 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003480 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3481 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003482 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003483 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003484 return err;
3485}
3486EXPORT_SYMBOL(dev_unicast_delete);
3487
3488/**
3489 * dev_unicast_add - add a secondary unicast address
3490 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003491 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003492 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003493 *
3494 * Add a secondary unicast address to the device or increase
3495 * the reference count if it already exists.
3496 *
3497 * The caller must hold the rtnl_mutex.
3498 */
3499int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3500{
3501 int err;
3502
3503 ASSERT_RTNL();
3504
David S. Millerb9e40852008-07-15 00:15:08 -07003505 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003506 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3507 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003508 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003509 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003510 return err;
3511}
3512EXPORT_SYMBOL(dev_unicast_add);
3513
Chris Leeche83a2ea2008-01-31 16:53:23 -08003514int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3515 struct dev_addr_list **from, int *from_count)
3516{
3517 struct dev_addr_list *da, *next;
3518 int err = 0;
3519
3520 da = *from;
3521 while (da != NULL) {
3522 next = da->next;
3523 if (!da->da_synced) {
3524 err = __dev_addr_add(to, to_count,
3525 da->da_addr, da->da_addrlen, 0);
3526 if (err < 0)
3527 break;
3528 da->da_synced = 1;
3529 da->da_users++;
3530 } else if (da->da_users == 1) {
3531 __dev_addr_delete(to, to_count,
3532 da->da_addr, da->da_addrlen, 0);
3533 __dev_addr_delete(from, from_count,
3534 da->da_addr, da->da_addrlen, 0);
3535 }
3536 da = next;
3537 }
3538 return err;
3539}
3540
3541void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3542 struct dev_addr_list **from, int *from_count)
3543{
3544 struct dev_addr_list *da, *next;
3545
3546 da = *from;
3547 while (da != NULL) {
3548 next = da->next;
3549 if (da->da_synced) {
3550 __dev_addr_delete(to, to_count,
3551 da->da_addr, da->da_addrlen, 0);
3552 da->da_synced = 0;
3553 __dev_addr_delete(from, from_count,
3554 da->da_addr, da->da_addrlen, 0);
3555 }
3556 da = next;
3557 }
3558}
3559
3560/**
3561 * dev_unicast_sync - Synchronize device's unicast list to another device
3562 * @to: destination device
3563 * @from: source device
3564 *
3565 * Add newly added addresses to the destination device and release
3566 * addresses that have no users left. The source device must be
3567 * locked by netif_tx_lock_bh.
3568 *
3569 * This function is intended to be called from the dev->set_rx_mode
3570 * function of layered software devices.
3571 */
3572int dev_unicast_sync(struct net_device *to, struct net_device *from)
3573{
3574 int err = 0;
3575
David S. Millerb9e40852008-07-15 00:15:08 -07003576 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003577 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3578 &from->uc_list, &from->uc_count);
3579 if (!err)
3580 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003581 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003582 return err;
3583}
3584EXPORT_SYMBOL(dev_unicast_sync);
3585
3586/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003587 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003588 * @to: destination device
3589 * @from: source device
3590 *
3591 * Remove all addresses that were added to the destination device by
3592 * dev_unicast_sync(). This function is intended to be called from the
3593 * dev->stop function of layered software devices.
3594 */
3595void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3596{
David S. Millerb9e40852008-07-15 00:15:08 -07003597 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003598 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003599
3600 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3601 &from->uc_list, &from->uc_count);
3602 __dev_set_rx_mode(to);
3603
David S. Millere308a5d2008-07-15 00:13:44 -07003604 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003605 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003606}
3607EXPORT_SYMBOL(dev_unicast_unsync);
3608
Denis Cheng12972622007-07-18 02:12:56 -07003609static void __dev_addr_discard(struct dev_addr_list **list)
3610{
3611 struct dev_addr_list *tmp;
3612
3613 while (*list != NULL) {
3614 tmp = *list;
3615 *list = tmp->next;
3616 if (tmp->da_users > tmp->da_gusers)
3617 printk("__dev_addr_discard: address leakage! "
3618 "da_users=%d\n", tmp->da_users);
3619 kfree(tmp);
3620 }
3621}
3622
Denis Cheng26cc2522007-07-18 02:12:03 -07003623static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003624{
David S. Millerb9e40852008-07-15 00:15:08 -07003625 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003626
Patrick McHardy4417da62007-06-27 01:28:10 -07003627 __dev_addr_discard(&dev->uc_list);
3628 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003629
Denis Cheng456ad752007-07-18 02:10:54 -07003630 __dev_addr_discard(&dev->mc_list);
3631 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003632
David S. Millerb9e40852008-07-15 00:15:08 -07003633 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003634}
3635
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003636/**
3637 * dev_get_flags - get flags reported to userspace
3638 * @dev: device
3639 *
3640 * Get the combination of flag bits exported through APIs to userspace.
3641 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003642unsigned dev_get_flags(const struct net_device *dev)
3643{
3644 unsigned flags;
3645
3646 flags = (dev->flags & ~(IFF_PROMISC |
3647 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003648 IFF_RUNNING |
3649 IFF_LOWER_UP |
3650 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651 (dev->gflags & (IFF_PROMISC |
3652 IFF_ALLMULTI));
3653
Stefan Rompfb00055a2006-03-20 17:09:11 -08003654 if (netif_running(dev)) {
3655 if (netif_oper_up(dev))
3656 flags |= IFF_RUNNING;
3657 if (netif_carrier_ok(dev))
3658 flags |= IFF_LOWER_UP;
3659 if (netif_dormant(dev))
3660 flags |= IFF_DORMANT;
3661 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003662
3663 return flags;
3664}
3665
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003666/**
3667 * dev_change_flags - change device settings
3668 * @dev: device
3669 * @flags: device state flags
3670 *
3671 * Change settings on device based state flags. The flags are
3672 * in the userspace exported format.
3673 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003674int dev_change_flags(struct net_device *dev, unsigned flags)
3675{
Thomas Graf7c355f52007-06-05 16:03:03 -07003676 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003677 int old_flags = dev->flags;
3678
Patrick McHardy24023452007-07-14 18:51:31 -07003679 ASSERT_RTNL();
3680
Linus Torvalds1da177e2005-04-16 15:20:36 -07003681 /*
3682 * Set the flags on our device.
3683 */
3684
3685 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3686 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3687 IFF_AUTOMEDIA)) |
3688 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3689 IFF_ALLMULTI));
3690
3691 /*
3692 * Load in the correct multicast list now the flags have changed.
3693 */
3694
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003695 if ((old_flags ^ flags) & IFF_MULTICAST)
3696 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003697
Patrick McHardy4417da62007-06-27 01:28:10 -07003698 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003699
3700 /*
3701 * Have we downed the interface. We handle IFF_UP ourselves
3702 * according to user attempts to set it, rather than blindly
3703 * setting it.
3704 */
3705
3706 ret = 0;
3707 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3708 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3709
3710 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003711 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003712 }
3713
3714 if (dev->flags & IFF_UP &&
3715 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3716 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003717 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003718
3719 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3720 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3721 dev->gflags ^= IFF_PROMISC;
3722 dev_set_promiscuity(dev, inc);
3723 }
3724
3725 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3726 is important. Some (broken) drivers set IFF_PROMISC, when
3727 IFF_ALLMULTI is requested not asking us and not reporting.
3728 */
3729 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3730 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3731 dev->gflags ^= IFF_ALLMULTI;
3732 dev_set_allmulti(dev, inc);
3733 }
3734
Thomas Graf7c355f52007-06-05 16:03:03 -07003735 /* Exclude state transition flags, already notified */
3736 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3737 if (changes)
3738 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003739
3740 return ret;
3741}
3742
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003743/**
3744 * dev_set_mtu - Change maximum transfer unit
3745 * @dev: device
3746 * @new_mtu: new transfer unit
3747 *
3748 * Change the maximum transfer size of the network device.
3749 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003750int dev_set_mtu(struct net_device *dev, int new_mtu)
3751{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003752 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003753 int err;
3754
3755 if (new_mtu == dev->mtu)
3756 return 0;
3757
3758 /* MTU must be positive. */
3759 if (new_mtu < 0)
3760 return -EINVAL;
3761
3762 if (!netif_device_present(dev))
3763 return -ENODEV;
3764
3765 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003766 if (ops->ndo_change_mtu)
3767 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003768 else
3769 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003770
Linus Torvalds1da177e2005-04-16 15:20:36 -07003771 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003772 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003773 return err;
3774}
3775
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003776/**
3777 * dev_set_mac_address - Change Media Access Control Address
3778 * @dev: device
3779 * @sa: new address
3780 *
3781 * Change the hardware (MAC) address of the device
3782 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003783int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3784{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003785 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003786 int err;
3787
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003788 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003789 return -EOPNOTSUPP;
3790 if (sa->sa_family != dev->type)
3791 return -EINVAL;
3792 if (!netif_device_present(dev))
3793 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003794 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003795 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003796 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003797 return err;
3798}
3799
3800/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003801 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003802 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003803static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003804{
3805 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003806 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003807
3808 if (!dev)
3809 return -ENODEV;
3810
3811 switch (cmd) {
3812 case SIOCGIFFLAGS: /* Get interface flags */
3813 ifr->ifr_flags = dev_get_flags(dev);
3814 return 0;
3815
Linus Torvalds1da177e2005-04-16 15:20:36 -07003816 case SIOCGIFMETRIC: /* Get the metric on the interface
3817 (currently unused) */
3818 ifr->ifr_metric = 0;
3819 return 0;
3820
Linus Torvalds1da177e2005-04-16 15:20:36 -07003821 case SIOCGIFMTU: /* Get the MTU of a device */
3822 ifr->ifr_mtu = dev->mtu;
3823 return 0;
3824
Linus Torvalds1da177e2005-04-16 15:20:36 -07003825 case SIOCGIFHWADDR:
3826 if (!dev->addr_len)
3827 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3828 else
3829 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3830 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3831 ifr->ifr_hwaddr.sa_family = dev->type;
3832 return 0;
3833
Jeff Garzik14e3e072007-10-08 00:06:32 -07003834 case SIOCGIFSLAVE:
3835 err = -EINVAL;
3836 break;
3837
3838 case SIOCGIFMAP:
3839 ifr->ifr_map.mem_start = dev->mem_start;
3840 ifr->ifr_map.mem_end = dev->mem_end;
3841 ifr->ifr_map.base_addr = dev->base_addr;
3842 ifr->ifr_map.irq = dev->irq;
3843 ifr->ifr_map.dma = dev->dma;
3844 ifr->ifr_map.port = dev->if_port;
3845 return 0;
3846
3847 case SIOCGIFINDEX:
3848 ifr->ifr_ifindex = dev->ifindex;
3849 return 0;
3850
3851 case SIOCGIFTXQLEN:
3852 ifr->ifr_qlen = dev->tx_queue_len;
3853 return 0;
3854
3855 default:
3856 /* dev_ioctl() should ensure this case
3857 * is never reached
3858 */
3859 WARN_ON(1);
3860 err = -EINVAL;
3861 break;
3862
3863 }
3864 return err;
3865}
3866
3867/*
3868 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3869 */
3870static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3871{
3872 int err;
3873 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003874 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003875
3876 if (!dev)
3877 return -ENODEV;
3878
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003879 ops = dev->netdev_ops;
3880
Jeff Garzik14e3e072007-10-08 00:06:32 -07003881 switch (cmd) {
3882 case SIOCSIFFLAGS: /* Set interface flags */
3883 return dev_change_flags(dev, ifr->ifr_flags);
3884
3885 case SIOCSIFMETRIC: /* Set the metric on the interface
3886 (currently unused) */
3887 return -EOPNOTSUPP;
3888
3889 case SIOCSIFMTU: /* Set the MTU of a device */
3890 return dev_set_mtu(dev, ifr->ifr_mtu);
3891
Linus Torvalds1da177e2005-04-16 15:20:36 -07003892 case SIOCSIFHWADDR:
3893 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3894
3895 case SIOCSIFHWBROADCAST:
3896 if (ifr->ifr_hwaddr.sa_family != dev->type)
3897 return -EINVAL;
3898 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3899 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003900 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003901 return 0;
3902
Linus Torvalds1da177e2005-04-16 15:20:36 -07003903 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003904 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003905 if (!netif_device_present(dev))
3906 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003907 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003908 }
3909 return -EOPNOTSUPP;
3910
3911 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003912 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003913 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3914 return -EINVAL;
3915 if (!netif_device_present(dev))
3916 return -ENODEV;
3917 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3918 dev->addr_len, 1);
3919
3920 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003921 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003922 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3923 return -EINVAL;
3924 if (!netif_device_present(dev))
3925 return -ENODEV;
3926 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3927 dev->addr_len, 1);
3928
Linus Torvalds1da177e2005-04-16 15:20:36 -07003929 case SIOCSIFTXQLEN:
3930 if (ifr->ifr_qlen < 0)
3931 return -EINVAL;
3932 dev->tx_queue_len = ifr->ifr_qlen;
3933 return 0;
3934
3935 case SIOCSIFNAME:
3936 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3937 return dev_change_name(dev, ifr->ifr_newname);
3938
3939 /*
3940 * Unknown or private ioctl
3941 */
3942
3943 default:
3944 if ((cmd >= SIOCDEVPRIVATE &&
3945 cmd <= SIOCDEVPRIVATE + 15) ||
3946 cmd == SIOCBONDENSLAVE ||
3947 cmd == SIOCBONDRELEASE ||
3948 cmd == SIOCBONDSETHWADDR ||
3949 cmd == SIOCBONDSLAVEINFOQUERY ||
3950 cmd == SIOCBONDINFOQUERY ||
3951 cmd == SIOCBONDCHANGEACTIVE ||
3952 cmd == SIOCGMIIPHY ||
3953 cmd == SIOCGMIIREG ||
3954 cmd == SIOCSMIIREG ||
3955 cmd == SIOCBRADDIF ||
3956 cmd == SIOCBRDELIF ||
3957 cmd == SIOCWANDEV) {
3958 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003959 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003960 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003961 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003962 else
3963 err = -ENODEV;
3964 }
3965 } else
3966 err = -EINVAL;
3967
3968 }
3969 return err;
3970}
3971
3972/*
3973 * This function handles all "interface"-type I/O control requests. The actual
3974 * 'doing' part of this is dev_ifsioc above.
3975 */
3976
3977/**
3978 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003979 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003980 * @cmd: command to issue
3981 * @arg: pointer to a struct ifreq in user space
3982 *
3983 * Issue ioctl functions to devices. This is normally called by the
3984 * user space syscall interfaces but can sometimes be useful for
3985 * other purposes. The return value is the return from the syscall if
3986 * positive or a negative errno code on error.
3987 */
3988
Eric W. Biederman881d9662007-09-17 11:56:21 -07003989int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003990{
3991 struct ifreq ifr;
3992 int ret;
3993 char *colon;
3994
3995 /* One special case: SIOCGIFCONF takes ifconf argument
3996 and requires shared lock, because it sleeps writing
3997 to user space.
3998 */
3999
4000 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004001 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004002 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004003 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004004 return ret;
4005 }
4006 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004007 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004008
4009 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4010 return -EFAULT;
4011
4012 ifr.ifr_name[IFNAMSIZ-1] = 0;
4013
4014 colon = strchr(ifr.ifr_name, ':');
4015 if (colon)
4016 *colon = 0;
4017
4018 /*
4019 * See which interface the caller is talking about.
4020 */
4021
4022 switch (cmd) {
4023 /*
4024 * These ioctl calls:
4025 * - can be done by all.
4026 * - atomic and do not require locking.
4027 * - return a value
4028 */
4029 case SIOCGIFFLAGS:
4030 case SIOCGIFMETRIC:
4031 case SIOCGIFMTU:
4032 case SIOCGIFHWADDR:
4033 case SIOCGIFSLAVE:
4034 case SIOCGIFMAP:
4035 case SIOCGIFINDEX:
4036 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004037 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004038 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004039 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004040 read_unlock(&dev_base_lock);
4041 if (!ret) {
4042 if (colon)
4043 *colon = ':';
4044 if (copy_to_user(arg, &ifr,
4045 sizeof(struct ifreq)))
4046 ret = -EFAULT;
4047 }
4048 return ret;
4049
4050 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004051 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004052 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004053 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004054 rtnl_unlock();
4055 if (!ret) {
4056 if (colon)
4057 *colon = ':';
4058 if (copy_to_user(arg, &ifr,
4059 sizeof(struct ifreq)))
4060 ret = -EFAULT;
4061 }
4062 return ret;
4063
4064 /*
4065 * These ioctl calls:
4066 * - require superuser power.
4067 * - require strict serialization.
4068 * - return a value
4069 */
4070 case SIOCGMIIPHY:
4071 case SIOCGMIIREG:
4072 case SIOCSIFNAME:
4073 if (!capable(CAP_NET_ADMIN))
4074 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004075 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004076 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004077 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004078 rtnl_unlock();
4079 if (!ret) {
4080 if (colon)
4081 *colon = ':';
4082 if (copy_to_user(arg, &ifr,
4083 sizeof(struct ifreq)))
4084 ret = -EFAULT;
4085 }
4086 return ret;
4087
4088 /*
4089 * These ioctl calls:
4090 * - require superuser power.
4091 * - require strict serialization.
4092 * - do not return a value
4093 */
4094 case SIOCSIFFLAGS:
4095 case SIOCSIFMETRIC:
4096 case SIOCSIFMTU:
4097 case SIOCSIFMAP:
4098 case SIOCSIFHWADDR:
4099 case SIOCSIFSLAVE:
4100 case SIOCADDMULTI:
4101 case SIOCDELMULTI:
4102 case SIOCSIFHWBROADCAST:
4103 case SIOCSIFTXQLEN:
4104 case SIOCSMIIREG:
4105 case SIOCBONDENSLAVE:
4106 case SIOCBONDRELEASE:
4107 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004108 case SIOCBONDCHANGEACTIVE:
4109 case SIOCBRADDIF:
4110 case SIOCBRDELIF:
4111 if (!capable(CAP_NET_ADMIN))
4112 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08004113 /* fall through */
4114 case SIOCBONDSLAVEINFOQUERY:
4115 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004116 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004117 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004118 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004119 rtnl_unlock();
4120 return ret;
4121
4122 case SIOCGIFMEM:
4123 /* Get the per device memory space. We can add this but
4124 * currently do not support it */
4125 case SIOCSIFMEM:
4126 /* Set the per device memory buffer space.
4127 * Not applicable in our case */
4128 case SIOCSIFLINK:
4129 return -EINVAL;
4130
4131 /*
4132 * Unknown or private ioctl.
4133 */
4134 default:
4135 if (cmd == SIOCWANDEV ||
4136 (cmd >= SIOCDEVPRIVATE &&
4137 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004138 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004139 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004140 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004141 rtnl_unlock();
4142 if (!ret && copy_to_user(arg, &ifr,
4143 sizeof(struct ifreq)))
4144 ret = -EFAULT;
4145 return ret;
4146 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004147 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07004148 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004149 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004150 return -EINVAL;
4151 }
4152}
4153
4154
4155/**
4156 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004157 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004158 *
4159 * Returns a suitable unique value for a new device interface
4160 * number. The caller must hold the rtnl semaphore or the
4161 * dev_base_lock to be sure it remains unique.
4162 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004163static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004164{
4165 static int ifindex;
4166 for (;;) {
4167 if (++ifindex <= 0)
4168 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004169 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004170 return ifindex;
4171 }
4172}
4173
Linus Torvalds1da177e2005-04-16 15:20:36 -07004174/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004175static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004176
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004177static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004178{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004179 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004180}
4181
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004182static void rollback_registered(struct net_device *dev)
4183{
4184 BUG_ON(dev_boot_phase);
4185 ASSERT_RTNL();
4186
4187 /* Some devices call without registering for initialization unwind. */
4188 if (dev->reg_state == NETREG_UNINITIALIZED) {
4189 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4190 "was registered\n", dev->name, dev);
4191
4192 WARN_ON(1);
4193 return;
4194 }
4195
4196 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4197
4198 /* If device is running, close it first. */
4199 dev_close(dev);
4200
4201 /* And unlink it from device chain. */
4202 unlist_netdevice(dev);
4203
4204 dev->reg_state = NETREG_UNREGISTERING;
4205
4206 synchronize_net();
4207
4208 /* Shutdown queueing discipline. */
4209 dev_shutdown(dev);
4210
4211
4212 /* Notify protocols, that we are about to destroy
4213 this device. They should clean all the things.
4214 */
4215 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4216
4217 /*
4218 * Flush the unicast and multicast chains
4219 */
4220 dev_addr_discard(dev);
4221
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004222 if (dev->netdev_ops->ndo_uninit)
4223 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004224
4225 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004226 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004227
4228 /* Remove entries from kobject tree */
4229 netdev_unregister_kobject(dev);
4230
4231 synchronize_net();
4232
4233 dev_put(dev);
4234}
4235
David S. Millere8a04642008-07-17 00:34:19 -07004236static void __netdev_init_queue_locks_one(struct net_device *dev,
4237 struct netdev_queue *dev_queue,
4238 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004239{
4240 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004241 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004242 dev_queue->xmit_lock_owner = -1;
4243}
4244
4245static void netdev_init_queue_locks(struct net_device *dev)
4246{
David S. Millere8a04642008-07-17 00:34:19 -07004247 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4248 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004249}
4250
Herbert Xub63365a2008-10-23 01:11:29 -07004251unsigned long netdev_fix_features(unsigned long features, const char *name)
4252{
4253 /* Fix illegal SG+CSUM combinations. */
4254 if ((features & NETIF_F_SG) &&
4255 !(features & NETIF_F_ALL_CSUM)) {
4256 if (name)
4257 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4258 "checksum feature.\n", name);
4259 features &= ~NETIF_F_SG;
4260 }
4261
4262 /* TSO requires that SG is present as well. */
4263 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4264 if (name)
4265 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4266 "SG feature.\n", name);
4267 features &= ~NETIF_F_TSO;
4268 }
4269
4270 if (features & NETIF_F_UFO) {
4271 if (!(features & NETIF_F_GEN_CSUM)) {
4272 if (name)
4273 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4274 "since no NETIF_F_HW_CSUM feature.\n",
4275 name);
4276 features &= ~NETIF_F_UFO;
4277 }
4278
4279 if (!(features & NETIF_F_SG)) {
4280 if (name)
4281 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4282 "since no NETIF_F_SG feature.\n", name);
4283 features &= ~NETIF_F_UFO;
4284 }
4285 }
4286
4287 return features;
4288}
4289EXPORT_SYMBOL(netdev_fix_features);
4290
Linus Torvalds1da177e2005-04-16 15:20:36 -07004291/**
4292 * register_netdevice - register a network device
4293 * @dev: device to register
4294 *
4295 * Take a completed network device structure and add it to the kernel
4296 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4297 * chain. 0 is returned on success. A negative errno code is returned
4298 * on a failure to set up the device, or if the name is a duplicate.
4299 *
4300 * Callers must hold the rtnl semaphore. You may want
4301 * register_netdev() instead of this.
4302 *
4303 * BUGS:
4304 * The locking appears insufficient to guarantee two parallel registers
4305 * will not get the same name.
4306 */
4307
4308int register_netdevice(struct net_device *dev)
4309{
4310 struct hlist_head *head;
4311 struct hlist_node *p;
4312 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004313 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004314
4315 BUG_ON(dev_boot_phase);
4316 ASSERT_RTNL();
4317
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004318 might_sleep();
4319
Linus Torvalds1da177e2005-04-16 15:20:36 -07004320 /* When net_device's are persistent, this will be fatal. */
4321 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004322 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004323
David S. Millerf1f28aa2008-07-15 00:08:33 -07004324 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004325 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004326 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004327
Linus Torvalds1da177e2005-04-16 15:20:36 -07004328 dev->iflink = -1;
4329
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004330#ifdef CONFIG_COMPAT_NET_DEV_OPS
4331 /* Netdevice_ops API compatiability support.
4332 * This is temporary until all network devices are converted.
4333 */
4334 if (dev->netdev_ops) {
4335 const struct net_device_ops *ops = dev->netdev_ops;
4336
4337 dev->init = ops->ndo_init;
4338 dev->uninit = ops->ndo_uninit;
4339 dev->open = ops->ndo_open;
4340 dev->change_rx_flags = ops->ndo_change_rx_flags;
4341 dev->set_rx_mode = ops->ndo_set_rx_mode;
4342 dev->set_multicast_list = ops->ndo_set_multicast_list;
4343 dev->set_mac_address = ops->ndo_set_mac_address;
4344 dev->validate_addr = ops->ndo_validate_addr;
4345 dev->do_ioctl = ops->ndo_do_ioctl;
4346 dev->set_config = ops->ndo_set_config;
4347 dev->change_mtu = ops->ndo_change_mtu;
4348 dev->tx_timeout = ops->ndo_tx_timeout;
4349 dev->get_stats = ops->ndo_get_stats;
4350 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4351 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4352 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4353#ifdef CONFIG_NET_POLL_CONTROLLER
4354 dev->poll_controller = ops->ndo_poll_controller;
4355#endif
4356 } else {
4357 char drivername[64];
4358 pr_info("%s (%s): not using net_device_ops yet\n",
4359 dev->name, netdev_drivername(dev, drivername, 64));
4360
4361 /* This works only because net_device_ops and the
4362 compatiablity structure are the same. */
4363 dev->netdev_ops = (void *) &(dev->init);
4364 }
4365#endif
4366
Linus Torvalds1da177e2005-04-16 15:20:36 -07004367 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004368 if (dev->netdev_ops->ndo_init) {
4369 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004370 if (ret) {
4371 if (ret > 0)
4372 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004373 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004374 }
4375 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004376
Linus Torvalds1da177e2005-04-16 15:20:36 -07004377 if (!dev_valid_name(dev->name)) {
4378 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004379 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004380 }
4381
Eric W. Biederman881d9662007-09-17 11:56:21 -07004382 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004383 if (dev->iflink == -1)
4384 dev->iflink = dev->ifindex;
4385
4386 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004387 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004388 hlist_for_each(p, head) {
4389 struct net_device *d
4390 = hlist_entry(p, struct net_device, name_hlist);
4391 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4392 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004393 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004394 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004395 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004396
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004397 /* Fix illegal checksum combinations */
4398 if ((dev->features & NETIF_F_HW_CSUM) &&
4399 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4400 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4401 dev->name);
4402 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4403 }
4404
4405 if ((dev->features & NETIF_F_NO_CSUM) &&
4406 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4407 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4408 dev->name);
4409 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4410 }
4411
Herbert Xub63365a2008-10-23 01:11:29 -07004412 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004413
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004414 /* Enable software GSO if SG is supported. */
4415 if (dev->features & NETIF_F_SG)
4416 dev->features |= NETIF_F_GSO;
4417
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004418 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004419 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004420 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004421 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004422 dev->reg_state = NETREG_REGISTERED;
4423
Linus Torvalds1da177e2005-04-16 15:20:36 -07004424 /*
4425 * Default initial state at registry is that the
4426 * device is present.
4427 */
4428
4429 set_bit(__LINK_STATE_PRESENT, &dev->state);
4430
Linus Torvalds1da177e2005-04-16 15:20:36 -07004431 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004432 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004433 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004434
4435 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004436 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004437 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004438 if (ret) {
4439 rollback_registered(dev);
4440 dev->reg_state = NETREG_UNREGISTERED;
4441 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004442
4443out:
4444 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004445
4446err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004447 if (dev->netdev_ops->ndo_uninit)
4448 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004449 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004450}
4451
4452/**
4453 * register_netdev - register a network device
4454 * @dev: device to register
4455 *
4456 * Take a completed network device structure and add it to the kernel
4457 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4458 * chain. 0 is returned on success. A negative errno code is returned
4459 * on a failure to set up the device, or if the name is a duplicate.
4460 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004461 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004462 * and expands the device name if you passed a format string to
4463 * alloc_netdev.
4464 */
4465int register_netdev(struct net_device *dev)
4466{
4467 int err;
4468
4469 rtnl_lock();
4470
4471 /*
4472 * If the name is a format string the caller wants us to do a
4473 * name allocation.
4474 */
4475 if (strchr(dev->name, '%')) {
4476 err = dev_alloc_name(dev, dev->name);
4477 if (err < 0)
4478 goto out;
4479 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004480
Linus Torvalds1da177e2005-04-16 15:20:36 -07004481 err = register_netdevice(dev);
4482out:
4483 rtnl_unlock();
4484 return err;
4485}
4486EXPORT_SYMBOL(register_netdev);
4487
4488/*
4489 * netdev_wait_allrefs - wait until all references are gone.
4490 *
4491 * This is called when unregistering network devices.
4492 *
4493 * Any protocol or device that holds a reference should register
4494 * for netdevice notification, and cleanup and put back the
4495 * reference if they receive an UNREGISTER event.
4496 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004497 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004498 */
4499static void netdev_wait_allrefs(struct net_device *dev)
4500{
4501 unsigned long rebroadcast_time, warning_time;
4502
4503 rebroadcast_time = warning_time = jiffies;
4504 while (atomic_read(&dev->refcnt) != 0) {
4505 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004506 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004507
4508 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004509 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004510
4511 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4512 &dev->state)) {
4513 /* We must not have linkwatch events
4514 * pending on unregister. If this
4515 * happens, we simply run the queue
4516 * unscheduled, resulting in a noop
4517 * for this device.
4518 */
4519 linkwatch_run_queue();
4520 }
4521
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004522 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004523
4524 rebroadcast_time = jiffies;
4525 }
4526
4527 msleep(250);
4528
4529 if (time_after(jiffies, warning_time + 10 * HZ)) {
4530 printk(KERN_EMERG "unregister_netdevice: "
4531 "waiting for %s to become free. Usage "
4532 "count = %d\n",
4533 dev->name, atomic_read(&dev->refcnt));
4534 warning_time = jiffies;
4535 }
4536 }
4537}
4538
4539/* The sequence is:
4540 *
4541 * rtnl_lock();
4542 * ...
4543 * register_netdevice(x1);
4544 * register_netdevice(x2);
4545 * ...
4546 * unregister_netdevice(y1);
4547 * unregister_netdevice(y2);
4548 * ...
4549 * rtnl_unlock();
4550 * free_netdev(y1);
4551 * free_netdev(y2);
4552 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004553 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004554 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004555 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004556 * without deadlocking with linkwatch via keventd.
4557 * 2) Since we run with the RTNL semaphore not held, we can sleep
4558 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004559 *
4560 * We must not return until all unregister events added during
4561 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004562 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004563void netdev_run_todo(void)
4564{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004565 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004566
Linus Torvalds1da177e2005-04-16 15:20:36 -07004567 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004568 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004569
4570 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004571
Linus Torvalds1da177e2005-04-16 15:20:36 -07004572 while (!list_empty(&list)) {
4573 struct net_device *dev
4574 = list_entry(list.next, struct net_device, todo_list);
4575 list_del(&dev->todo_list);
4576
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004577 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004578 printk(KERN_ERR "network todo '%s' but state %d\n",
4579 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004580 dump_stack();
4581 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004582 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004583
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004584 dev->reg_state = NETREG_UNREGISTERED;
4585
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004586 on_each_cpu(flush_backlog, dev, 1);
4587
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004588 netdev_wait_allrefs(dev);
4589
4590 /* paranoia */
4591 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004592 WARN_ON(dev->ip_ptr);
4593 WARN_ON(dev->ip6_ptr);
4594 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004595
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004596 if (dev->destructor)
4597 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004598
4599 /* Free network device */
4600 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004601 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004602}
4603
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004604/**
4605 * dev_get_stats - get network device statistics
4606 * @dev: device to get statistics from
4607 *
4608 * Get network statistics from device. The device driver may provide
4609 * its own method by setting dev->netdev_ops->get_stats; otherwise
4610 * the internal statistics structure is used.
4611 */
4612const struct net_device_stats *dev_get_stats(struct net_device *dev)
4613 {
4614 const struct net_device_ops *ops = dev->netdev_ops;
4615
4616 if (ops->ndo_get_stats)
4617 return ops->ndo_get_stats(dev);
4618 else
4619 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004620}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004621EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004622
David S. Millerdc2b4842008-07-08 17:18:23 -07004623static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004624 struct netdev_queue *queue,
4625 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004626{
David S. Millerdc2b4842008-07-08 17:18:23 -07004627 queue->dev = dev;
4628}
4629
David S. Millerbb949fb2008-07-08 16:55:56 -07004630static void netdev_init_queues(struct net_device *dev)
4631{
David S. Millere8a04642008-07-17 00:34:19 -07004632 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4633 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004634 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004635}
4636
Linus Torvalds1da177e2005-04-16 15:20:36 -07004637/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004638 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004639 * @sizeof_priv: size of private data to allocate space for
4640 * @name: device name format string
4641 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004642 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004643 *
4644 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004645 * and performs basic initialization. Also allocates subquue structs
4646 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004647 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004648struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4649 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004650{
David S. Millere8a04642008-07-17 00:34:19 -07004651 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004652 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004653 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004654 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004655
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004656 BUG_ON(strlen(name) >= sizeof(dev->name));
4657
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004658 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004659 if (sizeof_priv) {
4660 /* ensure 32-byte alignment of private area */
4661 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4662 alloc_size += sizeof_priv;
4663 }
4664 /* ensure 32-byte alignment of whole construct */
4665 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004666
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004667 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004668 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004669 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004670 return NULL;
4671 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004672
Stephen Hemminger79439862008-07-21 13:28:44 -07004673 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004674 if (!tx) {
4675 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4676 "tx qdiscs.\n");
4677 kfree(p);
4678 return NULL;
4679 }
4680
Linus Torvalds1da177e2005-04-16 15:20:36 -07004681 dev = (struct net_device *)
4682 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4683 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004684 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004685
David S. Millere8a04642008-07-17 00:34:19 -07004686 dev->_tx = tx;
4687 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004688 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004689
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004690 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004691
David S. Millerbb949fb2008-07-08 16:55:56 -07004692 netdev_init_queues(dev);
4693
Herbert Xud565b0a2008-12-15 23:38:52 -08004694 INIT_LIST_HEAD(&dev->napi_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004695 setup(dev);
4696 strcpy(dev->name, name);
4697 return dev;
4698}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004699EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004700
4701/**
4702 * free_netdev - free network device
4703 * @dev: device
4704 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004705 * This function does the last stage of destroying an allocated device
4706 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004707 * If this is the last reference then it will be freed.
4708 */
4709void free_netdev(struct net_device *dev)
4710{
Herbert Xud565b0a2008-12-15 23:38:52 -08004711 struct napi_struct *p, *n;
4712
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004713 release_net(dev_net(dev));
4714
David S. Millere8a04642008-07-17 00:34:19 -07004715 kfree(dev->_tx);
4716
Herbert Xud565b0a2008-12-15 23:38:52 -08004717 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4718 netif_napi_del(p);
4719
Stephen Hemminger3041a062006-05-26 13:25:24 -07004720 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004721 if (dev->reg_state == NETREG_UNINITIALIZED) {
4722 kfree((char *)dev - dev->padded);
4723 return;
4724 }
4725
4726 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4727 dev->reg_state = NETREG_RELEASED;
4728
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004729 /* will free via device release */
4730 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004731}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004732
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004733/**
4734 * synchronize_net - Synchronize with packet receive processing
4735 *
4736 * Wait for packets currently being received to be done.
4737 * Does not block later packets from starting.
4738 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004739void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004740{
4741 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004742 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004743}
4744
4745/**
4746 * unregister_netdevice - remove device from the kernel
4747 * @dev: device
4748 *
4749 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004750 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004751 *
4752 * Callers must hold the rtnl semaphore. You may want
4753 * unregister_netdev() instead of this.
4754 */
4755
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004756void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004757{
Herbert Xua6620712007-12-12 19:21:56 -08004758 ASSERT_RTNL();
4759
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004760 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004761 /* Finish processing unregister after unlock */
4762 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004763}
4764
4765/**
4766 * unregister_netdev - remove device from the kernel
4767 * @dev: device
4768 *
4769 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004770 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004771 *
4772 * This is just a wrapper for unregister_netdevice that takes
4773 * the rtnl semaphore. In general you want to use this and not
4774 * unregister_netdevice.
4775 */
4776void unregister_netdev(struct net_device *dev)
4777{
4778 rtnl_lock();
4779 unregister_netdevice(dev);
4780 rtnl_unlock();
4781}
4782
4783EXPORT_SYMBOL(unregister_netdev);
4784
Eric W. Biedermance286d32007-09-12 13:53:49 +02004785/**
4786 * dev_change_net_namespace - move device to different nethost namespace
4787 * @dev: device
4788 * @net: network namespace
4789 * @pat: If not NULL name pattern to try if the current device name
4790 * is already taken in the destination network namespace.
4791 *
4792 * This function shuts down a device interface and moves it
4793 * to a new network namespace. On success 0 is returned, on
4794 * a failure a netagive errno code is returned.
4795 *
4796 * Callers must hold the rtnl semaphore.
4797 */
4798
4799int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4800{
4801 char buf[IFNAMSIZ];
4802 const char *destname;
4803 int err;
4804
4805 ASSERT_RTNL();
4806
4807 /* Don't allow namespace local devices to be moved. */
4808 err = -EINVAL;
4809 if (dev->features & NETIF_F_NETNS_LOCAL)
4810 goto out;
4811
Eric W. Biederman38918452008-10-27 17:51:47 -07004812#ifdef CONFIG_SYSFS
4813 /* Don't allow real devices to be moved when sysfs
4814 * is enabled.
4815 */
4816 err = -EINVAL;
4817 if (dev->dev.parent)
4818 goto out;
4819#endif
4820
Eric W. Biedermance286d32007-09-12 13:53:49 +02004821 /* Ensure the device has been registrered */
4822 err = -EINVAL;
4823 if (dev->reg_state != NETREG_REGISTERED)
4824 goto out;
4825
4826 /* Get out if there is nothing todo */
4827 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004828 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004829 goto out;
4830
4831 /* Pick the destination device name, and ensure
4832 * we can use it in the destination network namespace.
4833 */
4834 err = -EEXIST;
4835 destname = dev->name;
4836 if (__dev_get_by_name(net, destname)) {
4837 /* We get here if we can't use the current device name */
4838 if (!pat)
4839 goto out;
4840 if (!dev_valid_name(pat))
4841 goto out;
4842 if (strchr(pat, '%')) {
4843 if (__dev_alloc_name(net, pat, buf) < 0)
4844 goto out;
4845 destname = buf;
4846 } else
4847 destname = pat;
4848 if (__dev_get_by_name(net, destname))
4849 goto out;
4850 }
4851
4852 /*
4853 * And now a mini version of register_netdevice unregister_netdevice.
4854 */
4855
4856 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004857 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004858
4859 /* And unlink it from device chain */
4860 err = -ENODEV;
4861 unlist_netdevice(dev);
4862
4863 synchronize_net();
4864
4865 /* Shutdown queueing discipline. */
4866 dev_shutdown(dev);
4867
4868 /* Notify protocols, that we are about to destroy
4869 this device. They should clean all the things.
4870 */
4871 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4872
4873 /*
4874 * Flush the unicast and multicast chains
4875 */
4876 dev_addr_discard(dev);
4877
Eric W. Biederman38918452008-10-27 17:51:47 -07004878 netdev_unregister_kobject(dev);
4879
Eric W. Biedermance286d32007-09-12 13:53:49 +02004880 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004881 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004882
4883 /* Assign the new device name */
4884 if (destname != dev->name)
4885 strcpy(dev->name, destname);
4886
4887 /* If there is an ifindex conflict assign a new one */
4888 if (__dev_get_by_index(net, dev->ifindex)) {
4889 int iflink = (dev->iflink == dev->ifindex);
4890 dev->ifindex = dev_new_index(net);
4891 if (iflink)
4892 dev->iflink = dev->ifindex;
4893 }
4894
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004895 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004896 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004897 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004898
4899 /* Add the device back in the hashes */
4900 list_netdevice(dev);
4901
4902 /* Notify protocols, that a new device appeared. */
4903 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4904
4905 synchronize_net();
4906 err = 0;
4907out:
4908 return err;
4909}
4910
Linus Torvalds1da177e2005-04-16 15:20:36 -07004911static int dev_cpu_callback(struct notifier_block *nfb,
4912 unsigned long action,
4913 void *ocpu)
4914{
4915 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004916 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004917 struct sk_buff *skb;
4918 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4919 struct softnet_data *sd, *oldsd;
4920
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004921 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004922 return NOTIFY_OK;
4923
4924 local_irq_disable();
4925 cpu = smp_processor_id();
4926 sd = &per_cpu(softnet_data, cpu);
4927 oldsd = &per_cpu(softnet_data, oldcpu);
4928
4929 /* Find end of our completion_queue. */
4930 list_skb = &sd->completion_queue;
4931 while (*list_skb)
4932 list_skb = &(*list_skb)->next;
4933 /* Append completion queue from offline CPU. */
4934 *list_skb = oldsd->completion_queue;
4935 oldsd->completion_queue = NULL;
4936
4937 /* Find end of our output_queue. */
4938 list_net = &sd->output_queue;
4939 while (*list_net)
4940 list_net = &(*list_net)->next_sched;
4941 /* Append output queue from offline CPU. */
4942 *list_net = oldsd->output_queue;
4943 oldsd->output_queue = NULL;
4944
4945 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4946 local_irq_enable();
4947
4948 /* Process offline CPU's input_pkt_queue */
4949 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4950 netif_rx(skb);
4951
4952 return NOTIFY_OK;
4953}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004954
Chris Leechdb217332006-06-17 21:24:58 -07004955#ifdef CONFIG_NET_DMA
4956/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004957 * net_dma_rebalance - try to maintain one DMA channel per CPU
4958 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4959 *
4960 * This is called when the number of channels allocated to the net_dma client
4961 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004962 */
Dan Williamsd379b012007-07-09 11:56:42 -07004963
4964static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004965{
Dan Williamsd379b012007-07-09 11:56:42 -07004966 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004967 struct dma_chan *chan;
4968
Dan Williamsd379b012007-07-09 11:56:42 -07004969 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004970 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004971 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004972 return;
4973 }
4974
4975 i = 0;
4976 cpu = first_cpu(cpu_online_map);
4977
Mike Travis0e12f842008-05-12 21:21:13 +02004978 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07004979 chan = net_dma->channels[chan_idx];
4980
4981 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4982 + (i < (num_online_cpus() %
4983 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004984
4985 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004986 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004987 cpu = next_cpu(cpu, cpu_online_map);
4988 n--;
4989 }
4990 i++;
4991 }
Chris Leechdb217332006-06-17 21:24:58 -07004992}
4993
4994/**
4995 * netdev_dma_event - event callback for the net_dma_client
4996 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004997 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004998 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004999 */
Dan Williamsd379b012007-07-09 11:56:42 -07005000static enum dma_state_client
5001netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
5002 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07005003{
Dan Williamsd379b012007-07-09 11:56:42 -07005004 int i, found = 0, pos = -1;
5005 struct net_dma *net_dma =
5006 container_of(client, struct net_dma, client);
5007 enum dma_state_client ack = DMA_DUP; /* default: take no action */
5008
5009 spin_lock(&net_dma->lock);
5010 switch (state) {
5011 case DMA_RESOURCE_AVAILABLE:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07005012 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07005013 if (net_dma->channels[i] == chan) {
5014 found = 1;
5015 break;
5016 } else if (net_dma->channels[i] == NULL && pos < 0)
5017 pos = i;
5018
5019 if (!found && pos >= 0) {
5020 ack = DMA_ACK;
5021 net_dma->channels[pos] = chan;
5022 cpu_set(pos, net_dma->channel_mask);
5023 net_dma_rebalance(net_dma);
5024 }
Chris Leechdb217332006-06-17 21:24:58 -07005025 break;
5026 case DMA_RESOURCE_REMOVED:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07005027 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07005028 if (net_dma->channels[i] == chan) {
5029 found = 1;
5030 pos = i;
5031 break;
5032 }
5033
5034 if (found) {
5035 ack = DMA_ACK;
5036 cpu_clear(pos, net_dma->channel_mask);
5037 net_dma->channels[i] = NULL;
5038 net_dma_rebalance(net_dma);
5039 }
Chris Leechdb217332006-06-17 21:24:58 -07005040 break;
5041 default:
5042 break;
5043 }
Dan Williamsd379b012007-07-09 11:56:42 -07005044 spin_unlock(&net_dma->lock);
5045
5046 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07005047}
5048
5049/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005050 * netdev_dma_register - register the networking subsystem as a DMA client
Chris Leechdb217332006-06-17 21:24:58 -07005051 */
5052static int __init netdev_dma_register(void)
5053{
Mike Travis0c0b0ac2008-05-02 16:43:08 -07005054 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
5055 GFP_KERNEL);
5056 if (unlikely(!net_dma.channels)) {
5057 printk(KERN_NOTICE
5058 "netdev_dma: no memory for net_dma.channels\n");
5059 return -ENOMEM;
5060 }
Dan Williamsd379b012007-07-09 11:56:42 -07005061 spin_lock_init(&net_dma.lock);
5062 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
5063 dma_async_client_register(&net_dma.client);
5064 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07005065 return 0;
5066}
5067
5068#else
5069static int __init netdev_dma_register(void) { return -ENODEV; }
5070#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005071
Herbert Xu7f353bf2007-08-10 15:47:58 -07005072/**
Herbert Xub63365a2008-10-23 01:11:29 -07005073 * netdev_increment_features - increment feature set by one
5074 * @all: current feature set
5075 * @one: new feature set
5076 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005077 *
5078 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005079 * @one to the master device with current feature set @all. Will not
5080 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005081 */
Herbert Xub63365a2008-10-23 01:11:29 -07005082unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5083 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005084{
Herbert Xub63365a2008-10-23 01:11:29 -07005085 /* If device needs checksumming, downgrade to it. */
5086 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5087 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5088 else if (mask & NETIF_F_ALL_CSUM) {
5089 /* If one device supports v4/v6 checksumming, set for all. */
5090 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5091 !(all & NETIF_F_GEN_CSUM)) {
5092 all &= ~NETIF_F_ALL_CSUM;
5093 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5094 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005095
Herbert Xub63365a2008-10-23 01:11:29 -07005096 /* If one device supports hw checksumming, set for all. */
5097 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5098 all &= ~NETIF_F_ALL_CSUM;
5099 all |= NETIF_F_HW_CSUM;
5100 }
5101 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005102
Herbert Xub63365a2008-10-23 01:11:29 -07005103 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005104
Herbert Xub63365a2008-10-23 01:11:29 -07005105 one |= all & NETIF_F_ONE_FOR_ALL;
5106 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5107 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005108
5109 return all;
5110}
Herbert Xub63365a2008-10-23 01:11:29 -07005111EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005112
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005113static struct hlist_head *netdev_create_hash(void)
5114{
5115 int i;
5116 struct hlist_head *hash;
5117
5118 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5119 if (hash != NULL)
5120 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5121 INIT_HLIST_HEAD(&hash[i]);
5122
5123 return hash;
5124}
5125
Eric W. Biederman881d9662007-09-17 11:56:21 -07005126/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005127static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005128{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005129 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005130
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005131 net->dev_name_head = netdev_create_hash();
5132 if (net->dev_name_head == NULL)
5133 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005134
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005135 net->dev_index_head = netdev_create_hash();
5136 if (net->dev_index_head == NULL)
5137 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005138
5139 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005140
5141err_idx:
5142 kfree(net->dev_name_head);
5143err_name:
5144 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005145}
5146
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005147/**
5148 * netdev_drivername - network driver for the device
5149 * @dev: network device
5150 * @buffer: buffer for resulting name
5151 * @len: size of buffer
5152 *
5153 * Determine network driver for device.
5154 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005155char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005156{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005157 const struct device_driver *driver;
5158 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005159
5160 if (len <= 0 || !buffer)
5161 return buffer;
5162 buffer[0] = 0;
5163
5164 parent = dev->dev.parent;
5165
5166 if (!parent)
5167 return buffer;
5168
5169 driver = parent->driver;
5170 if (driver && driver->name)
5171 strlcpy(buffer, driver->name, len);
5172 return buffer;
5173}
5174
Pavel Emelyanov46650792007-10-08 20:38:39 -07005175static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005176{
5177 kfree(net->dev_name_head);
5178 kfree(net->dev_index_head);
5179}
5180
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005181static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005182 .init = netdev_init,
5183 .exit = netdev_exit,
5184};
5185
Pavel Emelyanov46650792007-10-08 20:38:39 -07005186static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005187{
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005188 struct net_device *dev;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005189 /*
5190 * Push all migratable of the network devices back to the
5191 * initial network namespace
5192 */
5193 rtnl_lock();
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005194restart:
5195 for_each_netdev(net, dev) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005196 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005197 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005198
5199 /* Ignore unmoveable devices (i.e. loopback) */
5200 if (dev->features & NETIF_F_NETNS_LOCAL)
5201 continue;
5202
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005203 /* Delete virtual devices */
5204 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5205 dev->rtnl_link_ops->dellink(dev);
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005206 goto restart;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005207 }
5208
Eric W. Biedermance286d32007-09-12 13:53:49 +02005209 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005210 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5211 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005212 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005213 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005214 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005215 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005216 }
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005217 goto restart;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005218 }
5219 rtnl_unlock();
5220}
5221
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005222static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005223 .exit = default_device_exit,
5224};
5225
Linus Torvalds1da177e2005-04-16 15:20:36 -07005226/*
5227 * Initialize the DEV module. At boot time this walks the device list and
5228 * unhooks any devices that fail to initialise (normally hardware not
5229 * present) and leaves us with a valid list of present and active devices.
5230 *
5231 */
5232
5233/*
5234 * This is called single threaded during boot, so no need
5235 * to take the rtnl semaphore.
5236 */
5237static int __init net_dev_init(void)
5238{
5239 int i, rc = -ENOMEM;
5240
5241 BUG_ON(!dev_boot_phase);
5242
Linus Torvalds1da177e2005-04-16 15:20:36 -07005243 if (dev_proc_init())
5244 goto out;
5245
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005246 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005247 goto out;
5248
5249 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08005250 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005251 INIT_LIST_HEAD(&ptype_base[i]);
5252
Eric W. Biederman881d9662007-09-17 11:56:21 -07005253 if (register_pernet_subsys(&netdev_net_ops))
5254 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005255
5256 /*
5257 * Initialise the packet receive queues.
5258 */
5259
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005260 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005261 struct softnet_data *queue;
5262
5263 queue = &per_cpu(softnet_data, i);
5264 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005265 queue->completion_queue = NULL;
5266 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005267
5268 queue->backlog.poll = process_backlog;
5269 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005270 queue->backlog.gro_list = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005271 }
5272
Linus Torvalds1da177e2005-04-16 15:20:36 -07005273 dev_boot_phase = 0;
5274
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005275 /* The loopback device is special if any other network devices
5276 * is present in a network namespace the loopback device must
5277 * be present. Since we now dynamically allocate and free the
5278 * loopback device ensure this invariant is maintained by
5279 * keeping the loopback device as the first device on the
5280 * list of network devices. Ensuring the loopback devices
5281 * is the first device that appears and the last network device
5282 * that disappears.
5283 */
5284 if (register_pernet_device(&loopback_net_ops))
5285 goto out;
5286
5287 if (register_pernet_device(&default_device_ops))
5288 goto out;
5289
5290 netdev_dma_register();
5291
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005292 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5293 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005294
5295 hotcpu_notifier(dev_cpu_callback, 0);
5296 dst_init();
5297 dev_mcast_init();
5298 rc = 0;
5299out:
5300 return rc;
5301}
5302
5303subsys_initcall(net_dev_init);
5304
5305EXPORT_SYMBOL(__dev_get_by_index);
5306EXPORT_SYMBOL(__dev_get_by_name);
5307EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08005308EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005309EXPORT_SYMBOL(dev_add_pack);
5310EXPORT_SYMBOL(dev_alloc_name);
5311EXPORT_SYMBOL(dev_close);
5312EXPORT_SYMBOL(dev_get_by_flags);
5313EXPORT_SYMBOL(dev_get_by_index);
5314EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005315EXPORT_SYMBOL(dev_open);
5316EXPORT_SYMBOL(dev_queue_xmit);
5317EXPORT_SYMBOL(dev_remove_pack);
5318EXPORT_SYMBOL(dev_set_allmulti);
5319EXPORT_SYMBOL(dev_set_promiscuity);
5320EXPORT_SYMBOL(dev_change_flags);
5321EXPORT_SYMBOL(dev_set_mtu);
5322EXPORT_SYMBOL(dev_set_mac_address);
5323EXPORT_SYMBOL(free_netdev);
5324EXPORT_SYMBOL(netdev_boot_setup_check);
5325EXPORT_SYMBOL(netdev_set_master);
5326EXPORT_SYMBOL(netdev_state_change);
5327EXPORT_SYMBOL(netif_receive_skb);
5328EXPORT_SYMBOL(netif_rx);
5329EXPORT_SYMBOL(register_gifconf);
5330EXPORT_SYMBOL(register_netdevice);
5331EXPORT_SYMBOL(register_netdevice_notifier);
5332EXPORT_SYMBOL(skb_checksum_help);
5333EXPORT_SYMBOL(synchronize_net);
5334EXPORT_SYMBOL(unregister_netdevice);
5335EXPORT_SYMBOL(unregister_netdevice_notifier);
5336EXPORT_SYMBOL(net_enable_timestamp);
5337EXPORT_SYMBOL(net_disable_timestamp);
5338EXPORT_SYMBOL(dev_get_flags);
5339
5340#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5341EXPORT_SYMBOL(br_handle_frame_hook);
5342EXPORT_SYMBOL(br_fdb_get_hook);
5343EXPORT_SYMBOL(br_fdb_put_hook);
5344#endif
5345
Linus Torvalds1da177e2005-04-16 15:20:36 -07005346EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005347
5348EXPORT_PER_CPU_SYMBOL(softnet_data);