blob: 382df6c09eecece6771b09a9e3fa93315798728b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Herbert Xud565b0a2008-12-15 23:38:52 -0800132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
Herbert Xu5d38a072009-01-04 16:13:40 -0800135/* This should be increased if a protocol with a bigger head is added. */
136#define GRO_MAX_HEAD (MAX_HEADER + 128)
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138/*
139 * The list of packet types we will receive (as opposed to discard)
140 * and the routines to invoke.
141 *
142 * Why 16. Because with 16 the only overlap we get on a hash of the
143 * low nibble of the protocol value is RARP/SNAP/X.25.
144 *
145 * NOTE: That is no longer true with the addition of VLAN tags. Not
146 * sure which should go first, but I bet it won't make much
147 * difference if we are running VLANs. The good news is that
148 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700149 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 * --BLG
151 *
152 * 0800 IP
153 * 8100 802.1Q VLAN
154 * 0001 802.3
155 * 0002 AX.25
156 * 0004 802.2
157 * 8035 RARP
158 * 0005 SNAP
159 * 0805 X.25
160 * 0806 ARP
161 * 8137 IPX
162 * 0009 Localtalk
163 * 86DD IPv6
164 */
165
Pavel Emelyanov82d8a862007-11-26 20:12:58 +0800166#define PTYPE_HASH_SIZE (16)
167#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a862007-11-26 20:12:58 +0800170static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700171static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Chris Leechdb217332006-06-17 21:24:58 -0700173#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700174struct net_dma {
175 struct dma_client client;
176 spinlock_t lock;
177 cpumask_t channel_mask;
Mike Travis0c0b0ac2008-05-02 16:43:08 -0700178 struct dma_chan **channels;
Dan Williamsd379b012007-07-09 11:56:42 -0700179};
180
181static enum dma_state_client
182netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
183 enum dma_state state);
184
185static struct net_dma net_dma = {
186 .client = {
187 .event_callback = netdev_dma_event,
188 },
189};
Chris Leechdb217332006-06-17 21:24:58 -0700190#endif
191
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700193 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 * semaphore.
195 *
196 * Pure readers hold dev_base_lock for reading.
197 *
198 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700199 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 * actual updates. This allows pure readers to access the list even
201 * while a writer is preparing to update it.
202 *
203 * To put it another way, dev_base_lock is held for writing only to
204 * protect against pure readers; the rtnl semaphore provides the
205 * protection against other writers.
206 *
207 * See, for example usages, register_netdevice() and
208 * unregister_netdevice(), which must be called with the rtnl
209 * semaphore held.
210 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211DEFINE_RWLOCK(dev_base_lock);
212
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213EXPORT_SYMBOL(dev_base_lock);
214
215#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700216#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
Eric W. Biederman881d9662007-09-17 11:56:21 -0700218static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
220 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700221 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222}
223
Eric W. Biederman881d9662007-09-17 11:56:21 -0700224static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700226 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Eric W. Biedermance286d32007-09-12 13:53:49 +0200229/* Device list insertion */
230static int list_netdevice(struct net_device *dev)
231{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900232 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200233
234 ASSERT_RTNL();
235
236 write_lock_bh(&dev_base_lock);
237 list_add_tail(&dev->dev_list, &net->dev_base_head);
238 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
239 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
240 write_unlock_bh(&dev_base_lock);
241 return 0;
242}
243
244/* Device list removal */
245static void unlist_netdevice(struct net_device *dev)
246{
247 ASSERT_RTNL();
248
249 /* Unlink dev from the device chain */
250 write_lock_bh(&dev_base_lock);
251 list_del(&dev->dev_list);
252 hlist_del(&dev->name_hlist);
253 hlist_del(&dev->index_hlist);
254 write_unlock_bh(&dev_base_lock);
255}
256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257/*
258 * Our notifier list
259 */
260
Alan Sternf07d5b92006-05-09 15:23:03 -0700261static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
263/*
264 * Device drivers call our routines to queue packets here. We empty the
265 * queue in the local softnet handler.
266 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700267
268DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
David S. Millercf508b12008-07-22 14:16:42 -0700270#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700271/*
David S. Millerc773e842008-07-08 23:13:53 -0700272 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700273 * according to dev->type
274 */
275static const unsigned short netdev_lock_type[] =
276 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
277 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
278 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
279 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
280 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
281 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
282 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
283 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
284 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
285 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
286 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
287 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
288 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800289 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800290 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700291
292static const char *netdev_lock_name[] =
293 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
294 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
295 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
296 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
297 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
298 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
299 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
300 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
301 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
302 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
303 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
304 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
305 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800306 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800307 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700308
309static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700310static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700311
312static inline unsigned short netdev_lock_pos(unsigned short dev_type)
313{
314 int i;
315
316 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
317 if (netdev_lock_type[i] == dev_type)
318 return i;
319 /* the last key is used by default */
320 return ARRAY_SIZE(netdev_lock_type) - 1;
321}
322
David S. Millercf508b12008-07-22 14:16:42 -0700323static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
324 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700325{
326 int i;
327
328 i = netdev_lock_pos(dev_type);
329 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
330 netdev_lock_name[i]);
331}
David S. Millercf508b12008-07-22 14:16:42 -0700332
333static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
334{
335 int i;
336
337 i = netdev_lock_pos(dev->type);
338 lockdep_set_class_and_name(&dev->addr_list_lock,
339 &netdev_addr_lock_key[i],
340 netdev_lock_name[i]);
341}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700342#else
David S. Millercf508b12008-07-22 14:16:42 -0700343static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
344 unsigned short dev_type)
345{
346}
347static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700348{
349}
350#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351
352/*******************************************************************************
353
354 Protocol management and registration routines
355
356*******************************************************************************/
357
358/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 * Add a protocol ID to the list. Now that the input handler is
360 * smarter we can dispense with all the messy stuff that used to be
361 * here.
362 *
363 * BEWARE!!! Protocol handlers, mangling input packets,
364 * MUST BE last in hash buckets and checking protocol handlers
365 * MUST start from promiscuous ptype_all chain in net_bh.
366 * It is true now, do not change it.
367 * Explanation follows: if protocol handler, mangling packet, will
368 * be the first on list, it is not able to sense, that packet
369 * is cloned and should be copied-on-write, so that it will
370 * change it and subsequent readers will get broken packet.
371 * --ANK (980803)
372 */
373
374/**
375 * dev_add_pack - add packet handler
376 * @pt: packet type declaration
377 *
378 * Add a protocol handler to the networking stack. The passed &packet_type
379 * is linked into kernel lists and may not be freed until it has been
380 * removed from the kernel lists.
381 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900382 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 * guarantee all CPU's that are in middle of receiving packets
384 * will see the new packet type (until the next received packet).
385 */
386
387void dev_add_pack(struct packet_type *pt)
388{
389 int hash;
390
391 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700392 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700394 else {
Pavel Emelyanov82d8a862007-11-26 20:12:58 +0800395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399}
400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401/**
402 * __dev_remove_pack - remove packet handler
403 * @pt: packet type declaration
404 *
405 * Remove a protocol handler that was previously added to the kernel
406 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
407 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900408 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 *
410 * The packet type might still be in use by receivers
411 * and must not be freed until after all the CPU's have gone
412 * through a quiescent state.
413 */
414void __dev_remove_pack(struct packet_type *pt)
415{
416 struct list_head *head;
417 struct packet_type *pt1;
418
419 spin_lock_bh(&ptype_lock);
420
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700421 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700423 else
Pavel Emelyanov82d8a862007-11-26 20:12:58 +0800424 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 list_for_each_entry(pt1, head, list) {
427 if (pt == pt1) {
428 list_del_rcu(&pt->list);
429 goto out;
430 }
431 }
432
433 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
434out:
435 spin_unlock_bh(&ptype_lock);
436}
437/**
438 * dev_remove_pack - remove packet handler
439 * @pt: packet type declaration
440 *
441 * Remove a protocol handler that was previously added to the kernel
442 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
443 * from the kernel lists and can be freed or reused once this function
444 * returns.
445 *
446 * This call sleeps to guarantee that no CPU is looking at the packet
447 * type after return.
448 */
449void dev_remove_pack(struct packet_type *pt)
450{
451 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 synchronize_net();
454}
455
456/******************************************************************************
457
458 Device Boot-time Settings Routines
459
460*******************************************************************************/
461
462/* Boot time configuration table */
463static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
464
465/**
466 * netdev_boot_setup_add - add new setup entry
467 * @name: name of the device
468 * @map: configured settings for the device
469 *
470 * Adds new setup entry to the dev_boot_setup list. The function
471 * returns 0 on error and 1 on success. This is a generic routine to
472 * all netdevices.
473 */
474static int netdev_boot_setup_add(char *name, struct ifmap *map)
475{
476 struct netdev_boot_setup *s;
477 int i;
478
479 s = dev_boot_setup;
480 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
481 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
482 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700483 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 memcpy(&s[i].map, map, sizeof(s[i].map));
485 break;
486 }
487 }
488
489 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
490}
491
492/**
493 * netdev_boot_setup_check - check boot time settings
494 * @dev: the netdevice
495 *
496 * Check boot time settings for the device.
497 * The found settings are set for the device to be used
498 * later in the device probing.
499 * Returns 0 if no settings found, 1 if they are.
500 */
501int netdev_boot_setup_check(struct net_device *dev)
502{
503 struct netdev_boot_setup *s = dev_boot_setup;
504 int i;
505
506 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
507 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700508 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 dev->irq = s[i].map.irq;
510 dev->base_addr = s[i].map.base_addr;
511 dev->mem_start = s[i].map.mem_start;
512 dev->mem_end = s[i].map.mem_end;
513 return 1;
514 }
515 }
516 return 0;
517}
518
519
520/**
521 * netdev_boot_base - get address from boot time settings
522 * @prefix: prefix for network device
523 * @unit: id for network device
524 *
525 * Check boot time settings for the base address of device.
526 * The found settings are set for the device to be used
527 * later in the device probing.
528 * Returns 0 if no settings found.
529 */
530unsigned long netdev_boot_base(const char *prefix, int unit)
531{
532 const struct netdev_boot_setup *s = dev_boot_setup;
533 char name[IFNAMSIZ];
534 int i;
535
536 sprintf(name, "%s%d", prefix, unit);
537
538 /*
539 * If device already registered then return base of 1
540 * to indicate not to probe for this interface
541 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700542 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 return 1;
544
545 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
546 if (!strcmp(name, s[i].name))
547 return s[i].map.base_addr;
548 return 0;
549}
550
551/*
552 * Saves at boot time configured settings for any netdevice.
553 */
554int __init netdev_boot_setup(char *str)
555{
556 int ints[5];
557 struct ifmap map;
558
559 str = get_options(str, ARRAY_SIZE(ints), ints);
560 if (!str || !*str)
561 return 0;
562
563 /* Save settings */
564 memset(&map, 0, sizeof(map));
565 if (ints[0] > 0)
566 map.irq = ints[1];
567 if (ints[0] > 1)
568 map.base_addr = ints[2];
569 if (ints[0] > 2)
570 map.mem_start = ints[3];
571 if (ints[0] > 3)
572 map.mem_end = ints[4];
573
574 /* Add new entry to the list */
575 return netdev_boot_setup_add(str, &map);
576}
577
578__setup("netdev=", netdev_boot_setup);
579
580/*******************************************************************************
581
582 Device Interface Subroutines
583
584*******************************************************************************/
585
586/**
587 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700588 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 * @name: name to find
590 *
591 * Find an interface by name. Must be called under RTNL semaphore
592 * or @dev_base_lock. If the name is found a pointer to the device
593 * is returned. If the name is not found then %NULL is returned. The
594 * reference counters are not incremented so the caller must be
595 * careful with locks.
596 */
597
Eric W. Biederman881d9662007-09-17 11:56:21 -0700598struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599{
600 struct hlist_node *p;
601
Eric W. Biederman881d9662007-09-17 11:56:21 -0700602 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 struct net_device *dev
604 = hlist_entry(p, struct net_device, name_hlist);
605 if (!strncmp(dev->name, name, IFNAMSIZ))
606 return dev;
607 }
608 return NULL;
609}
610
611/**
612 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700613 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 * @name: name to find
615 *
616 * Find an interface by name. This can be called from any
617 * context and does its own locking. The returned handle has
618 * the usage count incremented and the caller must use dev_put() to
619 * release it when it is no longer needed. %NULL is returned if no
620 * matching device is found.
621 */
622
Eric W. Biederman881d9662007-09-17 11:56:21 -0700623struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624{
625 struct net_device *dev;
626
627 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700628 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (dev)
630 dev_hold(dev);
631 read_unlock(&dev_base_lock);
632 return dev;
633}
634
635/**
636 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700637 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 * @ifindex: index of device
639 *
640 * Search for an interface by index. Returns %NULL if the device
641 * is not found or a pointer to the device. The device has not
642 * had its reference counter increased so the caller must be careful
643 * about locking. The caller must hold either the RTNL semaphore
644 * or @dev_base_lock.
645 */
646
Eric W. Biederman881d9662007-09-17 11:56:21 -0700647struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648{
649 struct hlist_node *p;
650
Eric W. Biederman881d9662007-09-17 11:56:21 -0700651 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 struct net_device *dev
653 = hlist_entry(p, struct net_device, index_hlist);
654 if (dev->ifindex == ifindex)
655 return dev;
656 }
657 return NULL;
658}
659
660
661/**
662 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700663 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 * @ifindex: index of device
665 *
666 * Search for an interface by index. Returns NULL if the device
667 * is not found or a pointer to the device. The device returned has
668 * had a reference added and the pointer is safe until the user calls
669 * dev_put to indicate they have finished with it.
670 */
671
Eric W. Biederman881d9662007-09-17 11:56:21 -0700672struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct net_device *dev;
675
676 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700677 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 if (dev)
679 dev_hold(dev);
680 read_unlock(&dev_base_lock);
681 return dev;
682}
683
684/**
685 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700686 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 * @type: media type of device
688 * @ha: hardware address
689 *
690 * Search for an interface by MAC address. Returns NULL if the device
691 * is not found or a pointer to the device. The caller must hold the
692 * rtnl semaphore. The returned device has not had its ref count increased
693 * and the caller must therefore be careful about locking
694 *
695 * BUGS:
696 * If the API was consistent this would be __dev_get_by_hwaddr
697 */
698
Eric W. Biederman881d9662007-09-17 11:56:21 -0700699struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700{
701 struct net_device *dev;
702
703 ASSERT_RTNL();
704
Denis V. Lunev81103a52007-12-12 10:47:38 -0800705 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 if (dev->type == type &&
707 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700708 return dev;
709
710 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711}
712
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300713EXPORT_SYMBOL(dev_getbyhwaddr);
714
Eric W. Biederman881d9662007-09-17 11:56:21 -0700715struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700716{
717 struct net_device *dev;
718
719 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700720 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700721 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700722 return dev;
723
724 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700725}
726
727EXPORT_SYMBOL(__dev_getfirstbyhwtype);
728
Eric W. Biederman881d9662007-09-17 11:56:21 -0700729struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730{
731 struct net_device *dev;
732
733 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700734 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700735 if (dev)
736 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 rtnl_unlock();
738 return dev;
739}
740
741EXPORT_SYMBOL(dev_getfirstbyhwtype);
742
743/**
744 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700745 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 * @if_flags: IFF_* values
747 * @mask: bitmask of bits in if_flags to check
748 *
749 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900750 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 * had a reference added and the pointer is safe until the user calls
752 * dev_put to indicate they have finished with it.
753 */
754
Eric W. Biederman881d9662007-09-17 11:56:21 -0700755struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700757 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Pavel Emelianov7562f872007-05-03 15:13:45 -0700759 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700761 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 if (((dev->flags ^ if_flags) & mask) == 0) {
763 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700764 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 break;
766 }
767 }
768 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700769 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770}
771
772/**
773 * dev_valid_name - check if name is okay for network device
774 * @name: name string
775 *
776 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700777 * to allow sysfs to work. We also disallow any kind of
778 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800780int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700782 if (*name == '\0')
783 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700784 if (strlen(name) >= IFNAMSIZ)
785 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700786 if (!strcmp(name, ".") || !strcmp(name, ".."))
787 return 0;
788
789 while (*name) {
790 if (*name == '/' || isspace(*name))
791 return 0;
792 name++;
793 }
794 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795}
796
797/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200798 * __dev_alloc_name - allocate a name for a device
799 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200801 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 *
803 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700804 * id. It scans list of devices to build up a free map, then chooses
805 * the first empty slot. The caller must hold the dev_base or rtnl lock
806 * while allocating the name and adding the device in order to avoid
807 * duplicates.
808 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
809 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 */
811
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200812static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813{
814 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 const char *p;
816 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700817 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 struct net_device *d;
819
820 p = strnchr(name, IFNAMSIZ-1, '%');
821 if (p) {
822 /*
823 * Verify the string as this thing may have come from
824 * the user. There must be either one "%d" and no other "%"
825 * characters.
826 */
827 if (p[1] != 'd' || strchr(p + 2, '%'))
828 return -EINVAL;
829
830 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700831 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (!inuse)
833 return -ENOMEM;
834
Eric W. Biederman881d9662007-09-17 11:56:21 -0700835 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 if (!sscanf(d->name, name, &i))
837 continue;
838 if (i < 0 || i >= max_netdevices)
839 continue;
840
841 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200842 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 if (!strncmp(buf, d->name, IFNAMSIZ))
844 set_bit(i, inuse);
845 }
846
847 i = find_first_zero_bit(inuse, max_netdevices);
848 free_page((unsigned long) inuse);
849 }
850
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200851 snprintf(buf, IFNAMSIZ, name, i);
852 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
855 /* It is possible to run out of possible slots
856 * when the name is long and there isn't enough space left
857 * for the digits, or if all bits are used.
858 */
859 return -ENFILE;
860}
861
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200862/**
863 * dev_alloc_name - allocate a name for a device
864 * @dev: device
865 * @name: name format string
866 *
867 * Passed a format string - eg "lt%d" it will try and find a suitable
868 * id. It scans list of devices to build up a free map, then chooses
869 * the first empty slot. The caller must hold the dev_base or rtnl lock
870 * while allocating the name and adding the device in order to avoid
871 * duplicates.
872 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
873 * Returns the number of the unit assigned or a negative errno code.
874 */
875
876int dev_alloc_name(struct net_device *dev, const char *name)
877{
878 char buf[IFNAMSIZ];
879 struct net *net;
880 int ret;
881
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900882 BUG_ON(!dev_net(dev));
883 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200884 ret = __dev_alloc_name(net, name, buf);
885 if (ret >= 0)
886 strlcpy(dev->name, buf, IFNAMSIZ);
887 return ret;
888}
889
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891/**
892 * dev_change_name - change name of a device
893 * @dev: device
894 * @newname: name (or format string) must be at least IFNAMSIZ
895 *
896 * Change name of a device, can pass format strings "eth%d".
897 * for wildcarding.
898 */
Stephen Hemmingercf04a4c2008-09-30 02:22:14 -0700899int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900{
Herbert Xufcc5a032007-07-30 17:03:38 -0700901 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700903 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700904 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900907 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900909 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 if (dev->flags & IFF_UP)
911 return -EBUSY;
912
913 if (!dev_valid_name(newname))
914 return -EINVAL;
915
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700916 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
917 return 0;
918
Herbert Xufcc5a032007-07-30 17:03:38 -0700919 memcpy(oldname, dev->name, IFNAMSIZ);
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 if (strchr(newname, '%')) {
922 err = dev_alloc_name(dev, newname);
923 if (err < 0)
924 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700926 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 return -EEXIST;
928 else
929 strlcpy(dev->name, newname, IFNAMSIZ);
930
Herbert Xufcc5a032007-07-30 17:03:38 -0700931rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700932 /* For now only devices in the initial network namespace
933 * are in sysfs.
934 */
935 if (net == &init_net) {
936 ret = device_rename(&dev->dev, dev->name);
937 if (ret) {
938 memcpy(dev->name, oldname, IFNAMSIZ);
939 return ret;
940 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700941 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700942
943 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600944 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700945 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700946 write_unlock_bh(&dev_base_lock);
947
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700948 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700949 ret = notifier_to_errno(ret);
950
951 if (ret) {
952 if (err) {
953 printk(KERN_ERR
954 "%s: name change rollback failed: %d.\n",
955 dev->name, ret);
956 } else {
957 err = ret;
958 memcpy(dev->name, oldname, IFNAMSIZ);
959 goto rollback;
960 }
961 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
963 return err;
964}
965
966/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700967 * dev_set_alias - change ifalias of a device
968 * @dev: device
969 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700970 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700971 *
972 * Set ifalias for a device,
973 */
974int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
975{
976 ASSERT_RTNL();
977
978 if (len >= IFALIASZ)
979 return -EINVAL;
980
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700981 if (!len) {
982 if (dev->ifalias) {
983 kfree(dev->ifalias);
984 dev->ifalias = NULL;
985 }
986 return 0;
987 }
988
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700989 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
990 if (!dev->ifalias)
991 return -ENOMEM;
992
993 strlcpy(dev->ifalias, alias, len+1);
994 return len;
995}
996
997
998/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700999 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001000 * @dev: device to cause notification
1001 *
1002 * Called to indicate a device has changed features.
1003 */
1004void netdev_features_change(struct net_device *dev)
1005{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001006 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001007}
1008EXPORT_SYMBOL(netdev_features_change);
1009
1010/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 * netdev_state_change - device changes state
1012 * @dev: device to cause notification
1013 *
1014 * Called to indicate a device has changed state. This function calls
1015 * the notifier chains for netdev_chain and sends a NEWLINK message
1016 * to the routing socket.
1017 */
1018void netdev_state_change(struct net_device *dev)
1019{
1020 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001021 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1023 }
1024}
1025
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001026void netdev_bonding_change(struct net_device *dev)
1027{
1028 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1029}
1030EXPORT_SYMBOL(netdev_bonding_change);
1031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032/**
1033 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001034 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 * @name: name of interface
1036 *
1037 * If a network interface is not present and the process has suitable
1038 * privileges this function loads the module. If module loading is not
1039 * available in this kernel then it becomes a nop.
1040 */
1041
Eric W. Biederman881d9662007-09-17 11:56:21 -07001042void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001044 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045
1046 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001047 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 read_unlock(&dev_base_lock);
1049
1050 if (!dev && capable(CAP_SYS_MODULE))
1051 request_module("%s", name);
1052}
1053
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054/**
1055 * dev_open - prepare an interface for use.
1056 * @dev: device to open
1057 *
1058 * Takes a device from down to up state. The device's private open
1059 * function is invoked and then the multicast lists are loaded. Finally
1060 * the device is moved into the up state and a %NETDEV_UP message is
1061 * sent to the netdev notifier chain.
1062 *
1063 * Calling this function on an active interface is a nop. On a failure
1064 * a negative errno code is returned.
1065 */
1066int dev_open(struct net_device *dev)
1067{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001068 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 int ret = 0;
1070
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001071 ASSERT_RTNL();
1072
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 /*
1074 * Is it already up?
1075 */
1076
1077 if (dev->flags & IFF_UP)
1078 return 0;
1079
1080 /*
1081 * Is it even present?
1082 */
1083 if (!netif_device_present(dev))
1084 return -ENODEV;
1085
1086 /*
1087 * Call device private open method
1088 */
1089 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001090
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001091 if (ops->ndo_validate_addr)
1092 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001093
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001094 if (!ret && ops->ndo_open)
1095 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001097 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 * If it went open OK then:
1099 */
1100
Jeff Garzikbada3392007-10-23 20:19:37 -07001101 if (ret)
1102 clear_bit(__LINK_STATE_START, &dev->state);
1103 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 /*
1105 * Set the flags.
1106 */
1107 dev->flags |= IFF_UP;
1108
1109 /*
1110 * Initialize multicasting status
1111 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001112 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 /*
1115 * Wakeup transmit queue engine
1116 */
1117 dev_activate(dev);
1118
1119 /*
1120 * ... and announce new interface.
1121 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001122 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001124
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 return ret;
1126}
1127
1128/**
1129 * dev_close - shutdown an interface.
1130 * @dev: device to shutdown
1131 *
1132 * This function moves an active device into down state. A
1133 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1134 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1135 * chain.
1136 */
1137int dev_close(struct net_device *dev)
1138{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001139 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001140 ASSERT_RTNL();
1141
David S. Miller9d5010d2007-09-12 14:33:25 +02001142 might_sleep();
1143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 if (!(dev->flags & IFF_UP))
1145 return 0;
1146
1147 /*
1148 * Tell people we are going down, so that they can
1149 * prepare to death, when device is still operating.
1150 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001151 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 clear_bit(__LINK_STATE_START, &dev->state);
1154
1155 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001156 * it can be even on different cpu. So just clear netif_running().
1157 *
1158 * dev->stop() will invoke napi_disable() on all of it's
1159 * napi_struct instances on this device.
1160 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001163 dev_deactivate(dev);
1164
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 /*
1166 * Call the device specific close. This cannot fail.
1167 * Only if device is UP
1168 *
1169 * We allow it to be called even after a DETACH hot-plug
1170 * event.
1171 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001172 if (ops->ndo_stop)
1173 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175 /*
1176 * Device is now down.
1177 */
1178
1179 dev->flags &= ~IFF_UP;
1180
1181 /*
1182 * Tell people we are down
1183 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001184 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 return 0;
1187}
1188
1189
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001190/**
1191 * dev_disable_lro - disable Large Receive Offload on a device
1192 * @dev: device
1193 *
1194 * Disable Large Receive Offload (LRO) on a net device. Must be
1195 * called under RTNL. This is needed if received packets may be
1196 * forwarded to another interface.
1197 */
1198void dev_disable_lro(struct net_device *dev)
1199{
1200 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1201 dev->ethtool_ops->set_flags) {
1202 u32 flags = dev->ethtool_ops->get_flags(dev);
1203 if (flags & ETH_FLAG_LRO) {
1204 flags &= ~ETH_FLAG_LRO;
1205 dev->ethtool_ops->set_flags(dev, flags);
1206 }
1207 }
1208 WARN_ON(dev->features & NETIF_F_LRO);
1209}
1210EXPORT_SYMBOL(dev_disable_lro);
1211
1212
Eric W. Biederman881d9662007-09-17 11:56:21 -07001213static int dev_boot_phase = 1;
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215/*
1216 * Device change register/unregister. These are not inline or static
1217 * as we export them to the world.
1218 */
1219
1220/**
1221 * register_netdevice_notifier - register a network notifier block
1222 * @nb: notifier
1223 *
1224 * Register a notifier to be called when network device events occur.
1225 * The notifier passed is linked into the kernel structures and must
1226 * not be reused until it has been unregistered. A negative errno code
1227 * is returned on a failure.
1228 *
1229 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001230 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 * view of the network device list.
1232 */
1233
1234int register_netdevice_notifier(struct notifier_block *nb)
1235{
1236 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001237 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001238 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 int err;
1240
1241 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001242 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001243 if (err)
1244 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001245 if (dev_boot_phase)
1246 goto unlock;
1247 for_each_net(net) {
1248 for_each_netdev(net, dev) {
1249 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1250 err = notifier_to_errno(err);
1251 if (err)
1252 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253
Eric W. Biederman881d9662007-09-17 11:56:21 -07001254 if (!(dev->flags & IFF_UP))
1255 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001256
Eric W. Biederman881d9662007-09-17 11:56:21 -07001257 nb->notifier_call(nb, NETDEV_UP, dev);
1258 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001260
1261unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 rtnl_unlock();
1263 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001264
1265rollback:
1266 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001267 for_each_net(net) {
1268 for_each_netdev(net, dev) {
1269 if (dev == last)
1270 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001271
Eric W. Biederman881d9662007-09-17 11:56:21 -07001272 if (dev->flags & IFF_UP) {
1273 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1274 nb->notifier_call(nb, NETDEV_DOWN, dev);
1275 }
1276 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001277 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001278 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001279
1280 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001281 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282}
1283
1284/**
1285 * unregister_netdevice_notifier - unregister a network notifier block
1286 * @nb: notifier
1287 *
1288 * Unregister a notifier previously registered by
1289 * register_netdevice_notifier(). The notifier is unlinked into the
1290 * kernel structures and may then be reused. A negative errno code
1291 * is returned on a failure.
1292 */
1293
1294int unregister_netdevice_notifier(struct notifier_block *nb)
1295{
Herbert Xu9f514952006-03-25 01:24:25 -08001296 int err;
1297
1298 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001299 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001300 rtnl_unlock();
1301 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302}
1303
1304/**
1305 * call_netdevice_notifiers - call all network notifier blocks
1306 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001307 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 *
1309 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001310 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 */
1312
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001313int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001315 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316}
1317
1318/* When > 0 there are consumers of rx skb time stamps */
1319static atomic_t netstamp_needed = ATOMIC_INIT(0);
1320
1321void net_enable_timestamp(void)
1322{
1323 atomic_inc(&netstamp_needed);
1324}
1325
1326void net_disable_timestamp(void)
1327{
1328 atomic_dec(&netstamp_needed);
1329}
1330
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001331static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332{
1333 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001334 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001335 else
1336 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337}
1338
1339/*
1340 * Support routine. Sends outgoing frames to any network
1341 * taps currently in use.
1342 */
1343
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001344static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345{
1346 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001347
1348 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
1350 rcu_read_lock();
1351 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1352 /* Never send packets back to the socket
1353 * they originated from - MvS (miquels@drinkel.ow.org)
1354 */
1355 if ((ptype->dev == dev || !ptype->dev) &&
1356 (ptype->af_packet_priv == NULL ||
1357 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1358 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1359 if (!skb2)
1360 break;
1361
1362 /* skb->nh should be correctly
1363 set by sender, so that the second statement is
1364 just protection against buggy protocols.
1365 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001366 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001368 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001369 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 if (net_ratelimit())
1371 printk(KERN_CRIT "protocol %04x is "
1372 "buggy, dev %s\n",
1373 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001374 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 }
1376
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001377 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001379 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 }
1382 rcu_read_unlock();
1383}
1384
Denis Vlasenko56079432006-03-29 15:57:29 -08001385
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001386static inline void __netif_reschedule(struct Qdisc *q)
1387{
1388 struct softnet_data *sd;
1389 unsigned long flags;
1390
1391 local_irq_save(flags);
1392 sd = &__get_cpu_var(softnet_data);
1393 q->next_sched = sd->output_queue;
1394 sd->output_queue = q;
1395 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1396 local_irq_restore(flags);
1397}
1398
David S. Miller37437bb2008-07-16 02:15:04 -07001399void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001400{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001401 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1402 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001403}
1404EXPORT_SYMBOL(__netif_schedule);
1405
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001406void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001407{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001408 if (atomic_dec_and_test(&skb->users)) {
1409 struct softnet_data *sd;
1410 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001411
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001412 local_irq_save(flags);
1413 sd = &__get_cpu_var(softnet_data);
1414 skb->next = sd->completion_queue;
1415 sd->completion_queue = skb;
1416 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1417 local_irq_restore(flags);
1418 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001419}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001420EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001421
1422void dev_kfree_skb_any(struct sk_buff *skb)
1423{
1424 if (in_irq() || irqs_disabled())
1425 dev_kfree_skb_irq(skb);
1426 else
1427 dev_kfree_skb(skb);
1428}
1429EXPORT_SYMBOL(dev_kfree_skb_any);
1430
1431
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001432/**
1433 * netif_device_detach - mark device as removed
1434 * @dev: network device
1435 *
1436 * Mark device as removed from system and therefore no longer available.
1437 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001438void netif_device_detach(struct net_device *dev)
1439{
1440 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1441 netif_running(dev)) {
1442 netif_stop_queue(dev);
1443 }
1444}
1445EXPORT_SYMBOL(netif_device_detach);
1446
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001447/**
1448 * netif_device_attach - mark device as attached
1449 * @dev: network device
1450 *
1451 * Mark device as attached from system and restart if needed.
1452 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001453void netif_device_attach(struct net_device *dev)
1454{
1455 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1456 netif_running(dev)) {
1457 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001458 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001459 }
1460}
1461EXPORT_SYMBOL(netif_device_attach);
1462
Ben Hutchings6de329e2008-06-16 17:02:28 -07001463static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1464{
1465 return ((features & NETIF_F_GEN_CSUM) ||
1466 ((features & NETIF_F_IP_CSUM) &&
1467 protocol == htons(ETH_P_IP)) ||
1468 ((features & NETIF_F_IPV6_CSUM) &&
1469 protocol == htons(ETH_P_IPV6)));
1470}
1471
1472static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1473{
1474 if (can_checksum_protocol(dev->features, skb->protocol))
1475 return true;
1476
1477 if (skb->protocol == htons(ETH_P_8021Q)) {
1478 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1479 if (can_checksum_protocol(dev->features & dev->vlan_features,
1480 veh->h_vlan_encapsulated_proto))
1481 return true;
1482 }
1483
1484 return false;
1485}
Denis Vlasenko56079432006-03-29 15:57:29 -08001486
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487/*
1488 * Invalidate hardware checksum when packet is to be mangled, and
1489 * complete checksum manually on outgoing path.
1490 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001491int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
Al Virod3bc23e2006-11-14 21:24:49 -08001493 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001494 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
Patrick McHardy84fa7932006-08-29 16:44:56 -07001496 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001497 goto out_set_summed;
1498
1499 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001500 /* Let GSO fix up the checksum. */
1501 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 }
1503
Herbert Xua0308472007-10-15 01:47:15 -07001504 offset = skb->csum_start - skb_headroom(skb);
1505 BUG_ON(offset >= skb_headlen(skb));
1506 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1507
1508 offset += skb->csum_offset;
1509 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1510
1511 if (skb_cloned(skb) &&
1512 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1514 if (ret)
1515 goto out;
1516 }
1517
Herbert Xua0308472007-10-15 01:47:15 -07001518 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001519out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001521out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 return ret;
1523}
1524
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001525/**
1526 * skb_gso_segment - Perform segmentation on skb.
1527 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001528 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001529 *
1530 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001531 *
1532 * It may return NULL if the skb requires no segmentation. This is
1533 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001534 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001535struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001536{
1537 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1538 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001539 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001540 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001541
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001542 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001543 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001544 __skb_pull(skb, skb->mac_len);
1545
Herbert Xuf9d106a2007-04-23 22:36:13 -07001546 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001547 if (skb_header_cloned(skb) &&
1548 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1549 return ERR_PTR(err);
1550 }
1551
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001552 rcu_read_lock();
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08001553 list_for_each_entry_rcu(ptype,
1554 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001555 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001556 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001557 err = ptype->gso_send_check(skb);
1558 segs = ERR_PTR(err);
1559 if (err || skb_gso_ok(skb, features))
1560 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001561 __skb_push(skb, (skb->data -
1562 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001563 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001564 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001565 break;
1566 }
1567 }
1568 rcu_read_unlock();
1569
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001570 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001571
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001572 return segs;
1573}
1574
1575EXPORT_SYMBOL(skb_gso_segment);
1576
Herbert Xufb286bb2005-11-10 13:01:24 -08001577/* Take action when hardware reception checksum errors are detected. */
1578#ifdef CONFIG_BUG
1579void netdev_rx_csum_fault(struct net_device *dev)
1580{
1581 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001582 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001583 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001584 dump_stack();
1585 }
1586}
1587EXPORT_SYMBOL(netdev_rx_csum_fault);
1588#endif
1589
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590/* Actually, we should eliminate this check as soon as we know, that:
1591 * 1. IOMMU is present and allows to map all the memory.
1592 * 2. No high memory really exists on this machine.
1593 */
1594
1595static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1596{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001597#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 int i;
1599
1600 if (dev->features & NETIF_F_HIGHDMA)
1601 return 0;
1602
1603 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1604 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1605 return 1;
1606
Herbert Xu3d3a8532006-06-27 13:33:10 -07001607#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 return 0;
1609}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001611struct dev_gso_cb {
1612 void (*destructor)(struct sk_buff *skb);
1613};
1614
1615#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1616
1617static void dev_gso_skb_destructor(struct sk_buff *skb)
1618{
1619 struct dev_gso_cb *cb;
1620
1621 do {
1622 struct sk_buff *nskb = skb->next;
1623
1624 skb->next = nskb->next;
1625 nskb->next = NULL;
1626 kfree_skb(nskb);
1627 } while (skb->next);
1628
1629 cb = DEV_GSO_CB(skb);
1630 if (cb->destructor)
1631 cb->destructor(skb);
1632}
1633
1634/**
1635 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1636 * @skb: buffer to segment
1637 *
1638 * This function segments the given skb and stores the list of segments
1639 * in skb->next.
1640 */
1641static int dev_gso_segment(struct sk_buff *skb)
1642{
1643 struct net_device *dev = skb->dev;
1644 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001645 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1646 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001647
Herbert Xu576a30e2006-06-27 13:22:38 -07001648 segs = skb_gso_segment(skb, features);
1649
1650 /* Verifying header integrity only. */
1651 if (!segs)
1652 return 0;
1653
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001654 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001655 return PTR_ERR(segs);
1656
1657 skb->next = segs;
1658 DEV_GSO_CB(skb)->destructor = skb->destructor;
1659 skb->destructor = dev_gso_skb_destructor;
1660
1661 return 0;
1662}
1663
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001664int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1665 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666{
Stephen Hemminger00829822008-11-20 20:14:53 -08001667 const struct net_device_ops *ops = dev->netdev_ops;
1668
1669 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001670 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001671 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001672 dev_queue_xmit_nit(skb, dev);
1673
Herbert Xu576a30e2006-06-27 13:22:38 -07001674 if (netif_needs_gso(dev, skb)) {
1675 if (unlikely(dev_gso_segment(skb)))
1676 goto out_kfree_skb;
1677 if (skb->next)
1678 goto gso;
1679 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001680
Stephen Hemminger00829822008-11-20 20:14:53 -08001681 return ops->ndo_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001682 }
1683
Herbert Xu576a30e2006-06-27 13:22:38 -07001684gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001685 do {
1686 struct sk_buff *nskb = skb->next;
1687 int rc;
1688
1689 skb->next = nskb->next;
1690 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001691 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001692 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001693 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001694 skb->next = nskb;
1695 return rc;
1696 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001697 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001698 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001699 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001700
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001701 skb->destructor = DEV_GSO_CB(skb)->destructor;
1702
1703out_kfree_skb:
1704 kfree_skb(skb);
1705 return 0;
1706}
1707
David S. Millerb6b2fed2008-07-21 09:48:06 -07001708static u32 simple_tx_hashrnd;
1709static int simple_tx_hashrnd_initialized = 0;
1710
David S. Miller8f0f2222008-07-15 03:47:03 -07001711static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1712{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001713 u32 addr1, addr2, ports;
1714 u32 hash, ihl;
Alexander Duyckad55dca2008-09-20 22:05:50 -07001715 u8 ip_proto = 0;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001716
1717 if (unlikely(!simple_tx_hashrnd_initialized)) {
1718 get_random_bytes(&simple_tx_hashrnd, 4);
1719 simple_tx_hashrnd_initialized = 1;
1720 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001721
1722 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001723 case htons(ETH_P_IP):
Alexander Duyckad55dca2008-09-20 22:05:50 -07001724 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1725 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001726 addr1 = ip_hdr(skb)->saddr;
1727 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001728 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001729 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001730 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001731 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001732 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1733 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001734 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001735 break;
1736 default:
1737 return 0;
1738 }
1739
David S. Miller8f0f2222008-07-15 03:47:03 -07001740
1741 switch (ip_proto) {
1742 case IPPROTO_TCP:
1743 case IPPROTO_UDP:
1744 case IPPROTO_DCCP:
1745 case IPPROTO_ESP:
1746 case IPPROTO_AH:
1747 case IPPROTO_SCTP:
1748 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001749 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001750 break;
1751
1752 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001753 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001754 break;
1755 }
1756
David S. Millerb6b2fed2008-07-21 09:48:06 -07001757 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1758
1759 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001760}
1761
David S. Millere8a04642008-07-17 00:34:19 -07001762static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1763 struct sk_buff *skb)
1764{
Stephen Hemminger00829822008-11-20 20:14:53 -08001765 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001766 u16 queue_index = 0;
1767
Stephen Hemminger00829822008-11-20 20:14:53 -08001768 if (ops->ndo_select_queue)
1769 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001770 else if (dev->real_num_tx_queues > 1)
1771 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001772
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001773 skb_set_queue_mapping(skb, queue_index);
1774 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001775}
1776
Dave Jonesd29f7492008-07-22 14:09:06 -07001777/**
1778 * dev_queue_xmit - transmit a buffer
1779 * @skb: buffer to transmit
1780 *
1781 * Queue a buffer for transmission to a network device. The caller must
1782 * have set the device and priority and built the buffer before calling
1783 * this function. The function can be called from an interrupt.
1784 *
1785 * A negative errno code is returned on a failure. A success does not
1786 * guarantee the frame will be transmitted as it may be dropped due
1787 * to congestion or traffic shaping.
1788 *
1789 * -----------------------------------------------------------------------------------
1790 * I notice this method can also return errors from the queue disciplines,
1791 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1792 * be positive.
1793 *
1794 * Regardless of the return value, the skb is consumed, so it is currently
1795 * difficult to retry a send to this method. (You can bump the ref count
1796 * before sending to hold a reference for retry if you are careful.)
1797 *
1798 * When calling this method, interrupts MUST be enabled. This is because
1799 * the BH enable code must have IRQs enabled so that it will not deadlock.
1800 * --BLG
1801 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802int dev_queue_xmit(struct sk_buff *skb)
1803{
1804 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001805 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 struct Qdisc *q;
1807 int rc = -ENOMEM;
1808
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001809 /* GSO will handle the following emulations directly. */
1810 if (netif_needs_gso(dev, skb))
1811 goto gso;
1812
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 if (skb_shinfo(skb)->frag_list &&
1814 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001815 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 goto out_kfree_skb;
1817
1818 /* Fragmented skb is linearized if device does not support SG,
1819 * or if at least one of fragments is in highmem and device
1820 * does not support DMA from it.
1821 */
1822 if (skb_shinfo(skb)->nr_frags &&
1823 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001824 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 goto out_kfree_skb;
1826
1827 /* If packet is not checksummed and device does not support
1828 * checksumming for this protocol, complete checksumming here.
1829 */
Herbert Xu663ead32007-04-09 11:59:07 -07001830 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1831 skb_set_transport_header(skb, skb->csum_start -
1832 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001833 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1834 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001835 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001837gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001838 /* Disable soft irqs for various locks below. Also
1839 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001841 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842
David S. Millereae792b2008-07-15 03:03:33 -07001843 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001844 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001845
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846#ifdef CONFIG_NET_CLS_ACT
1847 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1848#endif
1849 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001850 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851
David S. Miller37437bb2008-07-16 02:15:04 -07001852 spin_lock(root_lock);
1853
David S. Millera9312ae2008-08-17 21:51:03 -07001854 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001855 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001856 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001857 } else {
1858 rc = qdisc_enqueue_root(skb, q);
1859 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001860 }
David S. Miller37437bb2008-07-16 02:15:04 -07001861 spin_unlock(root_lock);
1862
David S. Miller37437bb2008-07-16 02:15:04 -07001863 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 }
1865
1866 /* The device has no queue. Common case for software devices:
1867 loopback, all the sorts of tunnels...
1868
Herbert Xu932ff272006-06-09 12:20:56 -07001869 Really, it is unlikely that netif_tx_lock protection is necessary
1870 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 counters.)
1872 However, it is possible, that they rely on protection
1873 made by us here.
1874
1875 Check this and shot the lock. It is not prone from deadlocks.
1876 Either shot noqueue qdisc, it is even simpler 8)
1877 */
1878 if (dev->flags & IFF_UP) {
1879 int cpu = smp_processor_id(); /* ok because BHs are off */
1880
David S. Millerc773e842008-07-08 23:13:53 -07001881 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
David S. Millerc773e842008-07-08 23:13:53 -07001883 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001885 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001887 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001888 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 goto out;
1890 }
1891 }
David S. Millerc773e842008-07-08 23:13:53 -07001892 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 if (net_ratelimit())
1894 printk(KERN_CRIT "Virtual device %s asks to "
1895 "queue packet!\n", dev->name);
1896 } else {
1897 /* Recursion is detected! It is possible,
1898 * unfortunately */
1899 if (net_ratelimit())
1900 printk(KERN_CRIT "Dead loop on virtual device "
1901 "%s, fix it urgently!\n", dev->name);
1902 }
1903 }
1904
1905 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001906 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907
1908out_kfree_skb:
1909 kfree_skb(skb);
1910 return rc;
1911out:
Herbert Xud4828d82006-06-22 02:28:18 -07001912 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 return rc;
1914}
1915
1916
1917/*=======================================================================
1918 Receiver routines
1919 =======================================================================*/
1920
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001921int netdev_max_backlog __read_mostly = 1000;
1922int netdev_budget __read_mostly = 300;
1923int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
1925DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1926
1927
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928/**
1929 * netif_rx - post buffer to the network code
1930 * @skb: buffer to post
1931 *
1932 * This function receives a packet from a device driver and queues it for
1933 * the upper (protocol) levels to process. It always succeeds. The buffer
1934 * may be dropped during processing for congestion control or by the
1935 * protocol layers.
1936 *
1937 * return values:
1938 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 * NET_RX_DROP (packet was dropped)
1940 *
1941 */
1942
1943int netif_rx(struct sk_buff *skb)
1944{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 struct softnet_data *queue;
1946 unsigned long flags;
1947
1948 /* if netpoll wants it, pretend we never saw it */
1949 if (netpoll_rx(skb))
1950 return NET_RX_DROP;
1951
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001952 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001953 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
1955 /*
1956 * The code is rearranged so that the path is the most
1957 * short when CPU is congested, but is still operating.
1958 */
1959 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 queue = &__get_cpu_var(softnet_data);
1961
1962 __get_cpu_var(netdev_rx_stat).total++;
1963 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1964 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001968 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 }
1970
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001971 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 goto enqueue;
1973 }
1974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 __get_cpu_var(netdev_rx_stat).dropped++;
1976 local_irq_restore(flags);
1977
1978 kfree_skb(skb);
1979 return NET_RX_DROP;
1980}
1981
1982int netif_rx_ni(struct sk_buff *skb)
1983{
1984 int err;
1985
1986 preempt_disable();
1987 err = netif_rx(skb);
1988 if (local_softirq_pending())
1989 do_softirq();
1990 preempt_enable();
1991
1992 return err;
1993}
1994
1995EXPORT_SYMBOL(netif_rx_ni);
1996
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997static void net_tx_action(struct softirq_action *h)
1998{
1999 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2000
2001 if (sd->completion_queue) {
2002 struct sk_buff *clist;
2003
2004 local_irq_disable();
2005 clist = sd->completion_queue;
2006 sd->completion_queue = NULL;
2007 local_irq_enable();
2008
2009 while (clist) {
2010 struct sk_buff *skb = clist;
2011 clist = clist->next;
2012
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002013 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 __kfree_skb(skb);
2015 }
2016 }
2017
2018 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002019 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020
2021 local_irq_disable();
2022 head = sd->output_queue;
2023 sd->output_queue = NULL;
2024 local_irq_enable();
2025
2026 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002027 struct Qdisc *q = head;
2028 spinlock_t *root_lock;
2029
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 head = head->next_sched;
2031
David S. Miller5fb66222008-08-02 20:02:43 -07002032 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002033 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002034 smp_mb__before_clear_bit();
2035 clear_bit(__QDISC_STATE_SCHED,
2036 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002037 qdisc_run(q);
2038 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002040 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002041 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002042 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002043 } else {
2044 smp_mb__before_clear_bit();
2045 clear_bit(__QDISC_STATE_SCHED,
2046 &q->state);
2047 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 }
2049 }
2050 }
2051}
2052
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002053static inline int deliver_skb(struct sk_buff *skb,
2054 struct packet_type *pt_prev,
2055 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056{
2057 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002058 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059}
2060
2061#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002062/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063struct net_bridge;
2064struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2065 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002066void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067
Stephen Hemminger6229e362007-03-21 13:38:47 -07002068/*
2069 * If bridge module is loaded call bridging hook.
2070 * returns NULL if packet was consumed.
2071 */
2072struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2073 struct sk_buff *skb) __read_mostly;
2074static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2075 struct packet_type **pt_prev, int *ret,
2076 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077{
2078 struct net_bridge_port *port;
2079
Stephen Hemminger6229e362007-03-21 13:38:47 -07002080 if (skb->pkt_type == PACKET_LOOPBACK ||
2081 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2082 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
2084 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002085 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002087 }
2088
Stephen Hemminger6229e362007-03-21 13:38:47 -07002089 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090}
2091#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002092#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093#endif
2094
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002095#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2096struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2097EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2098
2099static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2100 struct packet_type **pt_prev,
2101 int *ret,
2102 struct net_device *orig_dev)
2103{
2104 if (skb->dev->macvlan_port == NULL)
2105 return skb;
2106
2107 if (*pt_prev) {
2108 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2109 *pt_prev = NULL;
2110 }
2111 return macvlan_handle_frame_hook(skb);
2112}
2113#else
2114#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2115#endif
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117#ifdef CONFIG_NET_CLS_ACT
2118/* TODO: Maybe we should just force sch_ingress to be compiled in
2119 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2120 * a compare and 2 stores extra right now if we dont have it on
2121 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002122 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 * the ingress scheduler, you just cant add policies on ingress.
2124 *
2125 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002126static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002129 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002130 struct netdev_queue *rxq;
2131 int result = TC_ACT_OK;
2132 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002133
Herbert Xuf697c3e2007-10-14 00:38:47 -07002134 if (MAX_RED_LOOP < ttl++) {
2135 printk(KERN_WARNING
2136 "Redir loop detected Dropping packet (%d->%d)\n",
2137 skb->iif, dev->ifindex);
2138 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 }
2140
Herbert Xuf697c3e2007-10-14 00:38:47 -07002141 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2142 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2143
David S. Miller555353c2008-07-08 17:33:13 -07002144 rxq = &dev->rx_queue;
2145
David S. Miller83874002008-07-17 00:53:03 -07002146 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002147 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002148 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002149 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2150 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002151 spin_unlock(qdisc_lock(q));
2152 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002153
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 return result;
2155}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002156
2157static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2158 struct packet_type **pt_prev,
2159 int *ret, struct net_device *orig_dev)
2160{
David S. Miller8d50b532008-07-30 02:37:46 -07002161 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002162 goto out;
2163
2164 if (*pt_prev) {
2165 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2166 *pt_prev = NULL;
2167 } else {
2168 /* Huh? Why does turning on AF_PACKET affect this? */
2169 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2170 }
2171
2172 switch (ing_filter(skb)) {
2173 case TC_ACT_SHOT:
2174 case TC_ACT_STOLEN:
2175 kfree_skb(skb);
2176 return NULL;
2177 }
2178
2179out:
2180 skb->tc_verd = 0;
2181 return skb;
2182}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183#endif
2184
Patrick McHardybc1d0412008-07-14 22:49:30 -07002185/*
2186 * netif_nit_deliver - deliver received packets to network taps
2187 * @skb: buffer
2188 *
2189 * This function is used to deliver incoming packets to network
2190 * taps. It should be used when the normal netif_receive_skb path
2191 * is bypassed, for example because of VLAN acceleration.
2192 */
2193void netif_nit_deliver(struct sk_buff *skb)
2194{
2195 struct packet_type *ptype;
2196
2197 if (list_empty(&ptype_all))
2198 return;
2199
2200 skb_reset_network_header(skb);
2201 skb_reset_transport_header(skb);
2202 skb->mac_len = skb->network_header - skb->mac_header;
2203
2204 rcu_read_lock();
2205 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2206 if (!ptype->dev || ptype->dev == skb->dev)
2207 deliver_skb(skb, ptype, skb->dev);
2208 }
2209 rcu_read_unlock();
2210}
2211
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002212/**
2213 * netif_receive_skb - process receive buffer from network
2214 * @skb: buffer to process
2215 *
2216 * netif_receive_skb() is the main receive data processing function.
2217 * It always succeeds. The buffer may be dropped during processing
2218 * for congestion control or by the protocol layers.
2219 *
2220 * This function may only be called from softirq context and interrupts
2221 * should be enabled.
2222 *
2223 * Return values (usually ignored):
2224 * NET_RX_SUCCESS: no congestion
2225 * NET_RX_DROP: packet was dropped
2226 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227int netif_receive_skb(struct sk_buff *skb)
2228{
2229 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002230 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002231 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002233 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002235 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2236 return NET_RX_SUCCESS;
2237
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002239 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 return NET_RX_DROP;
2241
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002242 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002243 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244
Patrick McHardyc01003c2007-03-29 11:46:52 -07002245 if (!skb->iif)
2246 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002247
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002248 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002249 orig_dev = skb->dev;
2250 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002251 if (skb_bond_should_drop(skb))
2252 null_or_orig = orig_dev; /* deliver only exact match */
2253 else
2254 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002255 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002256
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 __get_cpu_var(netdev_rx_stat).total++;
2258
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002259 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002260 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002261 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
2263 pt_prev = NULL;
2264
2265 rcu_read_lock();
2266
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002267 /* Don't receive packets in an exiting network namespace */
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002268 if (!net_alive(dev_net(skb->dev))) {
2269 kfree_skb(skb);
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002270 goto out;
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002271 }
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002272
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273#ifdef CONFIG_NET_CLS_ACT
2274 if (skb->tc_verd & TC_NCLS) {
2275 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2276 goto ncls;
2277 }
2278#endif
2279
2280 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002281 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2282 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002283 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002284 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 pt_prev = ptype;
2286 }
2287 }
2288
2289#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002290 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2291 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293ncls:
2294#endif
2295
Stephen Hemminger6229e362007-03-21 13:38:47 -07002296 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2297 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002299 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2300 if (!skb)
2301 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302
2303 type = skb->protocol;
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08002304 list_for_each_entry_rcu(ptype,
2305 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002307 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2308 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002309 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002310 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 pt_prev = ptype;
2312 }
2313 }
2314
2315 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002316 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 } else {
2318 kfree_skb(skb);
2319 /* Jamal, now you will not able to escape explaining
2320 * me how you were going to use this. :-)
2321 */
2322 ret = NET_RX_DROP;
2323 }
2324
2325out:
2326 rcu_read_unlock();
2327 return ret;
2328}
2329
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002330/* Network device is going away, flush any packets still pending */
2331static void flush_backlog(void *arg)
2332{
2333 struct net_device *dev = arg;
2334 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2335 struct sk_buff *skb, *tmp;
2336
2337 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2338 if (skb->dev == dev) {
2339 __skb_unlink(skb, &queue->input_pkt_queue);
2340 kfree_skb(skb);
2341 }
2342}
2343
Herbert Xud565b0a2008-12-15 23:38:52 -08002344static int napi_gro_complete(struct sk_buff *skb)
2345{
2346 struct packet_type *ptype;
2347 __be16 type = skb->protocol;
2348 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2349 int err = -ENOENT;
2350
Herbert Xu5d38a072009-01-04 16:13:40 -08002351 if (NAPI_GRO_CB(skb)->count == 1)
Herbert Xud565b0a2008-12-15 23:38:52 -08002352 goto out;
2353
2354 rcu_read_lock();
2355 list_for_each_entry_rcu(ptype, head, list) {
2356 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2357 continue;
2358
2359 err = ptype->gro_complete(skb);
2360 break;
2361 }
2362 rcu_read_unlock();
2363
2364 if (err) {
2365 WARN_ON(&ptype->list == head);
2366 kfree_skb(skb);
2367 return NET_RX_SUCCESS;
2368 }
2369
2370out:
Herbert Xub5302562009-01-04 16:13:19 -08002371 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002372 __skb_push(skb, -skb_network_offset(skb));
2373 return netif_receive_skb(skb);
2374}
2375
2376void napi_gro_flush(struct napi_struct *napi)
2377{
2378 struct sk_buff *skb, *next;
2379
2380 for (skb = napi->gro_list; skb; skb = next) {
2381 next = skb->next;
2382 skb->next = NULL;
2383 napi_gro_complete(skb);
2384 }
2385
2386 napi->gro_list = NULL;
2387}
2388EXPORT_SYMBOL(napi_gro_flush);
2389
Herbert Xu5d38a072009-01-04 16:13:40 -08002390static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002391{
2392 struct sk_buff **pp = NULL;
2393 struct packet_type *ptype;
2394 __be16 type = skb->protocol;
2395 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2396 int count = 0;
Herbert Xu0da2afd2008-12-26 14:57:42 -08002397 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002398 int mac_len;
Herbert Xu5d38a072009-01-04 16:13:40 -08002399 int free;
Herbert Xud565b0a2008-12-15 23:38:52 -08002400
2401 if (!(skb->dev->features & NETIF_F_GRO))
2402 goto normal;
2403
2404 rcu_read_lock();
2405 list_for_each_entry_rcu(ptype, head, list) {
2406 struct sk_buff *p;
2407
2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2409 continue;
2410
2411 skb_reset_network_header(skb);
2412 mac_len = skb->network_header - skb->mac_header;
2413 skb->mac_len = mac_len;
2414 NAPI_GRO_CB(skb)->same_flow = 0;
2415 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002416 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002417
2418 for (p = napi->gro_list; p; p = p->next) {
2419 count++;
2420 NAPI_GRO_CB(p)->same_flow =
2421 p->mac_len == mac_len &&
2422 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2423 mac_len);
2424 NAPI_GRO_CB(p)->flush = 0;
2425 }
2426
2427 pp = ptype->gro_receive(&napi->gro_list, skb);
2428 break;
2429 }
2430 rcu_read_unlock();
2431
2432 if (&ptype->list == head)
2433 goto normal;
2434
Herbert Xu0da2afd2008-12-26 14:57:42 -08002435 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d38a072009-01-04 16:13:40 -08002436 free = NAPI_GRO_CB(skb)->free;
Herbert Xu0da2afd2008-12-26 14:57:42 -08002437
Herbert Xud565b0a2008-12-15 23:38:52 -08002438 if (pp) {
2439 struct sk_buff *nskb = *pp;
2440
2441 *pp = nskb->next;
2442 nskb->next = NULL;
2443 napi_gro_complete(nskb);
2444 count--;
2445 }
2446
Herbert Xu0da2afd2008-12-26 14:57:42 -08002447 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002448 goto ok;
2449
2450 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2451 __skb_push(skb, -skb_network_offset(skb));
2452 goto normal;
2453 }
2454
2455 NAPI_GRO_CB(skb)->count = 1;
Herbert Xub5302562009-01-04 16:13:19 -08002456 skb_shinfo(skb)->gso_size = skb->len;
Herbert Xud565b0a2008-12-15 23:38:52 -08002457 skb->next = napi->gro_list;
2458 napi->gro_list = skb;
2459
2460ok:
Herbert Xu5d38a072009-01-04 16:13:40 -08002461 return free;
Herbert Xud565b0a2008-12-15 23:38:52 -08002462
2463normal:
Herbert Xu5d38a072009-01-04 16:13:40 -08002464 return -1;
2465}
2466
2467int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2468{
2469 switch (__napi_gro_receive(napi, skb)) {
2470 case -1:
2471 return netif_receive_skb(skb);
2472
2473 case 1:
2474 kfree_skb(skb);
2475 break;
2476 }
2477
2478 return NET_RX_SUCCESS;
Herbert Xud565b0a2008-12-15 23:38:52 -08002479}
2480EXPORT_SYMBOL(napi_gro_receive);
2481
Herbert Xu5d38a072009-01-04 16:13:40 -08002482int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2483{
2484 struct net_device *dev = napi->dev;
2485 struct sk_buff *skb = napi->skb;
2486 int err = NET_RX_DROP;
2487
2488 napi->skb = NULL;
2489
2490 if (!skb) {
2491 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2492 if (!skb)
2493 goto out;
2494
2495 skb_reserve(skb, NET_IP_ALIGN);
2496 }
2497
2498 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2499 skb_shinfo(skb)->nr_frags = info->nr_frags;
2500 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2501
2502 skb->data_len = info->len;
2503 skb->len += info->len;
2504 skb->truesize += info->len;
2505
2506 if (!pskb_may_pull(skb, ETH_HLEN))
2507 goto reuse;
2508
2509 err = NET_RX_SUCCESS;
2510
2511 skb->protocol = eth_type_trans(skb, dev);
2512
2513 skb->ip_summed = info->ip_summed;
2514 skb->csum = info->csum;
2515
2516 switch (__napi_gro_receive(napi, skb)) {
2517 case -1:
2518 return netif_receive_skb(skb);
2519
2520 case 0:
2521 goto out;
2522 }
2523
2524reuse:
2525 skb_shinfo(skb)->nr_frags = 0;
2526
2527 skb->len -= skb->data_len;
2528 skb->truesize -= skb->data_len;
2529 skb->data_len = 0;
2530
2531 __skb_pull(skb, skb_headlen(skb));
2532 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2533
2534 napi->skb = skb;
2535
2536out:
2537 return err;
2538}
2539EXPORT_SYMBOL(napi_gro_frags);
2540
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002541static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542{
2543 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2545 unsigned long start_time = jiffies;
2546
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002547 napi->weight = weight_p;
2548 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550
2551 local_irq_disable();
2552 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002553 if (!skb) {
2554 __napi_complete(napi);
2555 local_irq_enable();
2556 break;
2557 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 local_irq_enable();
2559
Herbert Xud565b0a2008-12-15 23:38:52 -08002560 napi_gro_receive(napi, skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002561 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562
Herbert Xud565b0a2008-12-15 23:38:52 -08002563 napi_gro_flush(napi);
2564
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002565 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566}
2567
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002568/**
2569 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002570 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002571 *
2572 * The entry's receive function will be scheduled to run
2573 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002574void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002575{
2576 unsigned long flags;
2577
2578 local_irq_save(flags);
2579 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2580 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2581 local_irq_restore(flags);
2582}
2583EXPORT_SYMBOL(__napi_schedule);
2584
Herbert Xud565b0a2008-12-15 23:38:52 -08002585void __napi_complete(struct napi_struct *n)
2586{
2587 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2588 BUG_ON(n->gro_list);
2589
2590 list_del(&n->poll_list);
2591 smp_mb__before_clear_bit();
2592 clear_bit(NAPI_STATE_SCHED, &n->state);
2593}
2594EXPORT_SYMBOL(__napi_complete);
2595
2596void napi_complete(struct napi_struct *n)
2597{
2598 unsigned long flags;
2599
2600 /*
2601 * don't let napi dequeue from the cpu poll list
2602 * just in case its running on a different cpu
2603 */
2604 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2605 return;
2606
2607 napi_gro_flush(n);
2608 local_irq_save(flags);
2609 __napi_complete(n);
2610 local_irq_restore(flags);
2611}
2612EXPORT_SYMBOL(napi_complete);
2613
2614void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2615 int (*poll)(struct napi_struct *, int), int weight)
2616{
2617 INIT_LIST_HEAD(&napi->poll_list);
2618 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002619 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002620 napi->poll = poll;
2621 napi->weight = weight;
2622 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002623 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002624#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002625 spin_lock_init(&napi->poll_lock);
2626 napi->poll_owner = -1;
2627#endif
2628 set_bit(NAPI_STATE_SCHED, &napi->state);
2629}
2630EXPORT_SYMBOL(netif_napi_add);
2631
2632void netif_napi_del(struct napi_struct *napi)
2633{
2634 struct sk_buff *skb, *next;
2635
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002636 list_del_init(&napi->dev_list);
Herbert Xu5d38a072009-01-04 16:13:40 -08002637 kfree(napi->skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002638
2639 for (skb = napi->gro_list; skb; skb = next) {
2640 next = skb->next;
2641 skb->next = NULL;
2642 kfree_skb(skb);
2643 }
2644
2645 napi->gro_list = NULL;
2646}
2647EXPORT_SYMBOL(netif_napi_del);
2648
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002649
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650static void net_rx_action(struct softirq_action *h)
2651{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002652 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002653 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002654 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002655 void *have;
2656
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657 local_irq_disable();
2658
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002659 while (!list_empty(list)) {
2660 struct napi_struct *n;
2661 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002663 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002664 * Allow this to run for 2 jiffies since which will allow
2665 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002666 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002667 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002668 goto softnet_break;
2669
2670 local_irq_enable();
2671
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002672 /* Even though interrupts have been re-enabled, this
2673 * access is safe because interrupts can only add new
2674 * entries to the tail of this list, and only ->poll()
2675 * calls can remove this head entry from the list.
2676 */
2677 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002678
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002679 have = netpoll_poll_lock(n);
2680
2681 weight = n->weight;
2682
David S. Miller0a7606c2007-10-29 21:28:47 -07002683 /* This NAPI_STATE_SCHED test is for avoiding a race
2684 * with netpoll's poll_napi(). Only the entity which
2685 * obtains the lock and sees NAPI_STATE_SCHED set will
2686 * actually make the ->poll() call. Therefore we avoid
2687 * accidently calling ->poll() when NAPI is not scheduled.
2688 */
2689 work = 0;
2690 if (test_bit(NAPI_STATE_SCHED, &n->state))
2691 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002692
2693 WARN_ON_ONCE(work > weight);
2694
2695 budget -= work;
2696
2697 local_irq_disable();
2698
2699 /* Drivers must not modify the NAPI state if they
2700 * consume the entire weight. In such cases this code
2701 * still "owns" the NAPI instance and therefore can
2702 * move the instance around on the list at-will.
2703 */
David S. Millerfed17f32008-01-07 21:00:40 -08002704 if (unlikely(work == weight)) {
2705 if (unlikely(napi_disable_pending(n)))
2706 __napi_complete(n);
2707 else
2708 list_move_tail(&n->poll_list, list);
2709 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002710
2711 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712 }
2713out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002714 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002715
Chris Leechdb217332006-06-17 21:24:58 -07002716#ifdef CONFIG_NET_DMA
2717 /*
2718 * There may not be any more sk_buffs coming right now, so push
2719 * any pending DMA copies to hardware
2720 */
Dan Williamsd379b012007-07-09 11:56:42 -07002721 if (!cpus_empty(net_dma.channel_mask)) {
2722 int chan_idx;
Mike Travis0e12f842008-05-12 21:21:13 +02002723 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07002724 struct dma_chan *chan = net_dma.channels[chan_idx];
2725 if (chan)
2726 dma_async_memcpy_issue_pending(chan);
2727 }
Chris Leechdb217332006-06-17 21:24:58 -07002728 }
2729#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002730
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 return;
2732
2733softnet_break:
2734 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2735 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2736 goto out;
2737}
2738
2739static gifconf_func_t * gifconf_list [NPROTO];
2740
2741/**
2742 * register_gifconf - register a SIOCGIF handler
2743 * @family: Address family
2744 * @gifconf: Function handler
2745 *
2746 * Register protocol dependent address dumping routines. The handler
2747 * that is passed must not be freed or reused until it has been replaced
2748 * by another handler.
2749 */
2750int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2751{
2752 if (family >= NPROTO)
2753 return -EINVAL;
2754 gifconf_list[family] = gifconf;
2755 return 0;
2756}
2757
2758
2759/*
2760 * Map an interface index to its name (SIOCGIFNAME)
2761 */
2762
2763/*
2764 * We need this ioctl for efficient implementation of the
2765 * if_indextoname() function required by the IPv6 API. Without
2766 * it, we would have to search all the interfaces to find a
2767 * match. --pb
2768 */
2769
Eric W. Biederman881d9662007-09-17 11:56:21 -07002770static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771{
2772 struct net_device *dev;
2773 struct ifreq ifr;
2774
2775 /*
2776 * Fetch the caller's info block.
2777 */
2778
2779 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2780 return -EFAULT;
2781
2782 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002783 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 if (!dev) {
2785 read_unlock(&dev_base_lock);
2786 return -ENODEV;
2787 }
2788
2789 strcpy(ifr.ifr_name, dev->name);
2790 read_unlock(&dev_base_lock);
2791
2792 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2793 return -EFAULT;
2794 return 0;
2795}
2796
2797/*
2798 * Perform a SIOCGIFCONF call. This structure will change
2799 * size eventually, and there is nothing I can do about it.
2800 * Thus we will need a 'compatibility mode'.
2801 */
2802
Eric W. Biederman881d9662007-09-17 11:56:21 -07002803static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804{
2805 struct ifconf ifc;
2806 struct net_device *dev;
2807 char __user *pos;
2808 int len;
2809 int total;
2810 int i;
2811
2812 /*
2813 * Fetch the caller's info block.
2814 */
2815
2816 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2817 return -EFAULT;
2818
2819 pos = ifc.ifc_buf;
2820 len = ifc.ifc_len;
2821
2822 /*
2823 * Loop over the interfaces, and write an info block for each.
2824 */
2825
2826 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002827 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002828 for (i = 0; i < NPROTO; i++) {
2829 if (gifconf_list[i]) {
2830 int done;
2831 if (!pos)
2832 done = gifconf_list[i](dev, NULL, 0);
2833 else
2834 done = gifconf_list[i](dev, pos + total,
2835 len - total);
2836 if (done < 0)
2837 return -EFAULT;
2838 total += done;
2839 }
2840 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002841 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842
2843 /*
2844 * All done. Write the updated control block back to the caller.
2845 */
2846 ifc.ifc_len = total;
2847
2848 /*
2849 * Both BSD and Solaris return 0 here, so we do too.
2850 */
2851 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2852}
2853
2854#ifdef CONFIG_PROC_FS
2855/*
2856 * This is invoked by the /proc filesystem handler to display a device
2857 * in detail.
2858 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002860 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002861{
Denis V. Luneve372c412007-11-19 22:31:54 -08002862 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002863 loff_t off;
2864 struct net_device *dev;
2865
Linus Torvalds1da177e2005-04-16 15:20:36 -07002866 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002867 if (!*pos)
2868 return SEQ_START_TOKEN;
2869
2870 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002871 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002872 if (off++ == *pos)
2873 return dev;
2874
2875 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876}
2877
2878void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2879{
Denis V. Luneve372c412007-11-19 22:31:54 -08002880 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002882 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002883 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002884}
2885
2886void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002887 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888{
2889 read_unlock(&dev_base_lock);
2890}
2891
2892static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2893{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002894 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002895
Rusty Russell5a1b5892007-04-28 21:04:03 -07002896 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2897 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2898 dev->name, stats->rx_bytes, stats->rx_packets,
2899 stats->rx_errors,
2900 stats->rx_dropped + stats->rx_missed_errors,
2901 stats->rx_fifo_errors,
2902 stats->rx_length_errors + stats->rx_over_errors +
2903 stats->rx_crc_errors + stats->rx_frame_errors,
2904 stats->rx_compressed, stats->multicast,
2905 stats->tx_bytes, stats->tx_packets,
2906 stats->tx_errors, stats->tx_dropped,
2907 stats->tx_fifo_errors, stats->collisions,
2908 stats->tx_carrier_errors +
2909 stats->tx_aborted_errors +
2910 stats->tx_window_errors +
2911 stats->tx_heartbeat_errors,
2912 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002913}
2914
2915/*
2916 * Called from the PROCfs module. This now uses the new arbitrary sized
2917 * /proc/net interface to create /proc/net/dev
2918 */
2919static int dev_seq_show(struct seq_file *seq, void *v)
2920{
2921 if (v == SEQ_START_TOKEN)
2922 seq_puts(seq, "Inter-| Receive "
2923 " | Transmit\n"
2924 " face |bytes packets errs drop fifo frame "
2925 "compressed multicast|bytes packets errs "
2926 "drop fifo colls carrier compressed\n");
2927 else
2928 dev_seq_printf_stats(seq, v);
2929 return 0;
2930}
2931
2932static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2933{
2934 struct netif_rx_stats *rc = NULL;
2935
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002936 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002937 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002938 rc = &per_cpu(netdev_rx_stat, *pos);
2939 break;
2940 } else
2941 ++*pos;
2942 return rc;
2943}
2944
2945static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2946{
2947 return softnet_get_online(pos);
2948}
2949
2950static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2951{
2952 ++*pos;
2953 return softnet_get_online(pos);
2954}
2955
2956static void softnet_seq_stop(struct seq_file *seq, void *v)
2957{
2958}
2959
2960static int softnet_seq_show(struct seq_file *seq, void *v)
2961{
2962 struct netif_rx_stats *s = v;
2963
2964 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002965 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002966 0, 0, 0, 0, /* was fastroute */
2967 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002968 return 0;
2969}
2970
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002971static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002972 .start = dev_seq_start,
2973 .next = dev_seq_next,
2974 .stop = dev_seq_stop,
2975 .show = dev_seq_show,
2976};
2977
2978static int dev_seq_open(struct inode *inode, struct file *file)
2979{
Denis V. Luneve372c412007-11-19 22:31:54 -08002980 return seq_open_net(inode, file, &dev_seq_ops,
2981 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002982}
2983
Arjan van de Ven9a321442007-02-12 00:55:35 -08002984static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985 .owner = THIS_MODULE,
2986 .open = dev_seq_open,
2987 .read = seq_read,
2988 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002989 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002990};
2991
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002992static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 .start = softnet_seq_start,
2994 .next = softnet_seq_next,
2995 .stop = softnet_seq_stop,
2996 .show = softnet_seq_show,
2997};
2998
2999static int softnet_seq_open(struct inode *inode, struct file *file)
3000{
3001 return seq_open(file, &softnet_seq_ops);
3002}
3003
Arjan van de Ven9a321442007-02-12 00:55:35 -08003004static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003005 .owner = THIS_MODULE,
3006 .open = softnet_seq_open,
3007 .read = seq_read,
3008 .llseek = seq_lseek,
3009 .release = seq_release,
3010};
3011
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003012static void *ptype_get_idx(loff_t pos)
3013{
3014 struct packet_type *pt = NULL;
3015 loff_t i = 0;
3016 int t;
3017
3018 list_for_each_entry_rcu(pt, &ptype_all, list) {
3019 if (i == pos)
3020 return pt;
3021 ++i;
3022 }
3023
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08003024 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003025 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3026 if (i == pos)
3027 return pt;
3028 ++i;
3029 }
3030 }
3031 return NULL;
3032}
3033
3034static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003035 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003036{
3037 rcu_read_lock();
3038 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3039}
3040
3041static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3042{
3043 struct packet_type *pt;
3044 struct list_head *nxt;
3045 int hash;
3046
3047 ++*pos;
3048 if (v == SEQ_START_TOKEN)
3049 return ptype_get_idx(0);
3050
3051 pt = v;
3052 nxt = pt->list.next;
3053 if (pt->type == htons(ETH_P_ALL)) {
3054 if (nxt != &ptype_all)
3055 goto found;
3056 hash = 0;
3057 nxt = ptype_base[0].next;
3058 } else
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08003059 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003060
3061 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08003062 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003063 return NULL;
3064 nxt = ptype_base[hash].next;
3065 }
3066found:
3067 return list_entry(nxt, struct packet_type, list);
3068}
3069
3070static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003071 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003072{
3073 rcu_read_unlock();
3074}
3075
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003076static int ptype_seq_show(struct seq_file *seq, void *v)
3077{
3078 struct packet_type *pt = v;
3079
3080 if (v == SEQ_START_TOKEN)
3081 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003082 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003083 if (pt->type == htons(ETH_P_ALL))
3084 seq_puts(seq, "ALL ");
3085 else
3086 seq_printf(seq, "%04x", ntohs(pt->type));
3087
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003088 seq_printf(seq, " %-8s %pF\n",
3089 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003090 }
3091
3092 return 0;
3093}
3094
3095static const struct seq_operations ptype_seq_ops = {
3096 .start = ptype_seq_start,
3097 .next = ptype_seq_next,
3098 .stop = ptype_seq_stop,
3099 .show = ptype_seq_show,
3100};
3101
3102static int ptype_seq_open(struct inode *inode, struct file *file)
3103{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003104 return seq_open_net(inode, file, &ptype_seq_ops,
3105 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003106}
3107
3108static const struct file_operations ptype_seq_fops = {
3109 .owner = THIS_MODULE,
3110 .open = ptype_seq_open,
3111 .read = seq_read,
3112 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003113 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003114};
3115
3116
Pavel Emelyanov46650792007-10-08 20:38:39 -07003117static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118{
3119 int rc = -ENOMEM;
3120
Eric W. Biederman881d9662007-09-17 11:56:21 -07003121 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003123 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003124 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003125 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003126 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003127
Eric W. Biederman881d9662007-09-17 11:56:21 -07003128 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003129 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130 rc = 0;
3131out:
3132 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003133out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003134 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003135out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003136 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003137out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003138 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003139 goto out;
3140}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003141
Pavel Emelyanov46650792007-10-08 20:38:39 -07003142static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003143{
3144 wext_proc_exit(net);
3145
3146 proc_net_remove(net, "ptype");
3147 proc_net_remove(net, "softnet_stat");
3148 proc_net_remove(net, "dev");
3149}
3150
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003151static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003152 .init = dev_proc_net_init,
3153 .exit = dev_proc_net_exit,
3154};
3155
3156static int __init dev_proc_init(void)
3157{
3158 return register_pernet_subsys(&dev_proc_ops);
3159}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160#else
3161#define dev_proc_init() 0
3162#endif /* CONFIG_PROC_FS */
3163
3164
3165/**
3166 * netdev_set_master - set up master/slave pair
3167 * @slave: slave device
3168 * @master: new master device
3169 *
3170 * Changes the master device of the slave. Pass %NULL to break the
3171 * bonding. The caller must hold the RTNL semaphore. On a failure
3172 * a negative errno code is returned. On success the reference counts
3173 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3174 * function returns zero.
3175 */
3176int netdev_set_master(struct net_device *slave, struct net_device *master)
3177{
3178 struct net_device *old = slave->master;
3179
3180 ASSERT_RTNL();
3181
3182 if (master) {
3183 if (old)
3184 return -EBUSY;
3185 dev_hold(master);
3186 }
3187
3188 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003189
Linus Torvalds1da177e2005-04-16 15:20:36 -07003190 synchronize_net();
3191
3192 if (old)
3193 dev_put(old);
3194
3195 if (master)
3196 slave->flags |= IFF_SLAVE;
3197 else
3198 slave->flags &= ~IFF_SLAVE;
3199
3200 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3201 return 0;
3202}
3203
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003204static void dev_change_rx_flags(struct net_device *dev, int flags)
3205{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003206 const struct net_device_ops *ops = dev->netdev_ops;
3207
3208 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3209 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003210}
3211
Wang Chendad9b332008-06-18 01:48:28 -07003212static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003213{
3214 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003215 uid_t uid;
3216 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003217
Patrick McHardy24023452007-07-14 18:51:31 -07003218 ASSERT_RTNL();
3219
Wang Chendad9b332008-06-18 01:48:28 -07003220 dev->flags |= IFF_PROMISC;
3221 dev->promiscuity += inc;
3222 if (dev->promiscuity == 0) {
3223 /*
3224 * Avoid overflow.
3225 * If inc causes overflow, untouch promisc and return error.
3226 */
3227 if (inc < 0)
3228 dev->flags &= ~IFF_PROMISC;
3229 else {
3230 dev->promiscuity -= inc;
3231 printk(KERN_WARNING "%s: promiscuity touches roof, "
3232 "set promiscuity failed, promiscuity feature "
3233 "of device might be broken.\n", dev->name);
3234 return -EOVERFLOW;
3235 }
3236 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003237 if (dev->flags != old_flags) {
3238 printk(KERN_INFO "device %s %s promiscuous mode\n",
3239 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3240 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003241 if (audit_enabled) {
3242 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003243 audit_log(current->audit_context, GFP_ATOMIC,
3244 AUDIT_ANOM_PROMISCUOUS,
3245 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3246 dev->name, (dev->flags & IFF_PROMISC),
3247 (old_flags & IFF_PROMISC),
3248 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003249 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003250 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003251 }
Patrick McHardy24023452007-07-14 18:51:31 -07003252
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003253 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003254 }
Wang Chendad9b332008-06-18 01:48:28 -07003255 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003256}
3257
Linus Torvalds1da177e2005-04-16 15:20:36 -07003258/**
3259 * dev_set_promiscuity - update promiscuity count on a device
3260 * @dev: device
3261 * @inc: modifier
3262 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003263 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003264 * remains above zero the interface remains promiscuous. Once it hits zero
3265 * the device reverts back to normal filtering operation. A negative inc
3266 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003267 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003268 */
Wang Chendad9b332008-06-18 01:48:28 -07003269int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003270{
3271 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003272 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003273
Wang Chendad9b332008-06-18 01:48:28 -07003274 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003275 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003276 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003277 if (dev->flags != old_flags)
3278 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003279 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003280}
3281
3282/**
3283 * dev_set_allmulti - update allmulti count on a device
3284 * @dev: device
3285 * @inc: modifier
3286 *
3287 * Add or remove reception of all multicast frames to a device. While the
3288 * count in the device remains above zero the interface remains listening
3289 * to all interfaces. Once it hits zero the device reverts back to normal
3290 * filtering operation. A negative @inc value is used to drop the counter
3291 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003292 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003293 */
3294
Wang Chendad9b332008-06-18 01:48:28 -07003295int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003296{
3297 unsigned short old_flags = dev->flags;
3298
Patrick McHardy24023452007-07-14 18:51:31 -07003299 ASSERT_RTNL();
3300
Linus Torvalds1da177e2005-04-16 15:20:36 -07003301 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003302 dev->allmulti += inc;
3303 if (dev->allmulti == 0) {
3304 /*
3305 * Avoid overflow.
3306 * If inc causes overflow, untouch allmulti and return error.
3307 */
3308 if (inc < 0)
3309 dev->flags &= ~IFF_ALLMULTI;
3310 else {
3311 dev->allmulti -= inc;
3312 printk(KERN_WARNING "%s: allmulti touches roof, "
3313 "set allmulti failed, allmulti feature of "
3314 "device might be broken.\n", dev->name);
3315 return -EOVERFLOW;
3316 }
3317 }
Patrick McHardy24023452007-07-14 18:51:31 -07003318 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003319 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003320 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003321 }
Wang Chendad9b332008-06-18 01:48:28 -07003322 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003323}
3324
3325/*
3326 * Upload unicast and multicast address lists to device and
3327 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003328 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003329 * are present.
3330 */
3331void __dev_set_rx_mode(struct net_device *dev)
3332{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003333 const struct net_device_ops *ops = dev->netdev_ops;
3334
Patrick McHardy4417da62007-06-27 01:28:10 -07003335 /* dev_open will call this function so the list will stay sane. */
3336 if (!(dev->flags&IFF_UP))
3337 return;
3338
3339 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003340 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003341
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003342 if (ops->ndo_set_rx_mode)
3343 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003344 else {
3345 /* Unicast addresses changes may only happen under the rtnl,
3346 * therefore calling __dev_set_promiscuity here is safe.
3347 */
3348 if (dev->uc_count > 0 && !dev->uc_promisc) {
3349 __dev_set_promiscuity(dev, 1);
3350 dev->uc_promisc = 1;
3351 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3352 __dev_set_promiscuity(dev, -1);
3353 dev->uc_promisc = 0;
3354 }
3355
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003356 if (ops->ndo_set_multicast_list)
3357 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003358 }
3359}
3360
3361void dev_set_rx_mode(struct net_device *dev)
3362{
David S. Millerb9e40852008-07-15 00:15:08 -07003363 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003364 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003365 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003366}
3367
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003368int __dev_addr_delete(struct dev_addr_list **list, int *count,
3369 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003370{
3371 struct dev_addr_list *da;
3372
3373 for (; (da = *list) != NULL; list = &da->next) {
3374 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3375 alen == da->da_addrlen) {
3376 if (glbl) {
3377 int old_glbl = da->da_gusers;
3378 da->da_gusers = 0;
3379 if (old_glbl == 0)
3380 break;
3381 }
3382 if (--da->da_users)
3383 return 0;
3384
3385 *list = da->next;
3386 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003387 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003388 return 0;
3389 }
3390 }
3391 return -ENOENT;
3392}
3393
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003394int __dev_addr_add(struct dev_addr_list **list, int *count,
3395 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003396{
3397 struct dev_addr_list *da;
3398
3399 for (da = *list; da != NULL; da = da->next) {
3400 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3401 da->da_addrlen == alen) {
3402 if (glbl) {
3403 int old_glbl = da->da_gusers;
3404 da->da_gusers = 1;
3405 if (old_glbl)
3406 return 0;
3407 }
3408 da->da_users++;
3409 return 0;
3410 }
3411 }
3412
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003413 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003414 if (da == NULL)
3415 return -ENOMEM;
3416 memcpy(da->da_addr, addr, alen);
3417 da->da_addrlen = alen;
3418 da->da_users = 1;
3419 da->da_gusers = glbl ? 1 : 0;
3420 da->next = *list;
3421 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003422 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003423 return 0;
3424}
3425
Patrick McHardy4417da62007-06-27 01:28:10 -07003426/**
3427 * dev_unicast_delete - Release secondary unicast address.
3428 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003429 * @addr: address to delete
3430 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003431 *
3432 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003433 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003434 *
3435 * The caller must hold the rtnl_mutex.
3436 */
3437int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3438{
3439 int err;
3440
3441 ASSERT_RTNL();
3442
David S. Millerb9e40852008-07-15 00:15:08 -07003443 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003444 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3445 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003446 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003447 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003448 return err;
3449}
3450EXPORT_SYMBOL(dev_unicast_delete);
3451
3452/**
3453 * dev_unicast_add - add a secondary unicast address
3454 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003455 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003456 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003457 *
3458 * Add a secondary unicast address to the device or increase
3459 * the reference count if it already exists.
3460 *
3461 * The caller must hold the rtnl_mutex.
3462 */
3463int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3464{
3465 int err;
3466
3467 ASSERT_RTNL();
3468
David S. Millerb9e40852008-07-15 00:15:08 -07003469 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003470 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3471 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003472 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003473 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003474 return err;
3475}
3476EXPORT_SYMBOL(dev_unicast_add);
3477
Chris Leeche83a2ea2008-01-31 16:53:23 -08003478int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3479 struct dev_addr_list **from, int *from_count)
3480{
3481 struct dev_addr_list *da, *next;
3482 int err = 0;
3483
3484 da = *from;
3485 while (da != NULL) {
3486 next = da->next;
3487 if (!da->da_synced) {
3488 err = __dev_addr_add(to, to_count,
3489 da->da_addr, da->da_addrlen, 0);
3490 if (err < 0)
3491 break;
3492 da->da_synced = 1;
3493 da->da_users++;
3494 } else if (da->da_users == 1) {
3495 __dev_addr_delete(to, to_count,
3496 da->da_addr, da->da_addrlen, 0);
3497 __dev_addr_delete(from, from_count,
3498 da->da_addr, da->da_addrlen, 0);
3499 }
3500 da = next;
3501 }
3502 return err;
3503}
3504
3505void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3506 struct dev_addr_list **from, int *from_count)
3507{
3508 struct dev_addr_list *da, *next;
3509
3510 da = *from;
3511 while (da != NULL) {
3512 next = da->next;
3513 if (da->da_synced) {
3514 __dev_addr_delete(to, to_count,
3515 da->da_addr, da->da_addrlen, 0);
3516 da->da_synced = 0;
3517 __dev_addr_delete(from, from_count,
3518 da->da_addr, da->da_addrlen, 0);
3519 }
3520 da = next;
3521 }
3522}
3523
3524/**
3525 * dev_unicast_sync - Synchronize device's unicast list to another device
3526 * @to: destination device
3527 * @from: source device
3528 *
3529 * Add newly added addresses to the destination device and release
3530 * addresses that have no users left. The source device must be
3531 * locked by netif_tx_lock_bh.
3532 *
3533 * This function is intended to be called from the dev->set_rx_mode
3534 * function of layered software devices.
3535 */
3536int dev_unicast_sync(struct net_device *to, struct net_device *from)
3537{
3538 int err = 0;
3539
David S. Millerb9e40852008-07-15 00:15:08 -07003540 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003541 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3542 &from->uc_list, &from->uc_count);
3543 if (!err)
3544 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003545 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003546 return err;
3547}
3548EXPORT_SYMBOL(dev_unicast_sync);
3549
3550/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003551 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003552 * @to: destination device
3553 * @from: source device
3554 *
3555 * Remove all addresses that were added to the destination device by
3556 * dev_unicast_sync(). This function is intended to be called from the
3557 * dev->stop function of layered software devices.
3558 */
3559void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3560{
David S. Millerb9e40852008-07-15 00:15:08 -07003561 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003562 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003563
3564 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3565 &from->uc_list, &from->uc_count);
3566 __dev_set_rx_mode(to);
3567
David S. Millere308a5d2008-07-15 00:13:44 -07003568 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003569 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003570}
3571EXPORT_SYMBOL(dev_unicast_unsync);
3572
Denis Cheng12972622007-07-18 02:12:56 -07003573static void __dev_addr_discard(struct dev_addr_list **list)
3574{
3575 struct dev_addr_list *tmp;
3576
3577 while (*list != NULL) {
3578 tmp = *list;
3579 *list = tmp->next;
3580 if (tmp->da_users > tmp->da_gusers)
3581 printk("__dev_addr_discard: address leakage! "
3582 "da_users=%d\n", tmp->da_users);
3583 kfree(tmp);
3584 }
3585}
3586
Denis Cheng26cc2522007-07-18 02:12:03 -07003587static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003588{
David S. Millerb9e40852008-07-15 00:15:08 -07003589 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003590
Patrick McHardy4417da62007-06-27 01:28:10 -07003591 __dev_addr_discard(&dev->uc_list);
3592 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003593
Denis Cheng456ad752007-07-18 02:10:54 -07003594 __dev_addr_discard(&dev->mc_list);
3595 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003596
David S. Millerb9e40852008-07-15 00:15:08 -07003597 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003598}
3599
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003600/**
3601 * dev_get_flags - get flags reported to userspace
3602 * @dev: device
3603 *
3604 * Get the combination of flag bits exported through APIs to userspace.
3605 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003606unsigned dev_get_flags(const struct net_device *dev)
3607{
3608 unsigned flags;
3609
3610 flags = (dev->flags & ~(IFF_PROMISC |
3611 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003612 IFF_RUNNING |
3613 IFF_LOWER_UP |
3614 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003615 (dev->gflags & (IFF_PROMISC |
3616 IFF_ALLMULTI));
3617
Stefan Rompfb00055a2006-03-20 17:09:11 -08003618 if (netif_running(dev)) {
3619 if (netif_oper_up(dev))
3620 flags |= IFF_RUNNING;
3621 if (netif_carrier_ok(dev))
3622 flags |= IFF_LOWER_UP;
3623 if (netif_dormant(dev))
3624 flags |= IFF_DORMANT;
3625 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003626
3627 return flags;
3628}
3629
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003630/**
3631 * dev_change_flags - change device settings
3632 * @dev: device
3633 * @flags: device state flags
3634 *
3635 * Change settings on device based state flags. The flags are
3636 * in the userspace exported format.
3637 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003638int dev_change_flags(struct net_device *dev, unsigned flags)
3639{
Thomas Graf7c355f52007-06-05 16:03:03 -07003640 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003641 int old_flags = dev->flags;
3642
Patrick McHardy24023452007-07-14 18:51:31 -07003643 ASSERT_RTNL();
3644
Linus Torvalds1da177e2005-04-16 15:20:36 -07003645 /*
3646 * Set the flags on our device.
3647 */
3648
3649 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3650 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3651 IFF_AUTOMEDIA)) |
3652 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3653 IFF_ALLMULTI));
3654
3655 /*
3656 * Load in the correct multicast list now the flags have changed.
3657 */
3658
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003659 if ((old_flags ^ flags) & IFF_MULTICAST)
3660 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003661
Patrick McHardy4417da62007-06-27 01:28:10 -07003662 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003663
3664 /*
3665 * Have we downed the interface. We handle IFF_UP ourselves
3666 * according to user attempts to set it, rather than blindly
3667 * setting it.
3668 */
3669
3670 ret = 0;
3671 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3672 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3673
3674 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003675 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003676 }
3677
3678 if (dev->flags & IFF_UP &&
3679 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3680 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003681 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003682
3683 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3684 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3685 dev->gflags ^= IFF_PROMISC;
3686 dev_set_promiscuity(dev, inc);
3687 }
3688
3689 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3690 is important. Some (broken) drivers set IFF_PROMISC, when
3691 IFF_ALLMULTI is requested not asking us and not reporting.
3692 */
3693 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3694 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3695 dev->gflags ^= IFF_ALLMULTI;
3696 dev_set_allmulti(dev, inc);
3697 }
3698
Thomas Graf7c355f52007-06-05 16:03:03 -07003699 /* Exclude state transition flags, already notified */
3700 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3701 if (changes)
3702 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003703
3704 return ret;
3705}
3706
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003707/**
3708 * dev_set_mtu - Change maximum transfer unit
3709 * @dev: device
3710 * @new_mtu: new transfer unit
3711 *
3712 * Change the maximum transfer size of the network device.
3713 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003714int dev_set_mtu(struct net_device *dev, int new_mtu)
3715{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003716 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003717 int err;
3718
3719 if (new_mtu == dev->mtu)
3720 return 0;
3721
3722 /* MTU must be positive. */
3723 if (new_mtu < 0)
3724 return -EINVAL;
3725
3726 if (!netif_device_present(dev))
3727 return -ENODEV;
3728
3729 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003730 if (ops->ndo_change_mtu)
3731 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003732 else
3733 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003734
Linus Torvalds1da177e2005-04-16 15:20:36 -07003735 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003736 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003737 return err;
3738}
3739
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003740/**
3741 * dev_set_mac_address - Change Media Access Control Address
3742 * @dev: device
3743 * @sa: new address
3744 *
3745 * Change the hardware (MAC) address of the device
3746 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003747int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3748{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003749 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003750 int err;
3751
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003752 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003753 return -EOPNOTSUPP;
3754 if (sa->sa_family != dev->type)
3755 return -EINVAL;
3756 if (!netif_device_present(dev))
3757 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003758 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003759 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003760 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003761 return err;
3762}
3763
3764/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003765 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003766 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003767static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003768{
3769 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003770 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003771
3772 if (!dev)
3773 return -ENODEV;
3774
3775 switch (cmd) {
3776 case SIOCGIFFLAGS: /* Get interface flags */
3777 ifr->ifr_flags = dev_get_flags(dev);
3778 return 0;
3779
Linus Torvalds1da177e2005-04-16 15:20:36 -07003780 case SIOCGIFMETRIC: /* Get the metric on the interface
3781 (currently unused) */
3782 ifr->ifr_metric = 0;
3783 return 0;
3784
Linus Torvalds1da177e2005-04-16 15:20:36 -07003785 case SIOCGIFMTU: /* Get the MTU of a device */
3786 ifr->ifr_mtu = dev->mtu;
3787 return 0;
3788
Linus Torvalds1da177e2005-04-16 15:20:36 -07003789 case SIOCGIFHWADDR:
3790 if (!dev->addr_len)
3791 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3792 else
3793 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3794 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3795 ifr->ifr_hwaddr.sa_family = dev->type;
3796 return 0;
3797
Jeff Garzik14e3e072007-10-08 00:06:32 -07003798 case SIOCGIFSLAVE:
3799 err = -EINVAL;
3800 break;
3801
3802 case SIOCGIFMAP:
3803 ifr->ifr_map.mem_start = dev->mem_start;
3804 ifr->ifr_map.mem_end = dev->mem_end;
3805 ifr->ifr_map.base_addr = dev->base_addr;
3806 ifr->ifr_map.irq = dev->irq;
3807 ifr->ifr_map.dma = dev->dma;
3808 ifr->ifr_map.port = dev->if_port;
3809 return 0;
3810
3811 case SIOCGIFINDEX:
3812 ifr->ifr_ifindex = dev->ifindex;
3813 return 0;
3814
3815 case SIOCGIFTXQLEN:
3816 ifr->ifr_qlen = dev->tx_queue_len;
3817 return 0;
3818
3819 default:
3820 /* dev_ioctl() should ensure this case
3821 * is never reached
3822 */
3823 WARN_ON(1);
3824 err = -EINVAL;
3825 break;
3826
3827 }
3828 return err;
3829}
3830
3831/*
3832 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3833 */
3834static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3835{
3836 int err;
3837 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003838 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003839
3840 if (!dev)
3841 return -ENODEV;
3842
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003843 ops = dev->netdev_ops;
3844
Jeff Garzik14e3e072007-10-08 00:06:32 -07003845 switch (cmd) {
3846 case SIOCSIFFLAGS: /* Set interface flags */
3847 return dev_change_flags(dev, ifr->ifr_flags);
3848
3849 case SIOCSIFMETRIC: /* Set the metric on the interface
3850 (currently unused) */
3851 return -EOPNOTSUPP;
3852
3853 case SIOCSIFMTU: /* Set the MTU of a device */
3854 return dev_set_mtu(dev, ifr->ifr_mtu);
3855
Linus Torvalds1da177e2005-04-16 15:20:36 -07003856 case SIOCSIFHWADDR:
3857 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3858
3859 case SIOCSIFHWBROADCAST:
3860 if (ifr->ifr_hwaddr.sa_family != dev->type)
3861 return -EINVAL;
3862 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3863 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003864 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003865 return 0;
3866
Linus Torvalds1da177e2005-04-16 15:20:36 -07003867 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003868 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003869 if (!netif_device_present(dev))
3870 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003871 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003872 }
3873 return -EOPNOTSUPP;
3874
3875 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003876 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003877 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3878 return -EINVAL;
3879 if (!netif_device_present(dev))
3880 return -ENODEV;
3881 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3882 dev->addr_len, 1);
3883
3884 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003885 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003886 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3887 return -EINVAL;
3888 if (!netif_device_present(dev))
3889 return -ENODEV;
3890 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3891 dev->addr_len, 1);
3892
Linus Torvalds1da177e2005-04-16 15:20:36 -07003893 case SIOCSIFTXQLEN:
3894 if (ifr->ifr_qlen < 0)
3895 return -EINVAL;
3896 dev->tx_queue_len = ifr->ifr_qlen;
3897 return 0;
3898
3899 case SIOCSIFNAME:
3900 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3901 return dev_change_name(dev, ifr->ifr_newname);
3902
3903 /*
3904 * Unknown or private ioctl
3905 */
3906
3907 default:
3908 if ((cmd >= SIOCDEVPRIVATE &&
3909 cmd <= SIOCDEVPRIVATE + 15) ||
3910 cmd == SIOCBONDENSLAVE ||
3911 cmd == SIOCBONDRELEASE ||
3912 cmd == SIOCBONDSETHWADDR ||
3913 cmd == SIOCBONDSLAVEINFOQUERY ||
3914 cmd == SIOCBONDINFOQUERY ||
3915 cmd == SIOCBONDCHANGEACTIVE ||
3916 cmd == SIOCGMIIPHY ||
3917 cmd == SIOCGMIIREG ||
3918 cmd == SIOCSMIIREG ||
3919 cmd == SIOCBRADDIF ||
3920 cmd == SIOCBRDELIF ||
3921 cmd == SIOCWANDEV) {
3922 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003923 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003924 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003925 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003926 else
3927 err = -ENODEV;
3928 }
3929 } else
3930 err = -EINVAL;
3931
3932 }
3933 return err;
3934}
3935
3936/*
3937 * This function handles all "interface"-type I/O control requests. The actual
3938 * 'doing' part of this is dev_ifsioc above.
3939 */
3940
3941/**
3942 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003943 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003944 * @cmd: command to issue
3945 * @arg: pointer to a struct ifreq in user space
3946 *
3947 * Issue ioctl functions to devices. This is normally called by the
3948 * user space syscall interfaces but can sometimes be useful for
3949 * other purposes. The return value is the return from the syscall if
3950 * positive or a negative errno code on error.
3951 */
3952
Eric W. Biederman881d9662007-09-17 11:56:21 -07003953int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003954{
3955 struct ifreq ifr;
3956 int ret;
3957 char *colon;
3958
3959 /* One special case: SIOCGIFCONF takes ifconf argument
3960 and requires shared lock, because it sleeps writing
3961 to user space.
3962 */
3963
3964 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003965 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003966 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003967 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003968 return ret;
3969 }
3970 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003971 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003972
3973 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3974 return -EFAULT;
3975
3976 ifr.ifr_name[IFNAMSIZ-1] = 0;
3977
3978 colon = strchr(ifr.ifr_name, ':');
3979 if (colon)
3980 *colon = 0;
3981
3982 /*
3983 * See which interface the caller is talking about.
3984 */
3985
3986 switch (cmd) {
3987 /*
3988 * These ioctl calls:
3989 * - can be done by all.
3990 * - atomic and do not require locking.
3991 * - return a value
3992 */
3993 case SIOCGIFFLAGS:
3994 case SIOCGIFMETRIC:
3995 case SIOCGIFMTU:
3996 case SIOCGIFHWADDR:
3997 case SIOCGIFSLAVE:
3998 case SIOCGIFMAP:
3999 case SIOCGIFINDEX:
4000 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004001 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004002 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004003 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004004 read_unlock(&dev_base_lock);
4005 if (!ret) {
4006 if (colon)
4007 *colon = ':';
4008 if (copy_to_user(arg, &ifr,
4009 sizeof(struct ifreq)))
4010 ret = -EFAULT;
4011 }
4012 return ret;
4013
4014 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004015 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004016 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004017 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004018 rtnl_unlock();
4019 if (!ret) {
4020 if (colon)
4021 *colon = ':';
4022 if (copy_to_user(arg, &ifr,
4023 sizeof(struct ifreq)))
4024 ret = -EFAULT;
4025 }
4026 return ret;
4027
4028 /*
4029 * These ioctl calls:
4030 * - require superuser power.
4031 * - require strict serialization.
4032 * - return a value
4033 */
4034 case SIOCGMIIPHY:
4035 case SIOCGMIIREG:
4036 case SIOCSIFNAME:
4037 if (!capable(CAP_NET_ADMIN))
4038 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004039 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004040 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004041 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004042 rtnl_unlock();
4043 if (!ret) {
4044 if (colon)
4045 *colon = ':';
4046 if (copy_to_user(arg, &ifr,
4047 sizeof(struct ifreq)))
4048 ret = -EFAULT;
4049 }
4050 return ret;
4051
4052 /*
4053 * These ioctl calls:
4054 * - require superuser power.
4055 * - require strict serialization.
4056 * - do not return a value
4057 */
4058 case SIOCSIFFLAGS:
4059 case SIOCSIFMETRIC:
4060 case SIOCSIFMTU:
4061 case SIOCSIFMAP:
4062 case SIOCSIFHWADDR:
4063 case SIOCSIFSLAVE:
4064 case SIOCADDMULTI:
4065 case SIOCDELMULTI:
4066 case SIOCSIFHWBROADCAST:
4067 case SIOCSIFTXQLEN:
4068 case SIOCSMIIREG:
4069 case SIOCBONDENSLAVE:
4070 case SIOCBONDRELEASE:
4071 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004072 case SIOCBONDCHANGEACTIVE:
4073 case SIOCBRADDIF:
4074 case SIOCBRDELIF:
4075 if (!capable(CAP_NET_ADMIN))
4076 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08004077 /* fall through */
4078 case SIOCBONDSLAVEINFOQUERY:
4079 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07004080 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004081 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004082 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004083 rtnl_unlock();
4084 return ret;
4085
4086 case SIOCGIFMEM:
4087 /* Get the per device memory space. We can add this but
4088 * currently do not support it */
4089 case SIOCSIFMEM:
4090 /* Set the per device memory buffer space.
4091 * Not applicable in our case */
4092 case SIOCSIFLINK:
4093 return -EINVAL;
4094
4095 /*
4096 * Unknown or private ioctl.
4097 */
4098 default:
4099 if (cmd == SIOCWANDEV ||
4100 (cmd >= SIOCDEVPRIVATE &&
4101 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004102 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004103 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004104 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004105 rtnl_unlock();
4106 if (!ret && copy_to_user(arg, &ifr,
4107 sizeof(struct ifreq)))
4108 ret = -EFAULT;
4109 return ret;
4110 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004111 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07004112 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004113 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004114 return -EINVAL;
4115 }
4116}
4117
4118
4119/**
4120 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004121 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004122 *
4123 * Returns a suitable unique value for a new device interface
4124 * number. The caller must hold the rtnl semaphore or the
4125 * dev_base_lock to be sure it remains unique.
4126 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004127static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004128{
4129 static int ifindex;
4130 for (;;) {
4131 if (++ifindex <= 0)
4132 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004133 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004134 return ifindex;
4135 }
4136}
4137
Linus Torvalds1da177e2005-04-16 15:20:36 -07004138/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004139static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004140
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004141static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004142{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004143 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004144}
4145
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004146static void rollback_registered(struct net_device *dev)
4147{
4148 BUG_ON(dev_boot_phase);
4149 ASSERT_RTNL();
4150
4151 /* Some devices call without registering for initialization unwind. */
4152 if (dev->reg_state == NETREG_UNINITIALIZED) {
4153 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4154 "was registered\n", dev->name, dev);
4155
4156 WARN_ON(1);
4157 return;
4158 }
4159
4160 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4161
4162 /* If device is running, close it first. */
4163 dev_close(dev);
4164
4165 /* And unlink it from device chain. */
4166 unlist_netdevice(dev);
4167
4168 dev->reg_state = NETREG_UNREGISTERING;
4169
4170 synchronize_net();
4171
4172 /* Shutdown queueing discipline. */
4173 dev_shutdown(dev);
4174
4175
4176 /* Notify protocols, that we are about to destroy
4177 this device. They should clean all the things.
4178 */
4179 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4180
4181 /*
4182 * Flush the unicast and multicast chains
4183 */
4184 dev_addr_discard(dev);
4185
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004186 if (dev->netdev_ops->ndo_uninit)
4187 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004188
4189 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004190 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004191
4192 /* Remove entries from kobject tree */
4193 netdev_unregister_kobject(dev);
4194
4195 synchronize_net();
4196
4197 dev_put(dev);
4198}
4199
David S. Millere8a04642008-07-17 00:34:19 -07004200static void __netdev_init_queue_locks_one(struct net_device *dev,
4201 struct netdev_queue *dev_queue,
4202 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004203{
4204 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004205 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004206 dev_queue->xmit_lock_owner = -1;
4207}
4208
4209static void netdev_init_queue_locks(struct net_device *dev)
4210{
David S. Millere8a04642008-07-17 00:34:19 -07004211 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4212 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004213}
4214
Herbert Xub63365a2008-10-23 01:11:29 -07004215unsigned long netdev_fix_features(unsigned long features, const char *name)
4216{
4217 /* Fix illegal SG+CSUM combinations. */
4218 if ((features & NETIF_F_SG) &&
4219 !(features & NETIF_F_ALL_CSUM)) {
4220 if (name)
4221 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4222 "checksum feature.\n", name);
4223 features &= ~NETIF_F_SG;
4224 }
4225
4226 /* TSO requires that SG is present as well. */
4227 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4228 if (name)
4229 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4230 "SG feature.\n", name);
4231 features &= ~NETIF_F_TSO;
4232 }
4233
4234 if (features & NETIF_F_UFO) {
4235 if (!(features & NETIF_F_GEN_CSUM)) {
4236 if (name)
4237 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4238 "since no NETIF_F_HW_CSUM feature.\n",
4239 name);
4240 features &= ~NETIF_F_UFO;
4241 }
4242
4243 if (!(features & NETIF_F_SG)) {
4244 if (name)
4245 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4246 "since no NETIF_F_SG feature.\n", name);
4247 features &= ~NETIF_F_UFO;
4248 }
4249 }
4250
4251 return features;
4252}
4253EXPORT_SYMBOL(netdev_fix_features);
4254
Linus Torvalds1da177e2005-04-16 15:20:36 -07004255/**
4256 * register_netdevice - register a network device
4257 * @dev: device to register
4258 *
4259 * Take a completed network device structure and add it to the kernel
4260 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4261 * chain. 0 is returned on success. A negative errno code is returned
4262 * on a failure to set up the device, or if the name is a duplicate.
4263 *
4264 * Callers must hold the rtnl semaphore. You may want
4265 * register_netdev() instead of this.
4266 *
4267 * BUGS:
4268 * The locking appears insufficient to guarantee two parallel registers
4269 * will not get the same name.
4270 */
4271
4272int register_netdevice(struct net_device *dev)
4273{
4274 struct hlist_head *head;
4275 struct hlist_node *p;
4276 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004277 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004278
4279 BUG_ON(dev_boot_phase);
4280 ASSERT_RTNL();
4281
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004282 might_sleep();
4283
Linus Torvalds1da177e2005-04-16 15:20:36 -07004284 /* When net_device's are persistent, this will be fatal. */
4285 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004286 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004287
David S. Millerf1f28aa2008-07-15 00:08:33 -07004288 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004289 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004290 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004291
Linus Torvalds1da177e2005-04-16 15:20:36 -07004292 dev->iflink = -1;
4293
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004294#ifdef CONFIG_COMPAT_NET_DEV_OPS
4295 /* Netdevice_ops API compatiability support.
4296 * This is temporary until all network devices are converted.
4297 */
4298 if (dev->netdev_ops) {
4299 const struct net_device_ops *ops = dev->netdev_ops;
4300
4301 dev->init = ops->ndo_init;
4302 dev->uninit = ops->ndo_uninit;
4303 dev->open = ops->ndo_open;
4304 dev->change_rx_flags = ops->ndo_change_rx_flags;
4305 dev->set_rx_mode = ops->ndo_set_rx_mode;
4306 dev->set_multicast_list = ops->ndo_set_multicast_list;
4307 dev->set_mac_address = ops->ndo_set_mac_address;
4308 dev->validate_addr = ops->ndo_validate_addr;
4309 dev->do_ioctl = ops->ndo_do_ioctl;
4310 dev->set_config = ops->ndo_set_config;
4311 dev->change_mtu = ops->ndo_change_mtu;
4312 dev->tx_timeout = ops->ndo_tx_timeout;
4313 dev->get_stats = ops->ndo_get_stats;
4314 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4315 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4316 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4317#ifdef CONFIG_NET_POLL_CONTROLLER
4318 dev->poll_controller = ops->ndo_poll_controller;
4319#endif
4320 } else {
4321 char drivername[64];
4322 pr_info("%s (%s): not using net_device_ops yet\n",
4323 dev->name, netdev_drivername(dev, drivername, 64));
4324
4325 /* This works only because net_device_ops and the
4326 compatiablity structure are the same. */
4327 dev->netdev_ops = (void *) &(dev->init);
4328 }
4329#endif
4330
Linus Torvalds1da177e2005-04-16 15:20:36 -07004331 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004332 if (dev->netdev_ops->ndo_init) {
4333 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004334 if (ret) {
4335 if (ret > 0)
4336 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004337 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004338 }
4339 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004340
Linus Torvalds1da177e2005-04-16 15:20:36 -07004341 if (!dev_valid_name(dev->name)) {
4342 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004343 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004344 }
4345
Eric W. Biederman881d9662007-09-17 11:56:21 -07004346 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004347 if (dev->iflink == -1)
4348 dev->iflink = dev->ifindex;
4349
4350 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004351 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004352 hlist_for_each(p, head) {
4353 struct net_device *d
4354 = hlist_entry(p, struct net_device, name_hlist);
4355 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4356 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004357 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004358 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004360
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004361 /* Fix illegal checksum combinations */
4362 if ((dev->features & NETIF_F_HW_CSUM) &&
4363 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4364 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4365 dev->name);
4366 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4367 }
4368
4369 if ((dev->features & NETIF_F_NO_CSUM) &&
4370 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4371 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4372 dev->name);
4373 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4374 }
4375
Herbert Xub63365a2008-10-23 01:11:29 -07004376 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004377
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004378 /* Enable software GSO if SG is supported. */
4379 if (dev->features & NETIF_F_SG)
4380 dev->features |= NETIF_F_GSO;
4381
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004382 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004383 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004384 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004385 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004386 dev->reg_state = NETREG_REGISTERED;
4387
Linus Torvalds1da177e2005-04-16 15:20:36 -07004388 /*
4389 * Default initial state at registry is that the
4390 * device is present.
4391 */
4392
4393 set_bit(__LINK_STATE_PRESENT, &dev->state);
4394
Linus Torvalds1da177e2005-04-16 15:20:36 -07004395 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004396 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004397 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004398
4399 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004400 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004401 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004402 if (ret) {
4403 rollback_registered(dev);
4404 dev->reg_state = NETREG_UNREGISTERED;
4405 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004406
4407out:
4408 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004409
4410err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004411 if (dev->netdev_ops->ndo_uninit)
4412 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004413 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004414}
4415
4416/**
4417 * register_netdev - register a network device
4418 * @dev: device to register
4419 *
4420 * Take a completed network device structure and add it to the kernel
4421 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4422 * chain. 0 is returned on success. A negative errno code is returned
4423 * on a failure to set up the device, or if the name is a duplicate.
4424 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004425 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004426 * and expands the device name if you passed a format string to
4427 * alloc_netdev.
4428 */
4429int register_netdev(struct net_device *dev)
4430{
4431 int err;
4432
4433 rtnl_lock();
4434
4435 /*
4436 * If the name is a format string the caller wants us to do a
4437 * name allocation.
4438 */
4439 if (strchr(dev->name, '%')) {
4440 err = dev_alloc_name(dev, dev->name);
4441 if (err < 0)
4442 goto out;
4443 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004444
Linus Torvalds1da177e2005-04-16 15:20:36 -07004445 err = register_netdevice(dev);
4446out:
4447 rtnl_unlock();
4448 return err;
4449}
4450EXPORT_SYMBOL(register_netdev);
4451
4452/*
4453 * netdev_wait_allrefs - wait until all references are gone.
4454 *
4455 * This is called when unregistering network devices.
4456 *
4457 * Any protocol or device that holds a reference should register
4458 * for netdevice notification, and cleanup and put back the
4459 * reference if they receive an UNREGISTER event.
4460 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004461 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004462 */
4463static void netdev_wait_allrefs(struct net_device *dev)
4464{
4465 unsigned long rebroadcast_time, warning_time;
4466
4467 rebroadcast_time = warning_time = jiffies;
4468 while (atomic_read(&dev->refcnt) != 0) {
4469 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004470 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004471
4472 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004473 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004474
4475 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4476 &dev->state)) {
4477 /* We must not have linkwatch events
4478 * pending on unregister. If this
4479 * happens, we simply run the queue
4480 * unscheduled, resulting in a noop
4481 * for this device.
4482 */
4483 linkwatch_run_queue();
4484 }
4485
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004486 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004487
4488 rebroadcast_time = jiffies;
4489 }
4490
4491 msleep(250);
4492
4493 if (time_after(jiffies, warning_time + 10 * HZ)) {
4494 printk(KERN_EMERG "unregister_netdevice: "
4495 "waiting for %s to become free. Usage "
4496 "count = %d\n",
4497 dev->name, atomic_read(&dev->refcnt));
4498 warning_time = jiffies;
4499 }
4500 }
4501}
4502
4503/* The sequence is:
4504 *
4505 * rtnl_lock();
4506 * ...
4507 * register_netdevice(x1);
4508 * register_netdevice(x2);
4509 * ...
4510 * unregister_netdevice(y1);
4511 * unregister_netdevice(y2);
4512 * ...
4513 * rtnl_unlock();
4514 * free_netdev(y1);
4515 * free_netdev(y2);
4516 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004517 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004518 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004519 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004520 * without deadlocking with linkwatch via keventd.
4521 * 2) Since we run with the RTNL semaphore not held, we can sleep
4522 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004523 *
4524 * We must not return until all unregister events added during
4525 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004526 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004527void netdev_run_todo(void)
4528{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004529 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004530
Linus Torvalds1da177e2005-04-16 15:20:36 -07004531 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004532 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004533
4534 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004535
Linus Torvalds1da177e2005-04-16 15:20:36 -07004536 while (!list_empty(&list)) {
4537 struct net_device *dev
4538 = list_entry(list.next, struct net_device, todo_list);
4539 list_del(&dev->todo_list);
4540
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004541 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004542 printk(KERN_ERR "network todo '%s' but state %d\n",
4543 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004544 dump_stack();
4545 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004546 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004547
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004548 dev->reg_state = NETREG_UNREGISTERED;
4549
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004550 on_each_cpu(flush_backlog, dev, 1);
4551
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004552 netdev_wait_allrefs(dev);
4553
4554 /* paranoia */
4555 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004556 WARN_ON(dev->ip_ptr);
4557 WARN_ON(dev->ip6_ptr);
4558 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004559
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004560 if (dev->destructor)
4561 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004562
4563 /* Free network device */
4564 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004565 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004566}
4567
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004568/**
4569 * dev_get_stats - get network device statistics
4570 * @dev: device to get statistics from
4571 *
4572 * Get network statistics from device. The device driver may provide
4573 * its own method by setting dev->netdev_ops->get_stats; otherwise
4574 * the internal statistics structure is used.
4575 */
4576const struct net_device_stats *dev_get_stats(struct net_device *dev)
4577 {
4578 const struct net_device_ops *ops = dev->netdev_ops;
4579
4580 if (ops->ndo_get_stats)
4581 return ops->ndo_get_stats(dev);
4582 else
4583 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004584}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004585EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004586
David S. Millerdc2b4842008-07-08 17:18:23 -07004587static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004588 struct netdev_queue *queue,
4589 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004590{
David S. Millerdc2b4842008-07-08 17:18:23 -07004591 queue->dev = dev;
4592}
4593
David S. Millerbb949fb2008-07-08 16:55:56 -07004594static void netdev_init_queues(struct net_device *dev)
4595{
David S. Millere8a04642008-07-17 00:34:19 -07004596 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4597 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004598 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004599}
4600
Linus Torvalds1da177e2005-04-16 15:20:36 -07004601/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004602 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004603 * @sizeof_priv: size of private data to allocate space for
4604 * @name: device name format string
4605 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004606 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004607 *
4608 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004609 * and performs basic initialization. Also allocates subquue structs
4610 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004611 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004612struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4613 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004614{
David S. Millere8a04642008-07-17 00:34:19 -07004615 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004616 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004617 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004618 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004619
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004620 BUG_ON(strlen(name) >= sizeof(dev->name));
4621
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004622 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004623 if (sizeof_priv) {
4624 /* ensure 32-byte alignment of private area */
4625 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4626 alloc_size += sizeof_priv;
4627 }
4628 /* ensure 32-byte alignment of whole construct */
4629 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004630
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004631 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004632 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004633 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004634 return NULL;
4635 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004636
Stephen Hemminger79439862008-07-21 13:28:44 -07004637 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004638 if (!tx) {
4639 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4640 "tx qdiscs.\n");
4641 kfree(p);
4642 return NULL;
4643 }
4644
Linus Torvalds1da177e2005-04-16 15:20:36 -07004645 dev = (struct net_device *)
4646 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4647 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004648 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004649
David S. Millere8a04642008-07-17 00:34:19 -07004650 dev->_tx = tx;
4651 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004652 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004653
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004654 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004655
David S. Millerbb949fb2008-07-08 16:55:56 -07004656 netdev_init_queues(dev);
4657
Herbert Xud565b0a2008-12-15 23:38:52 -08004658 INIT_LIST_HEAD(&dev->napi_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004659 setup(dev);
4660 strcpy(dev->name, name);
4661 return dev;
4662}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004663EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004664
4665/**
4666 * free_netdev - free network device
4667 * @dev: device
4668 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004669 * This function does the last stage of destroying an allocated device
4670 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004671 * If this is the last reference then it will be freed.
4672 */
4673void free_netdev(struct net_device *dev)
4674{
Herbert Xud565b0a2008-12-15 23:38:52 -08004675 struct napi_struct *p, *n;
4676
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004677 release_net(dev_net(dev));
4678
David S. Millere8a04642008-07-17 00:34:19 -07004679 kfree(dev->_tx);
4680
Herbert Xud565b0a2008-12-15 23:38:52 -08004681 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4682 netif_napi_del(p);
4683
Stephen Hemminger3041a062006-05-26 13:25:24 -07004684 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004685 if (dev->reg_state == NETREG_UNINITIALIZED) {
4686 kfree((char *)dev - dev->padded);
4687 return;
4688 }
4689
4690 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4691 dev->reg_state = NETREG_RELEASED;
4692
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004693 /* will free via device release */
4694 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004695}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004696
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004697/**
4698 * synchronize_net - Synchronize with packet receive processing
4699 *
4700 * Wait for packets currently being received to be done.
4701 * Does not block later packets from starting.
4702 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004703void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004704{
4705 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004706 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004707}
4708
4709/**
4710 * unregister_netdevice - remove device from the kernel
4711 * @dev: device
4712 *
4713 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004714 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004715 *
4716 * Callers must hold the rtnl semaphore. You may want
4717 * unregister_netdev() instead of this.
4718 */
4719
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004720void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004721{
Herbert Xua6620712007-12-12 19:21:56 -08004722 ASSERT_RTNL();
4723
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004724 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004725 /* Finish processing unregister after unlock */
4726 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004727}
4728
4729/**
4730 * unregister_netdev - remove device from the kernel
4731 * @dev: device
4732 *
4733 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004734 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004735 *
4736 * This is just a wrapper for unregister_netdevice that takes
4737 * the rtnl semaphore. In general you want to use this and not
4738 * unregister_netdevice.
4739 */
4740void unregister_netdev(struct net_device *dev)
4741{
4742 rtnl_lock();
4743 unregister_netdevice(dev);
4744 rtnl_unlock();
4745}
4746
4747EXPORT_SYMBOL(unregister_netdev);
4748
Eric W. Biedermance286d32007-09-12 13:53:49 +02004749/**
4750 * dev_change_net_namespace - move device to different nethost namespace
4751 * @dev: device
4752 * @net: network namespace
4753 * @pat: If not NULL name pattern to try if the current device name
4754 * is already taken in the destination network namespace.
4755 *
4756 * This function shuts down a device interface and moves it
4757 * to a new network namespace. On success 0 is returned, on
4758 * a failure a netagive errno code is returned.
4759 *
4760 * Callers must hold the rtnl semaphore.
4761 */
4762
4763int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4764{
4765 char buf[IFNAMSIZ];
4766 const char *destname;
4767 int err;
4768
4769 ASSERT_RTNL();
4770
4771 /* Don't allow namespace local devices to be moved. */
4772 err = -EINVAL;
4773 if (dev->features & NETIF_F_NETNS_LOCAL)
4774 goto out;
4775
Eric W. Biederman38918452008-10-27 17:51:47 -07004776#ifdef CONFIG_SYSFS
4777 /* Don't allow real devices to be moved when sysfs
4778 * is enabled.
4779 */
4780 err = -EINVAL;
4781 if (dev->dev.parent)
4782 goto out;
4783#endif
4784
Eric W. Biedermance286d32007-09-12 13:53:49 +02004785 /* Ensure the device has been registrered */
4786 err = -EINVAL;
4787 if (dev->reg_state != NETREG_REGISTERED)
4788 goto out;
4789
4790 /* Get out if there is nothing todo */
4791 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004792 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004793 goto out;
4794
4795 /* Pick the destination device name, and ensure
4796 * we can use it in the destination network namespace.
4797 */
4798 err = -EEXIST;
4799 destname = dev->name;
4800 if (__dev_get_by_name(net, destname)) {
4801 /* We get here if we can't use the current device name */
4802 if (!pat)
4803 goto out;
4804 if (!dev_valid_name(pat))
4805 goto out;
4806 if (strchr(pat, '%')) {
4807 if (__dev_alloc_name(net, pat, buf) < 0)
4808 goto out;
4809 destname = buf;
4810 } else
4811 destname = pat;
4812 if (__dev_get_by_name(net, destname))
4813 goto out;
4814 }
4815
4816 /*
4817 * And now a mini version of register_netdevice unregister_netdevice.
4818 */
4819
4820 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004821 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004822
4823 /* And unlink it from device chain */
4824 err = -ENODEV;
4825 unlist_netdevice(dev);
4826
4827 synchronize_net();
4828
4829 /* Shutdown queueing discipline. */
4830 dev_shutdown(dev);
4831
4832 /* Notify protocols, that we are about to destroy
4833 this device. They should clean all the things.
4834 */
4835 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4836
4837 /*
4838 * Flush the unicast and multicast chains
4839 */
4840 dev_addr_discard(dev);
4841
Eric W. Biederman38918452008-10-27 17:51:47 -07004842 netdev_unregister_kobject(dev);
4843
Eric W. Biedermance286d32007-09-12 13:53:49 +02004844 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004845 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004846
4847 /* Assign the new device name */
4848 if (destname != dev->name)
4849 strcpy(dev->name, destname);
4850
4851 /* If there is an ifindex conflict assign a new one */
4852 if (__dev_get_by_index(net, dev->ifindex)) {
4853 int iflink = (dev->iflink == dev->ifindex);
4854 dev->ifindex = dev_new_index(net);
4855 if (iflink)
4856 dev->iflink = dev->ifindex;
4857 }
4858
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004859 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004860 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004861 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004862
4863 /* Add the device back in the hashes */
4864 list_netdevice(dev);
4865
4866 /* Notify protocols, that a new device appeared. */
4867 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4868
4869 synchronize_net();
4870 err = 0;
4871out:
4872 return err;
4873}
4874
Linus Torvalds1da177e2005-04-16 15:20:36 -07004875static int dev_cpu_callback(struct notifier_block *nfb,
4876 unsigned long action,
4877 void *ocpu)
4878{
4879 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004880 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004881 struct sk_buff *skb;
4882 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4883 struct softnet_data *sd, *oldsd;
4884
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004885 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004886 return NOTIFY_OK;
4887
4888 local_irq_disable();
4889 cpu = smp_processor_id();
4890 sd = &per_cpu(softnet_data, cpu);
4891 oldsd = &per_cpu(softnet_data, oldcpu);
4892
4893 /* Find end of our completion_queue. */
4894 list_skb = &sd->completion_queue;
4895 while (*list_skb)
4896 list_skb = &(*list_skb)->next;
4897 /* Append completion queue from offline CPU. */
4898 *list_skb = oldsd->completion_queue;
4899 oldsd->completion_queue = NULL;
4900
4901 /* Find end of our output_queue. */
4902 list_net = &sd->output_queue;
4903 while (*list_net)
4904 list_net = &(*list_net)->next_sched;
4905 /* Append output queue from offline CPU. */
4906 *list_net = oldsd->output_queue;
4907 oldsd->output_queue = NULL;
4908
4909 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4910 local_irq_enable();
4911
4912 /* Process offline CPU's input_pkt_queue */
4913 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4914 netif_rx(skb);
4915
4916 return NOTIFY_OK;
4917}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004918
Chris Leechdb217332006-06-17 21:24:58 -07004919#ifdef CONFIG_NET_DMA
4920/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004921 * net_dma_rebalance - try to maintain one DMA channel per CPU
4922 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4923 *
4924 * This is called when the number of channels allocated to the net_dma client
4925 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004926 */
Dan Williamsd379b012007-07-09 11:56:42 -07004927
4928static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004929{
Dan Williamsd379b012007-07-09 11:56:42 -07004930 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004931 struct dma_chan *chan;
4932
Dan Williamsd379b012007-07-09 11:56:42 -07004933 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004934 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004935 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004936 return;
4937 }
4938
4939 i = 0;
4940 cpu = first_cpu(cpu_online_map);
4941
Mike Travis0e12f842008-05-12 21:21:13 +02004942 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07004943 chan = net_dma->channels[chan_idx];
4944
4945 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4946 + (i < (num_online_cpus() %
4947 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004948
4949 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004950 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004951 cpu = next_cpu(cpu, cpu_online_map);
4952 n--;
4953 }
4954 i++;
4955 }
Chris Leechdb217332006-06-17 21:24:58 -07004956}
4957
4958/**
4959 * netdev_dma_event - event callback for the net_dma_client
4960 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004961 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004962 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004963 */
Dan Williamsd379b012007-07-09 11:56:42 -07004964static enum dma_state_client
4965netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4966 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004967{
Dan Williamsd379b012007-07-09 11:56:42 -07004968 int i, found = 0, pos = -1;
4969 struct net_dma *net_dma =
4970 container_of(client, struct net_dma, client);
4971 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4972
4973 spin_lock(&net_dma->lock);
4974 switch (state) {
4975 case DMA_RESOURCE_AVAILABLE:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004976 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004977 if (net_dma->channels[i] == chan) {
4978 found = 1;
4979 break;
4980 } else if (net_dma->channels[i] == NULL && pos < 0)
4981 pos = i;
4982
4983 if (!found && pos >= 0) {
4984 ack = DMA_ACK;
4985 net_dma->channels[pos] = chan;
4986 cpu_set(pos, net_dma->channel_mask);
4987 net_dma_rebalance(net_dma);
4988 }
Chris Leechdb217332006-06-17 21:24:58 -07004989 break;
4990 case DMA_RESOURCE_REMOVED:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004991 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004992 if (net_dma->channels[i] == chan) {
4993 found = 1;
4994 pos = i;
4995 break;
4996 }
4997
4998 if (found) {
4999 ack = DMA_ACK;
5000 cpu_clear(pos, net_dma->channel_mask);
5001 net_dma->channels[i] = NULL;
5002 net_dma_rebalance(net_dma);
5003 }
Chris Leechdb217332006-06-17 21:24:58 -07005004 break;
5005 default:
5006 break;
5007 }
Dan Williamsd379b012007-07-09 11:56:42 -07005008 spin_unlock(&net_dma->lock);
5009
5010 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07005011}
5012
5013/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005014 * netdev_dma_register - register the networking subsystem as a DMA client
Chris Leechdb217332006-06-17 21:24:58 -07005015 */
5016static int __init netdev_dma_register(void)
5017{
Mike Travis0c0b0ac2008-05-02 16:43:08 -07005018 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
5019 GFP_KERNEL);
5020 if (unlikely(!net_dma.channels)) {
5021 printk(KERN_NOTICE
5022 "netdev_dma: no memory for net_dma.channels\n");
5023 return -ENOMEM;
5024 }
Dan Williamsd379b012007-07-09 11:56:42 -07005025 spin_lock_init(&net_dma.lock);
5026 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
5027 dma_async_client_register(&net_dma.client);
5028 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07005029 return 0;
5030}
5031
5032#else
5033static int __init netdev_dma_register(void) { return -ENODEV; }
5034#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005035
Herbert Xu7f353bf2007-08-10 15:47:58 -07005036/**
Herbert Xub63365a2008-10-23 01:11:29 -07005037 * netdev_increment_features - increment feature set by one
5038 * @all: current feature set
5039 * @one: new feature set
5040 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005041 *
5042 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005043 * @one to the master device with current feature set @all. Will not
5044 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005045 */
Herbert Xub63365a2008-10-23 01:11:29 -07005046unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5047 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005048{
Herbert Xub63365a2008-10-23 01:11:29 -07005049 /* If device needs checksumming, downgrade to it. */
5050 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5051 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5052 else if (mask & NETIF_F_ALL_CSUM) {
5053 /* If one device supports v4/v6 checksumming, set for all. */
5054 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5055 !(all & NETIF_F_GEN_CSUM)) {
5056 all &= ~NETIF_F_ALL_CSUM;
5057 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5058 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005059
Herbert Xub63365a2008-10-23 01:11:29 -07005060 /* If one device supports hw checksumming, set for all. */
5061 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5062 all &= ~NETIF_F_ALL_CSUM;
5063 all |= NETIF_F_HW_CSUM;
5064 }
5065 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005066
Herbert Xub63365a2008-10-23 01:11:29 -07005067 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005068
Herbert Xub63365a2008-10-23 01:11:29 -07005069 one |= all & NETIF_F_ONE_FOR_ALL;
5070 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5071 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005072
5073 return all;
5074}
Herbert Xub63365a2008-10-23 01:11:29 -07005075EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005076
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005077static struct hlist_head *netdev_create_hash(void)
5078{
5079 int i;
5080 struct hlist_head *hash;
5081
5082 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5083 if (hash != NULL)
5084 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5085 INIT_HLIST_HEAD(&hash[i]);
5086
5087 return hash;
5088}
5089
Eric W. Biederman881d9662007-09-17 11:56:21 -07005090/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005091static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005092{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005093 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005094
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005095 net->dev_name_head = netdev_create_hash();
5096 if (net->dev_name_head == NULL)
5097 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005098
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005099 net->dev_index_head = netdev_create_hash();
5100 if (net->dev_index_head == NULL)
5101 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005102
5103 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005104
5105err_idx:
5106 kfree(net->dev_name_head);
5107err_name:
5108 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005109}
5110
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005111/**
5112 * netdev_drivername - network driver for the device
5113 * @dev: network device
5114 * @buffer: buffer for resulting name
5115 * @len: size of buffer
5116 *
5117 * Determine network driver for device.
5118 */
Stephen Hemmingercf04a4c2008-09-30 02:22:14 -07005119char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005120{
Stephen Hemmingercf04a4c2008-09-30 02:22:14 -07005121 const struct device_driver *driver;
5122 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005123
5124 if (len <= 0 || !buffer)
5125 return buffer;
5126 buffer[0] = 0;
5127
5128 parent = dev->dev.parent;
5129
5130 if (!parent)
5131 return buffer;
5132
5133 driver = parent->driver;
5134 if (driver && driver->name)
5135 strlcpy(buffer, driver->name, len);
5136 return buffer;
5137}
5138
Pavel Emelyanov46650792007-10-08 20:38:39 -07005139static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005140{
5141 kfree(net->dev_name_head);
5142 kfree(net->dev_index_head);
5143}
5144
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005145static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005146 .init = netdev_init,
5147 .exit = netdev_exit,
5148};
5149
Pavel Emelyanov46650792007-10-08 20:38:39 -07005150static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005151{
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005152 struct net_device *dev;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005153 /*
5154 * Push all migratable of the network devices back to the
5155 * initial network namespace
5156 */
5157 rtnl_lock();
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005158restart:
5159 for_each_netdev(net, dev) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005160 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005161 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005162
5163 /* Ignore unmoveable devices (i.e. loopback) */
5164 if (dev->features & NETIF_F_NETNS_LOCAL)
5165 continue;
5166
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005167 /* Delete virtual devices */
5168 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5169 dev->rtnl_link_ops->dellink(dev);
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005170 goto restart;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005171 }
5172
Eric W. Biedermance286d32007-09-12 13:53:49 +02005173 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005174 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5175 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005176 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005177 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005178 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005179 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005180 }
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005181 goto restart;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005182 }
5183 rtnl_unlock();
5184}
5185
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005186static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005187 .exit = default_device_exit,
5188};
5189
Linus Torvalds1da177e2005-04-16 15:20:36 -07005190/*
5191 * Initialize the DEV module. At boot time this walks the device list and
5192 * unhooks any devices that fail to initialise (normally hardware not
5193 * present) and leaves us with a valid list of present and active devices.
5194 *
5195 */
5196
5197/*
5198 * This is called single threaded during boot, so no need
5199 * to take the rtnl semaphore.
5200 */
5201static int __init net_dev_init(void)
5202{
5203 int i, rc = -ENOMEM;
5204
5205 BUG_ON(!dev_boot_phase);
5206
Linus Torvalds1da177e2005-04-16 15:20:36 -07005207 if (dev_proc_init())
5208 goto out;
5209
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005210 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005211 goto out;
5212
5213 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a862007-11-26 20:12:58 +08005214 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005215 INIT_LIST_HEAD(&ptype_base[i]);
5216
Eric W. Biederman881d9662007-09-17 11:56:21 -07005217 if (register_pernet_subsys(&netdev_net_ops))
5218 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005219
5220 /*
5221 * Initialise the packet receive queues.
5222 */
5223
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005224 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005225 struct softnet_data *queue;
5226
5227 queue = &per_cpu(softnet_data, i);
5228 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005229 queue->completion_queue = NULL;
5230 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005231
5232 queue->backlog.poll = process_backlog;
5233 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005234 queue->backlog.gro_list = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005235 }
5236
Linus Torvalds1da177e2005-04-16 15:20:36 -07005237 dev_boot_phase = 0;
5238
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005239 /* The loopback device is special if any other network devices
5240 * is present in a network namespace the loopback device must
5241 * be present. Since we now dynamically allocate and free the
5242 * loopback device ensure this invariant is maintained by
5243 * keeping the loopback device as the first device on the
5244 * list of network devices. Ensuring the loopback devices
5245 * is the first device that appears and the last network device
5246 * that disappears.
5247 */
5248 if (register_pernet_device(&loopback_net_ops))
5249 goto out;
5250
5251 if (register_pernet_device(&default_device_ops))
5252 goto out;
5253
5254 netdev_dma_register();
5255
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005256 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5257 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005258
5259 hotcpu_notifier(dev_cpu_callback, 0);
5260 dst_init();
5261 dev_mcast_init();
5262 rc = 0;
5263out:
5264 return rc;
5265}
5266
5267subsys_initcall(net_dev_init);
5268
5269EXPORT_SYMBOL(__dev_get_by_index);
5270EXPORT_SYMBOL(__dev_get_by_name);
5271EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08005272EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005273EXPORT_SYMBOL(dev_add_pack);
5274EXPORT_SYMBOL(dev_alloc_name);
5275EXPORT_SYMBOL(dev_close);
5276EXPORT_SYMBOL(dev_get_by_flags);
5277EXPORT_SYMBOL(dev_get_by_index);
5278EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005279EXPORT_SYMBOL(dev_open);
5280EXPORT_SYMBOL(dev_queue_xmit);
5281EXPORT_SYMBOL(dev_remove_pack);
5282EXPORT_SYMBOL(dev_set_allmulti);
5283EXPORT_SYMBOL(dev_set_promiscuity);
5284EXPORT_SYMBOL(dev_change_flags);
5285EXPORT_SYMBOL(dev_set_mtu);
5286EXPORT_SYMBOL(dev_set_mac_address);
5287EXPORT_SYMBOL(free_netdev);
5288EXPORT_SYMBOL(netdev_boot_setup_check);
5289EXPORT_SYMBOL(netdev_set_master);
5290EXPORT_SYMBOL(netdev_state_change);
5291EXPORT_SYMBOL(netif_receive_skb);
5292EXPORT_SYMBOL(netif_rx);
5293EXPORT_SYMBOL(register_gifconf);
5294EXPORT_SYMBOL(register_netdevice);
5295EXPORT_SYMBOL(register_netdevice_notifier);
5296EXPORT_SYMBOL(skb_checksum_help);
5297EXPORT_SYMBOL(synchronize_net);
5298EXPORT_SYMBOL(unregister_netdevice);
5299EXPORT_SYMBOL(unregister_netdevice_notifier);
5300EXPORT_SYMBOL(net_enable_timestamp);
5301EXPORT_SYMBOL(net_disable_timestamp);
5302EXPORT_SYMBOL(dev_get_flags);
5303
5304#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5305EXPORT_SYMBOL(br_handle_frame_hook);
5306EXPORT_SYMBOL(br_fdb_get_hook);
5307EXPORT_SYMBOL(br_fdb_put_hook);
5308#endif
5309
Linus Torvalds1da177e2005-04-16 15:20:36 -07005310EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005311
5312EXPORT_PER_CPU_SYMBOL(softnet_data);