blob: ac55d84d6255c99a82d2a0e791c7c435c295e00b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Herbert Xud565b0a2008-12-15 23:38:52 -0800132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/*
136 * The list of packet types we will receive (as opposed to discard)
137 * and the routines to invoke.
138 *
139 * Why 16. Because with 16 the only overlap we get on a hash of the
140 * low nibble of the protocol value is RARP/SNAP/X.25.
141 *
142 * NOTE: That is no longer true with the addition of VLAN tags. Not
143 * sure which should go first, but I bet it won't make much
144 * difference if we are running VLANs. The good news is that
145 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700146 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 * --BLG
148 *
149 * 0800 IP
150 * 8100 802.1Q VLAN
151 * 0001 802.3
152 * 0002 AX.25
153 * 0004 802.2
154 * 8035 RARP
155 * 0005 SNAP
156 * 0805 X.25
157 * 0806 ARP
158 * 8137 IPX
159 * 0009 Localtalk
160 * 86DD IPv6
161 */
162
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800163#define PTYPE_HASH_SIZE (16)
164#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
165
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800167static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700168static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700171 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 * semaphore.
173 *
174 * Pure readers hold dev_base_lock for reading.
175 *
176 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700177 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 * actual updates. This allows pure readers to access the list even
179 * while a writer is preparing to update it.
180 *
181 * To put it another way, dev_base_lock is held for writing only to
182 * protect against pure readers; the rtnl semaphore provides the
183 * protection against other writers.
184 *
185 * See, for example usages, register_netdevice() and
186 * unregister_netdevice(), which must be called with the rtnl
187 * semaphore held.
188 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189DEFINE_RWLOCK(dev_base_lock);
190
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191EXPORT_SYMBOL(dev_base_lock);
192
193#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700194#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Eric W. Biederman881d9662007-09-17 11:56:21 -0700196static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197{
198 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700199 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200}
201
Eric W. Biederman881d9662007-09-17 11:56:21 -0700202static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700204 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}
206
Eric W. Biedermance286d32007-09-12 13:53:49 +0200207/* Device list insertion */
208static int list_netdevice(struct net_device *dev)
209{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900210 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200211
212 ASSERT_RTNL();
213
214 write_lock_bh(&dev_base_lock);
215 list_add_tail(&dev->dev_list, &net->dev_base_head);
216 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
217 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
218 write_unlock_bh(&dev_base_lock);
219 return 0;
220}
221
222/* Device list removal */
223static void unlist_netdevice(struct net_device *dev)
224{
225 ASSERT_RTNL();
226
227 /* Unlink dev from the device chain */
228 write_lock_bh(&dev_base_lock);
229 list_del(&dev->dev_list);
230 hlist_del(&dev->name_hlist);
231 hlist_del(&dev->index_hlist);
232 write_unlock_bh(&dev_base_lock);
233}
234
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235/*
236 * Our notifier list
237 */
238
Alan Sternf07d5b92006-05-09 15:23:03 -0700239static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240
241/*
242 * Device drivers call our routines to queue packets here. We empty the
243 * queue in the local softnet handler.
244 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700245
246DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
David S. Millercf508b12008-07-22 14:16:42 -0700248#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700249/*
David S. Millerc773e842008-07-08 23:13:53 -0700250 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700251 * according to dev->type
252 */
253static const unsigned short netdev_lock_type[] =
254 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
255 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
256 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
257 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
258 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
259 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
260 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
261 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
262 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
263 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
264 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
265 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
266 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800267 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800268 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700269
270static const char *netdev_lock_name[] =
271 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
272 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
273 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
274 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
275 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
276 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
277 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
278 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
279 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
280 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
281 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
282 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
283 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800284 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Rémi Denis-Courmont57c81ff2008-12-17 15:47:48 -0800285 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700286
287static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700288static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700289
290static inline unsigned short netdev_lock_pos(unsigned short dev_type)
291{
292 int i;
293
294 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
295 if (netdev_lock_type[i] == dev_type)
296 return i;
297 /* the last key is used by default */
298 return ARRAY_SIZE(netdev_lock_type) - 1;
299}
300
David S. Millercf508b12008-07-22 14:16:42 -0700301static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
302 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700303{
304 int i;
305
306 i = netdev_lock_pos(dev_type);
307 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
308 netdev_lock_name[i]);
309}
David S. Millercf508b12008-07-22 14:16:42 -0700310
311static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
312{
313 int i;
314
315 i = netdev_lock_pos(dev->type);
316 lockdep_set_class_and_name(&dev->addr_list_lock,
317 &netdev_addr_lock_key[i],
318 netdev_lock_name[i]);
319}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700320#else
David S. Millercf508b12008-07-22 14:16:42 -0700321static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
322 unsigned short dev_type)
323{
324}
325static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700326{
327}
328#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329
330/*******************************************************************************
331
332 Protocol management and registration routines
333
334*******************************************************************************/
335
336/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 * Add a protocol ID to the list. Now that the input handler is
338 * smarter we can dispense with all the messy stuff that used to be
339 * here.
340 *
341 * BEWARE!!! Protocol handlers, mangling input packets,
342 * MUST BE last in hash buckets and checking protocol handlers
343 * MUST start from promiscuous ptype_all chain in net_bh.
344 * It is true now, do not change it.
345 * Explanation follows: if protocol handler, mangling packet, will
346 * be the first on list, it is not able to sense, that packet
347 * is cloned and should be copied-on-write, so that it will
348 * change it and subsequent readers will get broken packet.
349 * --ANK (980803)
350 */
351
352/**
353 * dev_add_pack - add packet handler
354 * @pt: packet type declaration
355 *
356 * Add a protocol handler to the networking stack. The passed &packet_type
357 * is linked into kernel lists and may not be freed until it has been
358 * removed from the kernel lists.
359 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900360 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 * guarantee all CPU's that are in middle of receiving packets
362 * will see the new packet type (until the next received packet).
363 */
364
365void dev_add_pack(struct packet_type *pt)
366{
367 int hash;
368
369 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700370 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700372 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800373 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 list_add_rcu(&pt->list, &ptype_base[hash]);
375 }
376 spin_unlock_bh(&ptype_lock);
377}
378
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379/**
380 * __dev_remove_pack - remove packet handler
381 * @pt: packet type declaration
382 *
383 * Remove a protocol handler that was previously added to the kernel
384 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
385 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900386 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 *
388 * The packet type might still be in use by receivers
389 * and must not be freed until after all the CPU's have gone
390 * through a quiescent state.
391 */
392void __dev_remove_pack(struct packet_type *pt)
393{
394 struct list_head *head;
395 struct packet_type *pt1;
396
397 spin_lock_bh(&ptype_lock);
398
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700399 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700401 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800402 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403
404 list_for_each_entry(pt1, head, list) {
405 if (pt == pt1) {
406 list_del_rcu(&pt->list);
407 goto out;
408 }
409 }
410
411 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
412out:
413 spin_unlock_bh(&ptype_lock);
414}
415/**
416 * dev_remove_pack - remove packet handler
417 * @pt: packet type declaration
418 *
419 * Remove a protocol handler that was previously added to the kernel
420 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
421 * from the kernel lists and can be freed or reused once this function
422 * returns.
423 *
424 * This call sleeps to guarantee that no CPU is looking at the packet
425 * type after return.
426 */
427void dev_remove_pack(struct packet_type *pt)
428{
429 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900430
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 synchronize_net();
432}
433
434/******************************************************************************
435
436 Device Boot-time Settings Routines
437
438*******************************************************************************/
439
440/* Boot time configuration table */
441static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
442
443/**
444 * netdev_boot_setup_add - add new setup entry
445 * @name: name of the device
446 * @map: configured settings for the device
447 *
448 * Adds new setup entry to the dev_boot_setup list. The function
449 * returns 0 on error and 1 on success. This is a generic routine to
450 * all netdevices.
451 */
452static int netdev_boot_setup_add(char *name, struct ifmap *map)
453{
454 struct netdev_boot_setup *s;
455 int i;
456
457 s = dev_boot_setup;
458 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
459 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
460 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700461 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 memcpy(&s[i].map, map, sizeof(s[i].map));
463 break;
464 }
465 }
466
467 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
468}
469
470/**
471 * netdev_boot_setup_check - check boot time settings
472 * @dev: the netdevice
473 *
474 * Check boot time settings for the device.
475 * The found settings are set for the device to be used
476 * later in the device probing.
477 * Returns 0 if no settings found, 1 if they are.
478 */
479int netdev_boot_setup_check(struct net_device *dev)
480{
481 struct netdev_boot_setup *s = dev_boot_setup;
482 int i;
483
484 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
485 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700486 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 dev->irq = s[i].map.irq;
488 dev->base_addr = s[i].map.base_addr;
489 dev->mem_start = s[i].map.mem_start;
490 dev->mem_end = s[i].map.mem_end;
491 return 1;
492 }
493 }
494 return 0;
495}
496
497
498/**
499 * netdev_boot_base - get address from boot time settings
500 * @prefix: prefix for network device
501 * @unit: id for network device
502 *
503 * Check boot time settings for the base address of device.
504 * The found settings are set for the device to be used
505 * later in the device probing.
506 * Returns 0 if no settings found.
507 */
508unsigned long netdev_boot_base(const char *prefix, int unit)
509{
510 const struct netdev_boot_setup *s = dev_boot_setup;
511 char name[IFNAMSIZ];
512 int i;
513
514 sprintf(name, "%s%d", prefix, unit);
515
516 /*
517 * If device already registered then return base of 1
518 * to indicate not to probe for this interface
519 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700520 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 return 1;
522
523 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
524 if (!strcmp(name, s[i].name))
525 return s[i].map.base_addr;
526 return 0;
527}
528
529/*
530 * Saves at boot time configured settings for any netdevice.
531 */
532int __init netdev_boot_setup(char *str)
533{
534 int ints[5];
535 struct ifmap map;
536
537 str = get_options(str, ARRAY_SIZE(ints), ints);
538 if (!str || !*str)
539 return 0;
540
541 /* Save settings */
542 memset(&map, 0, sizeof(map));
543 if (ints[0] > 0)
544 map.irq = ints[1];
545 if (ints[0] > 1)
546 map.base_addr = ints[2];
547 if (ints[0] > 2)
548 map.mem_start = ints[3];
549 if (ints[0] > 3)
550 map.mem_end = ints[4];
551
552 /* Add new entry to the list */
553 return netdev_boot_setup_add(str, &map);
554}
555
556__setup("netdev=", netdev_boot_setup);
557
558/*******************************************************************************
559
560 Device Interface Subroutines
561
562*******************************************************************************/
563
564/**
565 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700566 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 * @name: name to find
568 *
569 * Find an interface by name. Must be called under RTNL semaphore
570 * or @dev_base_lock. If the name is found a pointer to the device
571 * is returned. If the name is not found then %NULL is returned. The
572 * reference counters are not incremented so the caller must be
573 * careful with locks.
574 */
575
Eric W. Biederman881d9662007-09-17 11:56:21 -0700576struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577{
578 struct hlist_node *p;
579
Eric W. Biederman881d9662007-09-17 11:56:21 -0700580 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 struct net_device *dev
582 = hlist_entry(p, struct net_device, name_hlist);
583 if (!strncmp(dev->name, name, IFNAMSIZ))
584 return dev;
585 }
586 return NULL;
587}
588
589/**
590 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700591 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 * @name: name to find
593 *
594 * Find an interface by name. This can be called from any
595 * context and does its own locking. The returned handle has
596 * the usage count incremented and the caller must use dev_put() to
597 * release it when it is no longer needed. %NULL is returned if no
598 * matching device is found.
599 */
600
Eric W. Biederman881d9662007-09-17 11:56:21 -0700601struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602{
603 struct net_device *dev;
604
605 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700606 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 if (dev)
608 dev_hold(dev);
609 read_unlock(&dev_base_lock);
610 return dev;
611}
612
613/**
614 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700615 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 * @ifindex: index of device
617 *
618 * Search for an interface by index. Returns %NULL if the device
619 * is not found or a pointer to the device. The device has not
620 * had its reference counter increased so the caller must be careful
621 * about locking. The caller must hold either the RTNL semaphore
622 * or @dev_base_lock.
623 */
624
Eric W. Biederman881d9662007-09-17 11:56:21 -0700625struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626{
627 struct hlist_node *p;
628
Eric W. Biederman881d9662007-09-17 11:56:21 -0700629 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 struct net_device *dev
631 = hlist_entry(p, struct net_device, index_hlist);
632 if (dev->ifindex == ifindex)
633 return dev;
634 }
635 return NULL;
636}
637
638
639/**
640 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700641 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 * @ifindex: index of device
643 *
644 * Search for an interface by index. Returns NULL if the device
645 * is not found or a pointer to the device. The device returned has
646 * had a reference added and the pointer is safe until the user calls
647 * dev_put to indicate they have finished with it.
648 */
649
Eric W. Biederman881d9662007-09-17 11:56:21 -0700650struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
652 struct net_device *dev;
653
654 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700655 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 if (dev)
657 dev_hold(dev);
658 read_unlock(&dev_base_lock);
659 return dev;
660}
661
662/**
663 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700664 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 * @type: media type of device
666 * @ha: hardware address
667 *
668 * Search for an interface by MAC address. Returns NULL if the device
669 * is not found or a pointer to the device. The caller must hold the
670 * rtnl semaphore. The returned device has not had its ref count increased
671 * and the caller must therefore be careful about locking
672 *
673 * BUGS:
674 * If the API was consistent this would be __dev_get_by_hwaddr
675 */
676
Eric W. Biederman881d9662007-09-17 11:56:21 -0700677struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678{
679 struct net_device *dev;
680
681 ASSERT_RTNL();
682
Denis V. Lunev81103a52007-12-12 10:47:38 -0800683 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 if (dev->type == type &&
685 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700686 return dev;
687
688 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689}
690
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300691EXPORT_SYMBOL(dev_getbyhwaddr);
692
Eric W. Biederman881d9662007-09-17 11:56:21 -0700693struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700694{
695 struct net_device *dev;
696
697 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700698 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700699 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700700 return dev;
701
702 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700703}
704
705EXPORT_SYMBOL(__dev_getfirstbyhwtype);
706
Eric W. Biederman881d9662007-09-17 11:56:21 -0700707struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708{
709 struct net_device *dev;
710
711 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700712 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700713 if (dev)
714 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 rtnl_unlock();
716 return dev;
717}
718
719EXPORT_SYMBOL(dev_getfirstbyhwtype);
720
721/**
722 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700723 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 * @if_flags: IFF_* values
725 * @mask: bitmask of bits in if_flags to check
726 *
727 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900728 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 * had a reference added and the pointer is safe until the user calls
730 * dev_put to indicate they have finished with it.
731 */
732
Eric W. Biederman881d9662007-09-17 11:56:21 -0700733struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700735 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Pavel Emelianov7562f872007-05-03 15:13:45 -0700737 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700739 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 if (((dev->flags ^ if_flags) & mask) == 0) {
741 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700742 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 break;
744 }
745 }
746 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700747 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748}
749
750/**
751 * dev_valid_name - check if name is okay for network device
752 * @name: name string
753 *
754 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700755 * to allow sysfs to work. We also disallow any kind of
756 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800758int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700760 if (*name == '\0')
761 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700762 if (strlen(name) >= IFNAMSIZ)
763 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700764 if (!strcmp(name, ".") || !strcmp(name, ".."))
765 return 0;
766
767 while (*name) {
768 if (*name == '/' || isspace(*name))
769 return 0;
770 name++;
771 }
772 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773}
774
775/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200776 * __dev_alloc_name - allocate a name for a device
777 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200779 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 *
781 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700782 * id. It scans list of devices to build up a free map, then chooses
783 * the first empty slot. The caller must hold the dev_base or rtnl lock
784 * while allocating the name and adding the device in order to avoid
785 * duplicates.
786 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
787 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 */
789
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200790static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791{
792 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 const char *p;
794 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700795 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 struct net_device *d;
797
798 p = strnchr(name, IFNAMSIZ-1, '%');
799 if (p) {
800 /*
801 * Verify the string as this thing may have come from
802 * the user. There must be either one "%d" and no other "%"
803 * characters.
804 */
805 if (p[1] != 'd' || strchr(p + 2, '%'))
806 return -EINVAL;
807
808 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700809 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 if (!inuse)
811 return -ENOMEM;
812
Eric W. Biederman881d9662007-09-17 11:56:21 -0700813 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 if (!sscanf(d->name, name, &i))
815 continue;
816 if (i < 0 || i >= max_netdevices)
817 continue;
818
819 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200820 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 if (!strncmp(buf, d->name, IFNAMSIZ))
822 set_bit(i, inuse);
823 }
824
825 i = find_first_zero_bit(inuse, max_netdevices);
826 free_page((unsigned long) inuse);
827 }
828
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200829 snprintf(buf, IFNAMSIZ, name, i);
830 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
833 /* It is possible to run out of possible slots
834 * when the name is long and there isn't enough space left
835 * for the digits, or if all bits are used.
836 */
837 return -ENFILE;
838}
839
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200840/**
841 * dev_alloc_name - allocate a name for a device
842 * @dev: device
843 * @name: name format string
844 *
845 * Passed a format string - eg "lt%d" it will try and find a suitable
846 * id. It scans list of devices to build up a free map, then chooses
847 * the first empty slot. The caller must hold the dev_base or rtnl lock
848 * while allocating the name and adding the device in order to avoid
849 * duplicates.
850 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
851 * Returns the number of the unit assigned or a negative errno code.
852 */
853
854int dev_alloc_name(struct net_device *dev, const char *name)
855{
856 char buf[IFNAMSIZ];
857 struct net *net;
858 int ret;
859
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900860 BUG_ON(!dev_net(dev));
861 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200862 ret = __dev_alloc_name(net, name, buf);
863 if (ret >= 0)
864 strlcpy(dev->name, buf, IFNAMSIZ);
865 return ret;
866}
867
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868
869/**
870 * dev_change_name - change name of a device
871 * @dev: device
872 * @newname: name (or format string) must be at least IFNAMSIZ
873 *
874 * Change name of a device, can pass format strings "eth%d".
875 * for wildcarding.
876 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700877int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878{
Herbert Xufcc5a032007-07-30 17:03:38 -0700879 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700881 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700882 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
884 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900885 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900887 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 if (dev->flags & IFF_UP)
889 return -EBUSY;
890
891 if (!dev_valid_name(newname))
892 return -EINVAL;
893
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700894 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
895 return 0;
896
Herbert Xufcc5a032007-07-30 17:03:38 -0700897 memcpy(oldname, dev->name, IFNAMSIZ);
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 if (strchr(newname, '%')) {
900 err = dev_alloc_name(dev, newname);
901 if (err < 0)
902 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700904 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 return -EEXIST;
906 else
907 strlcpy(dev->name, newname, IFNAMSIZ);
908
Herbert Xufcc5a032007-07-30 17:03:38 -0700909rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700910 /* For now only devices in the initial network namespace
911 * are in sysfs.
912 */
913 if (net == &init_net) {
914 ret = device_rename(&dev->dev, dev->name);
915 if (ret) {
916 memcpy(dev->name, oldname, IFNAMSIZ);
917 return ret;
918 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700919 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700920
921 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600922 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700923 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700924 write_unlock_bh(&dev_base_lock);
925
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700926 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700927 ret = notifier_to_errno(ret);
928
929 if (ret) {
930 if (err) {
931 printk(KERN_ERR
932 "%s: name change rollback failed: %d.\n",
933 dev->name, ret);
934 } else {
935 err = ret;
936 memcpy(dev->name, oldname, IFNAMSIZ);
937 goto rollback;
938 }
939 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
941 return err;
942}
943
944/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700945 * dev_set_alias - change ifalias of a device
946 * @dev: device
947 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700948 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700949 *
950 * Set ifalias for a device,
951 */
952int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
953{
954 ASSERT_RTNL();
955
956 if (len >= IFALIASZ)
957 return -EINVAL;
958
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700959 if (!len) {
960 if (dev->ifalias) {
961 kfree(dev->ifalias);
962 dev->ifalias = NULL;
963 }
964 return 0;
965 }
966
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700967 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
968 if (!dev->ifalias)
969 return -ENOMEM;
970
971 strlcpy(dev->ifalias, alias, len+1);
972 return len;
973}
974
975
976/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700977 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700978 * @dev: device to cause notification
979 *
980 * Called to indicate a device has changed features.
981 */
982void netdev_features_change(struct net_device *dev)
983{
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700984 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700985}
986EXPORT_SYMBOL(netdev_features_change);
987
988/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 * netdev_state_change - device changes state
990 * @dev: device to cause notification
991 *
992 * Called to indicate a device has changed state. This function calls
993 * the notifier chains for netdev_chain and sends a NEWLINK message
994 * to the routing socket.
995 */
996void netdev_state_change(struct net_device *dev)
997{
998 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700999 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1001 }
1002}
1003
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001004void netdev_bonding_change(struct net_device *dev)
1005{
1006 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1007}
1008EXPORT_SYMBOL(netdev_bonding_change);
1009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010/**
1011 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001012 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 * @name: name of interface
1014 *
1015 * If a network interface is not present and the process has suitable
1016 * privileges this function loads the module. If module loading is not
1017 * available in this kernel then it becomes a nop.
1018 */
1019
Eric W. Biederman881d9662007-09-17 11:56:21 -07001020void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001022 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
1024 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001025 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 read_unlock(&dev_base_lock);
1027
1028 if (!dev && capable(CAP_SYS_MODULE))
1029 request_module("%s", name);
1030}
1031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032/**
1033 * dev_open - prepare an interface for use.
1034 * @dev: device to open
1035 *
1036 * Takes a device from down to up state. The device's private open
1037 * function is invoked and then the multicast lists are loaded. Finally
1038 * the device is moved into the up state and a %NETDEV_UP message is
1039 * sent to the netdev notifier chain.
1040 *
1041 * Calling this function on an active interface is a nop. On a failure
1042 * a negative errno code is returned.
1043 */
1044int dev_open(struct net_device *dev)
1045{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001046 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 int ret = 0;
1048
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001049 ASSERT_RTNL();
1050
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 /*
1052 * Is it already up?
1053 */
1054
1055 if (dev->flags & IFF_UP)
1056 return 0;
1057
1058 /*
1059 * Is it even present?
1060 */
1061 if (!netif_device_present(dev))
1062 return -ENODEV;
1063
1064 /*
1065 * Call device private open method
1066 */
1067 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001068
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001069 if (ops->ndo_validate_addr)
1070 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001071
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001072 if (!ret && ops->ndo_open)
1073 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001075 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 * If it went open OK then:
1077 */
1078
Jeff Garzikbada3392007-10-23 20:19:37 -07001079 if (ret)
1080 clear_bit(__LINK_STATE_START, &dev->state);
1081 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 /*
1083 * Set the flags.
1084 */
1085 dev->flags |= IFF_UP;
1086
1087 /*
1088 * Initialize multicasting status
1089 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001090 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091
1092 /*
1093 * Wakeup transmit queue engine
1094 */
1095 dev_activate(dev);
1096
1097 /*
1098 * ... and announce new interface.
1099 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001100 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 return ret;
1104}
1105
1106/**
1107 * dev_close - shutdown an interface.
1108 * @dev: device to shutdown
1109 *
1110 * This function moves an active device into down state. A
1111 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1112 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1113 * chain.
1114 */
1115int dev_close(struct net_device *dev)
1116{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001117 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001118 ASSERT_RTNL();
1119
David S. Miller9d5010d2007-09-12 14:33:25 +02001120 might_sleep();
1121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 if (!(dev->flags & IFF_UP))
1123 return 0;
1124
1125 /*
1126 * Tell people we are going down, so that they can
1127 * prepare to death, when device is still operating.
1128 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001129 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 clear_bit(__LINK_STATE_START, &dev->state);
1132
1133 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001134 * it can be even on different cpu. So just clear netif_running().
1135 *
1136 * dev->stop() will invoke napi_disable() on all of it's
1137 * napi_struct instances on this device.
1138 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001141 dev_deactivate(dev);
1142
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 /*
1144 * Call the device specific close. This cannot fail.
1145 * Only if device is UP
1146 *
1147 * We allow it to be called even after a DETACH hot-plug
1148 * event.
1149 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001150 if (ops->ndo_stop)
1151 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152
1153 /*
1154 * Device is now down.
1155 */
1156
1157 dev->flags &= ~IFF_UP;
1158
1159 /*
1160 * Tell people we are down
1161 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001162 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163
1164 return 0;
1165}
1166
1167
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001168/**
1169 * dev_disable_lro - disable Large Receive Offload on a device
1170 * @dev: device
1171 *
1172 * Disable Large Receive Offload (LRO) on a net device. Must be
1173 * called under RTNL. This is needed if received packets may be
1174 * forwarded to another interface.
1175 */
1176void dev_disable_lro(struct net_device *dev)
1177{
1178 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1179 dev->ethtool_ops->set_flags) {
1180 u32 flags = dev->ethtool_ops->get_flags(dev);
1181 if (flags & ETH_FLAG_LRO) {
1182 flags &= ~ETH_FLAG_LRO;
1183 dev->ethtool_ops->set_flags(dev, flags);
1184 }
1185 }
1186 WARN_ON(dev->features & NETIF_F_LRO);
1187}
1188EXPORT_SYMBOL(dev_disable_lro);
1189
1190
Eric W. Biederman881d9662007-09-17 11:56:21 -07001191static int dev_boot_phase = 1;
1192
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193/*
1194 * Device change register/unregister. These are not inline or static
1195 * as we export them to the world.
1196 */
1197
1198/**
1199 * register_netdevice_notifier - register a network notifier block
1200 * @nb: notifier
1201 *
1202 * Register a notifier to be called when network device events occur.
1203 * The notifier passed is linked into the kernel structures and must
1204 * not be reused until it has been unregistered. A negative errno code
1205 * is returned on a failure.
1206 *
1207 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001208 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 * view of the network device list.
1210 */
1211
1212int register_netdevice_notifier(struct notifier_block *nb)
1213{
1214 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001215 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001216 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 int err;
1218
1219 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001220 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001221 if (err)
1222 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001223 if (dev_boot_phase)
1224 goto unlock;
1225 for_each_net(net) {
1226 for_each_netdev(net, dev) {
1227 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1228 err = notifier_to_errno(err);
1229 if (err)
1230 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Eric W. Biederman881d9662007-09-17 11:56:21 -07001232 if (!(dev->flags & IFF_UP))
1233 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001234
Eric W. Biederman881d9662007-09-17 11:56:21 -07001235 nb->notifier_call(nb, NETDEV_UP, dev);
1236 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001238
1239unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 rtnl_unlock();
1241 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001242
1243rollback:
1244 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001245 for_each_net(net) {
1246 for_each_netdev(net, dev) {
1247 if (dev == last)
1248 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001249
Eric W. Biederman881d9662007-09-17 11:56:21 -07001250 if (dev->flags & IFF_UP) {
1251 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1252 nb->notifier_call(nb, NETDEV_DOWN, dev);
1253 }
1254 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001255 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001256 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001257
1258 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001259 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260}
1261
1262/**
1263 * unregister_netdevice_notifier - unregister a network notifier block
1264 * @nb: notifier
1265 *
1266 * Unregister a notifier previously registered by
1267 * register_netdevice_notifier(). The notifier is unlinked into the
1268 * kernel structures and may then be reused. A negative errno code
1269 * is returned on a failure.
1270 */
1271
1272int unregister_netdevice_notifier(struct notifier_block *nb)
1273{
Herbert Xu9f514952006-03-25 01:24:25 -08001274 int err;
1275
1276 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001277 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001278 rtnl_unlock();
1279 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280}
1281
1282/**
1283 * call_netdevice_notifiers - call all network notifier blocks
1284 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001285 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 *
1287 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001288 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 */
1290
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001291int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001293 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294}
1295
1296/* When > 0 there are consumers of rx skb time stamps */
1297static atomic_t netstamp_needed = ATOMIC_INIT(0);
1298
1299void net_enable_timestamp(void)
1300{
1301 atomic_inc(&netstamp_needed);
1302}
1303
1304void net_disable_timestamp(void)
1305{
1306 atomic_dec(&netstamp_needed);
1307}
1308
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001309static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310{
1311 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001312 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001313 else
1314 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315}
1316
1317/*
1318 * Support routine. Sends outgoing frames to any network
1319 * taps currently in use.
1320 */
1321
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001322static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323{
1324 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001325
1326 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327
1328 rcu_read_lock();
1329 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1330 /* Never send packets back to the socket
1331 * they originated from - MvS (miquels@drinkel.ow.org)
1332 */
1333 if ((ptype->dev == dev || !ptype->dev) &&
1334 (ptype->af_packet_priv == NULL ||
1335 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1336 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1337 if (!skb2)
1338 break;
1339
1340 /* skb->nh should be correctly
1341 set by sender, so that the second statement is
1342 just protection against buggy protocols.
1343 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001344 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001346 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001347 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 if (net_ratelimit())
1349 printk(KERN_CRIT "protocol %04x is "
1350 "buggy, dev %s\n",
1351 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001352 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 }
1354
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001355 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001357 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 }
1359 }
1360 rcu_read_unlock();
1361}
1362
Denis Vlasenko56079432006-03-29 15:57:29 -08001363
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001364static inline void __netif_reschedule(struct Qdisc *q)
1365{
1366 struct softnet_data *sd;
1367 unsigned long flags;
1368
1369 local_irq_save(flags);
1370 sd = &__get_cpu_var(softnet_data);
1371 q->next_sched = sd->output_queue;
1372 sd->output_queue = q;
1373 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374 local_irq_restore(flags);
1375}
1376
David S. Miller37437bb2008-07-16 02:15:04 -07001377void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001378{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001379 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1380 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001381}
1382EXPORT_SYMBOL(__netif_schedule);
1383
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001384void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001385{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001386 if (atomic_dec_and_test(&skb->users)) {
1387 struct softnet_data *sd;
1388 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001389
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001390 local_irq_save(flags);
1391 sd = &__get_cpu_var(softnet_data);
1392 skb->next = sd->completion_queue;
1393 sd->completion_queue = skb;
1394 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1395 local_irq_restore(flags);
1396 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001397}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001398EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001399
1400void dev_kfree_skb_any(struct sk_buff *skb)
1401{
1402 if (in_irq() || irqs_disabled())
1403 dev_kfree_skb_irq(skb);
1404 else
1405 dev_kfree_skb(skb);
1406}
1407EXPORT_SYMBOL(dev_kfree_skb_any);
1408
1409
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001410/**
1411 * netif_device_detach - mark device as removed
1412 * @dev: network device
1413 *
1414 * Mark device as removed from system and therefore no longer available.
1415 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001416void netif_device_detach(struct net_device *dev)
1417{
1418 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1419 netif_running(dev)) {
1420 netif_stop_queue(dev);
1421 }
1422}
1423EXPORT_SYMBOL(netif_device_detach);
1424
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001425/**
1426 * netif_device_attach - mark device as attached
1427 * @dev: network device
1428 *
1429 * Mark device as attached from system and restart if needed.
1430 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001431void netif_device_attach(struct net_device *dev)
1432{
1433 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1434 netif_running(dev)) {
1435 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001436 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001437 }
1438}
1439EXPORT_SYMBOL(netif_device_attach);
1440
Ben Hutchings6de329e2008-06-16 17:02:28 -07001441static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1442{
1443 return ((features & NETIF_F_GEN_CSUM) ||
1444 ((features & NETIF_F_IP_CSUM) &&
1445 protocol == htons(ETH_P_IP)) ||
1446 ((features & NETIF_F_IPV6_CSUM) &&
1447 protocol == htons(ETH_P_IPV6)));
1448}
1449
1450static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1451{
1452 if (can_checksum_protocol(dev->features, skb->protocol))
1453 return true;
1454
1455 if (skb->protocol == htons(ETH_P_8021Q)) {
1456 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1457 if (can_checksum_protocol(dev->features & dev->vlan_features,
1458 veh->h_vlan_encapsulated_proto))
1459 return true;
1460 }
1461
1462 return false;
1463}
Denis Vlasenko56079432006-03-29 15:57:29 -08001464
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465/*
1466 * Invalidate hardware checksum when packet is to be mangled, and
1467 * complete checksum manually on outgoing path.
1468 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001469int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
Al Virod3bc23e2006-11-14 21:24:49 -08001471 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001472 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
Patrick McHardy84fa7932006-08-29 16:44:56 -07001474 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001475 goto out_set_summed;
1476
1477 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001478 /* Let GSO fix up the checksum. */
1479 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 }
1481
Herbert Xua0308472007-10-15 01:47:15 -07001482 offset = skb->csum_start - skb_headroom(skb);
1483 BUG_ON(offset >= skb_headlen(skb));
1484 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1485
1486 offset += skb->csum_offset;
1487 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1488
1489 if (skb_cloned(skb) &&
1490 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1492 if (ret)
1493 goto out;
1494 }
1495
Herbert Xua0308472007-10-15 01:47:15 -07001496 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001497out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001499out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return ret;
1501}
1502
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001503/**
1504 * skb_gso_segment - Perform segmentation on skb.
1505 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001506 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001507 *
1508 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001509 *
1510 * It may return NULL if the skb requires no segmentation. This is
1511 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001512 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001513struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001514{
1515 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1516 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001517 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001518 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001519
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001520 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001521 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001522 __skb_pull(skb, skb->mac_len);
1523
Herbert Xuf9d106a2007-04-23 22:36:13 -07001524 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001525 if (skb_header_cloned(skb) &&
1526 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1527 return ERR_PTR(err);
1528 }
1529
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001530 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001531 list_for_each_entry_rcu(ptype,
1532 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001533 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001534 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001535 err = ptype->gso_send_check(skb);
1536 segs = ERR_PTR(err);
1537 if (err || skb_gso_ok(skb, features))
1538 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001539 __skb_push(skb, (skb->data -
1540 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001541 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001542 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001543 break;
1544 }
1545 }
1546 rcu_read_unlock();
1547
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001548 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001549
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001550 return segs;
1551}
1552
1553EXPORT_SYMBOL(skb_gso_segment);
1554
Herbert Xufb286bb2005-11-10 13:01:24 -08001555/* Take action when hardware reception checksum errors are detected. */
1556#ifdef CONFIG_BUG
1557void netdev_rx_csum_fault(struct net_device *dev)
1558{
1559 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001560 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001561 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001562 dump_stack();
1563 }
1564}
1565EXPORT_SYMBOL(netdev_rx_csum_fault);
1566#endif
1567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568/* Actually, we should eliminate this check as soon as we know, that:
1569 * 1. IOMMU is present and allows to map all the memory.
1570 * 2. No high memory really exists on this machine.
1571 */
1572
1573static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1574{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001575#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 int i;
1577
1578 if (dev->features & NETIF_F_HIGHDMA)
1579 return 0;
1580
1581 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1582 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1583 return 1;
1584
Herbert Xu3d3a8532006-06-27 13:33:10 -07001585#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 return 0;
1587}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001589struct dev_gso_cb {
1590 void (*destructor)(struct sk_buff *skb);
1591};
1592
1593#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1594
1595static void dev_gso_skb_destructor(struct sk_buff *skb)
1596{
1597 struct dev_gso_cb *cb;
1598
1599 do {
1600 struct sk_buff *nskb = skb->next;
1601
1602 skb->next = nskb->next;
1603 nskb->next = NULL;
1604 kfree_skb(nskb);
1605 } while (skb->next);
1606
1607 cb = DEV_GSO_CB(skb);
1608 if (cb->destructor)
1609 cb->destructor(skb);
1610}
1611
1612/**
1613 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1614 * @skb: buffer to segment
1615 *
1616 * This function segments the given skb and stores the list of segments
1617 * in skb->next.
1618 */
1619static int dev_gso_segment(struct sk_buff *skb)
1620{
1621 struct net_device *dev = skb->dev;
1622 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001623 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1624 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001625
Herbert Xu576a30e2006-06-27 13:22:38 -07001626 segs = skb_gso_segment(skb, features);
1627
1628 /* Verifying header integrity only. */
1629 if (!segs)
1630 return 0;
1631
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001632 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001633 return PTR_ERR(segs);
1634
1635 skb->next = segs;
1636 DEV_GSO_CB(skb)->destructor = skb->destructor;
1637 skb->destructor = dev_gso_skb_destructor;
1638
1639 return 0;
1640}
1641
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001642int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1643 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001644{
Stephen Hemminger00829822008-11-20 20:14:53 -08001645 const struct net_device_ops *ops = dev->netdev_ops;
1646
1647 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001648 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001649 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001650 dev_queue_xmit_nit(skb, dev);
1651
Herbert Xu576a30e2006-06-27 13:22:38 -07001652 if (netif_needs_gso(dev, skb)) {
1653 if (unlikely(dev_gso_segment(skb)))
1654 goto out_kfree_skb;
1655 if (skb->next)
1656 goto gso;
1657 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001658
Stephen Hemminger00829822008-11-20 20:14:53 -08001659 return ops->ndo_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001660 }
1661
Herbert Xu576a30e2006-06-27 13:22:38 -07001662gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001663 do {
1664 struct sk_buff *nskb = skb->next;
1665 int rc;
1666
1667 skb->next = nskb->next;
1668 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001669 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001670 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001671 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001672 skb->next = nskb;
1673 return rc;
1674 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001675 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001676 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001677 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001678
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001679 skb->destructor = DEV_GSO_CB(skb)->destructor;
1680
1681out_kfree_skb:
1682 kfree_skb(skb);
1683 return 0;
1684}
1685
David S. Millerb6b2fed2008-07-21 09:48:06 -07001686static u32 simple_tx_hashrnd;
1687static int simple_tx_hashrnd_initialized = 0;
1688
David S. Miller8f0f2222008-07-15 03:47:03 -07001689static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1690{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001691 u32 addr1, addr2, ports;
1692 u32 hash, ihl;
Alexander Duyckad55dca2008-09-20 22:05:50 -07001693 u8 ip_proto = 0;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001694
1695 if (unlikely(!simple_tx_hashrnd_initialized)) {
1696 get_random_bytes(&simple_tx_hashrnd, 4);
1697 simple_tx_hashrnd_initialized = 1;
1698 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001699
1700 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001701 case htons(ETH_P_IP):
Alexander Duyckad55dca2008-09-20 22:05:50 -07001702 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1703 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001704 addr1 = ip_hdr(skb)->saddr;
1705 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001706 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001707 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001708 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001709 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001710 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1711 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001712 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001713 break;
1714 default:
1715 return 0;
1716 }
1717
David S. Miller8f0f2222008-07-15 03:47:03 -07001718
1719 switch (ip_proto) {
1720 case IPPROTO_TCP:
1721 case IPPROTO_UDP:
1722 case IPPROTO_DCCP:
1723 case IPPROTO_ESP:
1724 case IPPROTO_AH:
1725 case IPPROTO_SCTP:
1726 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001727 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001728 break;
1729
1730 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001731 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001732 break;
1733 }
1734
David S. Millerb6b2fed2008-07-21 09:48:06 -07001735 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1736
1737 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001738}
1739
David S. Millere8a04642008-07-17 00:34:19 -07001740static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1741 struct sk_buff *skb)
1742{
Stephen Hemminger00829822008-11-20 20:14:53 -08001743 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001744 u16 queue_index = 0;
1745
Stephen Hemminger00829822008-11-20 20:14:53 -08001746 if (ops->ndo_select_queue)
1747 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001748 else if (dev->real_num_tx_queues > 1)
1749 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001750
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001751 skb_set_queue_mapping(skb, queue_index);
1752 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001753}
1754
Dave Jonesd29f7492008-07-22 14:09:06 -07001755/**
1756 * dev_queue_xmit - transmit a buffer
1757 * @skb: buffer to transmit
1758 *
1759 * Queue a buffer for transmission to a network device. The caller must
1760 * have set the device and priority and built the buffer before calling
1761 * this function. The function can be called from an interrupt.
1762 *
1763 * A negative errno code is returned on a failure. A success does not
1764 * guarantee the frame will be transmitted as it may be dropped due
1765 * to congestion or traffic shaping.
1766 *
1767 * -----------------------------------------------------------------------------------
1768 * I notice this method can also return errors from the queue disciplines,
1769 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1770 * be positive.
1771 *
1772 * Regardless of the return value, the skb is consumed, so it is currently
1773 * difficult to retry a send to this method. (You can bump the ref count
1774 * before sending to hold a reference for retry if you are careful.)
1775 *
1776 * When calling this method, interrupts MUST be enabled. This is because
1777 * the BH enable code must have IRQs enabled so that it will not deadlock.
1778 * --BLG
1779 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780int dev_queue_xmit(struct sk_buff *skb)
1781{
1782 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001783 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 struct Qdisc *q;
1785 int rc = -ENOMEM;
1786
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001787 /* GSO will handle the following emulations directly. */
1788 if (netif_needs_gso(dev, skb))
1789 goto gso;
1790
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 if (skb_shinfo(skb)->frag_list &&
1792 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001793 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 goto out_kfree_skb;
1795
1796 /* Fragmented skb is linearized if device does not support SG,
1797 * or if at least one of fragments is in highmem and device
1798 * does not support DMA from it.
1799 */
1800 if (skb_shinfo(skb)->nr_frags &&
1801 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001802 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 goto out_kfree_skb;
1804
1805 /* If packet is not checksummed and device does not support
1806 * checksumming for this protocol, complete checksumming here.
1807 */
Herbert Xu663ead32007-04-09 11:59:07 -07001808 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1809 skb_set_transport_header(skb, skb->csum_start -
1810 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001811 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1812 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001813 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001815gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001816 /* Disable soft irqs for various locks below. Also
1817 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001819 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
David S. Millereae792b2008-07-15 03:03:33 -07001821 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001822 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001823
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824#ifdef CONFIG_NET_CLS_ACT
1825 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1826#endif
1827 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001828 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
David S. Miller37437bb2008-07-16 02:15:04 -07001830 spin_lock(root_lock);
1831
David S. Millera9312ae2008-08-17 21:51:03 -07001832 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001833 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001834 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001835 } else {
1836 rc = qdisc_enqueue_root(skb, q);
1837 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001838 }
David S. Miller37437bb2008-07-16 02:15:04 -07001839 spin_unlock(root_lock);
1840
David S. Miller37437bb2008-07-16 02:15:04 -07001841 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 }
1843
1844 /* The device has no queue. Common case for software devices:
1845 loopback, all the sorts of tunnels...
1846
Herbert Xu932ff272006-06-09 12:20:56 -07001847 Really, it is unlikely that netif_tx_lock protection is necessary
1848 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 counters.)
1850 However, it is possible, that they rely on protection
1851 made by us here.
1852
1853 Check this and shot the lock. It is not prone from deadlocks.
1854 Either shot noqueue qdisc, it is even simpler 8)
1855 */
1856 if (dev->flags & IFF_UP) {
1857 int cpu = smp_processor_id(); /* ok because BHs are off */
1858
David S. Millerc773e842008-07-08 23:13:53 -07001859 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
David S. Millerc773e842008-07-08 23:13:53 -07001861 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001863 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001865 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001866 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 goto out;
1868 }
1869 }
David S. Millerc773e842008-07-08 23:13:53 -07001870 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 if (net_ratelimit())
1872 printk(KERN_CRIT "Virtual device %s asks to "
1873 "queue packet!\n", dev->name);
1874 } else {
1875 /* Recursion is detected! It is possible,
1876 * unfortunately */
1877 if (net_ratelimit())
1878 printk(KERN_CRIT "Dead loop on virtual device "
1879 "%s, fix it urgently!\n", dev->name);
1880 }
1881 }
1882
1883 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001884 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886out_kfree_skb:
1887 kfree_skb(skb);
1888 return rc;
1889out:
Herbert Xud4828d82006-06-22 02:28:18 -07001890 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 return rc;
1892}
1893
1894
1895/*=======================================================================
1896 Receiver routines
1897 =======================================================================*/
1898
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001899int netdev_max_backlog __read_mostly = 1000;
1900int netdev_budget __read_mostly = 300;
1901int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902
1903DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1904
1905
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906/**
1907 * netif_rx - post buffer to the network code
1908 * @skb: buffer to post
1909 *
1910 * This function receives a packet from a device driver and queues it for
1911 * the upper (protocol) levels to process. It always succeeds. The buffer
1912 * may be dropped during processing for congestion control or by the
1913 * protocol layers.
1914 *
1915 * return values:
1916 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 * NET_RX_DROP (packet was dropped)
1918 *
1919 */
1920
1921int netif_rx(struct sk_buff *skb)
1922{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 struct softnet_data *queue;
1924 unsigned long flags;
1925
1926 /* if netpoll wants it, pretend we never saw it */
1927 if (netpoll_rx(skb))
1928 return NET_RX_DROP;
1929
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001930 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001931 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932
1933 /*
1934 * The code is rearranged so that the path is the most
1935 * short when CPU is congested, but is still operating.
1936 */
1937 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 queue = &__get_cpu_var(softnet_data);
1939
1940 __get_cpu_var(netdev_rx_stat).total++;
1941 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1942 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001946 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 }
1948
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001949 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 goto enqueue;
1951 }
1952
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 __get_cpu_var(netdev_rx_stat).dropped++;
1954 local_irq_restore(flags);
1955
1956 kfree_skb(skb);
1957 return NET_RX_DROP;
1958}
1959
1960int netif_rx_ni(struct sk_buff *skb)
1961{
1962 int err;
1963
1964 preempt_disable();
1965 err = netif_rx(skb);
1966 if (local_softirq_pending())
1967 do_softirq();
1968 preempt_enable();
1969
1970 return err;
1971}
1972
1973EXPORT_SYMBOL(netif_rx_ni);
1974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975static void net_tx_action(struct softirq_action *h)
1976{
1977 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1978
1979 if (sd->completion_queue) {
1980 struct sk_buff *clist;
1981
1982 local_irq_disable();
1983 clist = sd->completion_queue;
1984 sd->completion_queue = NULL;
1985 local_irq_enable();
1986
1987 while (clist) {
1988 struct sk_buff *skb = clist;
1989 clist = clist->next;
1990
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001991 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 __kfree_skb(skb);
1993 }
1994 }
1995
1996 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07001997 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998
1999 local_irq_disable();
2000 head = sd->output_queue;
2001 sd->output_queue = NULL;
2002 local_irq_enable();
2003
2004 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002005 struct Qdisc *q = head;
2006 spinlock_t *root_lock;
2007
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 head = head->next_sched;
2009
David S. Miller5fb66222008-08-02 20:02:43 -07002010 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002011 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002012 smp_mb__before_clear_bit();
2013 clear_bit(__QDISC_STATE_SCHED,
2014 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002015 qdisc_run(q);
2016 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002018 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002019 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002020 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002021 } else {
2022 smp_mb__before_clear_bit();
2023 clear_bit(__QDISC_STATE_SCHED,
2024 &q->state);
2025 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026 }
2027 }
2028 }
2029}
2030
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002031static inline int deliver_skb(struct sk_buff *skb,
2032 struct packet_type *pt_prev,
2033 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034{
2035 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002036 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037}
2038
2039#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002040/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041struct net_bridge;
2042struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2043 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002044void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045
Stephen Hemminger6229e362007-03-21 13:38:47 -07002046/*
2047 * If bridge module is loaded call bridging hook.
2048 * returns NULL if packet was consumed.
2049 */
2050struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2051 struct sk_buff *skb) __read_mostly;
2052static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2053 struct packet_type **pt_prev, int *ret,
2054 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055{
2056 struct net_bridge_port *port;
2057
Stephen Hemminger6229e362007-03-21 13:38:47 -07002058 if (skb->pkt_type == PACKET_LOOPBACK ||
2059 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2060 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
2062 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002063 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002065 }
2066
Stephen Hemminger6229e362007-03-21 13:38:47 -07002067 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068}
2069#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002070#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071#endif
2072
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002073#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2074struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2075EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2076
2077static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2078 struct packet_type **pt_prev,
2079 int *ret,
2080 struct net_device *orig_dev)
2081{
2082 if (skb->dev->macvlan_port == NULL)
2083 return skb;
2084
2085 if (*pt_prev) {
2086 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2087 *pt_prev = NULL;
2088 }
2089 return macvlan_handle_frame_hook(skb);
2090}
2091#else
2092#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2093#endif
2094
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095#ifdef CONFIG_NET_CLS_ACT
2096/* TODO: Maybe we should just force sch_ingress to be compiled in
2097 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2098 * a compare and 2 stores extra right now if we dont have it on
2099 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002100 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101 * the ingress scheduler, you just cant add policies on ingress.
2102 *
2103 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002104static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002107 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002108 struct netdev_queue *rxq;
2109 int result = TC_ACT_OK;
2110 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002111
Herbert Xuf697c3e2007-10-14 00:38:47 -07002112 if (MAX_RED_LOOP < ttl++) {
2113 printk(KERN_WARNING
2114 "Redir loop detected Dropping packet (%d->%d)\n",
2115 skb->iif, dev->ifindex);
2116 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 }
2118
Herbert Xuf697c3e2007-10-14 00:38:47 -07002119 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2120 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2121
David S. Miller555353c2008-07-08 17:33:13 -07002122 rxq = &dev->rx_queue;
2123
David S. Miller83874002008-07-17 00:53:03 -07002124 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002125 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002126 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002127 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2128 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002129 spin_unlock(qdisc_lock(q));
2130 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002131
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 return result;
2133}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002134
2135static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2136 struct packet_type **pt_prev,
2137 int *ret, struct net_device *orig_dev)
2138{
David S. Miller8d50b532008-07-30 02:37:46 -07002139 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002140 goto out;
2141
2142 if (*pt_prev) {
2143 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2144 *pt_prev = NULL;
2145 } else {
2146 /* Huh? Why does turning on AF_PACKET affect this? */
2147 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2148 }
2149
2150 switch (ing_filter(skb)) {
2151 case TC_ACT_SHOT:
2152 case TC_ACT_STOLEN:
2153 kfree_skb(skb);
2154 return NULL;
2155 }
2156
2157out:
2158 skb->tc_verd = 0;
2159 return skb;
2160}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161#endif
2162
Patrick McHardybc1d0412008-07-14 22:49:30 -07002163/*
2164 * netif_nit_deliver - deliver received packets to network taps
2165 * @skb: buffer
2166 *
2167 * This function is used to deliver incoming packets to network
2168 * taps. It should be used when the normal netif_receive_skb path
2169 * is bypassed, for example because of VLAN acceleration.
2170 */
2171void netif_nit_deliver(struct sk_buff *skb)
2172{
2173 struct packet_type *ptype;
2174
2175 if (list_empty(&ptype_all))
2176 return;
2177
2178 skb_reset_network_header(skb);
2179 skb_reset_transport_header(skb);
2180 skb->mac_len = skb->network_header - skb->mac_header;
2181
2182 rcu_read_lock();
2183 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2184 if (!ptype->dev || ptype->dev == skb->dev)
2185 deliver_skb(skb, ptype, skb->dev);
2186 }
2187 rcu_read_unlock();
2188}
2189
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002190/**
2191 * netif_receive_skb - process receive buffer from network
2192 * @skb: buffer to process
2193 *
2194 * netif_receive_skb() is the main receive data processing function.
2195 * It always succeeds. The buffer may be dropped during processing
2196 * for congestion control or by the protocol layers.
2197 *
2198 * This function may only be called from softirq context and interrupts
2199 * should be enabled.
2200 *
2201 * Return values (usually ignored):
2202 * NET_RX_SUCCESS: no congestion
2203 * NET_RX_DROP: packet was dropped
2204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205int netif_receive_skb(struct sk_buff *skb)
2206{
2207 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002208 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002209 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002211 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002213 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2214 return NET_RX_SUCCESS;
2215
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002217 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 return NET_RX_DROP;
2219
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002220 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002221 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222
Patrick McHardyc01003c2007-03-29 11:46:52 -07002223 if (!skb->iif)
2224 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002225
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002226 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002227 orig_dev = skb->dev;
2228 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002229 if (skb_bond_should_drop(skb))
2230 null_or_orig = orig_dev; /* deliver only exact match */
2231 else
2232 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002233 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002234
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 __get_cpu_var(netdev_rx_stat).total++;
2236
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002237 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002238 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002239 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240
2241 pt_prev = NULL;
2242
2243 rcu_read_lock();
2244
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002245 /* Don't receive packets in an exiting network namespace */
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002246 if (!net_alive(dev_net(skb->dev))) {
2247 kfree_skb(skb);
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002248 goto out;
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002249 }
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002250
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251#ifdef CONFIG_NET_CLS_ACT
2252 if (skb->tc_verd & TC_NCLS) {
2253 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2254 goto ncls;
2255 }
2256#endif
2257
2258 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002259 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2260 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002261 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002262 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 pt_prev = ptype;
2264 }
2265 }
2266
2267#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002268 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2269 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271ncls:
2272#endif
2273
Stephen Hemminger6229e362007-03-21 13:38:47 -07002274 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2275 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002277 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2278 if (!skb)
2279 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002282 list_for_each_entry_rcu(ptype,
2283 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002285 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2286 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002287 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002288 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289 pt_prev = ptype;
2290 }
2291 }
2292
2293 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002294 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 } else {
2296 kfree_skb(skb);
2297 /* Jamal, now you will not able to escape explaining
2298 * me how you were going to use this. :-)
2299 */
2300 ret = NET_RX_DROP;
2301 }
2302
2303out:
2304 rcu_read_unlock();
2305 return ret;
2306}
2307
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002308/* Network device is going away, flush any packets still pending */
2309static void flush_backlog(void *arg)
2310{
2311 struct net_device *dev = arg;
2312 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2313 struct sk_buff *skb, *tmp;
2314
2315 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2316 if (skb->dev == dev) {
2317 __skb_unlink(skb, &queue->input_pkt_queue);
2318 kfree_skb(skb);
2319 }
2320}
2321
Herbert Xud565b0a2008-12-15 23:38:52 -08002322static int napi_gro_complete(struct sk_buff *skb)
2323{
2324 struct packet_type *ptype;
2325 __be16 type = skb->protocol;
2326 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2327 int err = -ENOENT;
2328
2329 if (!skb_shinfo(skb)->frag_list)
2330 goto out;
2331
2332 rcu_read_lock();
2333 list_for_each_entry_rcu(ptype, head, list) {
2334 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2335 continue;
2336
2337 err = ptype->gro_complete(skb);
2338 break;
2339 }
2340 rcu_read_unlock();
2341
2342 if (err) {
2343 WARN_ON(&ptype->list == head);
2344 kfree_skb(skb);
2345 return NET_RX_SUCCESS;
2346 }
2347
2348out:
2349 __skb_push(skb, -skb_network_offset(skb));
2350 return netif_receive_skb(skb);
2351}
2352
2353void napi_gro_flush(struct napi_struct *napi)
2354{
2355 struct sk_buff *skb, *next;
2356
2357 for (skb = napi->gro_list; skb; skb = next) {
2358 next = skb->next;
2359 skb->next = NULL;
2360 napi_gro_complete(skb);
2361 }
2362
2363 napi->gro_list = NULL;
2364}
2365EXPORT_SYMBOL(napi_gro_flush);
2366
2367int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2368{
2369 struct sk_buff **pp = NULL;
2370 struct packet_type *ptype;
2371 __be16 type = skb->protocol;
2372 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2373 int count = 0;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002374 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002375 int mac_len;
2376
2377 if (!(skb->dev->features & NETIF_F_GRO))
2378 goto normal;
2379
2380 rcu_read_lock();
2381 list_for_each_entry_rcu(ptype, head, list) {
2382 struct sk_buff *p;
2383
2384 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2385 continue;
2386
2387 skb_reset_network_header(skb);
2388 mac_len = skb->network_header - skb->mac_header;
2389 skb->mac_len = mac_len;
2390 NAPI_GRO_CB(skb)->same_flow = 0;
2391 NAPI_GRO_CB(skb)->flush = 0;
2392
2393 for (p = napi->gro_list; p; p = p->next) {
2394 count++;
2395 NAPI_GRO_CB(p)->same_flow =
2396 p->mac_len == mac_len &&
2397 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2398 mac_len);
2399 NAPI_GRO_CB(p)->flush = 0;
2400 }
2401
2402 pp = ptype->gro_receive(&napi->gro_list, skb);
2403 break;
2404 }
2405 rcu_read_unlock();
2406
2407 if (&ptype->list == head)
2408 goto normal;
2409
Herbert Xu0da2afd52008-12-26 14:57:42 -08002410 same_flow = NAPI_GRO_CB(skb)->same_flow;
2411
Herbert Xud565b0a2008-12-15 23:38:52 -08002412 if (pp) {
2413 struct sk_buff *nskb = *pp;
2414
2415 *pp = nskb->next;
2416 nskb->next = NULL;
2417 napi_gro_complete(nskb);
2418 count--;
2419 }
2420
Herbert Xu0da2afd52008-12-26 14:57:42 -08002421 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002422 goto ok;
2423
2424 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2425 __skb_push(skb, -skb_network_offset(skb));
2426 goto normal;
2427 }
2428
2429 NAPI_GRO_CB(skb)->count = 1;
2430 skb->next = napi->gro_list;
2431 napi->gro_list = skb;
2432
2433ok:
2434 return NET_RX_SUCCESS;
2435
2436normal:
2437 return netif_receive_skb(skb);
2438}
2439EXPORT_SYMBOL(napi_gro_receive);
2440
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002441static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442{
2443 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2445 unsigned long start_time = jiffies;
2446
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002447 napi->weight = weight_p;
2448 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
2451 local_irq_disable();
2452 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002453 if (!skb) {
2454 __napi_complete(napi);
2455 local_irq_enable();
2456 break;
2457 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 local_irq_enable();
2459
Herbert Xud565b0a2008-12-15 23:38:52 -08002460 napi_gro_receive(napi, skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002461 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
Herbert Xud565b0a2008-12-15 23:38:52 -08002463 napi_gro_flush(napi);
2464
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002465 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466}
2467
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002468/**
2469 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002470 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002471 *
2472 * The entry's receive function will be scheduled to run
2473 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002474void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002475{
2476 unsigned long flags;
2477
2478 local_irq_save(flags);
2479 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2480 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2481 local_irq_restore(flags);
2482}
2483EXPORT_SYMBOL(__napi_schedule);
2484
Herbert Xud565b0a2008-12-15 23:38:52 -08002485void __napi_complete(struct napi_struct *n)
2486{
2487 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2488 BUG_ON(n->gro_list);
2489
2490 list_del(&n->poll_list);
2491 smp_mb__before_clear_bit();
2492 clear_bit(NAPI_STATE_SCHED, &n->state);
2493}
2494EXPORT_SYMBOL(__napi_complete);
2495
2496void napi_complete(struct napi_struct *n)
2497{
2498 unsigned long flags;
2499
2500 /*
2501 * don't let napi dequeue from the cpu poll list
2502 * just in case its running on a different cpu
2503 */
2504 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2505 return;
2506
2507 napi_gro_flush(n);
2508 local_irq_save(flags);
2509 __napi_complete(n);
2510 local_irq_restore(flags);
2511}
2512EXPORT_SYMBOL(napi_complete);
2513
2514void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2515 int (*poll)(struct napi_struct *, int), int weight)
2516{
2517 INIT_LIST_HEAD(&napi->poll_list);
2518 napi->gro_list = NULL;
2519 napi->poll = poll;
2520 napi->weight = weight;
2521 list_add(&napi->dev_list, &dev->napi_list);
2522#ifdef CONFIG_NETPOLL
2523 napi->dev = dev;
2524 spin_lock_init(&napi->poll_lock);
2525 napi->poll_owner = -1;
2526#endif
2527 set_bit(NAPI_STATE_SCHED, &napi->state);
2528}
2529EXPORT_SYMBOL(netif_napi_add);
2530
2531void netif_napi_del(struct napi_struct *napi)
2532{
2533 struct sk_buff *skb, *next;
2534
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002535 list_del_init(&napi->dev_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002536
2537 for (skb = napi->gro_list; skb; skb = next) {
2538 next = skb->next;
2539 skb->next = NULL;
2540 kfree_skb(skb);
2541 }
2542
2543 napi->gro_list = NULL;
2544}
2545EXPORT_SYMBOL(netif_napi_del);
2546
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002547
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548static void net_rx_action(struct softirq_action *h)
2549{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002550 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002551 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002552 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002553 void *have;
2554
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 local_irq_disable();
2556
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002557 while (!list_empty(list)) {
2558 struct napi_struct *n;
2559 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002561 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002562 * Allow this to run for 2 jiffies since which will allow
2563 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002564 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002565 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 goto softnet_break;
2567
2568 local_irq_enable();
2569
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002570 /* Even though interrupts have been re-enabled, this
2571 * access is safe because interrupts can only add new
2572 * entries to the tail of this list, and only ->poll()
2573 * calls can remove this head entry from the list.
2574 */
2575 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002577 have = netpoll_poll_lock(n);
2578
2579 weight = n->weight;
2580
David S. Miller0a7606c2007-10-29 21:28:47 -07002581 /* This NAPI_STATE_SCHED test is for avoiding a race
2582 * with netpoll's poll_napi(). Only the entity which
2583 * obtains the lock and sees NAPI_STATE_SCHED set will
2584 * actually make the ->poll() call. Therefore we avoid
2585 * accidently calling ->poll() when NAPI is not scheduled.
2586 */
2587 work = 0;
2588 if (test_bit(NAPI_STATE_SCHED, &n->state))
2589 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002590
2591 WARN_ON_ONCE(work > weight);
2592
2593 budget -= work;
2594
2595 local_irq_disable();
2596
2597 /* Drivers must not modify the NAPI state if they
2598 * consume the entire weight. In such cases this code
2599 * still "owns" the NAPI instance and therefore can
2600 * move the instance around on the list at-will.
2601 */
David S. Millerfed17f32008-01-07 21:00:40 -08002602 if (unlikely(work == weight)) {
2603 if (unlikely(napi_disable_pending(n)))
2604 __napi_complete(n);
2605 else
2606 list_move_tail(&n->poll_list, list);
2607 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002608
2609 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610 }
2611out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002612 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002613
Chris Leechdb217332006-06-17 21:24:58 -07002614#ifdef CONFIG_NET_DMA
2615 /*
2616 * There may not be any more sk_buffs coming right now, so push
2617 * any pending DMA copies to hardware
2618 */
Dan Williams2ba05622009-01-06 11:38:14 -07002619 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07002620#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002621
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 return;
2623
2624softnet_break:
2625 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2626 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2627 goto out;
2628}
2629
2630static gifconf_func_t * gifconf_list [NPROTO];
2631
2632/**
2633 * register_gifconf - register a SIOCGIF handler
2634 * @family: Address family
2635 * @gifconf: Function handler
2636 *
2637 * Register protocol dependent address dumping routines. The handler
2638 * that is passed must not be freed or reused until it has been replaced
2639 * by another handler.
2640 */
2641int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2642{
2643 if (family >= NPROTO)
2644 return -EINVAL;
2645 gifconf_list[family] = gifconf;
2646 return 0;
2647}
2648
2649
2650/*
2651 * Map an interface index to its name (SIOCGIFNAME)
2652 */
2653
2654/*
2655 * We need this ioctl for efficient implementation of the
2656 * if_indextoname() function required by the IPv6 API. Without
2657 * it, we would have to search all the interfaces to find a
2658 * match. --pb
2659 */
2660
Eric W. Biederman881d9662007-09-17 11:56:21 -07002661static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662{
2663 struct net_device *dev;
2664 struct ifreq ifr;
2665
2666 /*
2667 * Fetch the caller's info block.
2668 */
2669
2670 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2671 return -EFAULT;
2672
2673 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002674 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675 if (!dev) {
2676 read_unlock(&dev_base_lock);
2677 return -ENODEV;
2678 }
2679
2680 strcpy(ifr.ifr_name, dev->name);
2681 read_unlock(&dev_base_lock);
2682
2683 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2684 return -EFAULT;
2685 return 0;
2686}
2687
2688/*
2689 * Perform a SIOCGIFCONF call. This structure will change
2690 * size eventually, and there is nothing I can do about it.
2691 * Thus we will need a 'compatibility mode'.
2692 */
2693
Eric W. Biederman881d9662007-09-17 11:56:21 -07002694static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695{
2696 struct ifconf ifc;
2697 struct net_device *dev;
2698 char __user *pos;
2699 int len;
2700 int total;
2701 int i;
2702
2703 /*
2704 * Fetch the caller's info block.
2705 */
2706
2707 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2708 return -EFAULT;
2709
2710 pos = ifc.ifc_buf;
2711 len = ifc.ifc_len;
2712
2713 /*
2714 * Loop over the interfaces, and write an info block for each.
2715 */
2716
2717 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002718 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719 for (i = 0; i < NPROTO; i++) {
2720 if (gifconf_list[i]) {
2721 int done;
2722 if (!pos)
2723 done = gifconf_list[i](dev, NULL, 0);
2724 else
2725 done = gifconf_list[i](dev, pos + total,
2726 len - total);
2727 if (done < 0)
2728 return -EFAULT;
2729 total += done;
2730 }
2731 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733
2734 /*
2735 * All done. Write the updated control block back to the caller.
2736 */
2737 ifc.ifc_len = total;
2738
2739 /*
2740 * Both BSD and Solaris return 0 here, so we do too.
2741 */
2742 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2743}
2744
2745#ifdef CONFIG_PROC_FS
2746/*
2747 * This is invoked by the /proc filesystem handler to display a device
2748 * in detail.
2749 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002751 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752{
Denis V. Luneve372c412007-11-19 22:31:54 -08002753 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002754 loff_t off;
2755 struct net_device *dev;
2756
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002758 if (!*pos)
2759 return SEQ_START_TOKEN;
2760
2761 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002762 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002763 if (off++ == *pos)
2764 return dev;
2765
2766 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767}
2768
2769void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2770{
Denis V. Luneve372c412007-11-19 22:31:54 -08002771 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002773 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002774 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775}
2776
2777void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002778 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779{
2780 read_unlock(&dev_base_lock);
2781}
2782
2783static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2784{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002785 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786
Rusty Russell5a1b5892007-04-28 21:04:03 -07002787 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2788 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2789 dev->name, stats->rx_bytes, stats->rx_packets,
2790 stats->rx_errors,
2791 stats->rx_dropped + stats->rx_missed_errors,
2792 stats->rx_fifo_errors,
2793 stats->rx_length_errors + stats->rx_over_errors +
2794 stats->rx_crc_errors + stats->rx_frame_errors,
2795 stats->rx_compressed, stats->multicast,
2796 stats->tx_bytes, stats->tx_packets,
2797 stats->tx_errors, stats->tx_dropped,
2798 stats->tx_fifo_errors, stats->collisions,
2799 stats->tx_carrier_errors +
2800 stats->tx_aborted_errors +
2801 stats->tx_window_errors +
2802 stats->tx_heartbeat_errors,
2803 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804}
2805
2806/*
2807 * Called from the PROCfs module. This now uses the new arbitrary sized
2808 * /proc/net interface to create /proc/net/dev
2809 */
2810static int dev_seq_show(struct seq_file *seq, void *v)
2811{
2812 if (v == SEQ_START_TOKEN)
2813 seq_puts(seq, "Inter-| Receive "
2814 " | Transmit\n"
2815 " face |bytes packets errs drop fifo frame "
2816 "compressed multicast|bytes packets errs "
2817 "drop fifo colls carrier compressed\n");
2818 else
2819 dev_seq_printf_stats(seq, v);
2820 return 0;
2821}
2822
2823static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2824{
2825 struct netif_rx_stats *rc = NULL;
2826
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002827 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002828 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002829 rc = &per_cpu(netdev_rx_stat, *pos);
2830 break;
2831 } else
2832 ++*pos;
2833 return rc;
2834}
2835
2836static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2837{
2838 return softnet_get_online(pos);
2839}
2840
2841static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2842{
2843 ++*pos;
2844 return softnet_get_online(pos);
2845}
2846
2847static void softnet_seq_stop(struct seq_file *seq, void *v)
2848{
2849}
2850
2851static int softnet_seq_show(struct seq_file *seq, void *v)
2852{
2853 struct netif_rx_stats *s = v;
2854
2855 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002856 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002857 0, 0, 0, 0, /* was fastroute */
2858 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 return 0;
2860}
2861
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002862static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002863 .start = dev_seq_start,
2864 .next = dev_seq_next,
2865 .stop = dev_seq_stop,
2866 .show = dev_seq_show,
2867};
2868
2869static int dev_seq_open(struct inode *inode, struct file *file)
2870{
Denis V. Luneve372c412007-11-19 22:31:54 -08002871 return seq_open_net(inode, file, &dev_seq_ops,
2872 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873}
2874
Arjan van de Ven9a321442007-02-12 00:55:35 -08002875static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876 .owner = THIS_MODULE,
2877 .open = dev_seq_open,
2878 .read = seq_read,
2879 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002880 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881};
2882
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002883static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002884 .start = softnet_seq_start,
2885 .next = softnet_seq_next,
2886 .stop = softnet_seq_stop,
2887 .show = softnet_seq_show,
2888};
2889
2890static int softnet_seq_open(struct inode *inode, struct file *file)
2891{
2892 return seq_open(file, &softnet_seq_ops);
2893}
2894
Arjan van de Ven9a321442007-02-12 00:55:35 -08002895static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002896 .owner = THIS_MODULE,
2897 .open = softnet_seq_open,
2898 .read = seq_read,
2899 .llseek = seq_lseek,
2900 .release = seq_release,
2901};
2902
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002903static void *ptype_get_idx(loff_t pos)
2904{
2905 struct packet_type *pt = NULL;
2906 loff_t i = 0;
2907 int t;
2908
2909 list_for_each_entry_rcu(pt, &ptype_all, list) {
2910 if (i == pos)
2911 return pt;
2912 ++i;
2913 }
2914
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002915 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002916 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2917 if (i == pos)
2918 return pt;
2919 ++i;
2920 }
2921 }
2922 return NULL;
2923}
2924
2925static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002926 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002927{
2928 rcu_read_lock();
2929 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2930}
2931
2932static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2933{
2934 struct packet_type *pt;
2935 struct list_head *nxt;
2936 int hash;
2937
2938 ++*pos;
2939 if (v == SEQ_START_TOKEN)
2940 return ptype_get_idx(0);
2941
2942 pt = v;
2943 nxt = pt->list.next;
2944 if (pt->type == htons(ETH_P_ALL)) {
2945 if (nxt != &ptype_all)
2946 goto found;
2947 hash = 0;
2948 nxt = ptype_base[0].next;
2949 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002950 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002951
2952 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002953 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002954 return NULL;
2955 nxt = ptype_base[hash].next;
2956 }
2957found:
2958 return list_entry(nxt, struct packet_type, list);
2959}
2960
2961static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002962 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002963{
2964 rcu_read_unlock();
2965}
2966
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002967static int ptype_seq_show(struct seq_file *seq, void *v)
2968{
2969 struct packet_type *pt = v;
2970
2971 if (v == SEQ_START_TOKEN)
2972 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002973 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002974 if (pt->type == htons(ETH_P_ALL))
2975 seq_puts(seq, "ALL ");
2976 else
2977 seq_printf(seq, "%04x", ntohs(pt->type));
2978
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08002979 seq_printf(seq, " %-8s %pF\n",
2980 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002981 }
2982
2983 return 0;
2984}
2985
2986static const struct seq_operations ptype_seq_ops = {
2987 .start = ptype_seq_start,
2988 .next = ptype_seq_next,
2989 .stop = ptype_seq_stop,
2990 .show = ptype_seq_show,
2991};
2992
2993static int ptype_seq_open(struct inode *inode, struct file *file)
2994{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002995 return seq_open_net(inode, file, &ptype_seq_ops,
2996 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002997}
2998
2999static const struct file_operations ptype_seq_fops = {
3000 .owner = THIS_MODULE,
3001 .open = ptype_seq_open,
3002 .read = seq_read,
3003 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003004 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003005};
3006
3007
Pavel Emelyanov46650792007-10-08 20:38:39 -07003008static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009{
3010 int rc = -ENOMEM;
3011
Eric W. Biederman881d9662007-09-17 11:56:21 -07003012 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003013 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003014 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003015 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003016 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003017 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003018
Eric W. Biederman881d9662007-09-17 11:56:21 -07003019 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003020 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 rc = 0;
3022out:
3023 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003024out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003025 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003027 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003029 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003030 goto out;
3031}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003032
Pavel Emelyanov46650792007-10-08 20:38:39 -07003033static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003034{
3035 wext_proc_exit(net);
3036
3037 proc_net_remove(net, "ptype");
3038 proc_net_remove(net, "softnet_stat");
3039 proc_net_remove(net, "dev");
3040}
3041
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003042static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003043 .init = dev_proc_net_init,
3044 .exit = dev_proc_net_exit,
3045};
3046
3047static int __init dev_proc_init(void)
3048{
3049 return register_pernet_subsys(&dev_proc_ops);
3050}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003051#else
3052#define dev_proc_init() 0
3053#endif /* CONFIG_PROC_FS */
3054
3055
3056/**
3057 * netdev_set_master - set up master/slave pair
3058 * @slave: slave device
3059 * @master: new master device
3060 *
3061 * Changes the master device of the slave. Pass %NULL to break the
3062 * bonding. The caller must hold the RTNL semaphore. On a failure
3063 * a negative errno code is returned. On success the reference counts
3064 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3065 * function returns zero.
3066 */
3067int netdev_set_master(struct net_device *slave, struct net_device *master)
3068{
3069 struct net_device *old = slave->master;
3070
3071 ASSERT_RTNL();
3072
3073 if (master) {
3074 if (old)
3075 return -EBUSY;
3076 dev_hold(master);
3077 }
3078
3079 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003080
Linus Torvalds1da177e2005-04-16 15:20:36 -07003081 synchronize_net();
3082
3083 if (old)
3084 dev_put(old);
3085
3086 if (master)
3087 slave->flags |= IFF_SLAVE;
3088 else
3089 slave->flags &= ~IFF_SLAVE;
3090
3091 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3092 return 0;
3093}
3094
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003095static void dev_change_rx_flags(struct net_device *dev, int flags)
3096{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003097 const struct net_device_ops *ops = dev->netdev_ops;
3098
3099 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3100 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003101}
3102
Wang Chendad9b332008-06-18 01:48:28 -07003103static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003104{
3105 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003106 uid_t uid;
3107 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003108
Patrick McHardy24023452007-07-14 18:51:31 -07003109 ASSERT_RTNL();
3110
Wang Chendad9b332008-06-18 01:48:28 -07003111 dev->flags |= IFF_PROMISC;
3112 dev->promiscuity += inc;
3113 if (dev->promiscuity == 0) {
3114 /*
3115 * Avoid overflow.
3116 * If inc causes overflow, untouch promisc and return error.
3117 */
3118 if (inc < 0)
3119 dev->flags &= ~IFF_PROMISC;
3120 else {
3121 dev->promiscuity -= inc;
3122 printk(KERN_WARNING "%s: promiscuity touches roof, "
3123 "set promiscuity failed, promiscuity feature "
3124 "of device might be broken.\n", dev->name);
3125 return -EOVERFLOW;
3126 }
3127 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003128 if (dev->flags != old_flags) {
3129 printk(KERN_INFO "device %s %s promiscuous mode\n",
3130 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3131 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003132 if (audit_enabled) {
3133 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003134 audit_log(current->audit_context, GFP_ATOMIC,
3135 AUDIT_ANOM_PROMISCUOUS,
3136 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3137 dev->name, (dev->flags & IFF_PROMISC),
3138 (old_flags & IFF_PROMISC),
3139 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003140 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003141 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003142 }
Patrick McHardy24023452007-07-14 18:51:31 -07003143
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003144 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003145 }
Wang Chendad9b332008-06-18 01:48:28 -07003146 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003147}
3148
Linus Torvalds1da177e2005-04-16 15:20:36 -07003149/**
3150 * dev_set_promiscuity - update promiscuity count on a device
3151 * @dev: device
3152 * @inc: modifier
3153 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003154 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155 * remains above zero the interface remains promiscuous. Once it hits zero
3156 * the device reverts back to normal filtering operation. A negative inc
3157 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003158 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 */
Wang Chendad9b332008-06-18 01:48:28 -07003160int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003161{
3162 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003163 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003164
Wang Chendad9b332008-06-18 01:48:28 -07003165 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003166 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003167 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003168 if (dev->flags != old_flags)
3169 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003170 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003171}
3172
3173/**
3174 * dev_set_allmulti - update allmulti count on a device
3175 * @dev: device
3176 * @inc: modifier
3177 *
3178 * Add or remove reception of all multicast frames to a device. While the
3179 * count in the device remains above zero the interface remains listening
3180 * to all interfaces. Once it hits zero the device reverts back to normal
3181 * filtering operation. A negative @inc value is used to drop the counter
3182 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003183 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003184 */
3185
Wang Chendad9b332008-06-18 01:48:28 -07003186int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003187{
3188 unsigned short old_flags = dev->flags;
3189
Patrick McHardy24023452007-07-14 18:51:31 -07003190 ASSERT_RTNL();
3191
Linus Torvalds1da177e2005-04-16 15:20:36 -07003192 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003193 dev->allmulti += inc;
3194 if (dev->allmulti == 0) {
3195 /*
3196 * Avoid overflow.
3197 * If inc causes overflow, untouch allmulti and return error.
3198 */
3199 if (inc < 0)
3200 dev->flags &= ~IFF_ALLMULTI;
3201 else {
3202 dev->allmulti -= inc;
3203 printk(KERN_WARNING "%s: allmulti touches roof, "
3204 "set allmulti failed, allmulti feature of "
3205 "device might be broken.\n", dev->name);
3206 return -EOVERFLOW;
3207 }
3208 }
Patrick McHardy24023452007-07-14 18:51:31 -07003209 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003210 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003211 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003212 }
Wang Chendad9b332008-06-18 01:48:28 -07003213 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003214}
3215
3216/*
3217 * Upload unicast and multicast address lists to device and
3218 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003219 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003220 * are present.
3221 */
3222void __dev_set_rx_mode(struct net_device *dev)
3223{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003224 const struct net_device_ops *ops = dev->netdev_ops;
3225
Patrick McHardy4417da62007-06-27 01:28:10 -07003226 /* dev_open will call this function so the list will stay sane. */
3227 if (!(dev->flags&IFF_UP))
3228 return;
3229
3230 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003231 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003232
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003233 if (ops->ndo_set_rx_mode)
3234 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003235 else {
3236 /* Unicast addresses changes may only happen under the rtnl,
3237 * therefore calling __dev_set_promiscuity here is safe.
3238 */
3239 if (dev->uc_count > 0 && !dev->uc_promisc) {
3240 __dev_set_promiscuity(dev, 1);
3241 dev->uc_promisc = 1;
3242 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3243 __dev_set_promiscuity(dev, -1);
3244 dev->uc_promisc = 0;
3245 }
3246
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003247 if (ops->ndo_set_multicast_list)
3248 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003249 }
3250}
3251
3252void dev_set_rx_mode(struct net_device *dev)
3253{
David S. Millerb9e40852008-07-15 00:15:08 -07003254 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003255 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003256 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003257}
3258
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003259int __dev_addr_delete(struct dev_addr_list **list, int *count,
3260 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003261{
3262 struct dev_addr_list *da;
3263
3264 for (; (da = *list) != NULL; list = &da->next) {
3265 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3266 alen == da->da_addrlen) {
3267 if (glbl) {
3268 int old_glbl = da->da_gusers;
3269 da->da_gusers = 0;
3270 if (old_glbl == 0)
3271 break;
3272 }
3273 if (--da->da_users)
3274 return 0;
3275
3276 *list = da->next;
3277 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003278 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003279 return 0;
3280 }
3281 }
3282 return -ENOENT;
3283}
3284
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003285int __dev_addr_add(struct dev_addr_list **list, int *count,
3286 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003287{
3288 struct dev_addr_list *da;
3289
3290 for (da = *list; da != NULL; da = da->next) {
3291 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3292 da->da_addrlen == alen) {
3293 if (glbl) {
3294 int old_glbl = da->da_gusers;
3295 da->da_gusers = 1;
3296 if (old_glbl)
3297 return 0;
3298 }
3299 da->da_users++;
3300 return 0;
3301 }
3302 }
3303
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003304 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003305 if (da == NULL)
3306 return -ENOMEM;
3307 memcpy(da->da_addr, addr, alen);
3308 da->da_addrlen = alen;
3309 da->da_users = 1;
3310 da->da_gusers = glbl ? 1 : 0;
3311 da->next = *list;
3312 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003313 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003314 return 0;
3315}
3316
Patrick McHardy4417da62007-06-27 01:28:10 -07003317/**
3318 * dev_unicast_delete - Release secondary unicast address.
3319 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003320 * @addr: address to delete
3321 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003322 *
3323 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003324 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003325 *
3326 * The caller must hold the rtnl_mutex.
3327 */
3328int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3329{
3330 int err;
3331
3332 ASSERT_RTNL();
3333
David S. Millerb9e40852008-07-15 00:15:08 -07003334 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003335 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3336 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003337 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003338 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003339 return err;
3340}
3341EXPORT_SYMBOL(dev_unicast_delete);
3342
3343/**
3344 * dev_unicast_add - add a secondary unicast address
3345 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003346 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003347 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003348 *
3349 * Add a secondary unicast address to the device or increase
3350 * the reference count if it already exists.
3351 *
3352 * The caller must hold the rtnl_mutex.
3353 */
3354int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3355{
3356 int err;
3357
3358 ASSERT_RTNL();
3359
David S. Millerb9e40852008-07-15 00:15:08 -07003360 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003361 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3362 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003363 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003364 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003365 return err;
3366}
3367EXPORT_SYMBOL(dev_unicast_add);
3368
Chris Leeche83a2ea2008-01-31 16:53:23 -08003369int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3370 struct dev_addr_list **from, int *from_count)
3371{
3372 struct dev_addr_list *da, *next;
3373 int err = 0;
3374
3375 da = *from;
3376 while (da != NULL) {
3377 next = da->next;
3378 if (!da->da_synced) {
3379 err = __dev_addr_add(to, to_count,
3380 da->da_addr, da->da_addrlen, 0);
3381 if (err < 0)
3382 break;
3383 da->da_synced = 1;
3384 da->da_users++;
3385 } else if (da->da_users == 1) {
3386 __dev_addr_delete(to, to_count,
3387 da->da_addr, da->da_addrlen, 0);
3388 __dev_addr_delete(from, from_count,
3389 da->da_addr, da->da_addrlen, 0);
3390 }
3391 da = next;
3392 }
3393 return err;
3394}
3395
3396void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3397 struct dev_addr_list **from, int *from_count)
3398{
3399 struct dev_addr_list *da, *next;
3400
3401 da = *from;
3402 while (da != NULL) {
3403 next = da->next;
3404 if (da->da_synced) {
3405 __dev_addr_delete(to, to_count,
3406 da->da_addr, da->da_addrlen, 0);
3407 da->da_synced = 0;
3408 __dev_addr_delete(from, from_count,
3409 da->da_addr, da->da_addrlen, 0);
3410 }
3411 da = next;
3412 }
3413}
3414
3415/**
3416 * dev_unicast_sync - Synchronize device's unicast list to another device
3417 * @to: destination device
3418 * @from: source device
3419 *
3420 * Add newly added addresses to the destination device and release
3421 * addresses that have no users left. The source device must be
3422 * locked by netif_tx_lock_bh.
3423 *
3424 * This function is intended to be called from the dev->set_rx_mode
3425 * function of layered software devices.
3426 */
3427int dev_unicast_sync(struct net_device *to, struct net_device *from)
3428{
3429 int err = 0;
3430
David S. Millerb9e40852008-07-15 00:15:08 -07003431 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003432 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3433 &from->uc_list, &from->uc_count);
3434 if (!err)
3435 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003436 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003437 return err;
3438}
3439EXPORT_SYMBOL(dev_unicast_sync);
3440
3441/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003442 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003443 * @to: destination device
3444 * @from: source device
3445 *
3446 * Remove all addresses that were added to the destination device by
3447 * dev_unicast_sync(). This function is intended to be called from the
3448 * dev->stop function of layered software devices.
3449 */
3450void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3451{
David S. Millerb9e40852008-07-15 00:15:08 -07003452 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003453 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003454
3455 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3456 &from->uc_list, &from->uc_count);
3457 __dev_set_rx_mode(to);
3458
David S. Millere308a5d2008-07-15 00:13:44 -07003459 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003460 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003461}
3462EXPORT_SYMBOL(dev_unicast_unsync);
3463
Denis Cheng12972622007-07-18 02:12:56 -07003464static void __dev_addr_discard(struct dev_addr_list **list)
3465{
3466 struct dev_addr_list *tmp;
3467
3468 while (*list != NULL) {
3469 tmp = *list;
3470 *list = tmp->next;
3471 if (tmp->da_users > tmp->da_gusers)
3472 printk("__dev_addr_discard: address leakage! "
3473 "da_users=%d\n", tmp->da_users);
3474 kfree(tmp);
3475 }
3476}
3477
Denis Cheng26cc2522007-07-18 02:12:03 -07003478static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003479{
David S. Millerb9e40852008-07-15 00:15:08 -07003480 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003481
Patrick McHardy4417da62007-06-27 01:28:10 -07003482 __dev_addr_discard(&dev->uc_list);
3483 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003484
Denis Cheng456ad752007-07-18 02:10:54 -07003485 __dev_addr_discard(&dev->mc_list);
3486 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003487
David S. Millerb9e40852008-07-15 00:15:08 -07003488 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003489}
3490
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003491/**
3492 * dev_get_flags - get flags reported to userspace
3493 * @dev: device
3494 *
3495 * Get the combination of flag bits exported through APIs to userspace.
3496 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003497unsigned dev_get_flags(const struct net_device *dev)
3498{
3499 unsigned flags;
3500
3501 flags = (dev->flags & ~(IFF_PROMISC |
3502 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003503 IFF_RUNNING |
3504 IFF_LOWER_UP |
3505 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003506 (dev->gflags & (IFF_PROMISC |
3507 IFF_ALLMULTI));
3508
Stefan Rompfb00055a2006-03-20 17:09:11 -08003509 if (netif_running(dev)) {
3510 if (netif_oper_up(dev))
3511 flags |= IFF_RUNNING;
3512 if (netif_carrier_ok(dev))
3513 flags |= IFF_LOWER_UP;
3514 if (netif_dormant(dev))
3515 flags |= IFF_DORMANT;
3516 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003517
3518 return flags;
3519}
3520
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003521/**
3522 * dev_change_flags - change device settings
3523 * @dev: device
3524 * @flags: device state flags
3525 *
3526 * Change settings on device based state flags. The flags are
3527 * in the userspace exported format.
3528 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003529int dev_change_flags(struct net_device *dev, unsigned flags)
3530{
Thomas Graf7c355f52007-06-05 16:03:03 -07003531 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003532 int old_flags = dev->flags;
3533
Patrick McHardy24023452007-07-14 18:51:31 -07003534 ASSERT_RTNL();
3535
Linus Torvalds1da177e2005-04-16 15:20:36 -07003536 /*
3537 * Set the flags on our device.
3538 */
3539
3540 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3541 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3542 IFF_AUTOMEDIA)) |
3543 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3544 IFF_ALLMULTI));
3545
3546 /*
3547 * Load in the correct multicast list now the flags have changed.
3548 */
3549
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003550 if ((old_flags ^ flags) & IFF_MULTICAST)
3551 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003552
Patrick McHardy4417da62007-06-27 01:28:10 -07003553 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003554
3555 /*
3556 * Have we downed the interface. We handle IFF_UP ourselves
3557 * according to user attempts to set it, rather than blindly
3558 * setting it.
3559 */
3560
3561 ret = 0;
3562 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3563 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3564
3565 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003566 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003567 }
3568
3569 if (dev->flags & IFF_UP &&
3570 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3571 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003572 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003573
3574 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3575 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3576 dev->gflags ^= IFF_PROMISC;
3577 dev_set_promiscuity(dev, inc);
3578 }
3579
3580 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3581 is important. Some (broken) drivers set IFF_PROMISC, when
3582 IFF_ALLMULTI is requested not asking us and not reporting.
3583 */
3584 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3585 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3586 dev->gflags ^= IFF_ALLMULTI;
3587 dev_set_allmulti(dev, inc);
3588 }
3589
Thomas Graf7c355f52007-06-05 16:03:03 -07003590 /* Exclude state transition flags, already notified */
3591 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3592 if (changes)
3593 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003594
3595 return ret;
3596}
3597
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003598/**
3599 * dev_set_mtu - Change maximum transfer unit
3600 * @dev: device
3601 * @new_mtu: new transfer unit
3602 *
3603 * Change the maximum transfer size of the network device.
3604 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605int dev_set_mtu(struct net_device *dev, int new_mtu)
3606{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003607 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003608 int err;
3609
3610 if (new_mtu == dev->mtu)
3611 return 0;
3612
3613 /* MTU must be positive. */
3614 if (new_mtu < 0)
3615 return -EINVAL;
3616
3617 if (!netif_device_present(dev))
3618 return -ENODEV;
3619
3620 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003621 if (ops->ndo_change_mtu)
3622 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003623 else
3624 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003625
Linus Torvalds1da177e2005-04-16 15:20:36 -07003626 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003627 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003628 return err;
3629}
3630
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003631/**
3632 * dev_set_mac_address - Change Media Access Control Address
3633 * @dev: device
3634 * @sa: new address
3635 *
3636 * Change the hardware (MAC) address of the device
3637 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003638int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3639{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003640 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003641 int err;
3642
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003643 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644 return -EOPNOTSUPP;
3645 if (sa->sa_family != dev->type)
3646 return -EINVAL;
3647 if (!netif_device_present(dev))
3648 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003649 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003650 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003651 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003652 return err;
3653}
3654
3655/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003656 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003657 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003658static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003659{
3660 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003661 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003662
3663 if (!dev)
3664 return -ENODEV;
3665
3666 switch (cmd) {
3667 case SIOCGIFFLAGS: /* Get interface flags */
3668 ifr->ifr_flags = dev_get_flags(dev);
3669 return 0;
3670
Linus Torvalds1da177e2005-04-16 15:20:36 -07003671 case SIOCGIFMETRIC: /* Get the metric on the interface
3672 (currently unused) */
3673 ifr->ifr_metric = 0;
3674 return 0;
3675
Linus Torvalds1da177e2005-04-16 15:20:36 -07003676 case SIOCGIFMTU: /* Get the MTU of a device */
3677 ifr->ifr_mtu = dev->mtu;
3678 return 0;
3679
Linus Torvalds1da177e2005-04-16 15:20:36 -07003680 case SIOCGIFHWADDR:
3681 if (!dev->addr_len)
3682 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3683 else
3684 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3685 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3686 ifr->ifr_hwaddr.sa_family = dev->type;
3687 return 0;
3688
Jeff Garzik14e3e072007-10-08 00:06:32 -07003689 case SIOCGIFSLAVE:
3690 err = -EINVAL;
3691 break;
3692
3693 case SIOCGIFMAP:
3694 ifr->ifr_map.mem_start = dev->mem_start;
3695 ifr->ifr_map.mem_end = dev->mem_end;
3696 ifr->ifr_map.base_addr = dev->base_addr;
3697 ifr->ifr_map.irq = dev->irq;
3698 ifr->ifr_map.dma = dev->dma;
3699 ifr->ifr_map.port = dev->if_port;
3700 return 0;
3701
3702 case SIOCGIFINDEX:
3703 ifr->ifr_ifindex = dev->ifindex;
3704 return 0;
3705
3706 case SIOCGIFTXQLEN:
3707 ifr->ifr_qlen = dev->tx_queue_len;
3708 return 0;
3709
3710 default:
3711 /* dev_ioctl() should ensure this case
3712 * is never reached
3713 */
3714 WARN_ON(1);
3715 err = -EINVAL;
3716 break;
3717
3718 }
3719 return err;
3720}
3721
3722/*
3723 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3724 */
3725static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3726{
3727 int err;
3728 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003729 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003730
3731 if (!dev)
3732 return -ENODEV;
3733
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08003734 ops = dev->netdev_ops;
3735
Jeff Garzik14e3e072007-10-08 00:06:32 -07003736 switch (cmd) {
3737 case SIOCSIFFLAGS: /* Set interface flags */
3738 return dev_change_flags(dev, ifr->ifr_flags);
3739
3740 case SIOCSIFMETRIC: /* Set the metric on the interface
3741 (currently unused) */
3742 return -EOPNOTSUPP;
3743
3744 case SIOCSIFMTU: /* Set the MTU of a device */
3745 return dev_set_mtu(dev, ifr->ifr_mtu);
3746
Linus Torvalds1da177e2005-04-16 15:20:36 -07003747 case SIOCSIFHWADDR:
3748 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3749
3750 case SIOCSIFHWBROADCAST:
3751 if (ifr->ifr_hwaddr.sa_family != dev->type)
3752 return -EINVAL;
3753 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3754 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003755 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003756 return 0;
3757
Linus Torvalds1da177e2005-04-16 15:20:36 -07003758 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003759 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003760 if (!netif_device_present(dev))
3761 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003762 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003763 }
3764 return -EOPNOTSUPP;
3765
3766 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003767 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003768 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3769 return -EINVAL;
3770 if (!netif_device_present(dev))
3771 return -ENODEV;
3772 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3773 dev->addr_len, 1);
3774
3775 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003776 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003777 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3778 return -EINVAL;
3779 if (!netif_device_present(dev))
3780 return -ENODEV;
3781 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3782 dev->addr_len, 1);
3783
Linus Torvalds1da177e2005-04-16 15:20:36 -07003784 case SIOCSIFTXQLEN:
3785 if (ifr->ifr_qlen < 0)
3786 return -EINVAL;
3787 dev->tx_queue_len = ifr->ifr_qlen;
3788 return 0;
3789
3790 case SIOCSIFNAME:
3791 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3792 return dev_change_name(dev, ifr->ifr_newname);
3793
3794 /*
3795 * Unknown or private ioctl
3796 */
3797
3798 default:
3799 if ((cmd >= SIOCDEVPRIVATE &&
3800 cmd <= SIOCDEVPRIVATE + 15) ||
3801 cmd == SIOCBONDENSLAVE ||
3802 cmd == SIOCBONDRELEASE ||
3803 cmd == SIOCBONDSETHWADDR ||
3804 cmd == SIOCBONDSLAVEINFOQUERY ||
3805 cmd == SIOCBONDINFOQUERY ||
3806 cmd == SIOCBONDCHANGEACTIVE ||
3807 cmd == SIOCGMIIPHY ||
3808 cmd == SIOCGMIIREG ||
3809 cmd == SIOCSMIIREG ||
3810 cmd == SIOCBRADDIF ||
3811 cmd == SIOCBRDELIF ||
3812 cmd == SIOCWANDEV) {
3813 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003814 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003815 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003816 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003817 else
3818 err = -ENODEV;
3819 }
3820 } else
3821 err = -EINVAL;
3822
3823 }
3824 return err;
3825}
3826
3827/*
3828 * This function handles all "interface"-type I/O control requests. The actual
3829 * 'doing' part of this is dev_ifsioc above.
3830 */
3831
3832/**
3833 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003834 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003835 * @cmd: command to issue
3836 * @arg: pointer to a struct ifreq in user space
3837 *
3838 * Issue ioctl functions to devices. This is normally called by the
3839 * user space syscall interfaces but can sometimes be useful for
3840 * other purposes. The return value is the return from the syscall if
3841 * positive or a negative errno code on error.
3842 */
3843
Eric W. Biederman881d9662007-09-17 11:56:21 -07003844int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003845{
3846 struct ifreq ifr;
3847 int ret;
3848 char *colon;
3849
3850 /* One special case: SIOCGIFCONF takes ifconf argument
3851 and requires shared lock, because it sleeps writing
3852 to user space.
3853 */
3854
3855 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003856 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003857 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003858 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003859 return ret;
3860 }
3861 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003862 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003863
3864 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3865 return -EFAULT;
3866
3867 ifr.ifr_name[IFNAMSIZ-1] = 0;
3868
3869 colon = strchr(ifr.ifr_name, ':');
3870 if (colon)
3871 *colon = 0;
3872
3873 /*
3874 * See which interface the caller is talking about.
3875 */
3876
3877 switch (cmd) {
3878 /*
3879 * These ioctl calls:
3880 * - can be done by all.
3881 * - atomic and do not require locking.
3882 * - return a value
3883 */
3884 case SIOCGIFFLAGS:
3885 case SIOCGIFMETRIC:
3886 case SIOCGIFMTU:
3887 case SIOCGIFHWADDR:
3888 case SIOCGIFSLAVE:
3889 case SIOCGIFMAP:
3890 case SIOCGIFINDEX:
3891 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003892 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003893 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07003894 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003895 read_unlock(&dev_base_lock);
3896 if (!ret) {
3897 if (colon)
3898 *colon = ':';
3899 if (copy_to_user(arg, &ifr,
3900 sizeof(struct ifreq)))
3901 ret = -EFAULT;
3902 }
3903 return ret;
3904
3905 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003906 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003907 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003908 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003909 rtnl_unlock();
3910 if (!ret) {
3911 if (colon)
3912 *colon = ':';
3913 if (copy_to_user(arg, &ifr,
3914 sizeof(struct ifreq)))
3915 ret = -EFAULT;
3916 }
3917 return ret;
3918
3919 /*
3920 * These ioctl calls:
3921 * - require superuser power.
3922 * - require strict serialization.
3923 * - return a value
3924 */
3925 case SIOCGMIIPHY:
3926 case SIOCGMIIREG:
3927 case SIOCSIFNAME:
3928 if (!capable(CAP_NET_ADMIN))
3929 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003930 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003931 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003932 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003933 rtnl_unlock();
3934 if (!ret) {
3935 if (colon)
3936 *colon = ':';
3937 if (copy_to_user(arg, &ifr,
3938 sizeof(struct ifreq)))
3939 ret = -EFAULT;
3940 }
3941 return ret;
3942
3943 /*
3944 * These ioctl calls:
3945 * - require superuser power.
3946 * - require strict serialization.
3947 * - do not return a value
3948 */
3949 case SIOCSIFFLAGS:
3950 case SIOCSIFMETRIC:
3951 case SIOCSIFMTU:
3952 case SIOCSIFMAP:
3953 case SIOCSIFHWADDR:
3954 case SIOCSIFSLAVE:
3955 case SIOCADDMULTI:
3956 case SIOCDELMULTI:
3957 case SIOCSIFHWBROADCAST:
3958 case SIOCSIFTXQLEN:
3959 case SIOCSMIIREG:
3960 case SIOCBONDENSLAVE:
3961 case SIOCBONDRELEASE:
3962 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003963 case SIOCBONDCHANGEACTIVE:
3964 case SIOCBRADDIF:
3965 case SIOCBRDELIF:
3966 if (!capable(CAP_NET_ADMIN))
3967 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003968 /* fall through */
3969 case SIOCBONDSLAVEINFOQUERY:
3970 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003971 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003972 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003973 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003974 rtnl_unlock();
3975 return ret;
3976
3977 case SIOCGIFMEM:
3978 /* Get the per device memory space. We can add this but
3979 * currently do not support it */
3980 case SIOCSIFMEM:
3981 /* Set the per device memory buffer space.
3982 * Not applicable in our case */
3983 case SIOCSIFLINK:
3984 return -EINVAL;
3985
3986 /*
3987 * Unknown or private ioctl.
3988 */
3989 default:
3990 if (cmd == SIOCWANDEV ||
3991 (cmd >= SIOCDEVPRIVATE &&
3992 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003993 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003994 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003995 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003996 rtnl_unlock();
3997 if (!ret && copy_to_user(arg, &ifr,
3998 sizeof(struct ifreq)))
3999 ret = -EFAULT;
4000 return ret;
4001 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004002 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07004003 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004004 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004005 return -EINVAL;
4006 }
4007}
4008
4009
4010/**
4011 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004012 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004013 *
4014 * Returns a suitable unique value for a new device interface
4015 * number. The caller must hold the rtnl semaphore or the
4016 * dev_base_lock to be sure it remains unique.
4017 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004018static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004019{
4020 static int ifindex;
4021 for (;;) {
4022 if (++ifindex <= 0)
4023 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004024 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004025 return ifindex;
4026 }
4027}
4028
Linus Torvalds1da177e2005-04-16 15:20:36 -07004029/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004030static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004031
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004032static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004033{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004034 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004035}
4036
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004037static void rollback_registered(struct net_device *dev)
4038{
4039 BUG_ON(dev_boot_phase);
4040 ASSERT_RTNL();
4041
4042 /* Some devices call without registering for initialization unwind. */
4043 if (dev->reg_state == NETREG_UNINITIALIZED) {
4044 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4045 "was registered\n", dev->name, dev);
4046
4047 WARN_ON(1);
4048 return;
4049 }
4050
4051 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4052
4053 /* If device is running, close it first. */
4054 dev_close(dev);
4055
4056 /* And unlink it from device chain. */
4057 unlist_netdevice(dev);
4058
4059 dev->reg_state = NETREG_UNREGISTERING;
4060
4061 synchronize_net();
4062
4063 /* Shutdown queueing discipline. */
4064 dev_shutdown(dev);
4065
4066
4067 /* Notify protocols, that we are about to destroy
4068 this device. They should clean all the things.
4069 */
4070 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4071
4072 /*
4073 * Flush the unicast and multicast chains
4074 */
4075 dev_addr_discard(dev);
4076
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004077 if (dev->netdev_ops->ndo_uninit)
4078 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004079
4080 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004081 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004082
4083 /* Remove entries from kobject tree */
4084 netdev_unregister_kobject(dev);
4085
4086 synchronize_net();
4087
4088 dev_put(dev);
4089}
4090
David S. Millere8a04642008-07-17 00:34:19 -07004091static void __netdev_init_queue_locks_one(struct net_device *dev,
4092 struct netdev_queue *dev_queue,
4093 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004094{
4095 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004096 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004097 dev_queue->xmit_lock_owner = -1;
4098}
4099
4100static void netdev_init_queue_locks(struct net_device *dev)
4101{
David S. Millere8a04642008-07-17 00:34:19 -07004102 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4103 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004104}
4105
Herbert Xub63365a2008-10-23 01:11:29 -07004106unsigned long netdev_fix_features(unsigned long features, const char *name)
4107{
4108 /* Fix illegal SG+CSUM combinations. */
4109 if ((features & NETIF_F_SG) &&
4110 !(features & NETIF_F_ALL_CSUM)) {
4111 if (name)
4112 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4113 "checksum feature.\n", name);
4114 features &= ~NETIF_F_SG;
4115 }
4116
4117 /* TSO requires that SG is present as well. */
4118 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4119 if (name)
4120 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4121 "SG feature.\n", name);
4122 features &= ~NETIF_F_TSO;
4123 }
4124
4125 if (features & NETIF_F_UFO) {
4126 if (!(features & NETIF_F_GEN_CSUM)) {
4127 if (name)
4128 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4129 "since no NETIF_F_HW_CSUM feature.\n",
4130 name);
4131 features &= ~NETIF_F_UFO;
4132 }
4133
4134 if (!(features & NETIF_F_SG)) {
4135 if (name)
4136 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4137 "since no NETIF_F_SG feature.\n", name);
4138 features &= ~NETIF_F_UFO;
4139 }
4140 }
4141
4142 return features;
4143}
4144EXPORT_SYMBOL(netdev_fix_features);
4145
Linus Torvalds1da177e2005-04-16 15:20:36 -07004146/**
4147 * register_netdevice - register a network device
4148 * @dev: device to register
4149 *
4150 * Take a completed network device structure and add it to the kernel
4151 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4152 * chain. 0 is returned on success. A negative errno code is returned
4153 * on a failure to set up the device, or if the name is a duplicate.
4154 *
4155 * Callers must hold the rtnl semaphore. You may want
4156 * register_netdev() instead of this.
4157 *
4158 * BUGS:
4159 * The locking appears insufficient to guarantee two parallel registers
4160 * will not get the same name.
4161 */
4162
4163int register_netdevice(struct net_device *dev)
4164{
4165 struct hlist_head *head;
4166 struct hlist_node *p;
4167 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004168 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004169
4170 BUG_ON(dev_boot_phase);
4171 ASSERT_RTNL();
4172
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004173 might_sleep();
4174
Linus Torvalds1da177e2005-04-16 15:20:36 -07004175 /* When net_device's are persistent, this will be fatal. */
4176 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004177 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004178
David S. Millerf1f28aa2008-07-15 00:08:33 -07004179 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004180 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004181 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004182
Linus Torvalds1da177e2005-04-16 15:20:36 -07004183 dev->iflink = -1;
4184
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004185#ifdef CONFIG_COMPAT_NET_DEV_OPS
4186 /* Netdevice_ops API compatiability support.
4187 * This is temporary until all network devices are converted.
4188 */
4189 if (dev->netdev_ops) {
4190 const struct net_device_ops *ops = dev->netdev_ops;
4191
4192 dev->init = ops->ndo_init;
4193 dev->uninit = ops->ndo_uninit;
4194 dev->open = ops->ndo_open;
4195 dev->change_rx_flags = ops->ndo_change_rx_flags;
4196 dev->set_rx_mode = ops->ndo_set_rx_mode;
4197 dev->set_multicast_list = ops->ndo_set_multicast_list;
4198 dev->set_mac_address = ops->ndo_set_mac_address;
4199 dev->validate_addr = ops->ndo_validate_addr;
4200 dev->do_ioctl = ops->ndo_do_ioctl;
4201 dev->set_config = ops->ndo_set_config;
4202 dev->change_mtu = ops->ndo_change_mtu;
4203 dev->tx_timeout = ops->ndo_tx_timeout;
4204 dev->get_stats = ops->ndo_get_stats;
4205 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4206 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4207 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4208#ifdef CONFIG_NET_POLL_CONTROLLER
4209 dev->poll_controller = ops->ndo_poll_controller;
4210#endif
4211 } else {
4212 char drivername[64];
4213 pr_info("%s (%s): not using net_device_ops yet\n",
4214 dev->name, netdev_drivername(dev, drivername, 64));
4215
4216 /* This works only because net_device_ops and the
4217 compatiablity structure are the same. */
4218 dev->netdev_ops = (void *) &(dev->init);
4219 }
4220#endif
4221
Linus Torvalds1da177e2005-04-16 15:20:36 -07004222 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004223 if (dev->netdev_ops->ndo_init) {
4224 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004225 if (ret) {
4226 if (ret > 0)
4227 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004228 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004229 }
4230 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004231
Linus Torvalds1da177e2005-04-16 15:20:36 -07004232 if (!dev_valid_name(dev->name)) {
4233 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004234 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004235 }
4236
Eric W. Biederman881d9662007-09-17 11:56:21 -07004237 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004238 if (dev->iflink == -1)
4239 dev->iflink = dev->ifindex;
4240
4241 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004242 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004243 hlist_for_each(p, head) {
4244 struct net_device *d
4245 = hlist_entry(p, struct net_device, name_hlist);
4246 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4247 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004248 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004249 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004250 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004251
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004252 /* Fix illegal checksum combinations */
4253 if ((dev->features & NETIF_F_HW_CSUM) &&
4254 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4255 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4256 dev->name);
4257 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4258 }
4259
4260 if ((dev->features & NETIF_F_NO_CSUM) &&
4261 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4262 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4263 dev->name);
4264 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4265 }
4266
Herbert Xub63365a2008-10-23 01:11:29 -07004267 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004268
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004269 /* Enable software GSO if SG is supported. */
4270 if (dev->features & NETIF_F_SG)
4271 dev->features |= NETIF_F_GSO;
4272
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004273 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004274 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004275 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004276 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004277 dev->reg_state = NETREG_REGISTERED;
4278
Linus Torvalds1da177e2005-04-16 15:20:36 -07004279 /*
4280 * Default initial state at registry is that the
4281 * device is present.
4282 */
4283
4284 set_bit(__LINK_STATE_PRESENT, &dev->state);
4285
Linus Torvalds1da177e2005-04-16 15:20:36 -07004286 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004287 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004288 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004289
4290 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004291 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004292 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004293 if (ret) {
4294 rollback_registered(dev);
4295 dev->reg_state = NETREG_UNREGISTERED;
4296 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004297
4298out:
4299 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004300
4301err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004302 if (dev->netdev_ops->ndo_uninit)
4303 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004304 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004305}
4306
4307/**
4308 * register_netdev - register a network device
4309 * @dev: device to register
4310 *
4311 * Take a completed network device structure and add it to the kernel
4312 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4313 * chain. 0 is returned on success. A negative errno code is returned
4314 * on a failure to set up the device, or if the name is a duplicate.
4315 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004316 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004317 * and expands the device name if you passed a format string to
4318 * alloc_netdev.
4319 */
4320int register_netdev(struct net_device *dev)
4321{
4322 int err;
4323
4324 rtnl_lock();
4325
4326 /*
4327 * If the name is a format string the caller wants us to do a
4328 * name allocation.
4329 */
4330 if (strchr(dev->name, '%')) {
4331 err = dev_alloc_name(dev, dev->name);
4332 if (err < 0)
4333 goto out;
4334 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004335
Linus Torvalds1da177e2005-04-16 15:20:36 -07004336 err = register_netdevice(dev);
4337out:
4338 rtnl_unlock();
4339 return err;
4340}
4341EXPORT_SYMBOL(register_netdev);
4342
4343/*
4344 * netdev_wait_allrefs - wait until all references are gone.
4345 *
4346 * This is called when unregistering network devices.
4347 *
4348 * Any protocol or device that holds a reference should register
4349 * for netdevice notification, and cleanup and put back the
4350 * reference if they receive an UNREGISTER event.
4351 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004352 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004353 */
4354static void netdev_wait_allrefs(struct net_device *dev)
4355{
4356 unsigned long rebroadcast_time, warning_time;
4357
4358 rebroadcast_time = warning_time = jiffies;
4359 while (atomic_read(&dev->refcnt) != 0) {
4360 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004361 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004362
4363 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004364 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004365
4366 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4367 &dev->state)) {
4368 /* We must not have linkwatch events
4369 * pending on unregister. If this
4370 * happens, we simply run the queue
4371 * unscheduled, resulting in a noop
4372 * for this device.
4373 */
4374 linkwatch_run_queue();
4375 }
4376
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004377 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004378
4379 rebroadcast_time = jiffies;
4380 }
4381
4382 msleep(250);
4383
4384 if (time_after(jiffies, warning_time + 10 * HZ)) {
4385 printk(KERN_EMERG "unregister_netdevice: "
4386 "waiting for %s to become free. Usage "
4387 "count = %d\n",
4388 dev->name, atomic_read(&dev->refcnt));
4389 warning_time = jiffies;
4390 }
4391 }
4392}
4393
4394/* The sequence is:
4395 *
4396 * rtnl_lock();
4397 * ...
4398 * register_netdevice(x1);
4399 * register_netdevice(x2);
4400 * ...
4401 * unregister_netdevice(y1);
4402 * unregister_netdevice(y2);
4403 * ...
4404 * rtnl_unlock();
4405 * free_netdev(y1);
4406 * free_netdev(y2);
4407 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004408 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004409 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004410 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004411 * without deadlocking with linkwatch via keventd.
4412 * 2) Since we run with the RTNL semaphore not held, we can sleep
4413 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004414 *
4415 * We must not return until all unregister events added during
4416 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004417 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004418void netdev_run_todo(void)
4419{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004420 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004421
Linus Torvalds1da177e2005-04-16 15:20:36 -07004422 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004423 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004424
4425 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004426
Linus Torvalds1da177e2005-04-16 15:20:36 -07004427 while (!list_empty(&list)) {
4428 struct net_device *dev
4429 = list_entry(list.next, struct net_device, todo_list);
4430 list_del(&dev->todo_list);
4431
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004432 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004433 printk(KERN_ERR "network todo '%s' but state %d\n",
4434 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004435 dump_stack();
4436 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004437 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004438
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004439 dev->reg_state = NETREG_UNREGISTERED;
4440
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004441 on_each_cpu(flush_backlog, dev, 1);
4442
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004443 netdev_wait_allrefs(dev);
4444
4445 /* paranoia */
4446 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004447 WARN_ON(dev->ip_ptr);
4448 WARN_ON(dev->ip6_ptr);
4449 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004450
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004451 if (dev->destructor)
4452 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004453
4454 /* Free network device */
4455 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004456 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004457}
4458
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004459/**
4460 * dev_get_stats - get network device statistics
4461 * @dev: device to get statistics from
4462 *
4463 * Get network statistics from device. The device driver may provide
4464 * its own method by setting dev->netdev_ops->get_stats; otherwise
4465 * the internal statistics structure is used.
4466 */
4467const struct net_device_stats *dev_get_stats(struct net_device *dev)
4468 {
4469 const struct net_device_ops *ops = dev->netdev_ops;
4470
4471 if (ops->ndo_get_stats)
4472 return ops->ndo_get_stats(dev);
4473 else
4474 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004475}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004476EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004477
David S. Millerdc2b4842008-07-08 17:18:23 -07004478static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004479 struct netdev_queue *queue,
4480 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004481{
David S. Millerdc2b4842008-07-08 17:18:23 -07004482 queue->dev = dev;
4483}
4484
David S. Millerbb949fb2008-07-08 16:55:56 -07004485static void netdev_init_queues(struct net_device *dev)
4486{
David S. Millere8a04642008-07-17 00:34:19 -07004487 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4488 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004489 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004490}
4491
Linus Torvalds1da177e2005-04-16 15:20:36 -07004492/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004493 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004494 * @sizeof_priv: size of private data to allocate space for
4495 * @name: device name format string
4496 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004497 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004498 *
4499 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004500 * and performs basic initialization. Also allocates subquue structs
4501 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004502 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004503struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4504 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004505{
David S. Millere8a04642008-07-17 00:34:19 -07004506 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004507 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004508 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004509 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004510
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004511 BUG_ON(strlen(name) >= sizeof(dev->name));
4512
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004513 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004514 if (sizeof_priv) {
4515 /* ensure 32-byte alignment of private area */
4516 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4517 alloc_size += sizeof_priv;
4518 }
4519 /* ensure 32-byte alignment of whole construct */
4520 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004521
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004522 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004523 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004524 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004525 return NULL;
4526 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004527
Stephen Hemminger79439862008-07-21 13:28:44 -07004528 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004529 if (!tx) {
4530 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4531 "tx qdiscs.\n");
4532 kfree(p);
4533 return NULL;
4534 }
4535
Linus Torvalds1da177e2005-04-16 15:20:36 -07004536 dev = (struct net_device *)
4537 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4538 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004539 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004540
David S. Millere8a04642008-07-17 00:34:19 -07004541 dev->_tx = tx;
4542 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004543 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004544
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004545 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004546
David S. Millerbb949fb2008-07-08 16:55:56 -07004547 netdev_init_queues(dev);
4548
Herbert Xud565b0a2008-12-15 23:38:52 -08004549 INIT_LIST_HEAD(&dev->napi_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004550 setup(dev);
4551 strcpy(dev->name, name);
4552 return dev;
4553}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004554EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004555
4556/**
4557 * free_netdev - free network device
4558 * @dev: device
4559 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004560 * This function does the last stage of destroying an allocated device
4561 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004562 * If this is the last reference then it will be freed.
4563 */
4564void free_netdev(struct net_device *dev)
4565{
Herbert Xud565b0a2008-12-15 23:38:52 -08004566 struct napi_struct *p, *n;
4567
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004568 release_net(dev_net(dev));
4569
David S. Millere8a04642008-07-17 00:34:19 -07004570 kfree(dev->_tx);
4571
Herbert Xud565b0a2008-12-15 23:38:52 -08004572 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4573 netif_napi_del(p);
4574
Stephen Hemminger3041a062006-05-26 13:25:24 -07004575 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004576 if (dev->reg_state == NETREG_UNINITIALIZED) {
4577 kfree((char *)dev - dev->padded);
4578 return;
4579 }
4580
4581 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4582 dev->reg_state = NETREG_RELEASED;
4583
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004584 /* will free via device release */
4585 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004586}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004587
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004588/**
4589 * synchronize_net - Synchronize with packet receive processing
4590 *
4591 * Wait for packets currently being received to be done.
4592 * Does not block later packets from starting.
4593 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004594void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004595{
4596 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004597 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004598}
4599
4600/**
4601 * unregister_netdevice - remove device from the kernel
4602 * @dev: device
4603 *
4604 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004605 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004606 *
4607 * Callers must hold the rtnl semaphore. You may want
4608 * unregister_netdev() instead of this.
4609 */
4610
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004611void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004612{
Herbert Xua6620712007-12-12 19:21:56 -08004613 ASSERT_RTNL();
4614
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004615 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004616 /* Finish processing unregister after unlock */
4617 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004618}
4619
4620/**
4621 * unregister_netdev - remove device from the kernel
4622 * @dev: device
4623 *
4624 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004625 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004626 *
4627 * This is just a wrapper for unregister_netdevice that takes
4628 * the rtnl semaphore. In general you want to use this and not
4629 * unregister_netdevice.
4630 */
4631void unregister_netdev(struct net_device *dev)
4632{
4633 rtnl_lock();
4634 unregister_netdevice(dev);
4635 rtnl_unlock();
4636}
4637
4638EXPORT_SYMBOL(unregister_netdev);
4639
Eric W. Biedermance286d32007-09-12 13:53:49 +02004640/**
4641 * dev_change_net_namespace - move device to different nethost namespace
4642 * @dev: device
4643 * @net: network namespace
4644 * @pat: If not NULL name pattern to try if the current device name
4645 * is already taken in the destination network namespace.
4646 *
4647 * This function shuts down a device interface and moves it
4648 * to a new network namespace. On success 0 is returned, on
4649 * a failure a netagive errno code is returned.
4650 *
4651 * Callers must hold the rtnl semaphore.
4652 */
4653
4654int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4655{
4656 char buf[IFNAMSIZ];
4657 const char *destname;
4658 int err;
4659
4660 ASSERT_RTNL();
4661
4662 /* Don't allow namespace local devices to be moved. */
4663 err = -EINVAL;
4664 if (dev->features & NETIF_F_NETNS_LOCAL)
4665 goto out;
4666
Eric W. Biederman38918452008-10-27 17:51:47 -07004667#ifdef CONFIG_SYSFS
4668 /* Don't allow real devices to be moved when sysfs
4669 * is enabled.
4670 */
4671 err = -EINVAL;
4672 if (dev->dev.parent)
4673 goto out;
4674#endif
4675
Eric W. Biedermance286d32007-09-12 13:53:49 +02004676 /* Ensure the device has been registrered */
4677 err = -EINVAL;
4678 if (dev->reg_state != NETREG_REGISTERED)
4679 goto out;
4680
4681 /* Get out if there is nothing todo */
4682 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004683 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004684 goto out;
4685
4686 /* Pick the destination device name, and ensure
4687 * we can use it in the destination network namespace.
4688 */
4689 err = -EEXIST;
4690 destname = dev->name;
4691 if (__dev_get_by_name(net, destname)) {
4692 /* We get here if we can't use the current device name */
4693 if (!pat)
4694 goto out;
4695 if (!dev_valid_name(pat))
4696 goto out;
4697 if (strchr(pat, '%')) {
4698 if (__dev_alloc_name(net, pat, buf) < 0)
4699 goto out;
4700 destname = buf;
4701 } else
4702 destname = pat;
4703 if (__dev_get_by_name(net, destname))
4704 goto out;
4705 }
4706
4707 /*
4708 * And now a mini version of register_netdevice unregister_netdevice.
4709 */
4710
4711 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004712 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004713
4714 /* And unlink it from device chain */
4715 err = -ENODEV;
4716 unlist_netdevice(dev);
4717
4718 synchronize_net();
4719
4720 /* Shutdown queueing discipline. */
4721 dev_shutdown(dev);
4722
4723 /* Notify protocols, that we are about to destroy
4724 this device. They should clean all the things.
4725 */
4726 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4727
4728 /*
4729 * Flush the unicast and multicast chains
4730 */
4731 dev_addr_discard(dev);
4732
Eric W. Biederman38918452008-10-27 17:51:47 -07004733 netdev_unregister_kobject(dev);
4734
Eric W. Biedermance286d32007-09-12 13:53:49 +02004735 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004736 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004737
4738 /* Assign the new device name */
4739 if (destname != dev->name)
4740 strcpy(dev->name, destname);
4741
4742 /* If there is an ifindex conflict assign a new one */
4743 if (__dev_get_by_index(net, dev->ifindex)) {
4744 int iflink = (dev->iflink == dev->ifindex);
4745 dev->ifindex = dev_new_index(net);
4746 if (iflink)
4747 dev->iflink = dev->ifindex;
4748 }
4749
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004750 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004751 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004752 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004753
4754 /* Add the device back in the hashes */
4755 list_netdevice(dev);
4756
4757 /* Notify protocols, that a new device appeared. */
4758 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4759
4760 synchronize_net();
4761 err = 0;
4762out:
4763 return err;
4764}
4765
Linus Torvalds1da177e2005-04-16 15:20:36 -07004766static int dev_cpu_callback(struct notifier_block *nfb,
4767 unsigned long action,
4768 void *ocpu)
4769{
4770 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004771 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004772 struct sk_buff *skb;
4773 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4774 struct softnet_data *sd, *oldsd;
4775
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004776 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004777 return NOTIFY_OK;
4778
4779 local_irq_disable();
4780 cpu = smp_processor_id();
4781 sd = &per_cpu(softnet_data, cpu);
4782 oldsd = &per_cpu(softnet_data, oldcpu);
4783
4784 /* Find end of our completion_queue. */
4785 list_skb = &sd->completion_queue;
4786 while (*list_skb)
4787 list_skb = &(*list_skb)->next;
4788 /* Append completion queue from offline CPU. */
4789 *list_skb = oldsd->completion_queue;
4790 oldsd->completion_queue = NULL;
4791
4792 /* Find end of our output_queue. */
4793 list_net = &sd->output_queue;
4794 while (*list_net)
4795 list_net = &(*list_net)->next_sched;
4796 /* Append output queue from offline CPU. */
4797 *list_net = oldsd->output_queue;
4798 oldsd->output_queue = NULL;
4799
4800 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4801 local_irq_enable();
4802
4803 /* Process offline CPU's input_pkt_queue */
4804 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4805 netif_rx(skb);
4806
4807 return NOTIFY_OK;
4808}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004809
4810
Herbert Xu7f353bf2007-08-10 15:47:58 -07004811/**
Herbert Xub63365a2008-10-23 01:11:29 -07004812 * netdev_increment_features - increment feature set by one
4813 * @all: current feature set
4814 * @one: new feature set
4815 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07004816 *
4817 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07004818 * @one to the master device with current feature set @all. Will not
4819 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07004820 */
Herbert Xub63365a2008-10-23 01:11:29 -07004821unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4822 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07004823{
Herbert Xub63365a2008-10-23 01:11:29 -07004824 /* If device needs checksumming, downgrade to it. */
4825 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4826 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4827 else if (mask & NETIF_F_ALL_CSUM) {
4828 /* If one device supports v4/v6 checksumming, set for all. */
4829 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4830 !(all & NETIF_F_GEN_CSUM)) {
4831 all &= ~NETIF_F_ALL_CSUM;
4832 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4833 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004834
Herbert Xub63365a2008-10-23 01:11:29 -07004835 /* If one device supports hw checksumming, set for all. */
4836 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4837 all &= ~NETIF_F_ALL_CSUM;
4838 all |= NETIF_F_HW_CSUM;
4839 }
4840 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004841
Herbert Xub63365a2008-10-23 01:11:29 -07004842 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004843
Herbert Xub63365a2008-10-23 01:11:29 -07004844 one |= all & NETIF_F_ONE_FOR_ALL;
4845 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4846 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004847
4848 return all;
4849}
Herbert Xub63365a2008-10-23 01:11:29 -07004850EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07004851
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004852static struct hlist_head *netdev_create_hash(void)
4853{
4854 int i;
4855 struct hlist_head *hash;
4856
4857 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4858 if (hash != NULL)
4859 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4860 INIT_HLIST_HEAD(&hash[i]);
4861
4862 return hash;
4863}
4864
Eric W. Biederman881d9662007-09-17 11:56:21 -07004865/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07004866static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004867{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004868 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07004869
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004870 net->dev_name_head = netdev_create_hash();
4871 if (net->dev_name_head == NULL)
4872 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004873
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004874 net->dev_index_head = netdev_create_hash();
4875 if (net->dev_index_head == NULL)
4876 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004877
4878 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004879
4880err_idx:
4881 kfree(net->dev_name_head);
4882err_name:
4883 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004884}
4885
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004886/**
4887 * netdev_drivername - network driver for the device
4888 * @dev: network device
4889 * @buffer: buffer for resulting name
4890 * @len: size of buffer
4891 *
4892 * Determine network driver for device.
4893 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004894char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07004895{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004896 const struct device_driver *driver;
4897 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07004898
4899 if (len <= 0 || !buffer)
4900 return buffer;
4901 buffer[0] = 0;
4902
4903 parent = dev->dev.parent;
4904
4905 if (!parent)
4906 return buffer;
4907
4908 driver = parent->driver;
4909 if (driver && driver->name)
4910 strlcpy(buffer, driver->name, len);
4911 return buffer;
4912}
4913
Pavel Emelyanov46650792007-10-08 20:38:39 -07004914static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004915{
4916 kfree(net->dev_name_head);
4917 kfree(net->dev_index_head);
4918}
4919
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004920static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004921 .init = netdev_init,
4922 .exit = netdev_exit,
4923};
4924
Pavel Emelyanov46650792007-10-08 20:38:39 -07004925static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02004926{
Eric W. Biederman8eb79862008-12-29 18:21:48 -08004927 struct net_device *dev;
Eric W. Biedermance286d32007-09-12 13:53:49 +02004928 /*
4929 * Push all migratable of the network devices back to the
4930 * initial network namespace
4931 */
4932 rtnl_lock();
Eric W. Biederman8eb79862008-12-29 18:21:48 -08004933restart:
4934 for_each_netdev(net, dev) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004935 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004936 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02004937
4938 /* Ignore unmoveable devices (i.e. loopback) */
4939 if (dev->features & NETIF_F_NETNS_LOCAL)
4940 continue;
4941
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08004942 /* Delete virtual devices */
4943 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4944 dev->rtnl_link_ops->dellink(dev);
Eric W. Biederman8eb79862008-12-29 18:21:48 -08004945 goto restart;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08004946 }
4947
Eric W. Biedermance286d32007-09-12 13:53:49 +02004948 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004949 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4950 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004951 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004952 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02004953 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004954 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02004955 }
Eric W. Biederman8eb79862008-12-29 18:21:48 -08004956 goto restart;
Eric W. Biedermance286d32007-09-12 13:53:49 +02004957 }
4958 rtnl_unlock();
4959}
4960
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004961static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004962 .exit = default_device_exit,
4963};
4964
Linus Torvalds1da177e2005-04-16 15:20:36 -07004965/*
4966 * Initialize the DEV module. At boot time this walks the device list and
4967 * unhooks any devices that fail to initialise (normally hardware not
4968 * present) and leaves us with a valid list of present and active devices.
4969 *
4970 */
4971
4972/*
4973 * This is called single threaded during boot, so no need
4974 * to take the rtnl semaphore.
4975 */
4976static int __init net_dev_init(void)
4977{
4978 int i, rc = -ENOMEM;
4979
4980 BUG_ON(!dev_boot_phase);
4981
Linus Torvalds1da177e2005-04-16 15:20:36 -07004982 if (dev_proc_init())
4983 goto out;
4984
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004985 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004986 goto out;
4987
4988 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004989 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004990 INIT_LIST_HEAD(&ptype_base[i]);
4991
Eric W. Biederman881d9662007-09-17 11:56:21 -07004992 if (register_pernet_subsys(&netdev_net_ops))
4993 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004994
4995 /*
4996 * Initialise the packet receive queues.
4997 */
4998
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004999 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005000 struct softnet_data *queue;
5001
5002 queue = &per_cpu(softnet_data, i);
5003 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005004 queue->completion_queue = NULL;
5005 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005006
5007 queue->backlog.poll = process_backlog;
5008 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005009 queue->backlog.gro_list = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005010 }
5011
Linus Torvalds1da177e2005-04-16 15:20:36 -07005012 dev_boot_phase = 0;
5013
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005014 /* The loopback device is special if any other network devices
5015 * is present in a network namespace the loopback device must
5016 * be present. Since we now dynamically allocate and free the
5017 * loopback device ensure this invariant is maintained by
5018 * keeping the loopback device as the first device on the
5019 * list of network devices. Ensuring the loopback devices
5020 * is the first device that appears and the last network device
5021 * that disappears.
5022 */
5023 if (register_pernet_device(&loopback_net_ops))
5024 goto out;
5025
5026 if (register_pernet_device(&default_device_ops))
5027 goto out;
5028
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005029 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5030 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005031
5032 hotcpu_notifier(dev_cpu_callback, 0);
5033 dst_init();
5034 dev_mcast_init();
Dan Williamsaa1e6f12009-01-06 11:38:17 -07005035 #ifdef CONFIG_NET_DMA
5036 dmaengine_get();
5037 #endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07005038 rc = 0;
5039out:
5040 return rc;
5041}
5042
5043subsys_initcall(net_dev_init);
5044
5045EXPORT_SYMBOL(__dev_get_by_index);
5046EXPORT_SYMBOL(__dev_get_by_name);
5047EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08005048EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005049EXPORT_SYMBOL(dev_add_pack);
5050EXPORT_SYMBOL(dev_alloc_name);
5051EXPORT_SYMBOL(dev_close);
5052EXPORT_SYMBOL(dev_get_by_flags);
5053EXPORT_SYMBOL(dev_get_by_index);
5054EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005055EXPORT_SYMBOL(dev_open);
5056EXPORT_SYMBOL(dev_queue_xmit);
5057EXPORT_SYMBOL(dev_remove_pack);
5058EXPORT_SYMBOL(dev_set_allmulti);
5059EXPORT_SYMBOL(dev_set_promiscuity);
5060EXPORT_SYMBOL(dev_change_flags);
5061EXPORT_SYMBOL(dev_set_mtu);
5062EXPORT_SYMBOL(dev_set_mac_address);
5063EXPORT_SYMBOL(free_netdev);
5064EXPORT_SYMBOL(netdev_boot_setup_check);
5065EXPORT_SYMBOL(netdev_set_master);
5066EXPORT_SYMBOL(netdev_state_change);
5067EXPORT_SYMBOL(netif_receive_skb);
5068EXPORT_SYMBOL(netif_rx);
5069EXPORT_SYMBOL(register_gifconf);
5070EXPORT_SYMBOL(register_netdevice);
5071EXPORT_SYMBOL(register_netdevice_notifier);
5072EXPORT_SYMBOL(skb_checksum_help);
5073EXPORT_SYMBOL(synchronize_net);
5074EXPORT_SYMBOL(unregister_netdevice);
5075EXPORT_SYMBOL(unregister_netdevice_notifier);
5076EXPORT_SYMBOL(net_enable_timestamp);
5077EXPORT_SYMBOL(net_disable_timestamp);
5078EXPORT_SYMBOL(dev_get_flags);
5079
5080#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5081EXPORT_SYMBOL(br_handle_frame_hook);
5082EXPORT_SYMBOL(br_fdb_get_hook);
5083EXPORT_SYMBOL(br_fdb_put_hook);
5084#endif
5085
Linus Torvalds1da177e2005-04-16 15:20:36 -07005086EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005087
5088EXPORT_PER_CPU_SYMBOL(softnet_data);