blob: 92584bfef09bcaf1c23e6301006b309056c3fe1b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
stephen hemminger08e98972009-11-10 07:20:34 +000082#include <linux/hash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090083#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080085#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086#include <linux/string.h>
87#include <linux/mm.h>
88#include <linux/socket.h>
89#include <linux/sockios.h>
90#include <linux/errno.h>
91#include <linux/interrupt.h>
92#include <linux/if_ether.h>
93#include <linux/netdevice.h>
94#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070095#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <linux/notifier.h>
97#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020098#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <net/sock.h>
100#include <linux/rtnetlink.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/stat.h>
104#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700105#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#include <net/dst.h>
107#include <net/pkt_sched.h>
108#include <net/checksum.h>
Arnd Bergmann44540962009-11-26 06:07:08 +0000109#include <net/xfrm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#include <linux/highmem.h>
111#include <linux/init.h>
112#include <linux/kmod.h>
113#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114#include <linux/netpoll.h>
115#include <linux/rcupdate.h>
116#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700117#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500120#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700121#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700122#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700123#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700124#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700125#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700126#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700127#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700128#include <linux/ipv6.h>
129#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700130#include <linux/jhash.h>
131#include <linux/random.h>
David S. Miller9cbc1cb2009-06-15 03:02:23 -0700132#include <trace/events/napi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700134#include "net-sysfs.h"
135
Herbert Xud565b0a2008-12-15 23:38:52 -0800136/* Instead of increasing this, you should create a hash table. */
137#define MAX_GRO_SKBS 8
138
Herbert Xu5d38a072009-01-04 16:13:40 -0800139/* This should be increased if a protocol with a bigger head is added. */
140#define GRO_MAX_HEAD (MAX_HEADER + 128)
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142/*
143 * The list of packet types we will receive (as opposed to discard)
144 * and the routines to invoke.
145 *
146 * Why 16. Because with 16 the only overlap we get on a hash of the
147 * low nibble of the protocol value is RARP/SNAP/X.25.
148 *
149 * NOTE: That is no longer true with the addition of VLAN tags. Not
150 * sure which should go first, but I bet it won't make much
151 * difference if we are running VLANs. The good news is that
152 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700153 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 * --BLG
155 *
156 * 0800 IP
157 * 8100 802.1Q VLAN
158 * 0001 802.3
159 * 0002 AX.25
160 * 0004 802.2
161 * 8035 RARP
162 * 0005 SNAP
163 * 0805 X.25
164 * 0806 ARP
165 * 8137 IPX
166 * 0009 Localtalk
167 * 86DD IPv6
168 */
169
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800170#define PTYPE_HASH_SIZE (16)
171#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
172
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800174static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700175static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700178 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 * semaphore.
180 *
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800181 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 *
183 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700184 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 * actual updates. This allows pure readers to access the list even
186 * while a writer is preparing to update it.
187 *
188 * To put it another way, dev_base_lock is held for writing only to
189 * protect against pure readers; the rtnl semaphore provides the
190 * protection against other writers.
191 *
192 * See, for example usages, register_netdevice() and
193 * unregister_netdevice(), which must be called with the rtnl
194 * semaphore held.
195 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196DEFINE_RWLOCK(dev_base_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197EXPORT_SYMBOL(dev_base_lock);
198
Eric W. Biederman881d9662007-09-17 11:56:21 -0700199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
stephen hemminger08e98972009-11-10 07:20:34 +0000202 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203}
204
Eric W. Biederman881d9662007-09-17 11:56:21 -0700205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206{
Eric Dumazet7c28bd02009-10-24 06:13:17 -0700207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208}
209
Eric W. Biedermance286d32007-09-12 13:53:49 +0200210/* Device list insertion */
211static int list_netdevice(struct net_device *dev)
212{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900213 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200214
215 ASSERT_RTNL();
216
217 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800218 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
Eric Dumazet72c95282009-10-30 07:11:27 +0000219 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000220 hlist_add_head_rcu(&dev->index_hlist,
221 dev_index_hash(net, dev->ifindex));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200222 write_unlock_bh(&dev_base_lock);
223 return 0;
224}
225
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000226/* Device list removal
227 * caller must respect a RCU grace period before freeing/reusing dev
228 */
Eric W. Biedermance286d32007-09-12 13:53:49 +0200229static void unlist_netdevice(struct net_device *dev)
230{
231 ASSERT_RTNL();
232
233 /* Unlink dev from the device chain */
234 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800235 list_del_rcu(&dev->dev_list);
Eric Dumazet72c95282009-10-30 07:11:27 +0000236 hlist_del_rcu(&dev->name_hlist);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000237 hlist_del_rcu(&dev->index_hlist);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200238 write_unlock_bh(&dev_base_lock);
239}
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/*
242 * Our notifier list
243 */
244
Alan Sternf07d5b92006-05-09 15:23:03 -0700245static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
247/*
248 * Device drivers call our routines to queue packets here. We empty the
249 * queue in the local softnet handler.
250 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700251
252DEFINE_PER_CPU(struct softnet_data, softnet_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700253EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
David S. Millercf508b12008-07-22 14:16:42 -0700255#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700256/*
David S. Millerc773e842008-07-08 23:13:53 -0700257 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700258 * according to dev->type
259 */
260static const unsigned short netdev_lock_type[] =
261 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
262 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
263 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
264 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
265 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
266 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
267 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
268 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
269 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
270 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
271 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
272 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
273 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800274 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Dmitry Eremin-Solenikov929122cd2009-08-14 20:00:20 +0400275 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000276 ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700277
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700278static const char *const netdev_lock_name[] =
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700279 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
280 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
281 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
282 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
283 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
284 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
285 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
286 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
287 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
288 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
289 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
290 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
291 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800292 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Dmitry Eremin-Solenikov929122cd2009-08-14 20:00:20 +0400293 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000294 "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700295
296static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700297static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700298
299static inline unsigned short netdev_lock_pos(unsigned short dev_type)
300{
301 int i;
302
303 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
304 if (netdev_lock_type[i] == dev_type)
305 return i;
306 /* the last key is used by default */
307 return ARRAY_SIZE(netdev_lock_type) - 1;
308}
309
David S. Millercf508b12008-07-22 14:16:42 -0700310static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
311 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700312{
313 int i;
314
315 i = netdev_lock_pos(dev_type);
316 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
317 netdev_lock_name[i]);
318}
David S. Millercf508b12008-07-22 14:16:42 -0700319
320static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
321{
322 int i;
323
324 i = netdev_lock_pos(dev->type);
325 lockdep_set_class_and_name(&dev->addr_list_lock,
326 &netdev_addr_lock_key[i],
327 netdev_lock_name[i]);
328}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700329#else
David S. Millercf508b12008-07-22 14:16:42 -0700330static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
331 unsigned short dev_type)
332{
333}
334static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700335{
336}
337#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
339/*******************************************************************************
340
341 Protocol management and registration routines
342
343*******************************************************************************/
344
345/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 * Add a protocol ID to the list. Now that the input handler is
347 * smarter we can dispense with all the messy stuff that used to be
348 * here.
349 *
350 * BEWARE!!! Protocol handlers, mangling input packets,
351 * MUST BE last in hash buckets and checking protocol handlers
352 * MUST start from promiscuous ptype_all chain in net_bh.
353 * It is true now, do not change it.
354 * Explanation follows: if protocol handler, mangling packet, will
355 * be the first on list, it is not able to sense, that packet
356 * is cloned and should be copied-on-write, so that it will
357 * change it and subsequent readers will get broken packet.
358 * --ANK (980803)
359 */
360
361/**
362 * dev_add_pack - add packet handler
363 * @pt: packet type declaration
364 *
365 * Add a protocol handler to the networking stack. The passed &packet_type
366 * is linked into kernel lists and may not be freed until it has been
367 * removed from the kernel lists.
368 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900369 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 * guarantee all CPU's that are in middle of receiving packets
371 * will see the new packet type (until the next received packet).
372 */
373
374void dev_add_pack(struct packet_type *pt)
375{
376 int hash;
377
378 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700379 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700381 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800382 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 list_add_rcu(&pt->list, &ptype_base[hash]);
384 }
385 spin_unlock_bh(&ptype_lock);
386}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700387EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389/**
390 * __dev_remove_pack - remove packet handler
391 * @pt: packet type declaration
392 *
393 * Remove a protocol handler that was previously added to the kernel
394 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
395 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900396 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 *
398 * The packet type might still be in use by receivers
399 * and must not be freed until after all the CPU's have gone
400 * through a quiescent state.
401 */
402void __dev_remove_pack(struct packet_type *pt)
403{
404 struct list_head *head;
405 struct packet_type *pt1;
406
407 spin_lock_bh(&ptype_lock);
408
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700409 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700411 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800412 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 list_for_each_entry(pt1, head, list) {
415 if (pt == pt1) {
416 list_del_rcu(&pt->list);
417 goto out;
418 }
419 }
420
421 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
422out:
423 spin_unlock_bh(&ptype_lock);
424}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700425EXPORT_SYMBOL(__dev_remove_pack);
426
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427/**
428 * dev_remove_pack - remove packet handler
429 * @pt: packet type declaration
430 *
431 * Remove a protocol handler that was previously added to the kernel
432 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
433 * from the kernel lists and can be freed or reused once this function
434 * returns.
435 *
436 * This call sleeps to guarantee that no CPU is looking at the packet
437 * type after return.
438 */
439void dev_remove_pack(struct packet_type *pt)
440{
441 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 synchronize_net();
444}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700445EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
447/******************************************************************************
448
449 Device Boot-time Settings Routines
450
451*******************************************************************************/
452
453/* Boot time configuration table */
454static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
455
456/**
457 * netdev_boot_setup_add - add new setup entry
458 * @name: name of the device
459 * @map: configured settings for the device
460 *
461 * Adds new setup entry to the dev_boot_setup list. The function
462 * returns 0 on error and 1 on success. This is a generic routine to
463 * all netdevices.
464 */
465static int netdev_boot_setup_add(char *name, struct ifmap *map)
466{
467 struct netdev_boot_setup *s;
468 int i;
469
470 s = dev_boot_setup;
471 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
472 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
473 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700474 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 memcpy(&s[i].map, map, sizeof(s[i].map));
476 break;
477 }
478 }
479
480 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
481}
482
483/**
484 * netdev_boot_setup_check - check boot time settings
485 * @dev: the netdevice
486 *
487 * Check boot time settings for the device.
488 * The found settings are set for the device to be used
489 * later in the device probing.
490 * Returns 0 if no settings found, 1 if they are.
491 */
492int netdev_boot_setup_check(struct net_device *dev)
493{
494 struct netdev_boot_setup *s = dev_boot_setup;
495 int i;
496
497 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700499 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 dev->irq = s[i].map.irq;
501 dev->base_addr = s[i].map.base_addr;
502 dev->mem_start = s[i].map.mem_start;
503 dev->mem_end = s[i].map.mem_end;
504 return 1;
505 }
506 }
507 return 0;
508}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700509EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
511
512/**
513 * netdev_boot_base - get address from boot time settings
514 * @prefix: prefix for network device
515 * @unit: id for network device
516 *
517 * Check boot time settings for the base address of device.
518 * The found settings are set for the device to be used
519 * later in the device probing.
520 * Returns 0 if no settings found.
521 */
522unsigned long netdev_boot_base(const char *prefix, int unit)
523{
524 const struct netdev_boot_setup *s = dev_boot_setup;
525 char name[IFNAMSIZ];
526 int i;
527
528 sprintf(name, "%s%d", prefix, unit);
529
530 /*
531 * If device already registered then return base of 1
532 * to indicate not to probe for this interface
533 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700534 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return 1;
536
537 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
538 if (!strcmp(name, s[i].name))
539 return s[i].map.base_addr;
540 return 0;
541}
542
543/*
544 * Saves at boot time configured settings for any netdevice.
545 */
546int __init netdev_boot_setup(char *str)
547{
548 int ints[5];
549 struct ifmap map;
550
551 str = get_options(str, ARRAY_SIZE(ints), ints);
552 if (!str || !*str)
553 return 0;
554
555 /* Save settings */
556 memset(&map, 0, sizeof(map));
557 if (ints[0] > 0)
558 map.irq = ints[1];
559 if (ints[0] > 1)
560 map.base_addr = ints[2];
561 if (ints[0] > 2)
562 map.mem_start = ints[3];
563 if (ints[0] > 3)
564 map.mem_end = ints[4];
565
566 /* Add new entry to the list */
567 return netdev_boot_setup_add(str, &map);
568}
569
570__setup("netdev=", netdev_boot_setup);
571
572/*******************************************************************************
573
574 Device Interface Subroutines
575
576*******************************************************************************/
577
578/**
579 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700580 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 * @name: name to find
582 *
583 * Find an interface by name. Must be called under RTNL semaphore
584 * or @dev_base_lock. If the name is found a pointer to the device
585 * is returned. If the name is not found then %NULL is returned. The
586 * reference counters are not incremented so the caller must be
587 * careful with locks.
588 */
589
Eric W. Biederman881d9662007-09-17 11:56:21 -0700590struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591{
592 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700593 struct net_device *dev;
594 struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700596 hlist_for_each_entry(dev, p, head, name_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 if (!strncmp(dev->name, name, IFNAMSIZ))
598 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700599
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 return NULL;
601}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700602EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
604/**
Eric Dumazet72c95282009-10-30 07:11:27 +0000605 * dev_get_by_name_rcu - find a device by its name
606 * @net: the applicable net namespace
607 * @name: name to find
608 *
609 * Find an interface by name.
610 * If the name is found a pointer to the device is returned.
611 * If the name is not found then %NULL is returned.
612 * The reference counters are not incremented so the caller must be
613 * careful with locks. The caller must hold RCU lock.
614 */
615
616struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
617{
618 struct hlist_node *p;
619 struct net_device *dev;
620 struct hlist_head *head = dev_name_hash(net, name);
621
622 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
623 if (!strncmp(dev->name, name, IFNAMSIZ))
624 return dev;
625
626 return NULL;
627}
628EXPORT_SYMBOL(dev_get_by_name_rcu);
629
630/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700632 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 * @name: name to find
634 *
635 * Find an interface by name. This can be called from any
636 * context and does its own locking. The returned handle has
637 * the usage count incremented and the caller must use dev_put() to
638 * release it when it is no longer needed. %NULL is returned if no
639 * matching device is found.
640 */
641
Eric W. Biederman881d9662007-09-17 11:56:21 -0700642struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643{
644 struct net_device *dev;
645
Eric Dumazet72c95282009-10-30 07:11:27 +0000646 rcu_read_lock();
647 dev = dev_get_by_name_rcu(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if (dev)
649 dev_hold(dev);
Eric Dumazet72c95282009-10-30 07:11:27 +0000650 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 return dev;
652}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700653EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
655/**
656 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700657 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 * @ifindex: index of device
659 *
660 * Search for an interface by index. Returns %NULL if the device
661 * is not found or a pointer to the device. The device has not
662 * had its reference counter increased so the caller must be careful
663 * about locking. The caller must hold either the RTNL semaphore
664 * or @dev_base_lock.
665 */
666
Eric W. Biederman881d9662007-09-17 11:56:21 -0700667struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668{
669 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700670 struct net_device *dev;
671 struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700673 hlist_for_each_entry(dev, p, head, index_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 if (dev->ifindex == ifindex)
675 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700676
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 return NULL;
678}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700679EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000681/**
682 * dev_get_by_index_rcu - find a device by its ifindex
683 * @net: the applicable net namespace
684 * @ifindex: index of device
685 *
686 * Search for an interface by index. Returns %NULL if the device
687 * is not found or a pointer to the device. The device has not
688 * had its reference counter increased so the caller must be careful
689 * about locking. The caller must hold RCU lock.
690 */
691
692struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
693{
694 struct hlist_node *p;
695 struct net_device *dev;
696 struct hlist_head *head = dev_index_hash(net, ifindex);
697
698 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
699 if (dev->ifindex == ifindex)
700 return dev;
701
702 return NULL;
703}
704EXPORT_SYMBOL(dev_get_by_index_rcu);
705
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707/**
708 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700709 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 * @ifindex: index of device
711 *
712 * Search for an interface by index. Returns NULL if the device
713 * is not found or a pointer to the device. The device returned has
714 * had a reference added and the pointer is safe until the user calls
715 * dev_put to indicate they have finished with it.
716 */
717
Eric W. Biederman881d9662007-09-17 11:56:21 -0700718struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719{
720 struct net_device *dev;
721
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000722 rcu_read_lock();
723 dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 if (dev)
725 dev_hold(dev);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000726 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 return dev;
728}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700729EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
731/**
732 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700733 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 * @type: media type of device
735 * @ha: hardware address
736 *
737 * Search for an interface by MAC address. Returns NULL if the device
738 * is not found or a pointer to the device. The caller must hold the
739 * rtnl semaphore. The returned device has not had its ref count increased
740 * and the caller must therefore be careful about locking
741 *
742 * BUGS:
743 * If the API was consistent this would be __dev_get_by_hwaddr
744 */
745
Eric W. Biederman881d9662007-09-17 11:56:21 -0700746struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747{
748 struct net_device *dev;
749
750 ASSERT_RTNL();
751
Denis V. Lunev81103a52007-12-12 10:47:38 -0800752 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 if (dev->type == type &&
754 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700755 return dev;
756
757 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758}
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300759EXPORT_SYMBOL(dev_getbyhwaddr);
760
Eric W. Biederman881d9662007-09-17 11:56:21 -0700761struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700762{
763 struct net_device *dev;
764
765 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700766 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700767 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700768 return dev;
769
770 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700771}
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700772EXPORT_SYMBOL(__dev_getfirstbyhwtype);
773
Eric W. Biederman881d9662007-09-17 11:56:21 -0700774struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775{
776 struct net_device *dev;
777
778 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700779 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700780 if (dev)
781 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 rtnl_unlock();
783 return dev;
784}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785EXPORT_SYMBOL(dev_getfirstbyhwtype);
786
787/**
788 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700789 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 * @if_flags: IFF_* values
791 * @mask: bitmask of bits in if_flags to check
792 *
793 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900794 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 * had a reference added and the pointer is safe until the user calls
796 * dev_put to indicate they have finished with it.
797 */
798
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700799struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
800 unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700802 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
Pavel Emelianov7562f872007-05-03 15:13:45 -0700804 ret = NULL;
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800805 rcu_read_lock();
806 for_each_netdev_rcu(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 if (((dev->flags ^ if_flags) & mask) == 0) {
808 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700809 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 break;
811 }
812 }
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800813 rcu_read_unlock();
Pavel Emelianov7562f872007-05-03 15:13:45 -0700814 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700816EXPORT_SYMBOL(dev_get_by_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
818/**
819 * dev_valid_name - check if name is okay for network device
820 * @name: name string
821 *
822 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700823 * to allow sysfs to work. We also disallow any kind of
824 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800826int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700828 if (*name == '\0')
829 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700830 if (strlen(name) >= IFNAMSIZ)
831 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700832 if (!strcmp(name, ".") || !strcmp(name, ".."))
833 return 0;
834
835 while (*name) {
836 if (*name == '/' || isspace(*name))
837 return 0;
838 name++;
839 }
840 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700842EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
844/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200845 * __dev_alloc_name - allocate a name for a device
846 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200848 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 *
850 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700851 * id. It scans list of devices to build up a free map, then chooses
852 * the first empty slot. The caller must hold the dev_base or rtnl lock
853 * while allocating the name and adding the device in order to avoid
854 * duplicates.
855 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
856 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
858
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200859static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860{
861 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 const char *p;
863 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700864 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 struct net_device *d;
866
867 p = strnchr(name, IFNAMSIZ-1, '%');
868 if (p) {
869 /*
870 * Verify the string as this thing may have come from
871 * the user. There must be either one "%d" and no other "%"
872 * characters.
873 */
874 if (p[1] != 'd' || strchr(p + 2, '%'))
875 return -EINVAL;
876
877 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700878 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 if (!inuse)
880 return -ENOMEM;
881
Eric W. Biederman881d9662007-09-17 11:56:21 -0700882 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 if (!sscanf(d->name, name, &i))
884 continue;
885 if (i < 0 || i >= max_netdevices)
886 continue;
887
888 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200889 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 if (!strncmp(buf, d->name, IFNAMSIZ))
891 set_bit(i, inuse);
892 }
893
894 i = find_first_zero_bit(inuse, max_netdevices);
895 free_page((unsigned long) inuse);
896 }
897
Octavian Purdilad9031022009-11-18 02:36:59 +0000898 if (buf != name)
899 snprintf(buf, IFNAMSIZ, name, i);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200900 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903 /* It is possible to run out of possible slots
904 * when the name is long and there isn't enough space left
905 * for the digits, or if all bits are used.
906 */
907 return -ENFILE;
908}
909
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200910/**
911 * dev_alloc_name - allocate a name for a device
912 * @dev: device
913 * @name: name format string
914 *
915 * Passed a format string - eg "lt%d" it will try and find a suitable
916 * id. It scans list of devices to build up a free map, then chooses
917 * the first empty slot. The caller must hold the dev_base or rtnl lock
918 * while allocating the name and adding the device in order to avoid
919 * duplicates.
920 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
921 * Returns the number of the unit assigned or a negative errno code.
922 */
923
924int dev_alloc_name(struct net_device *dev, const char *name)
925{
926 char buf[IFNAMSIZ];
927 struct net *net;
928 int ret;
929
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900930 BUG_ON(!dev_net(dev));
931 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200932 ret = __dev_alloc_name(net, name, buf);
933 if (ret >= 0)
934 strlcpy(dev->name, buf, IFNAMSIZ);
935 return ret;
936}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700937EXPORT_SYMBOL(dev_alloc_name);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200938
Octavian Purdilad9031022009-11-18 02:36:59 +0000939static int dev_get_valid_name(struct net *net, const char *name, char *buf,
940 bool fmt)
941{
942 if (!dev_valid_name(name))
943 return -EINVAL;
944
945 if (fmt && strchr(name, '%'))
946 return __dev_alloc_name(net, name, buf);
947 else if (__dev_get_by_name(net, name))
948 return -EEXIST;
949 else if (buf != name)
950 strlcpy(buf, name, IFNAMSIZ);
951
952 return 0;
953}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955/**
956 * dev_change_name - change name of a device
957 * @dev: device
958 * @newname: name (or format string) must be at least IFNAMSIZ
959 *
960 * Change name of a device, can pass format strings "eth%d".
961 * for wildcarding.
962 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700963int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964{
Herbert Xufcc5a032007-07-30 17:03:38 -0700965 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700967 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700968 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900971 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900973 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (dev->flags & IFF_UP)
975 return -EBUSY;
976
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700977 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
978 return 0;
979
Herbert Xufcc5a032007-07-30 17:03:38 -0700980 memcpy(oldname, dev->name, IFNAMSIZ);
981
Octavian Purdilad9031022009-11-18 02:36:59 +0000982 err = dev_get_valid_name(net, newname, dev->name, 1);
983 if (err < 0)
984 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
Herbert Xufcc5a032007-07-30 17:03:38 -0700986rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700987 /* For now only devices in the initial network namespace
988 * are in sysfs.
989 */
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800990 if (net_eq(net, &init_net)) {
Eric W. Biederman38918452008-10-27 17:51:47 -0700991 ret = device_rename(&dev->dev, dev->name);
992 if (ret) {
993 memcpy(dev->name, oldname, IFNAMSIZ);
994 return ret;
995 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700996 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700997
998 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600999 hlist_del(&dev->name_hlist);
Eric Dumazet72c95282009-10-30 07:11:27 +00001000 write_unlock_bh(&dev_base_lock);
1001
1002 synchronize_rcu();
1003
1004 write_lock_bh(&dev_base_lock);
1005 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -07001006 write_unlock_bh(&dev_base_lock);
1007
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001008 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001009 ret = notifier_to_errno(ret);
1010
1011 if (ret) {
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001012 /* err >= 0 after dev_alloc_name() or stores the first errno */
1013 if (err >= 0) {
Herbert Xufcc5a032007-07-30 17:03:38 -07001014 err = ret;
1015 memcpy(dev->name, oldname, IFNAMSIZ);
1016 goto rollback;
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001017 } else {
1018 printk(KERN_ERR
1019 "%s: name change rollback failed: %d.\n",
1020 dev->name, ret);
Herbert Xufcc5a032007-07-30 17:03:38 -07001021 }
1022 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
1024 return err;
1025}
1026
1027/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001028 * dev_set_alias - change ifalias of a device
1029 * @dev: device
1030 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07001031 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001032 *
1033 * Set ifalias for a device,
1034 */
1035int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1036{
1037 ASSERT_RTNL();
1038
1039 if (len >= IFALIASZ)
1040 return -EINVAL;
1041
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -07001042 if (!len) {
1043 if (dev->ifalias) {
1044 kfree(dev->ifalias);
1045 dev->ifalias = NULL;
1046 }
1047 return 0;
1048 }
1049
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001050 dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001051 if (!dev->ifalias)
1052 return -ENOMEM;
1053
1054 strlcpy(dev->ifalias, alias, len+1);
1055 return len;
1056}
1057
1058
1059/**
Stephen Hemminger3041a062006-05-26 13:25:24 -07001060 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001061 * @dev: device to cause notification
1062 *
1063 * Called to indicate a device has changed features.
1064 */
1065void netdev_features_change(struct net_device *dev)
1066{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001067 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001068}
1069EXPORT_SYMBOL(netdev_features_change);
1070
1071/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 * netdev_state_change - device changes state
1073 * @dev: device to cause notification
1074 *
1075 * Called to indicate a device has changed state. This function calls
1076 * the notifier chains for netdev_chain and sends a NEWLINK message
1077 * to the routing socket.
1078 */
1079void netdev_state_change(struct net_device *dev)
1080{
1081 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001082 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1084 }
1085}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001086EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
Moni Shoua75c78502009-09-15 02:37:40 -07001088void netdev_bonding_change(struct net_device *dev, unsigned long event)
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001089{
Moni Shoua75c78502009-09-15 02:37:40 -07001090 call_netdevice_notifiers(event, dev);
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001091}
1092EXPORT_SYMBOL(netdev_bonding_change);
1093
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094/**
1095 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001096 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 * @name: name of interface
1098 *
1099 * If a network interface is not present and the process has suitable
1100 * privileges this function loads the module. If module loading is not
1101 * available in this kernel then it becomes a nop.
1102 */
1103
Eric W. Biederman881d9662007-09-17 11:56:21 -07001104void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001106 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
Eric Dumazet72c95282009-10-30 07:11:27 +00001108 rcu_read_lock();
1109 dev = dev_get_by_name_rcu(net, name);
1110 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
Eric Parisa8f80e82009-08-13 09:44:51 -04001112 if (!dev && capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 request_module("%s", name);
1114}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001115EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116
Patrick McHardybd380812010-02-26 06:34:53 +00001117static int __dev_open(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001119 const struct net_device_ops *ops = dev->netdev_ops;
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001120 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001122 ASSERT_RTNL();
1123
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 * Is it even present?
1126 */
1127 if (!netif_device_present(dev))
1128 return -ENODEV;
1129
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001130 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1131 ret = notifier_to_errno(ret);
1132 if (ret)
1133 return ret;
1134
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 /*
1136 * Call device private open method
1137 */
1138 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001139
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001140 if (ops->ndo_validate_addr)
1141 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001142
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001143 if (!ret && ops->ndo_open)
1144 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001146 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 * If it went open OK then:
1148 */
1149
Jeff Garzikbada3392007-10-23 20:19:37 -07001150 if (ret)
1151 clear_bit(__LINK_STATE_START, &dev->state);
1152 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 /*
1154 * Set the flags.
1155 */
1156 dev->flags |= IFF_UP;
1157
1158 /*
Dan Williams649274d2009-01-11 00:20:39 -08001159 * Enable NET_DMA
1160 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001161 net_dmaengine_get();
Dan Williams649274d2009-01-11 00:20:39 -08001162
1163 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 * Initialize multicasting status
1165 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001166 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
1168 /*
1169 * Wakeup transmit queue engine
1170 */
1171 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001173
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 return ret;
1175}
Patrick McHardybd380812010-02-26 06:34:53 +00001176
1177/**
1178 * dev_open - prepare an interface for use.
1179 * @dev: device to open
1180 *
1181 * Takes a device from down to up state. The device's private open
1182 * function is invoked and then the multicast lists are loaded. Finally
1183 * the device is moved into the up state and a %NETDEV_UP message is
1184 * sent to the netdev notifier chain.
1185 *
1186 * Calling this function on an active interface is a nop. On a failure
1187 * a negative errno code is returned.
1188 */
1189int dev_open(struct net_device *dev)
1190{
1191 int ret;
1192
1193 /*
1194 * Is it already up?
1195 */
1196 if (dev->flags & IFF_UP)
1197 return 0;
1198
1199 /*
1200 * Open device
1201 */
1202 ret = __dev_open(dev);
1203 if (ret < 0)
1204 return ret;
1205
1206 /*
1207 * ... and announce new interface.
1208 */
1209 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1210 call_netdevice_notifiers(NETDEV_UP, dev);
1211
1212 return ret;
1213}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001214EXPORT_SYMBOL(dev_open);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215
Patrick McHardybd380812010-02-26 06:34:53 +00001216static int __dev_close(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001218 const struct net_device_ops *ops = dev->netdev_ops;
Patrick McHardybd380812010-02-26 06:34:53 +00001219
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001220 ASSERT_RTNL();
David S. Miller9d5010d2007-09-12 14:33:25 +02001221 might_sleep();
1222
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 /*
1224 * Tell people we are going down, so that they can
1225 * prepare to death, when device is still operating.
1226 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001227 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 clear_bit(__LINK_STATE_START, &dev->state);
1230
1231 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001232 * it can be even on different cpu. So just clear netif_running().
1233 *
1234 * dev->stop() will invoke napi_disable() on all of it's
1235 * napi_struct instances on this device.
1236 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001239 dev_deactivate(dev);
1240
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 /*
1242 * Call the device specific close. This cannot fail.
1243 * Only if device is UP
1244 *
1245 * We allow it to be called even after a DETACH hot-plug
1246 * event.
1247 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001248 if (ops->ndo_stop)
1249 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
1251 /*
1252 * Device is now down.
1253 */
1254
1255 dev->flags &= ~IFF_UP;
1256
1257 /*
Dan Williams649274d2009-01-11 00:20:39 -08001258 * Shutdown NET_DMA
1259 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001260 net_dmaengine_put();
Dan Williams649274d2009-01-11 00:20:39 -08001261
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 return 0;
1263}
Patrick McHardybd380812010-02-26 06:34:53 +00001264
1265/**
1266 * dev_close - shutdown an interface.
1267 * @dev: device to shutdown
1268 *
1269 * This function moves an active device into down state. A
1270 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1271 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1272 * chain.
1273 */
1274int dev_close(struct net_device *dev)
1275{
1276 if (!(dev->flags & IFF_UP))
1277 return 0;
1278
1279 __dev_close(dev);
1280
1281 /*
1282 * Tell people we are down
1283 */
1284 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1285 call_netdevice_notifiers(NETDEV_DOWN, dev);
1286
1287 return 0;
1288}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001289EXPORT_SYMBOL(dev_close);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290
1291
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001292/**
1293 * dev_disable_lro - disable Large Receive Offload on a device
1294 * @dev: device
1295 *
1296 * Disable Large Receive Offload (LRO) on a net device. Must be
1297 * called under RTNL. This is needed if received packets may be
1298 * forwarded to another interface.
1299 */
1300void dev_disable_lro(struct net_device *dev)
1301{
1302 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1303 dev->ethtool_ops->set_flags) {
1304 u32 flags = dev->ethtool_ops->get_flags(dev);
1305 if (flags & ETH_FLAG_LRO) {
1306 flags &= ~ETH_FLAG_LRO;
1307 dev->ethtool_ops->set_flags(dev, flags);
1308 }
1309 }
1310 WARN_ON(dev->features & NETIF_F_LRO);
1311}
1312EXPORT_SYMBOL(dev_disable_lro);
1313
1314
Eric W. Biederman881d9662007-09-17 11:56:21 -07001315static int dev_boot_phase = 1;
1316
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317/*
1318 * Device change register/unregister. These are not inline or static
1319 * as we export them to the world.
1320 */
1321
1322/**
1323 * register_netdevice_notifier - register a network notifier block
1324 * @nb: notifier
1325 *
1326 * Register a notifier to be called when network device events occur.
1327 * The notifier passed is linked into the kernel structures and must
1328 * not be reused until it has been unregistered. A negative errno code
1329 * is returned on a failure.
1330 *
1331 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001332 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 * view of the network device list.
1334 */
1335
1336int register_netdevice_notifier(struct notifier_block *nb)
1337{
1338 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001339 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001340 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 int err;
1342
1343 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001344 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001345 if (err)
1346 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001347 if (dev_boot_phase)
1348 goto unlock;
1349 for_each_net(net) {
1350 for_each_netdev(net, dev) {
1351 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1352 err = notifier_to_errno(err);
1353 if (err)
1354 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
Eric W. Biederman881d9662007-09-17 11:56:21 -07001356 if (!(dev->flags & IFF_UP))
1357 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001358
Eric W. Biederman881d9662007-09-17 11:56:21 -07001359 nb->notifier_call(nb, NETDEV_UP, dev);
1360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001362
1363unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 rtnl_unlock();
1365 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001366
1367rollback:
1368 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001369 for_each_net(net) {
1370 for_each_netdev(net, dev) {
1371 if (dev == last)
1372 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001373
Eric W. Biederman881d9662007-09-17 11:56:21 -07001374 if (dev->flags & IFF_UP) {
1375 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1376 nb->notifier_call(nb, NETDEV_DOWN, dev);
1377 }
1378 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00001379 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001380 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001381 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001382
1383 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001384 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001386EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388/**
1389 * unregister_netdevice_notifier - unregister a network notifier block
1390 * @nb: notifier
1391 *
1392 * Unregister a notifier previously registered by
1393 * register_netdevice_notifier(). The notifier is unlinked into the
1394 * kernel structures and may then be reused. A negative errno code
1395 * is returned on a failure.
1396 */
1397
1398int unregister_netdevice_notifier(struct notifier_block *nb)
1399{
Herbert Xu9f514952006-03-25 01:24:25 -08001400 int err;
1401
1402 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001403 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001404 rtnl_unlock();
1405 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001407EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408
1409/**
1410 * call_netdevice_notifiers - call all network notifier blocks
1411 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001412 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 *
1414 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001415 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 */
1417
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001418int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001420 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421}
1422
1423/* When > 0 there are consumers of rx skb time stamps */
1424static atomic_t netstamp_needed = ATOMIC_INIT(0);
1425
1426void net_enable_timestamp(void)
1427{
1428 atomic_inc(&netstamp_needed);
1429}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001430EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
1432void net_disable_timestamp(void)
1433{
1434 atomic_dec(&netstamp_needed);
1435}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001436EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001438static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439{
1440 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001441 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001442 else
1443 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444}
1445
Arnd Bergmann44540962009-11-26 06:07:08 +00001446/**
1447 * dev_forward_skb - loopback an skb to another netif
1448 *
1449 * @dev: destination network device
1450 * @skb: buffer to forward
1451 *
1452 * return values:
1453 * NET_RX_SUCCESS (no congestion)
1454 * NET_RX_DROP (packet was dropped)
1455 *
1456 * dev_forward_skb can be used for injecting an skb from the
1457 * start_xmit function of one device into the receive queue
1458 * of another device.
1459 *
1460 * The receiving device may be in another namespace, so
1461 * we have to clear all information in the skb that could
1462 * impact namespace isolation.
1463 */
1464int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465{
1466 skb_orphan(skb);
1467
1468 if (!(dev->flags & IFF_UP))
1469 return NET_RX_DROP;
1470
1471 if (skb->len > (dev->mtu + dev->hard_header_len))
1472 return NET_RX_DROP;
1473
Arnd Bergmann8a83a002010-01-30 12:23:03 +00001474 skb_set_dev(skb, dev);
Arnd Bergmann44540962009-11-26 06:07:08 +00001475 skb->tstamp.tv64 = 0;
1476 skb->pkt_type = PACKET_HOST;
1477 skb->protocol = eth_type_trans(skb, dev);
Arnd Bergmann44540962009-11-26 06:07:08 +00001478 return netif_rx(skb);
1479}
1480EXPORT_SYMBOL_GPL(dev_forward_skb);
1481
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482/*
1483 * Support routine. Sends outgoing frames to any network
1484 * taps currently in use.
1485 */
1486
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001487static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488{
1489 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001490
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001491#ifdef CONFIG_NET_CLS_ACT
1492 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1493 net_timestamp(skb);
1494#else
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001495 net_timestamp(skb);
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001496#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
1498 rcu_read_lock();
1499 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1500 /* Never send packets back to the socket
1501 * they originated from - MvS (miquels@drinkel.ow.org)
1502 */
1503 if ((ptype->dev == dev || !ptype->dev) &&
1504 (ptype->af_packet_priv == NULL ||
1505 (struct sock *)ptype->af_packet_priv != skb->sk)) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001506 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 if (!skb2)
1508 break;
1509
1510 /* skb->nh should be correctly
1511 set by sender, so that the second statement is
1512 just protection against buggy protocols.
1513 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001514 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001516 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001517 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 if (net_ratelimit())
1519 printk(KERN_CRIT "protocol %04x is "
1520 "buggy, dev %s\n",
1521 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001522 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 }
1524
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001525 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001527 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 }
1529 }
1530 rcu_read_unlock();
1531}
1532
Denis Vlasenko56079432006-03-29 15:57:29 -08001533
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001534static inline void __netif_reschedule(struct Qdisc *q)
1535{
1536 struct softnet_data *sd;
1537 unsigned long flags;
1538
1539 local_irq_save(flags);
1540 sd = &__get_cpu_var(softnet_data);
1541 q->next_sched = sd->output_queue;
1542 sd->output_queue = q;
1543 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1544 local_irq_restore(flags);
1545}
1546
David S. Miller37437bb2008-07-16 02:15:04 -07001547void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001548{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001549 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1550 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001551}
1552EXPORT_SYMBOL(__netif_schedule);
1553
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001554void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001555{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001556 if (atomic_dec_and_test(&skb->users)) {
1557 struct softnet_data *sd;
1558 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001559
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001560 local_irq_save(flags);
1561 sd = &__get_cpu_var(softnet_data);
1562 skb->next = sd->completion_queue;
1563 sd->completion_queue = skb;
1564 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1565 local_irq_restore(flags);
1566 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001567}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001568EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001569
1570void dev_kfree_skb_any(struct sk_buff *skb)
1571{
1572 if (in_irq() || irqs_disabled())
1573 dev_kfree_skb_irq(skb);
1574 else
1575 dev_kfree_skb(skb);
1576}
1577EXPORT_SYMBOL(dev_kfree_skb_any);
1578
1579
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001580/**
1581 * netif_device_detach - mark device as removed
1582 * @dev: network device
1583 *
1584 * Mark device as removed from system and therefore no longer available.
1585 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001586void netif_device_detach(struct net_device *dev)
1587{
1588 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1589 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001590 netif_tx_stop_all_queues(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001591 }
1592}
1593EXPORT_SYMBOL(netif_device_detach);
1594
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001595/**
1596 * netif_device_attach - mark device as attached
1597 * @dev: network device
1598 *
1599 * Mark device as attached from system and restart if needed.
1600 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001601void netif_device_attach(struct net_device *dev)
1602{
1603 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1604 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001605 netif_tx_wake_all_queues(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001606 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001607 }
1608}
1609EXPORT_SYMBOL(netif_device_attach);
1610
Ben Hutchings6de329e2008-06-16 17:02:28 -07001611static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1612{
1613 return ((features & NETIF_F_GEN_CSUM) ||
1614 ((features & NETIF_F_IP_CSUM) &&
1615 protocol == htons(ETH_P_IP)) ||
1616 ((features & NETIF_F_IPV6_CSUM) &&
Yi Zou1c8dbcf2009-02-27 14:06:54 -08001617 protocol == htons(ETH_P_IPV6)) ||
1618 ((features & NETIF_F_FCOE_CRC) &&
1619 protocol == htons(ETH_P_FCOE)));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001620}
1621
1622static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1623{
1624 if (can_checksum_protocol(dev->features, skb->protocol))
1625 return true;
1626
1627 if (skb->protocol == htons(ETH_P_8021Q)) {
1628 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1629 if (can_checksum_protocol(dev->features & dev->vlan_features,
1630 veh->h_vlan_encapsulated_proto))
1631 return true;
1632 }
1633
1634 return false;
1635}
Denis Vlasenko56079432006-03-29 15:57:29 -08001636
Arnd Bergmann8a83a002010-01-30 12:23:03 +00001637/**
1638 * skb_dev_set -- assign a new device to a buffer
1639 * @skb: buffer for the new device
1640 * @dev: network device
1641 *
1642 * If an skb is owned by a device already, we have to reset
1643 * all data private to the namespace a device belongs to
1644 * before assigning it a new device.
1645 */
1646#ifdef CONFIG_NET_NS
1647void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1648{
1649 skb_dst_drop(skb);
1650 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1651 secpath_reset(skb);
1652 nf_reset(skb);
1653 skb_init_secmark(skb);
1654 skb->mark = 0;
1655 skb->priority = 0;
1656 skb->nf_trace = 0;
1657 skb->ipvs_property = 0;
1658#ifdef CONFIG_NET_SCHED
1659 skb->tc_index = 0;
1660#endif
1661 }
1662 skb->dev = dev;
1663}
1664EXPORT_SYMBOL(skb_set_dev);
1665#endif /* CONFIG_NET_NS */
1666
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667/*
1668 * Invalidate hardware checksum when packet is to be mangled, and
1669 * complete checksum manually on outgoing path.
1670 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001671int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672{
Al Virod3bc23e2006-11-14 21:24:49 -08001673 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001674 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675
Patrick McHardy84fa7932006-08-29 16:44:56 -07001676 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001677 goto out_set_summed;
1678
1679 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001680 /* Let GSO fix up the checksum. */
1681 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 }
1683
Herbert Xua0308472007-10-15 01:47:15 -07001684 offset = skb->csum_start - skb_headroom(skb);
1685 BUG_ON(offset >= skb_headlen(skb));
1686 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1687
1688 offset += skb->csum_offset;
1689 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1690
1691 if (skb_cloned(skb) &&
1692 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1694 if (ret)
1695 goto out;
1696 }
1697
Herbert Xua0308472007-10-15 01:47:15 -07001698 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001699out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001701out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 return ret;
1703}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001704EXPORT_SYMBOL(skb_checksum_help);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001706/**
1707 * skb_gso_segment - Perform segmentation on skb.
1708 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001709 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001710 *
1711 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001712 *
1713 * It may return NULL if the skb requires no segmentation. This is
1714 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001715 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001716struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001717{
1718 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1719 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001720 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001721 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001722
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001723 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001724 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001725 __skb_pull(skb, skb->mac_len);
1726
Herbert Xu67fd1a72009-01-19 16:26:44 -08001727 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1728 struct net_device *dev = skb->dev;
1729 struct ethtool_drvinfo info = {};
1730
1731 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1732 dev->ethtool_ops->get_drvinfo(dev, &info);
1733
1734 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1735 "ip_summed=%d",
1736 info.driver, dev ? dev->features : 0L,
1737 skb->sk ? skb->sk->sk_route_caps : 0L,
1738 skb->len, skb->data_len, skb->ip_summed);
1739
Herbert Xua430a432006-07-08 13:34:56 -07001740 if (skb_header_cloned(skb) &&
1741 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1742 return ERR_PTR(err);
1743 }
1744
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001745 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001746 list_for_each_entry_rcu(ptype,
1747 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001748 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001749 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001750 err = ptype->gso_send_check(skb);
1751 segs = ERR_PTR(err);
1752 if (err || skb_gso_ok(skb, features))
1753 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001754 __skb_push(skb, (skb->data -
1755 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001756 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001757 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001758 break;
1759 }
1760 }
1761 rcu_read_unlock();
1762
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001763 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001764
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001765 return segs;
1766}
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001767EXPORT_SYMBOL(skb_gso_segment);
1768
Herbert Xufb286bb2005-11-10 13:01:24 -08001769/* Take action when hardware reception checksum errors are detected. */
1770#ifdef CONFIG_BUG
1771void netdev_rx_csum_fault(struct net_device *dev)
1772{
1773 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001774 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001775 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001776 dump_stack();
1777 }
1778}
1779EXPORT_SYMBOL(netdev_rx_csum_fault);
1780#endif
1781
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782/* Actually, we should eliminate this check as soon as we know, that:
1783 * 1. IOMMU is present and allows to map all the memory.
1784 * 2. No high memory really exists on this machine.
1785 */
1786
1787static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1788{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001789#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 int i;
1791
1792 if (dev->features & NETIF_F_HIGHDMA)
1793 return 0;
1794
1795 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1796 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1797 return 1;
1798
Herbert Xu3d3a8532006-06-27 13:33:10 -07001799#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 return 0;
1801}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001803struct dev_gso_cb {
1804 void (*destructor)(struct sk_buff *skb);
1805};
1806
1807#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1808
1809static void dev_gso_skb_destructor(struct sk_buff *skb)
1810{
1811 struct dev_gso_cb *cb;
1812
1813 do {
1814 struct sk_buff *nskb = skb->next;
1815
1816 skb->next = nskb->next;
1817 nskb->next = NULL;
1818 kfree_skb(nskb);
1819 } while (skb->next);
1820
1821 cb = DEV_GSO_CB(skb);
1822 if (cb->destructor)
1823 cb->destructor(skb);
1824}
1825
1826/**
1827 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1828 * @skb: buffer to segment
1829 *
1830 * This function segments the given skb and stores the list of segments
1831 * in skb->next.
1832 */
1833static int dev_gso_segment(struct sk_buff *skb)
1834{
1835 struct net_device *dev = skb->dev;
1836 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001837 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1838 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001839
Herbert Xu576a30e2006-06-27 13:22:38 -07001840 segs = skb_gso_segment(skb, features);
1841
1842 /* Verifying header integrity only. */
1843 if (!segs)
1844 return 0;
1845
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001846 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001847 return PTR_ERR(segs);
1848
1849 skb->next = segs;
1850 DEV_GSO_CB(skb)->destructor = skb->destructor;
1851 skb->destructor = dev_gso_skb_destructor;
1852
1853 return 0;
1854}
1855
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001856int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1857 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001858{
Stephen Hemminger00829822008-11-20 20:14:53 -08001859 const struct net_device_ops *ops = dev->netdev_ops;
Patrick McHardy572a9d72009-11-10 06:14:14 +00001860 int rc = NETDEV_TX_OK;
Stephen Hemminger00829822008-11-20 20:14:53 -08001861
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001862 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001863 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001864 dev_queue_xmit_nit(skb, dev);
1865
Herbert Xu576a30e2006-06-27 13:22:38 -07001866 if (netif_needs_gso(dev, skb)) {
1867 if (unlikely(dev_gso_segment(skb)))
1868 goto out_kfree_skb;
1869 if (skb->next)
1870 goto gso;
1871 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001872
Eric Dumazet93f154b2009-05-18 22:19:19 -07001873 /*
1874 * If device doesnt need skb->dst, release it right now while
1875 * its hot in this cpu cache
1876 */
Eric Dumazetadf30902009-06-02 05:19:30 +00001877 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1878 skb_dst_drop(skb);
1879
Patrick Ohlyac45f602009-02-12 05:03:37 +00001880 rc = ops->ndo_start_xmit(skb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001881 if (rc == NETDEV_TX_OK)
Eric Dumazet08baf562009-05-25 22:58:01 -07001882 txq_trans_update(txq);
Patrick Ohlyac45f602009-02-12 05:03:37 +00001883 /*
1884 * TODO: if skb_orphan() was called by
1885 * dev->hard_start_xmit() (for example, the unmodified
1886 * igb driver does that; bnx2 doesn't), then
1887 * skb_tx_software_timestamp() will be unable to send
1888 * back the time stamp.
1889 *
1890 * How can this be prevented? Always create another
1891 * reference to the socket before calling
1892 * dev->hard_start_xmit()? Prevent that skb_orphan()
1893 * does anything in dev->hard_start_xmit() by clearing
1894 * the skb destructor before the call and restoring it
1895 * afterwards, then doing the skb_orphan() ourselves?
1896 */
Patrick Ohlyac45f602009-02-12 05:03:37 +00001897 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001898 }
1899
Herbert Xu576a30e2006-06-27 13:22:38 -07001900gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001901 do {
1902 struct sk_buff *nskb = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001903
1904 skb->next = nskb->next;
1905 nskb->next = NULL;
Krishna Kumar068a2de2009-12-09 20:59:58 +00001906
1907 /*
1908 * If device doesnt need nskb->dst, release it right now while
1909 * its hot in this cpu cache
1910 */
1911 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1912 skb_dst_drop(nskb);
1913
Stephen Hemminger00829822008-11-20 20:14:53 -08001914 rc = ops->ndo_start_xmit(nskb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001915 if (unlikely(rc != NETDEV_TX_OK)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00001916 if (rc & ~NETDEV_TX_MASK)
1917 goto out_kfree_gso_skb;
Michael Chanf54d9e82006-06-25 23:57:04 -07001918 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001919 skb->next = nskb;
1920 return rc;
1921 }
Eric Dumazet08baf562009-05-25 22:58:01 -07001922 txq_trans_update(txq);
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001923 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001924 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001925 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001926
Patrick McHardy572a9d72009-11-10 06:14:14 +00001927out_kfree_gso_skb:
1928 if (likely(skb->next == NULL))
1929 skb->destructor = DEV_GSO_CB(skb)->destructor;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001930out_kfree_skb:
1931 kfree_skb(skb);
Patrick McHardy572a9d72009-11-10 06:14:14 +00001932 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001933}
1934
David S. Miller70192982009-01-27 16:34:47 -08001935static u32 skb_tx_hashrnd;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001936
Stephen Hemminger92477442009-03-21 13:39:26 -07001937u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
David S. Miller8f0f2222008-07-15 03:47:03 -07001938{
David S. Miller70192982009-01-27 16:34:47 -08001939 u32 hash;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001940
David S. Miller513de112009-05-03 14:43:10 -07001941 if (skb_rx_queue_recorded(skb)) {
1942 hash = skb_get_rx_queue(skb);
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001943 while (unlikely(hash >= dev->real_num_tx_queues))
David S. Miller513de112009-05-03 14:43:10 -07001944 hash -= dev->real_num_tx_queues;
1945 return hash;
1946 }
Eric Dumazetec581f62009-05-01 09:05:06 -07001947
1948 if (skb->sk && skb->sk->sk_hash)
David S. Miller70192982009-01-27 16:34:47 -08001949 hash = skb->sk->sk_hash;
Eric Dumazetec581f62009-05-01 09:05:06 -07001950 else
David S. Miller70192982009-01-27 16:34:47 -08001951 hash = skb->protocol;
David S. Millerd5a9e242009-01-27 16:22:11 -08001952
David S. Miller70192982009-01-27 16:34:47 -08001953 hash = jhash_1word(hash, skb_tx_hashrnd);
David S. Millerd5a9e242009-01-27 16:22:11 -08001954
David S. Millerb6b2fed2008-07-21 09:48:06 -07001955 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001956}
Stephen Hemminger92477442009-03-21 13:39:26 -07001957EXPORT_SYMBOL(skb_tx_hash);
David S. Miller8f0f2222008-07-15 03:47:03 -07001958
Eric Dumazeted046422009-11-13 21:54:04 +00001959static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1960{
1961 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1962 if (net_ratelimit()) {
1963 WARN(1, "%s selects TX queue %d, but "
1964 "real number of TX queues is %d\n",
1965 dev->name, queue_index,
1966 dev->real_num_tx_queues);
1967 }
1968 return 0;
1969 }
1970 return queue_index;
1971}
1972
David S. Millere8a04642008-07-17 00:34:19 -07001973static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1974 struct sk_buff *skb)
1975{
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001976 u16 queue_index;
1977 struct sock *sk = skb->sk;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001978
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001979 if (sk_tx_queue_recorded(sk)) {
1980 queue_index = sk_tx_queue_get(sk);
1981 } else {
1982 const struct net_device_ops *ops = dev->netdev_ops;
1983
1984 if (ops->ndo_select_queue) {
1985 queue_index = ops->ndo_select_queue(dev, skb);
Eric Dumazeted046422009-11-13 21:54:04 +00001986 queue_index = dev_cap_txqueue(dev, queue_index);
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001987 } else {
1988 queue_index = 0;
1989 if (dev->real_num_tx_queues > 1)
1990 queue_index = skb_tx_hash(dev, skb);
1991
Eric Dumazet8728c542010-04-11 21:18:17 +00001992 if (sk) {
1993 struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache);
1994
1995 if (dst && skb_dst(skb) == dst)
1996 sk_tx_queue_set(sk, queue_index);
1997 }
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001998 }
1999 }
David S. Millereae792b2008-07-15 03:03:33 -07002000
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002001 skb_set_queue_mapping(skb, queue_index);
2002 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07002003}
2004
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002005static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2006 struct net_device *dev,
2007 struct netdev_queue *txq)
2008{
2009 spinlock_t *root_lock = qdisc_lock(q);
2010 int rc;
2011
2012 spin_lock(root_lock);
2013 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2014 kfree_skb(skb);
2015 rc = NET_XMIT_DROP;
2016 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2017 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
2018 /*
2019 * This is a work-conserving queue; there are no old skbs
2020 * waiting to be sent out; and the qdisc is not running -
2021 * xmit the skb directly.
2022 */
2023 __qdisc_update_bstats(q, skb->len);
2024 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2025 __qdisc_run(q);
2026 else
2027 clear_bit(__QDISC_STATE_RUNNING, &q->state);
2028
2029 rc = NET_XMIT_SUCCESS;
2030 } else {
2031 rc = qdisc_enqueue_root(skb, q);
2032 qdisc_run(q);
2033 }
2034 spin_unlock(root_lock);
2035
2036 return rc;
2037}
2038
Krishna Kumar4b258462010-01-21 01:26:29 -08002039/*
2040 * Returns true if either:
2041 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2042 * 2. skb is fragmented and the device does not support SG, or if
2043 * at least one of fragments is in highmem and device does not
2044 * support DMA from it.
2045 */
2046static inline int skb_needs_linearize(struct sk_buff *skb,
2047 struct net_device *dev)
2048{
2049 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2050 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2051 illegal_highdma(dev, skb)));
2052}
2053
Dave Jonesd29f7492008-07-22 14:09:06 -07002054/**
2055 * dev_queue_xmit - transmit a buffer
2056 * @skb: buffer to transmit
2057 *
2058 * Queue a buffer for transmission to a network device. The caller must
2059 * have set the device and priority and built the buffer before calling
2060 * this function. The function can be called from an interrupt.
2061 *
2062 * A negative errno code is returned on a failure. A success does not
2063 * guarantee the frame will be transmitted as it may be dropped due
2064 * to congestion or traffic shaping.
2065 *
2066 * -----------------------------------------------------------------------------------
2067 * I notice this method can also return errors from the queue disciplines,
2068 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2069 * be positive.
2070 *
2071 * Regardless of the return value, the skb is consumed, so it is currently
2072 * difficult to retry a send to this method. (You can bump the ref count
2073 * before sending to hold a reference for retry if you are careful.)
2074 *
2075 * When calling this method, interrupts MUST be enabled. This is because
2076 * the BH enable code must have IRQs enabled so that it will not deadlock.
2077 * --BLG
2078 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079int dev_queue_xmit(struct sk_buff *skb)
2080{
2081 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07002082 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 struct Qdisc *q;
2084 int rc = -ENOMEM;
2085
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002086 /* GSO will handle the following emulations directly. */
2087 if (netif_needs_gso(dev, skb))
2088 goto gso;
2089
Krishna Kumar4b258462010-01-21 01:26:29 -08002090 /* Convert a paged skb to linear, if required */
2091 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 goto out_kfree_skb;
2093
2094 /* If packet is not checksummed and device does not support
2095 * checksumming for this protocol, complete checksumming here.
2096 */
Herbert Xu663ead32007-04-09 11:59:07 -07002097 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2098 skb_set_transport_header(skb, skb->csum_start -
2099 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07002100 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2101 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07002102 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103
Herbert Xuf6a78bf2006-06-22 02:57:17 -07002104gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002105 /* Disable soft irqs for various locks below. Also
2106 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002108 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109
David S. Millereae792b2008-07-15 03:03:33 -07002110 txq = dev_pick_tx(dev, skb);
Paul E. McKenneya898def2010-02-22 17:04:49 -08002111 q = rcu_dereference_bh(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07002112
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002114 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115#endif
2116 if (q->enqueue) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002117 rc = __dev_xmit_skb(skb, q, dev, txq);
David S. Miller37437bb2008-07-16 02:15:04 -07002118 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 }
2120
2121 /* The device has no queue. Common case for software devices:
2122 loopback, all the sorts of tunnels...
2123
Herbert Xu932ff272006-06-09 12:20:56 -07002124 Really, it is unlikely that netif_tx_lock protection is necessary
2125 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 counters.)
2127 However, it is possible, that they rely on protection
2128 made by us here.
2129
2130 Check this and shot the lock. It is not prone from deadlocks.
2131 Either shot noqueue qdisc, it is even simpler 8)
2132 */
2133 if (dev->flags & IFF_UP) {
2134 int cpu = smp_processor_id(); /* ok because BHs are off */
2135
David S. Millerc773e842008-07-08 23:13:53 -07002136 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137
David S. Millerc773e842008-07-08 23:13:53 -07002138 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002140 if (!netif_tx_queue_stopped(txq)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00002141 rc = dev_hard_start_xmit(skb, dev, txq);
2142 if (dev_xmit_complete(rc)) {
David S. Millerc773e842008-07-08 23:13:53 -07002143 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 goto out;
2145 }
2146 }
David S. Millerc773e842008-07-08 23:13:53 -07002147 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 if (net_ratelimit())
2149 printk(KERN_CRIT "Virtual device %s asks to "
2150 "queue packet!\n", dev->name);
2151 } else {
2152 /* Recursion is detected! It is possible,
2153 * unfortunately */
2154 if (net_ratelimit())
2155 printk(KERN_CRIT "Dead loop on virtual device "
2156 "%s, fix it urgently!\n", dev->name);
2157 }
2158 }
2159
2160 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07002161 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162
2163out_kfree_skb:
2164 kfree_skb(skb);
2165 return rc;
2166out:
Herbert Xud4828d82006-06-22 02:28:18 -07002167 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 return rc;
2169}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002170EXPORT_SYMBOL(dev_queue_xmit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
2172
2173/*=======================================================================
2174 Receiver routines
2175 =======================================================================*/
2176
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07002177int netdev_max_backlog __read_mostly = 1000;
2178int netdev_budget __read_mostly = 300;
2179int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180
2181DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2182
2183
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184/**
2185 * netif_rx - post buffer to the network code
2186 * @skb: buffer to post
2187 *
2188 * This function receives a packet from a device driver and queues it for
2189 * the upper (protocol) levels to process. It always succeeds. The buffer
2190 * may be dropped during processing for congestion control or by the
2191 * protocol layers.
2192 *
2193 * return values:
2194 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 * NET_RX_DROP (packet was dropped)
2196 *
2197 */
2198
2199int netif_rx(struct sk_buff *skb)
2200{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 struct softnet_data *queue;
2202 unsigned long flags;
2203
2204 /* if netpoll wants it, pretend we never saw it */
2205 if (netpoll_rx(skb))
2206 return NET_RX_DROP;
2207
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002208 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002209 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
2211 /*
2212 * The code is rearranged so that the path is the most
2213 * short when CPU is congested, but is still operating.
2214 */
2215 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 queue = &__get_cpu_var(softnet_data);
2217
2218 __get_cpu_var(netdev_rx_stat).total++;
2219 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2220 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07002224 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 }
2226
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002227 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 goto enqueue;
2229 }
2230
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 __get_cpu_var(netdev_rx_stat).dropped++;
2232 local_irq_restore(flags);
2233
2234 kfree_skb(skb);
2235 return NET_RX_DROP;
2236}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002237EXPORT_SYMBOL(netif_rx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239int netif_rx_ni(struct sk_buff *skb)
2240{
2241 int err;
2242
2243 preempt_disable();
2244 err = netif_rx(skb);
2245 if (local_softirq_pending())
2246 do_softirq();
2247 preempt_enable();
2248
2249 return err;
2250}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251EXPORT_SYMBOL(netif_rx_ni);
2252
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253static void net_tx_action(struct softirq_action *h)
2254{
2255 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2256
2257 if (sd->completion_queue) {
2258 struct sk_buff *clist;
2259
2260 local_irq_disable();
2261 clist = sd->completion_queue;
2262 sd->completion_queue = NULL;
2263 local_irq_enable();
2264
2265 while (clist) {
2266 struct sk_buff *skb = clist;
2267 clist = clist->next;
2268
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002269 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 __kfree_skb(skb);
2271 }
2272 }
2273
2274 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002275 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276
2277 local_irq_disable();
2278 head = sd->output_queue;
2279 sd->output_queue = NULL;
2280 local_irq_enable();
2281
2282 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002283 struct Qdisc *q = head;
2284 spinlock_t *root_lock;
2285
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 head = head->next_sched;
2287
David S. Miller5fb66222008-08-02 20:02:43 -07002288 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002289 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002290 smp_mb__before_clear_bit();
2291 clear_bit(__QDISC_STATE_SCHED,
2292 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002293 qdisc_run(q);
2294 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002296 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002297 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002298 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002299 } else {
2300 smp_mb__before_clear_bit();
2301 clear_bit(__QDISC_STATE_SCHED,
2302 &q->state);
2303 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 }
2305 }
2306 }
2307}
2308
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002309static inline int deliver_skb(struct sk_buff *skb,
2310 struct packet_type *pt_prev,
2311 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312{
2313 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002314 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315}
2316
2317#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Michał Mirosławda678292009-06-05 05:35:28 +00002318
2319#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2320/* This hook is defined here for ATM LANE */
2321int (*br_fdb_test_addr_hook)(struct net_device *dev,
2322 unsigned char *addr) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002323EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002324#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325
Stephen Hemminger6229e362007-03-21 13:38:47 -07002326/*
2327 * If bridge module is loaded call bridging hook.
2328 * returns NULL if packet was consumed.
2329 */
2330struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2331 struct sk_buff *skb) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002332EXPORT_SYMBOL_GPL(br_handle_frame_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002333
Stephen Hemminger6229e362007-03-21 13:38:47 -07002334static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2335 struct packet_type **pt_prev, int *ret,
2336 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337{
2338 struct net_bridge_port *port;
2339
Stephen Hemminger6229e362007-03-21 13:38:47 -07002340 if (skb->pkt_type == PACKET_LOOPBACK ||
2341 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2342 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343
2344 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002345 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002347 }
2348
Stephen Hemminger6229e362007-03-21 13:38:47 -07002349 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350}
2351#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002352#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353#endif
2354
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002355#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2356struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2357EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2358
2359static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2360 struct packet_type **pt_prev,
2361 int *ret,
2362 struct net_device *orig_dev)
2363{
2364 if (skb->dev->macvlan_port == NULL)
2365 return skb;
2366
2367 if (*pt_prev) {
2368 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2369 *pt_prev = NULL;
2370 }
2371 return macvlan_handle_frame_hook(skb);
2372}
2373#else
2374#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2375#endif
2376
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377#ifdef CONFIG_NET_CLS_ACT
2378/* TODO: Maybe we should just force sch_ingress to be compiled in
2379 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2380 * a compare and 2 stores extra right now if we dont have it on
2381 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002382 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 * the ingress scheduler, you just cant add policies on ingress.
2384 *
2385 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002386static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002389 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002390 struct netdev_queue *rxq;
2391 int result = TC_ACT_OK;
2392 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002393
Herbert Xuf697c3e2007-10-14 00:38:47 -07002394 if (MAX_RED_LOOP < ttl++) {
2395 printk(KERN_WARNING
2396 "Redir loop detected Dropping packet (%d->%d)\n",
Eric Dumazet8964be42009-11-20 15:35:04 -08002397 skb->skb_iif, dev->ifindex);
Herbert Xuf697c3e2007-10-14 00:38:47 -07002398 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 }
2400
Herbert Xuf697c3e2007-10-14 00:38:47 -07002401 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2402 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2403
David S. Miller555353c2008-07-08 17:33:13 -07002404 rxq = &dev->rx_queue;
2405
David S. Miller83874002008-07-17 00:53:03 -07002406 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002407 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002408 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002409 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2410 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002411 spin_unlock(qdisc_lock(q));
2412 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002413
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 return result;
2415}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002416
2417static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2418 struct packet_type **pt_prev,
2419 int *ret, struct net_device *orig_dev)
2420{
David S. Miller8d50b532008-07-30 02:37:46 -07002421 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002422 goto out;
2423
2424 if (*pt_prev) {
2425 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2426 *pt_prev = NULL;
2427 } else {
2428 /* Huh? Why does turning on AF_PACKET affect this? */
2429 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2430 }
2431
2432 switch (ing_filter(skb)) {
2433 case TC_ACT_SHOT:
2434 case TC_ACT_STOLEN:
2435 kfree_skb(skb);
2436 return NULL;
2437 }
2438
2439out:
2440 skb->tc_verd = 0;
2441 return skb;
2442}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443#endif
2444
Patrick McHardybc1d0412008-07-14 22:49:30 -07002445/*
2446 * netif_nit_deliver - deliver received packets to network taps
2447 * @skb: buffer
2448 *
2449 * This function is used to deliver incoming packets to network
2450 * taps. It should be used when the normal netif_receive_skb path
2451 * is bypassed, for example because of VLAN acceleration.
2452 */
2453void netif_nit_deliver(struct sk_buff *skb)
2454{
2455 struct packet_type *ptype;
2456
2457 if (list_empty(&ptype_all))
2458 return;
2459
2460 skb_reset_network_header(skb);
2461 skb_reset_transport_header(skb);
2462 skb->mac_len = skb->network_header - skb->mac_header;
2463
2464 rcu_read_lock();
2465 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2466 if (!ptype->dev || ptype->dev == skb->dev)
2467 deliver_skb(skb, ptype, skb->dev);
2468 }
2469 rcu_read_unlock();
2470}
2471
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002472/**
2473 * netif_receive_skb - process receive buffer from network
2474 * @skb: buffer to process
2475 *
2476 * netif_receive_skb() is the main receive data processing function.
2477 * It always succeeds. The buffer may be dropped during processing
2478 * for congestion control or by the protocol layers.
2479 *
2480 * This function may only be called from softirq context and interrupts
2481 * should be enabled.
2482 *
2483 * Return values (usually ignored):
2484 * NET_RX_SUCCESS: no congestion
2485 * NET_RX_DROP: packet was dropped
2486 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487int netif_receive_skb(struct sk_buff *skb)
2488{
2489 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002490 struct net_device *orig_dev;
Eric Dumazet0641e4f2010-03-18 21:16:45 -07002491 struct net_device *master;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002492 struct net_device *null_or_orig;
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002493 struct net_device *null_or_bond;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002495 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496
Eric Dumazet81bbb3d2009-09-30 16:42:42 -07002497 if (!skb->tstamp.tv64)
2498 net_timestamp(skb);
2499
Eric Dumazet05423b22009-10-26 18:40:35 -07002500 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002501 return NET_RX_SUCCESS;
2502
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002504 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505 return NET_RX_DROP;
2506
Eric Dumazet8964be42009-11-20 15:35:04 -08002507 if (!skb->skb_iif)
2508 skb->skb_iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002509
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002510 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002511 orig_dev = skb->dev;
Eric Dumazet0641e4f2010-03-18 21:16:45 -07002512 master = ACCESS_ONCE(orig_dev->master);
2513 if (master) {
2514 if (skb_bond_should_drop(skb, master))
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002515 null_or_orig = orig_dev; /* deliver only exact match */
2516 else
Eric Dumazet0641e4f2010-03-18 21:16:45 -07002517 skb->dev = master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002518 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002519
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 __get_cpu_var(netdev_rx_stat).total++;
2521
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002522 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002523 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002524 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525
2526 pt_prev = NULL;
2527
2528 rcu_read_lock();
2529
2530#ifdef CONFIG_NET_CLS_ACT
2531 if (skb->tc_verd & TC_NCLS) {
2532 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2533 goto ncls;
2534 }
2535#endif
2536
2537 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002538 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2539 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002540 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002541 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 pt_prev = ptype;
2543 }
2544 }
2545
2546#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002547 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2548 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550ncls:
2551#endif
2552
Stephen Hemminger6229e362007-03-21 13:38:47 -07002553 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2554 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002556 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2557 if (!skb)
2558 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002560 /*
2561 * Make sure frames received on VLAN interfaces stacked on
2562 * bonding interfaces still make their way to any base bonding
2563 * device that may have registered for a specific ptype. The
2564 * handler may have to adjust skb->dev and orig_dev.
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002565 */
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002566 null_or_bond = NULL;
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002567 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2568 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002569 null_or_bond = vlan_dev_real_dev(skb->dev);
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002570 }
2571
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002573 list_for_each_entry_rcu(ptype,
2574 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Andy Gospodarek1f3c8802009-12-14 10:48:58 +00002575 if (ptype->type == type && (ptype->dev == null_or_orig ||
Andy Gospodarekca8d9ea2010-01-06 12:56:37 +00002576 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2577 ptype->dev == null_or_bond)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002578 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002579 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 pt_prev = ptype;
2581 }
2582 }
2583
2584 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002585 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 } else {
2587 kfree_skb(skb);
2588 /* Jamal, now you will not able to escape explaining
2589 * me how you were going to use this. :-)
2590 */
2591 ret = NET_RX_DROP;
2592 }
2593
2594out:
2595 rcu_read_unlock();
2596 return ret;
2597}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002598EXPORT_SYMBOL(netif_receive_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002600/* Network device is going away, flush any packets still pending */
2601static void flush_backlog(void *arg)
2602{
2603 struct net_device *dev = arg;
2604 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2605 struct sk_buff *skb, *tmp;
2606
2607 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2608 if (skb->dev == dev) {
2609 __skb_unlink(skb, &queue->input_pkt_queue);
2610 kfree_skb(skb);
2611 }
2612}
2613
Herbert Xud565b0a2008-12-15 23:38:52 -08002614static int napi_gro_complete(struct sk_buff *skb)
2615{
2616 struct packet_type *ptype;
2617 __be16 type = skb->protocol;
2618 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2619 int err = -ENOENT;
2620
Herbert Xufc59f9a2009-04-14 15:11:06 -07002621 if (NAPI_GRO_CB(skb)->count == 1) {
2622 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002623 goto out;
Herbert Xufc59f9a2009-04-14 15:11:06 -07002624 }
Herbert Xud565b0a2008-12-15 23:38:52 -08002625
2626 rcu_read_lock();
2627 list_for_each_entry_rcu(ptype, head, list) {
2628 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2629 continue;
2630
2631 err = ptype->gro_complete(skb);
2632 break;
2633 }
2634 rcu_read_unlock();
2635
2636 if (err) {
2637 WARN_ON(&ptype->list == head);
2638 kfree_skb(skb);
2639 return NET_RX_SUCCESS;
2640 }
2641
2642out:
Herbert Xud565b0a2008-12-15 23:38:52 -08002643 return netif_receive_skb(skb);
2644}
2645
David S. Miller11380a42010-01-19 13:46:10 -08002646static void napi_gro_flush(struct napi_struct *napi)
Herbert Xud565b0a2008-12-15 23:38:52 -08002647{
2648 struct sk_buff *skb, *next;
2649
2650 for (skb = napi->gro_list; skb; skb = next) {
2651 next = skb->next;
2652 skb->next = NULL;
2653 napi_gro_complete(skb);
2654 }
2655
Herbert Xu4ae55442009-02-08 18:00:36 +00002656 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002657 napi->gro_list = NULL;
2658}
Herbert Xud565b0a2008-12-15 23:38:52 -08002659
Ben Hutchings5b252f02009-10-29 07:17:09 +00002660enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002661{
2662 struct sk_buff **pp = NULL;
2663 struct packet_type *ptype;
2664 __be16 type = skb->protocol;
2665 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
Herbert Xu0da2afd52008-12-26 14:57:42 -08002666 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002667 int mac_len;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002668 enum gro_result ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002669
2670 if (!(skb->dev->features & NETIF_F_GRO))
2671 goto normal;
2672
David S. Miller4cf704f2009-06-09 00:18:51 -07002673 if (skb_is_gso(skb) || skb_has_frags(skb))
Herbert Xuf17f5c92009-01-14 14:36:12 -08002674 goto normal;
2675
Herbert Xud565b0a2008-12-15 23:38:52 -08002676 rcu_read_lock();
2677 list_for_each_entry_rcu(ptype, head, list) {
Herbert Xud565b0a2008-12-15 23:38:52 -08002678 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2679 continue;
2680
Herbert Xu86911732009-01-29 14:19:50 +00002681 skb_set_network_header(skb, skb_gro_offset(skb));
Herbert Xud565b0a2008-12-15 23:38:52 -08002682 mac_len = skb->network_header - skb->mac_header;
2683 skb->mac_len = mac_len;
2684 NAPI_GRO_CB(skb)->same_flow = 0;
2685 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002686 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002687
Herbert Xud565b0a2008-12-15 23:38:52 -08002688 pp = ptype->gro_receive(&napi->gro_list, skb);
2689 break;
2690 }
2691 rcu_read_unlock();
2692
2693 if (&ptype->list == head)
2694 goto normal;
2695
Herbert Xu0da2afd52008-12-26 14:57:42 -08002696 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002697 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002698
Herbert Xud565b0a2008-12-15 23:38:52 -08002699 if (pp) {
2700 struct sk_buff *nskb = *pp;
2701
2702 *pp = nskb->next;
2703 nskb->next = NULL;
2704 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00002705 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08002706 }
2707
Herbert Xu0da2afd52008-12-26 14:57:42 -08002708 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002709 goto ok;
2710
Herbert Xu4ae55442009-02-08 18:00:36 +00002711 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
Herbert Xud565b0a2008-12-15 23:38:52 -08002712 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08002713
Herbert Xu4ae55442009-02-08 18:00:36 +00002714 napi->gro_count++;
Herbert Xud565b0a2008-12-15 23:38:52 -08002715 NAPI_GRO_CB(skb)->count = 1;
Herbert Xu86911732009-01-29 14:19:50 +00002716 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002717 skb->next = napi->gro_list;
2718 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002719 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08002720
Herbert Xuad0f9902009-02-01 01:24:55 -08002721pull:
Herbert Xucb189782009-05-26 18:50:31 +00002722 if (skb_headlen(skb) < skb_gro_offset(skb)) {
2723 int grow = skb_gro_offset(skb) - skb_headlen(skb);
2724
2725 BUG_ON(skb->end - skb->tail < grow);
2726
2727 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2728
2729 skb->tail += grow;
2730 skb->data_len -= grow;
2731
2732 skb_shinfo(skb)->frags[0].page_offset += grow;
2733 skb_shinfo(skb)->frags[0].size -= grow;
2734
2735 if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2736 put_page(skb_shinfo(skb)->frags[0].page);
2737 memmove(skb_shinfo(skb)->frags,
2738 skb_shinfo(skb)->frags + 1,
2739 --skb_shinfo(skb)->nr_frags);
2740 }
Herbert Xuad0f9902009-02-01 01:24:55 -08002741 }
2742
Herbert Xud565b0a2008-12-15 23:38:52 -08002743ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002744 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002745
2746normal:
Herbert Xuad0f9902009-02-01 01:24:55 -08002747 ret = GRO_NORMAL;
2748 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08002749}
Herbert Xu96e93ea2009-01-06 10:49:34 -08002750EXPORT_SYMBOL(dev_gro_receive);
2751
Ben Hutchings5b252f02009-10-29 07:17:09 +00002752static gro_result_t
2753__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002754{
2755 struct sk_buff *p;
2756
Herbert Xud1c76af2009-03-16 10:50:02 -07002757 if (netpoll_rx_on(skb))
2758 return GRO_NORMAL;
2759
Herbert Xu96e93ea2009-01-06 10:49:34 -08002760 for (p = napi->gro_list; p; p = p->next) {
Joe Perchesf64f9e72009-11-29 16:55:45 -08002761 NAPI_GRO_CB(p)->same_flow =
2762 (p->dev == skb->dev) &&
2763 !compare_ether_header(skb_mac_header(p),
2764 skb_gro_mac_header(skb));
Herbert Xu96e93ea2009-01-06 10:49:34 -08002765 NAPI_GRO_CB(p)->flush = 0;
2766 }
2767
2768 return dev_gro_receive(napi, skb);
2769}
Herbert Xu5d38a072009-01-04 16:13:40 -08002770
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002771gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08002772{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002773 switch (ret) {
2774 case GRO_NORMAL:
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002775 if (netif_receive_skb(skb))
2776 ret = GRO_DROP;
2777 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002778
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002779 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002780 case GRO_MERGED_FREE:
Herbert Xu5d38a072009-01-04 16:13:40 -08002781 kfree_skb(skb);
2782 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002783
2784 case GRO_HELD:
2785 case GRO_MERGED:
2786 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002787 }
2788
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002789 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002790}
2791EXPORT_SYMBOL(napi_skb_finish);
2792
Herbert Xu78a478d2009-05-26 18:50:21 +00002793void skb_gro_reset_offset(struct sk_buff *skb)
2794{
2795 NAPI_GRO_CB(skb)->data_offset = 0;
2796 NAPI_GRO_CB(skb)->frag0 = NULL;
Herbert Xu74895942009-05-26 18:50:27 +00002797 NAPI_GRO_CB(skb)->frag0_len = 0;
Herbert Xu78a478d2009-05-26 18:50:21 +00002798
Herbert Xu78d3fd02009-05-26 18:50:23 +00002799 if (skb->mac_header == skb->tail &&
Herbert Xu74895942009-05-26 18:50:27 +00002800 !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
Herbert Xu78a478d2009-05-26 18:50:21 +00002801 NAPI_GRO_CB(skb)->frag0 =
2802 page_address(skb_shinfo(skb)->frags[0].page) +
2803 skb_shinfo(skb)->frags[0].page_offset;
Herbert Xu74895942009-05-26 18:50:27 +00002804 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2805 }
Herbert Xu78a478d2009-05-26 18:50:21 +00002806}
2807EXPORT_SYMBOL(skb_gro_reset_offset);
2808
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002809gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002810{
Herbert Xu86911732009-01-29 14:19:50 +00002811 skb_gro_reset_offset(skb);
2812
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002813 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002814}
2815EXPORT_SYMBOL(napi_gro_receive);
2816
Herbert Xu96e93ea2009-01-06 10:49:34 -08002817void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2818{
Herbert Xu96e93ea2009-01-06 10:49:34 -08002819 __skb_pull(skb, skb_headlen(skb));
2820 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2821
2822 napi->skb = skb;
2823}
2824EXPORT_SYMBOL(napi_reuse_skb);
2825
Herbert Xu76620aa2009-04-16 02:02:07 -07002826struct sk_buff *napi_get_frags(struct napi_struct *napi)
Herbert Xu5d38a072009-01-04 16:13:40 -08002827{
Herbert Xu5d38a072009-01-04 16:13:40 -08002828 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002829
2830 if (!skb) {
Eric Dumazet89d71a62009-10-13 05:34:20 +00002831 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2832 if (skb)
2833 napi->skb = skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002834 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08002835 return skb;
2836}
Herbert Xu76620aa2009-04-16 02:02:07 -07002837EXPORT_SYMBOL(napi_get_frags);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002838
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002839gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2840 gro_result_t ret)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002841{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002842 switch (ret) {
2843 case GRO_NORMAL:
Herbert Xu86911732009-01-29 14:19:50 +00002844 case GRO_HELD:
Ajit Khapardee76b69c2010-02-16 20:25:43 +00002845 skb->protocol = eth_type_trans(skb, skb->dev);
Herbert Xu86911732009-01-29 14:19:50 +00002846
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002847 if (ret == GRO_HELD)
2848 skb_gro_pull(skb, -ETH_HLEN);
2849 else if (netif_receive_skb(skb))
2850 ret = GRO_DROP;
Herbert Xu86911732009-01-29 14:19:50 +00002851 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002852
2853 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002854 case GRO_MERGED_FREE:
2855 napi_reuse_skb(napi, skb);
2856 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002857
2858 case GRO_MERGED:
2859 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002860 }
2861
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002862 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002863}
2864EXPORT_SYMBOL(napi_frags_finish);
2865
Herbert Xu76620aa2009-04-16 02:02:07 -07002866struct sk_buff *napi_frags_skb(struct napi_struct *napi)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002867{
Herbert Xu76620aa2009-04-16 02:02:07 -07002868 struct sk_buff *skb = napi->skb;
2869 struct ethhdr *eth;
Herbert Xua5b1cf22009-05-26 18:50:28 +00002870 unsigned int hlen;
2871 unsigned int off;
Herbert Xu76620aa2009-04-16 02:02:07 -07002872
2873 napi->skb = NULL;
2874
2875 skb_reset_mac_header(skb);
2876 skb_gro_reset_offset(skb);
2877
Herbert Xua5b1cf22009-05-26 18:50:28 +00002878 off = skb_gro_offset(skb);
2879 hlen = off + sizeof(*eth);
2880 eth = skb_gro_header_fast(skb, off);
2881 if (skb_gro_header_hard(skb, hlen)) {
2882 eth = skb_gro_header_slow(skb, hlen, off);
2883 if (unlikely(!eth)) {
2884 napi_reuse_skb(napi, skb);
2885 skb = NULL;
2886 goto out;
2887 }
Herbert Xu76620aa2009-04-16 02:02:07 -07002888 }
2889
2890 skb_gro_pull(skb, sizeof(*eth));
2891
2892 /*
2893 * This works because the only protocols we care about don't require
2894 * special handling. We'll fix it up properly at the end.
2895 */
2896 skb->protocol = eth->h_proto;
2897
2898out:
2899 return skb;
2900}
2901EXPORT_SYMBOL(napi_frags_skb);
2902
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002903gro_result_t napi_gro_frags(struct napi_struct *napi)
Herbert Xu76620aa2009-04-16 02:02:07 -07002904{
2905 struct sk_buff *skb = napi_frags_skb(napi);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002906
2907 if (!skb)
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002908 return GRO_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002909
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002910 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08002911}
2912EXPORT_SYMBOL(napi_gro_frags);
2913
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002914static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915{
2916 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002917 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2918 unsigned long start_time = jiffies;
2919
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002920 napi->weight = weight_p;
2921 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002922 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002923
2924 local_irq_disable();
2925 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002926 if (!skb) {
Herbert Xu8f1ead22009-03-26 00:59:10 -07002927 __napi_complete(napi);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002928 local_irq_enable();
Herbert Xu8f1ead22009-03-26 00:59:10 -07002929 break;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931 local_irq_enable();
2932
Herbert Xu8f1ead22009-03-26 00:59:10 -07002933 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002934 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002935
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002936 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937}
2938
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002939/**
2940 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002941 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002942 *
2943 * The entry's receive function will be scheduled to run
2944 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002945void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002946{
2947 unsigned long flags;
2948
2949 local_irq_save(flags);
2950 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2951 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2952 local_irq_restore(flags);
2953}
2954EXPORT_SYMBOL(__napi_schedule);
2955
Herbert Xud565b0a2008-12-15 23:38:52 -08002956void __napi_complete(struct napi_struct *n)
2957{
2958 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2959 BUG_ON(n->gro_list);
2960
2961 list_del(&n->poll_list);
2962 smp_mb__before_clear_bit();
2963 clear_bit(NAPI_STATE_SCHED, &n->state);
2964}
2965EXPORT_SYMBOL(__napi_complete);
2966
2967void napi_complete(struct napi_struct *n)
2968{
2969 unsigned long flags;
2970
2971 /*
2972 * don't let napi dequeue from the cpu poll list
2973 * just in case its running on a different cpu
2974 */
2975 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2976 return;
2977
2978 napi_gro_flush(n);
2979 local_irq_save(flags);
2980 __napi_complete(n);
2981 local_irq_restore(flags);
2982}
2983EXPORT_SYMBOL(napi_complete);
2984
2985void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2986 int (*poll)(struct napi_struct *, int), int weight)
2987{
2988 INIT_LIST_HEAD(&napi->poll_list);
Herbert Xu4ae55442009-02-08 18:00:36 +00002989 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002990 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002991 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002992 napi->poll = poll;
2993 napi->weight = weight;
2994 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002995 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002996#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002997 spin_lock_init(&napi->poll_lock);
2998 napi->poll_owner = -1;
2999#endif
3000 set_bit(NAPI_STATE_SCHED, &napi->state);
3001}
3002EXPORT_SYMBOL(netif_napi_add);
3003
3004void netif_napi_del(struct napi_struct *napi)
3005{
3006 struct sk_buff *skb, *next;
3007
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08003008 list_del_init(&napi->dev_list);
Herbert Xu76620aa2009-04-16 02:02:07 -07003009 napi_free_frags(napi);
Herbert Xud565b0a2008-12-15 23:38:52 -08003010
3011 for (skb = napi->gro_list; skb; skb = next) {
3012 next = skb->next;
3013 skb->next = NULL;
3014 kfree_skb(skb);
3015 }
3016
3017 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00003018 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08003019}
3020EXPORT_SYMBOL(netif_napi_del);
3021
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003022
Linus Torvalds1da177e2005-04-16 15:20:36 -07003023static void net_rx_action(struct softirq_action *h)
3024{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003025 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003026 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07003027 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07003028 void *have;
3029
Linus Torvalds1da177e2005-04-16 15:20:36 -07003030 local_irq_disable();
3031
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003032 while (!list_empty(list)) {
3033 struct napi_struct *n;
3034 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003035
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003036 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003037 * Allow this to run for 2 jiffies since which will allow
3038 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003039 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08003040 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041 goto softnet_break;
3042
3043 local_irq_enable();
3044
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003045 /* Even though interrupts have been re-enabled, this
3046 * access is safe because interrupts can only add new
3047 * entries to the tail of this list, and only ->poll()
3048 * calls can remove this head entry from the list.
3049 */
stephen hemmingere5e26d72010-02-24 14:01:38 +00003050 n = list_first_entry(list, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003051
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003052 have = netpoll_poll_lock(n);
3053
3054 weight = n->weight;
3055
David S. Miller0a7606c2007-10-29 21:28:47 -07003056 /* This NAPI_STATE_SCHED test is for avoiding a race
3057 * with netpoll's poll_napi(). Only the entity which
3058 * obtains the lock and sees NAPI_STATE_SCHED set will
3059 * actually make the ->poll() call. Therefore we avoid
3060 * accidently calling ->poll() when NAPI is not scheduled.
3061 */
3062 work = 0;
Neil Horman4ea7e382009-05-21 07:36:08 +00003063 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
David S. Miller0a7606c2007-10-29 21:28:47 -07003064 work = n->poll(n, weight);
Neil Horman4ea7e382009-05-21 07:36:08 +00003065 trace_napi_poll(n);
3066 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003067
3068 WARN_ON_ONCE(work > weight);
3069
3070 budget -= work;
3071
3072 local_irq_disable();
3073
3074 /* Drivers must not modify the NAPI state if they
3075 * consume the entire weight. In such cases this code
3076 * still "owns" the NAPI instance and therefore can
3077 * move the instance around on the list at-will.
3078 */
David S. Millerfed17f32008-01-07 21:00:40 -08003079 if (unlikely(work == weight)) {
Herbert Xuff780cd2009-06-26 19:27:04 -07003080 if (unlikely(napi_disable_pending(n))) {
3081 local_irq_enable();
3082 napi_complete(n);
3083 local_irq_disable();
3084 } else
David S. Millerfed17f32008-01-07 21:00:40 -08003085 list_move_tail(&n->poll_list, list);
3086 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003087
3088 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003089 }
3090out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07003091 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003092
Chris Leechdb217332006-06-17 21:24:58 -07003093#ifdef CONFIG_NET_DMA
3094 /*
3095 * There may not be any more sk_buffs coming right now, so push
3096 * any pending DMA copies to hardware
3097 */
Dan Williams2ba05622009-01-06 11:38:14 -07003098 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07003099#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003100
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101 return;
3102
3103softnet_break:
3104 __get_cpu_var(netdev_rx_stat).time_squeeze++;
3105 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3106 goto out;
3107}
3108
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003109static gifconf_func_t *gifconf_list[NPROTO];
Linus Torvalds1da177e2005-04-16 15:20:36 -07003110
3111/**
3112 * register_gifconf - register a SIOCGIF handler
3113 * @family: Address family
3114 * @gifconf: Function handler
3115 *
3116 * Register protocol dependent address dumping routines. The handler
3117 * that is passed must not be freed or reused until it has been replaced
3118 * by another handler.
3119 */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003120int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003121{
3122 if (family >= NPROTO)
3123 return -EINVAL;
3124 gifconf_list[family] = gifconf;
3125 return 0;
3126}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003127EXPORT_SYMBOL(register_gifconf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003128
3129
3130/*
3131 * Map an interface index to its name (SIOCGIFNAME)
3132 */
3133
3134/*
3135 * We need this ioctl for efficient implementation of the
3136 * if_indextoname() function required by the IPv6 API. Without
3137 * it, we would have to search all the interfaces to find a
3138 * match. --pb
3139 */
3140
Eric W. Biederman881d9662007-09-17 11:56:21 -07003141static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003142{
3143 struct net_device *dev;
3144 struct ifreq ifr;
3145
3146 /*
3147 * Fetch the caller's info block.
3148 */
3149
3150 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3151 return -EFAULT;
3152
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003153 rcu_read_lock();
3154 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155 if (!dev) {
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003156 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003157 return -ENODEV;
3158 }
3159
3160 strcpy(ifr.ifr_name, dev->name);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003161 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003162
3163 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3164 return -EFAULT;
3165 return 0;
3166}
3167
3168/*
3169 * Perform a SIOCGIFCONF call. This structure will change
3170 * size eventually, and there is nothing I can do about it.
3171 * Thus we will need a 'compatibility mode'.
3172 */
3173
Eric W. Biederman881d9662007-09-17 11:56:21 -07003174static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003175{
3176 struct ifconf ifc;
3177 struct net_device *dev;
3178 char __user *pos;
3179 int len;
3180 int total;
3181 int i;
3182
3183 /*
3184 * Fetch the caller's info block.
3185 */
3186
3187 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3188 return -EFAULT;
3189
3190 pos = ifc.ifc_buf;
3191 len = ifc.ifc_len;
3192
3193 /*
3194 * Loop over the interfaces, and write an info block for each.
3195 */
3196
3197 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003198 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003199 for (i = 0; i < NPROTO; i++) {
3200 if (gifconf_list[i]) {
3201 int done;
3202 if (!pos)
3203 done = gifconf_list[i](dev, NULL, 0);
3204 else
3205 done = gifconf_list[i](dev, pos + total,
3206 len - total);
3207 if (done < 0)
3208 return -EFAULT;
3209 total += done;
3210 }
3211 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003212 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003213
3214 /*
3215 * All done. Write the updated control block back to the caller.
3216 */
3217 ifc.ifc_len = total;
3218
3219 /*
3220 * Both BSD and Solaris return 0 here, so we do too.
3221 */
3222 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3223}
3224
3225#ifdef CONFIG_PROC_FS
3226/*
3227 * This is invoked by the /proc filesystem handler to display a device
3228 * in detail.
3229 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003230void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003231 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003232{
Denis V. Luneve372c412007-11-19 22:31:54 -08003233 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07003234 loff_t off;
3235 struct net_device *dev;
3236
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003237 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07003238 if (!*pos)
3239 return SEQ_START_TOKEN;
3240
3241 off = 1;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003242 for_each_netdev_rcu(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07003243 if (off++ == *pos)
3244 return dev;
3245
3246 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003247}
3248
3249void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3250{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003251 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3252 first_net_device(seq_file_net(seq)) :
3253 next_net_device((struct net_device *)v);
3254
Linus Torvalds1da177e2005-04-16 15:20:36 -07003255 ++*pos;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003256 return rcu_dereference(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003257}
3258
3259void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003260 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003261{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003262 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003263}
3264
3265static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3266{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08003267 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003268
Jesper Dangaard Brouer2d13baf2010-01-05 05:50:52 +00003269 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
Rusty Russell5a1b5892007-04-28 21:04:03 -07003270 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3271 dev->name, stats->rx_bytes, stats->rx_packets,
3272 stats->rx_errors,
3273 stats->rx_dropped + stats->rx_missed_errors,
3274 stats->rx_fifo_errors,
3275 stats->rx_length_errors + stats->rx_over_errors +
3276 stats->rx_crc_errors + stats->rx_frame_errors,
3277 stats->rx_compressed, stats->multicast,
3278 stats->tx_bytes, stats->tx_packets,
3279 stats->tx_errors, stats->tx_dropped,
3280 stats->tx_fifo_errors, stats->collisions,
3281 stats->tx_carrier_errors +
3282 stats->tx_aborted_errors +
3283 stats->tx_window_errors +
3284 stats->tx_heartbeat_errors,
3285 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003286}
3287
3288/*
3289 * Called from the PROCfs module. This now uses the new arbitrary sized
3290 * /proc/net interface to create /proc/net/dev
3291 */
3292static int dev_seq_show(struct seq_file *seq, void *v)
3293{
3294 if (v == SEQ_START_TOKEN)
3295 seq_puts(seq, "Inter-| Receive "
3296 " | Transmit\n"
3297 " face |bytes packets errs drop fifo frame "
3298 "compressed multicast|bytes packets errs "
3299 "drop fifo colls carrier compressed\n");
3300 else
3301 dev_seq_printf_stats(seq, v);
3302 return 0;
3303}
3304
3305static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3306{
3307 struct netif_rx_stats *rc = NULL;
3308
Mike Travis0c0b0ac2008-05-02 16:43:08 -07003309 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003310 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003311 rc = &per_cpu(netdev_rx_stat, *pos);
3312 break;
3313 } else
3314 ++*pos;
3315 return rc;
3316}
3317
3318static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3319{
3320 return softnet_get_online(pos);
3321}
3322
3323static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3324{
3325 ++*pos;
3326 return softnet_get_online(pos);
3327}
3328
3329static void softnet_seq_stop(struct seq_file *seq, void *v)
3330{
3331}
3332
3333static int softnet_seq_show(struct seq_file *seq, void *v)
3334{
3335 struct netif_rx_stats *s = v;
3336
3337 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07003338 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07003339 0, 0, 0, 0, /* was fastroute */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003340 s->cpu_collision);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003341 return 0;
3342}
3343
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003344static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003345 .start = dev_seq_start,
3346 .next = dev_seq_next,
3347 .stop = dev_seq_stop,
3348 .show = dev_seq_show,
3349};
3350
3351static int dev_seq_open(struct inode *inode, struct file *file)
3352{
Denis V. Luneve372c412007-11-19 22:31:54 -08003353 return seq_open_net(inode, file, &dev_seq_ops,
3354 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003355}
3356
Arjan van de Ven9a321442007-02-12 00:55:35 -08003357static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003358 .owner = THIS_MODULE,
3359 .open = dev_seq_open,
3360 .read = seq_read,
3361 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08003362 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003363};
3364
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003365static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003366 .start = softnet_seq_start,
3367 .next = softnet_seq_next,
3368 .stop = softnet_seq_stop,
3369 .show = softnet_seq_show,
3370};
3371
3372static int softnet_seq_open(struct inode *inode, struct file *file)
3373{
3374 return seq_open(file, &softnet_seq_ops);
3375}
3376
Arjan van de Ven9a321442007-02-12 00:55:35 -08003377static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003378 .owner = THIS_MODULE,
3379 .open = softnet_seq_open,
3380 .read = seq_read,
3381 .llseek = seq_lseek,
3382 .release = seq_release,
3383};
3384
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003385static void *ptype_get_idx(loff_t pos)
3386{
3387 struct packet_type *pt = NULL;
3388 loff_t i = 0;
3389 int t;
3390
3391 list_for_each_entry_rcu(pt, &ptype_all, list) {
3392 if (i == pos)
3393 return pt;
3394 ++i;
3395 }
3396
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003397 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003398 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3399 if (i == pos)
3400 return pt;
3401 ++i;
3402 }
3403 }
3404 return NULL;
3405}
3406
3407static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003408 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003409{
3410 rcu_read_lock();
3411 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3412}
3413
3414static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3415{
3416 struct packet_type *pt;
3417 struct list_head *nxt;
3418 int hash;
3419
3420 ++*pos;
3421 if (v == SEQ_START_TOKEN)
3422 return ptype_get_idx(0);
3423
3424 pt = v;
3425 nxt = pt->list.next;
3426 if (pt->type == htons(ETH_P_ALL)) {
3427 if (nxt != &ptype_all)
3428 goto found;
3429 hash = 0;
3430 nxt = ptype_base[0].next;
3431 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003432 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003433
3434 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003435 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003436 return NULL;
3437 nxt = ptype_base[hash].next;
3438 }
3439found:
3440 return list_entry(nxt, struct packet_type, list);
3441}
3442
3443static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08003444 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003445{
3446 rcu_read_unlock();
3447}
3448
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003449static int ptype_seq_show(struct seq_file *seq, void *v)
3450{
3451 struct packet_type *pt = v;
3452
3453 if (v == SEQ_START_TOKEN)
3454 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003455 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003456 if (pt->type == htons(ETH_P_ALL))
3457 seq_puts(seq, "ALL ");
3458 else
3459 seq_printf(seq, "%04x", ntohs(pt->type));
3460
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003461 seq_printf(seq, " %-8s %pF\n",
3462 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003463 }
3464
3465 return 0;
3466}
3467
3468static const struct seq_operations ptype_seq_ops = {
3469 .start = ptype_seq_start,
3470 .next = ptype_seq_next,
3471 .stop = ptype_seq_stop,
3472 .show = ptype_seq_show,
3473};
3474
3475static int ptype_seq_open(struct inode *inode, struct file *file)
3476{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003477 return seq_open_net(inode, file, &ptype_seq_ops,
3478 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003479}
3480
3481static const struct file_operations ptype_seq_fops = {
3482 .owner = THIS_MODULE,
3483 .open = ptype_seq_open,
3484 .read = seq_read,
3485 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003486 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003487};
3488
3489
Pavel Emelyanov46650792007-10-08 20:38:39 -07003490static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003491{
3492 int rc = -ENOMEM;
3493
Eric W. Biederman881d9662007-09-17 11:56:21 -07003494 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003495 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003496 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003497 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003498 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003499 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003500
Eric W. Biederman881d9662007-09-17 11:56:21 -07003501 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003502 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003503 rc = 0;
3504out:
3505 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003506out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003507 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003508out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003509 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003510out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003511 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003512 goto out;
3513}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003514
Pavel Emelyanov46650792007-10-08 20:38:39 -07003515static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003516{
3517 wext_proc_exit(net);
3518
3519 proc_net_remove(net, "ptype");
3520 proc_net_remove(net, "softnet_stat");
3521 proc_net_remove(net, "dev");
3522}
3523
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003524static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003525 .init = dev_proc_net_init,
3526 .exit = dev_proc_net_exit,
3527};
3528
3529static int __init dev_proc_init(void)
3530{
3531 return register_pernet_subsys(&dev_proc_ops);
3532}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003533#else
3534#define dev_proc_init() 0
3535#endif /* CONFIG_PROC_FS */
3536
3537
3538/**
3539 * netdev_set_master - set up master/slave pair
3540 * @slave: slave device
3541 * @master: new master device
3542 *
3543 * Changes the master device of the slave. Pass %NULL to break the
3544 * bonding. The caller must hold the RTNL semaphore. On a failure
3545 * a negative errno code is returned. On success the reference counts
3546 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3547 * function returns zero.
3548 */
3549int netdev_set_master(struct net_device *slave, struct net_device *master)
3550{
3551 struct net_device *old = slave->master;
3552
3553 ASSERT_RTNL();
3554
3555 if (master) {
3556 if (old)
3557 return -EBUSY;
3558 dev_hold(master);
3559 }
3560
3561 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003562
Linus Torvalds1da177e2005-04-16 15:20:36 -07003563 synchronize_net();
3564
3565 if (old)
3566 dev_put(old);
3567
3568 if (master)
3569 slave->flags |= IFF_SLAVE;
3570 else
3571 slave->flags &= ~IFF_SLAVE;
3572
3573 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3574 return 0;
3575}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003576EXPORT_SYMBOL(netdev_set_master);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003577
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003578static void dev_change_rx_flags(struct net_device *dev, int flags)
3579{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003580 const struct net_device_ops *ops = dev->netdev_ops;
3581
3582 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3583 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003584}
3585
Wang Chendad9b332008-06-18 01:48:28 -07003586static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003587{
3588 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003589 uid_t uid;
3590 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003591
Patrick McHardy24023452007-07-14 18:51:31 -07003592 ASSERT_RTNL();
3593
Wang Chendad9b332008-06-18 01:48:28 -07003594 dev->flags |= IFF_PROMISC;
3595 dev->promiscuity += inc;
3596 if (dev->promiscuity == 0) {
3597 /*
3598 * Avoid overflow.
3599 * If inc causes overflow, untouch promisc and return error.
3600 */
3601 if (inc < 0)
3602 dev->flags &= ~IFF_PROMISC;
3603 else {
3604 dev->promiscuity -= inc;
3605 printk(KERN_WARNING "%s: promiscuity touches roof, "
3606 "set promiscuity failed, promiscuity feature "
3607 "of device might be broken.\n", dev->name);
3608 return -EOVERFLOW;
3609 }
3610 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003611 if (dev->flags != old_flags) {
3612 printk(KERN_INFO "device %s %s promiscuous mode\n",
3613 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3614 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003615 if (audit_enabled) {
3616 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003617 audit_log(current->audit_context, GFP_ATOMIC,
3618 AUDIT_ANOM_PROMISCUOUS,
3619 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3620 dev->name, (dev->flags & IFF_PROMISC),
3621 (old_flags & IFF_PROMISC),
3622 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003623 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003624 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003625 }
Patrick McHardy24023452007-07-14 18:51:31 -07003626
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003627 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003628 }
Wang Chendad9b332008-06-18 01:48:28 -07003629 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003630}
3631
Linus Torvalds1da177e2005-04-16 15:20:36 -07003632/**
3633 * dev_set_promiscuity - update promiscuity count on a device
3634 * @dev: device
3635 * @inc: modifier
3636 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003637 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003638 * remains above zero the interface remains promiscuous. Once it hits zero
3639 * the device reverts back to normal filtering operation. A negative inc
3640 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003641 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003642 */
Wang Chendad9b332008-06-18 01:48:28 -07003643int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644{
3645 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003646 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647
Wang Chendad9b332008-06-18 01:48:28 -07003648 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003649 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003650 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003651 if (dev->flags != old_flags)
3652 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003653 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003654}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003655EXPORT_SYMBOL(dev_set_promiscuity);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003656
3657/**
3658 * dev_set_allmulti - update allmulti count on a device
3659 * @dev: device
3660 * @inc: modifier
3661 *
3662 * Add or remove reception of all multicast frames to a device. While the
3663 * count in the device remains above zero the interface remains listening
3664 * to all interfaces. Once it hits zero the device reverts back to normal
3665 * filtering operation. A negative @inc value is used to drop the counter
3666 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003667 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003668 */
3669
Wang Chendad9b332008-06-18 01:48:28 -07003670int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003671{
3672 unsigned short old_flags = dev->flags;
3673
Patrick McHardy24023452007-07-14 18:51:31 -07003674 ASSERT_RTNL();
3675
Linus Torvalds1da177e2005-04-16 15:20:36 -07003676 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003677 dev->allmulti += inc;
3678 if (dev->allmulti == 0) {
3679 /*
3680 * Avoid overflow.
3681 * If inc causes overflow, untouch allmulti and return error.
3682 */
3683 if (inc < 0)
3684 dev->flags &= ~IFF_ALLMULTI;
3685 else {
3686 dev->allmulti -= inc;
3687 printk(KERN_WARNING "%s: allmulti touches roof, "
3688 "set allmulti failed, allmulti feature of "
3689 "device might be broken.\n", dev->name);
3690 return -EOVERFLOW;
3691 }
3692 }
Patrick McHardy24023452007-07-14 18:51:31 -07003693 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003694 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003695 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003696 }
Wang Chendad9b332008-06-18 01:48:28 -07003697 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003698}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003699EXPORT_SYMBOL(dev_set_allmulti);
Patrick McHardy4417da62007-06-27 01:28:10 -07003700
3701/*
3702 * Upload unicast and multicast address lists to device and
3703 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003704 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003705 * are present.
3706 */
3707void __dev_set_rx_mode(struct net_device *dev)
3708{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003709 const struct net_device_ops *ops = dev->netdev_ops;
3710
Patrick McHardy4417da62007-06-27 01:28:10 -07003711 /* dev_open will call this function so the list will stay sane. */
3712 if (!(dev->flags&IFF_UP))
3713 return;
3714
3715 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003716 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003717
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003718 if (ops->ndo_set_rx_mode)
3719 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003720 else {
3721 /* Unicast addresses changes may only happen under the rtnl,
3722 * therefore calling __dev_set_promiscuity here is safe.
3723 */
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08003724 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003725 __dev_set_promiscuity(dev, 1);
3726 dev->uc_promisc = 1;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08003727 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003728 __dev_set_promiscuity(dev, -1);
3729 dev->uc_promisc = 0;
3730 }
3731
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003732 if (ops->ndo_set_multicast_list)
3733 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003734 }
3735}
3736
3737void dev_set_rx_mode(struct net_device *dev)
3738{
David S. Millerb9e40852008-07-15 00:15:08 -07003739 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003740 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003741 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003742}
3743
Jiri Pirkof001fde2009-05-05 02:48:28 +00003744/* hw addresses list handling functions */
3745
Jiri Pirko31278e72009-06-17 01:12:19 +00003746static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3747 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003748{
3749 struct netdev_hw_addr *ha;
3750 int alloc_size;
3751
3752 if (addr_len > MAX_ADDR_LEN)
3753 return -EINVAL;
3754
Jiri Pirko31278e72009-06-17 01:12:19 +00003755 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003756 if (!memcmp(ha->addr, addr, addr_len) &&
3757 ha->type == addr_type) {
3758 ha->refcount++;
3759 return 0;
3760 }
3761 }
3762
3763
Jiri Pirkof001fde2009-05-05 02:48:28 +00003764 alloc_size = sizeof(*ha);
3765 if (alloc_size < L1_CACHE_BYTES)
3766 alloc_size = L1_CACHE_BYTES;
3767 ha = kmalloc(alloc_size, GFP_ATOMIC);
3768 if (!ha)
3769 return -ENOMEM;
3770 memcpy(ha->addr, addr, addr_len);
3771 ha->type = addr_type;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003772 ha->refcount = 1;
3773 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003774 list_add_tail_rcu(&ha->list, &list->list);
3775 list->count++;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003776 return 0;
3777}
3778
3779static void ha_rcu_free(struct rcu_head *head)
3780{
3781 struct netdev_hw_addr *ha;
3782
3783 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3784 kfree(ha);
3785}
3786
Jiri Pirko31278e72009-06-17 01:12:19 +00003787static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3788 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003789{
3790 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003791
Jiri Pirko31278e72009-06-17 01:12:19 +00003792 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003793 if (!memcmp(ha->addr, addr, addr_len) &&
Jiri Pirkof001fde2009-05-05 02:48:28 +00003794 (ha->type == addr_type || !addr_type)) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003795 if (--ha->refcount)
3796 return 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003797 list_del_rcu(&ha->list);
3798 call_rcu(&ha->rcu_head, ha_rcu_free);
Jiri Pirko31278e72009-06-17 01:12:19 +00003799 list->count--;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003800 return 0;
3801 }
3802 }
3803 return -ENOENT;
3804}
3805
Jiri Pirko31278e72009-06-17 01:12:19 +00003806static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3807 struct netdev_hw_addr_list *from_list,
3808 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003809 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003810{
3811 int err;
3812 struct netdev_hw_addr *ha, *ha2;
3813 unsigned char type;
3814
Jiri Pirko31278e72009-06-17 01:12:19 +00003815 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003816 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003817 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003818 if (err)
3819 goto unroll;
3820 }
3821 return 0;
3822
3823unroll:
Jiri Pirko31278e72009-06-17 01:12:19 +00003824 list_for_each_entry(ha2, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003825 if (ha2 == ha)
3826 break;
3827 type = addr_type ? addr_type : ha2->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003828 __hw_addr_del(to_list, ha2->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003829 }
3830 return err;
3831}
3832
Jiri Pirko31278e72009-06-17 01:12:19 +00003833static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3834 struct netdev_hw_addr_list *from_list,
3835 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003836 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003837{
3838 struct netdev_hw_addr *ha;
3839 unsigned char type;
3840
Jiri Pirko31278e72009-06-17 01:12:19 +00003841 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003842 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003843 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003844 }
3845}
3846
Jiri Pirko31278e72009-06-17 01:12:19 +00003847static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3848 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003849 int addr_len)
3850{
3851 int err = 0;
3852 struct netdev_hw_addr *ha, *tmp;
3853
Jiri Pirko31278e72009-06-17 01:12:19 +00003854 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003855 if (!ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003856 err = __hw_addr_add(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003857 addr_len, ha->type);
3858 if (err)
3859 break;
3860 ha->synced = true;
3861 ha->refcount++;
3862 } else if (ha->refcount == 1) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003863 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3864 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003865 }
3866 }
3867 return err;
3868}
3869
Jiri Pirko31278e72009-06-17 01:12:19 +00003870static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3871 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003872 int addr_len)
3873{
3874 struct netdev_hw_addr *ha, *tmp;
3875
Jiri Pirko31278e72009-06-17 01:12:19 +00003876 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003877 if (ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003878 __hw_addr_del(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003879 addr_len, ha->type);
3880 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003881 __hw_addr_del(from_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003882 addr_len, ha->type);
3883 }
3884 }
3885}
3886
Jiri Pirko31278e72009-06-17 01:12:19 +00003887static void __hw_addr_flush(struct netdev_hw_addr_list *list)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003888{
3889 struct netdev_hw_addr *ha, *tmp;
3890
Jiri Pirko31278e72009-06-17 01:12:19 +00003891 list_for_each_entry_safe(ha, tmp, &list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003892 list_del_rcu(&ha->list);
3893 call_rcu(&ha->rcu_head, ha_rcu_free);
3894 }
Jiri Pirko31278e72009-06-17 01:12:19 +00003895 list->count = 0;
3896}
3897
3898static void __hw_addr_init(struct netdev_hw_addr_list *list)
3899{
3900 INIT_LIST_HEAD(&list->list);
3901 list->count = 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003902}
3903
3904/* Device addresses handling functions */
3905
3906static void dev_addr_flush(struct net_device *dev)
3907{
3908 /* rtnl_mutex must be held here */
3909
Jiri Pirko31278e72009-06-17 01:12:19 +00003910 __hw_addr_flush(&dev->dev_addrs);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003911 dev->dev_addr = NULL;
3912}
3913
3914static int dev_addr_init(struct net_device *dev)
3915{
3916 unsigned char addr[MAX_ADDR_LEN];
3917 struct netdev_hw_addr *ha;
3918 int err;
3919
3920 /* rtnl_mutex must be held here */
3921
Jiri Pirko31278e72009-06-17 01:12:19 +00003922 __hw_addr_init(&dev->dev_addrs);
Eric Dumazet0c279222009-06-08 03:49:24 +00003923 memset(addr, 0, sizeof(addr));
Jiri Pirko31278e72009-06-17 01:12:19 +00003924 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
Jiri Pirkof001fde2009-05-05 02:48:28 +00003925 NETDEV_HW_ADDR_T_LAN);
3926 if (!err) {
3927 /*
3928 * Get the first (previously created) address from the list
3929 * and set dev_addr pointer to this location.
3930 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003931 ha = list_first_entry(&dev->dev_addrs.list,
Jiri Pirkof001fde2009-05-05 02:48:28 +00003932 struct netdev_hw_addr, list);
3933 dev->dev_addr = ha->addr;
3934 }
3935 return err;
3936}
3937
3938/**
3939 * dev_addr_add - Add a device address
3940 * @dev: device
3941 * @addr: address to add
3942 * @addr_type: address type
3943 *
3944 * Add a device address to the device or increase the reference count if
3945 * it already exists.
3946 *
3947 * The caller must hold the rtnl_mutex.
3948 */
3949int dev_addr_add(struct net_device *dev, unsigned char *addr,
3950 unsigned char addr_type)
3951{
3952 int err;
3953
3954 ASSERT_RTNL();
3955
Jiri Pirko31278e72009-06-17 01:12:19 +00003956 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003957 if (!err)
3958 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3959 return err;
3960}
3961EXPORT_SYMBOL(dev_addr_add);
3962
3963/**
3964 * dev_addr_del - Release a device address.
3965 * @dev: device
3966 * @addr: address to delete
3967 * @addr_type: address type
3968 *
3969 * Release reference to a device address and remove it from the device
3970 * if the reference count drops to zero.
3971 *
3972 * The caller must hold the rtnl_mutex.
3973 */
3974int dev_addr_del(struct net_device *dev, unsigned char *addr,
3975 unsigned char addr_type)
3976{
3977 int err;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003978 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003979
3980 ASSERT_RTNL();
3981
Jiri Pirkoccffad252009-05-22 23:22:17 +00003982 /*
3983 * We can not remove the first address from the list because
3984 * dev->dev_addr points to that.
3985 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003986 ha = list_first_entry(&dev->dev_addrs.list,
3987 struct netdev_hw_addr, list);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003988 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3989 return -ENOENT;
3990
Jiri Pirko31278e72009-06-17 01:12:19 +00003991 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003992 addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003993 if (!err)
3994 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3995 return err;
3996}
3997EXPORT_SYMBOL(dev_addr_del);
3998
3999/**
4000 * dev_addr_add_multiple - Add device addresses from another device
4001 * @to_dev: device to which addresses will be added
4002 * @from_dev: device from which addresses will be added
4003 * @addr_type: address type - 0 means type will be used from from_dev
4004 *
4005 * Add device addresses of the one device to another.
4006 **
4007 * The caller must hold the rtnl_mutex.
4008 */
4009int dev_addr_add_multiple(struct net_device *to_dev,
4010 struct net_device *from_dev,
4011 unsigned char addr_type)
4012{
4013 int err;
4014
4015 ASSERT_RTNL();
4016
4017 if (from_dev->addr_len != to_dev->addr_len)
4018 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00004019 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00004020 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00004021 if (!err)
4022 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4023 return err;
4024}
4025EXPORT_SYMBOL(dev_addr_add_multiple);
4026
4027/**
4028 * dev_addr_del_multiple - Delete device addresses by another device
4029 * @to_dev: device where the addresses will be deleted
4030 * @from_dev: device by which addresses the addresses will be deleted
4031 * @addr_type: address type - 0 means type will used from from_dev
4032 *
4033 * Deletes addresses in to device by the list of addresses in from device.
4034 *
4035 * The caller must hold the rtnl_mutex.
4036 */
4037int dev_addr_del_multiple(struct net_device *to_dev,
4038 struct net_device *from_dev,
4039 unsigned char addr_type)
4040{
4041 ASSERT_RTNL();
4042
4043 if (from_dev->addr_len != to_dev->addr_len)
4044 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00004045 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00004046 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00004047 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4048 return 0;
4049}
4050EXPORT_SYMBOL(dev_addr_del_multiple);
4051
Jiri Pirko31278e72009-06-17 01:12:19 +00004052/* multicast addresses handling functions */
Jiri Pirkof001fde2009-05-05 02:48:28 +00004053
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004054int __dev_addr_delete(struct dev_addr_list **list, int *count,
4055 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07004056{
4057 struct dev_addr_list *da;
4058
4059 for (; (da = *list) != NULL; list = &da->next) {
4060 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4061 alen == da->da_addrlen) {
4062 if (glbl) {
4063 int old_glbl = da->da_gusers;
4064 da->da_gusers = 0;
4065 if (old_glbl == 0)
4066 break;
4067 }
4068 if (--da->da_users)
4069 return 0;
4070
4071 *list = da->next;
4072 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004073 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07004074 return 0;
4075 }
4076 }
4077 return -ENOENT;
4078}
4079
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004080int __dev_addr_add(struct dev_addr_list **list, int *count,
4081 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07004082{
4083 struct dev_addr_list *da;
4084
4085 for (da = *list; da != NULL; da = da->next) {
4086 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4087 da->da_addrlen == alen) {
4088 if (glbl) {
4089 int old_glbl = da->da_gusers;
4090 da->da_gusers = 1;
4091 if (old_glbl)
4092 return 0;
4093 }
4094 da->da_users++;
4095 return 0;
4096 }
4097 }
4098
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08004099 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07004100 if (da == NULL)
4101 return -ENOMEM;
4102 memcpy(da->da_addr, addr, alen);
4103 da->da_addrlen = alen;
4104 da->da_users = 1;
4105 da->da_gusers = glbl ? 1 : 0;
4106 da->next = *list;
4107 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004108 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07004109 return 0;
4110}
4111
Patrick McHardy4417da62007-06-27 01:28:10 -07004112/**
4113 * dev_unicast_delete - Release secondary unicast address.
4114 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004115 * @addr: address to delete
Patrick McHardy4417da62007-06-27 01:28:10 -07004116 *
4117 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004118 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07004119 *
4120 * The caller must hold the rtnl_mutex.
4121 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00004122int dev_unicast_delete(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07004123{
4124 int err;
4125
4126 ASSERT_RTNL();
4127
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004128 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004129 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4130 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004131 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004132 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004133 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004134 return err;
4135}
4136EXPORT_SYMBOL(dev_unicast_delete);
4137
4138/**
4139 * dev_unicast_add - add a secondary unicast address
4140 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07004141 * @addr: address to add
Patrick McHardy4417da62007-06-27 01:28:10 -07004142 *
4143 * Add a secondary unicast address to the device or increase
4144 * the reference count if it already exists.
4145 *
4146 * The caller must hold the rtnl_mutex.
4147 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00004148int dev_unicast_add(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07004149{
4150 int err;
4151
4152 ASSERT_RTNL();
4153
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004154 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004155 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4156 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004157 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004158 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004159 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004160 return err;
4161}
4162EXPORT_SYMBOL(dev_unicast_add);
4163
Chris Leeche83a2ea2008-01-31 16:53:23 -08004164int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4165 struct dev_addr_list **from, int *from_count)
4166{
4167 struct dev_addr_list *da, *next;
4168 int err = 0;
4169
4170 da = *from;
4171 while (da != NULL) {
4172 next = da->next;
4173 if (!da->da_synced) {
4174 err = __dev_addr_add(to, to_count,
4175 da->da_addr, da->da_addrlen, 0);
4176 if (err < 0)
4177 break;
4178 da->da_synced = 1;
4179 da->da_users++;
4180 } else if (da->da_users == 1) {
4181 __dev_addr_delete(to, to_count,
4182 da->da_addr, da->da_addrlen, 0);
4183 __dev_addr_delete(from, from_count,
4184 da->da_addr, da->da_addrlen, 0);
4185 }
4186 da = next;
4187 }
4188 return err;
4189}
Johannes Bergc4029082009-06-17 17:43:30 +02004190EXPORT_SYMBOL_GPL(__dev_addr_sync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004191
4192void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4193 struct dev_addr_list **from, int *from_count)
4194{
4195 struct dev_addr_list *da, *next;
4196
4197 da = *from;
4198 while (da != NULL) {
4199 next = da->next;
4200 if (da->da_synced) {
4201 __dev_addr_delete(to, to_count,
4202 da->da_addr, da->da_addrlen, 0);
4203 da->da_synced = 0;
4204 __dev_addr_delete(from, from_count,
4205 da->da_addr, da->da_addrlen, 0);
4206 }
4207 da = next;
4208 }
4209}
Johannes Bergc4029082009-06-17 17:43:30 +02004210EXPORT_SYMBOL_GPL(__dev_addr_unsync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004211
4212/**
4213 * dev_unicast_sync - Synchronize device's unicast list to another device
4214 * @to: destination device
4215 * @from: source device
4216 *
4217 * Add newly added addresses to the destination device and release
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004218 * addresses that have no users left. The source device must be
4219 * locked by netif_tx_lock_bh.
Chris Leeche83a2ea2008-01-31 16:53:23 -08004220 *
4221 * This function is intended to be called from the dev->set_rx_mode
4222 * function of layered software devices.
4223 */
4224int dev_unicast_sync(struct net_device *to, struct net_device *from)
4225{
4226 int err = 0;
4227
Jiri Pirkoccffad252009-05-22 23:22:17 +00004228 if (to->addr_len != from->addr_len)
4229 return -EINVAL;
4230
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004231 netif_addr_lock_bh(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004232 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004233 if (!err)
4234 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004235 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004236 return err;
4237}
4238EXPORT_SYMBOL(dev_unicast_sync);
4239
4240/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08004241 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08004242 * @to: destination device
4243 * @from: source device
4244 *
4245 * Remove all addresses that were added to the destination device by
4246 * dev_unicast_sync(). This function is intended to be called from the
4247 * dev->stop function of layered software devices.
4248 */
4249void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4250{
Jiri Pirkoccffad252009-05-22 23:22:17 +00004251 if (to->addr_len != from->addr_len)
4252 return;
4253
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004254 netif_addr_lock_bh(from);
4255 netif_addr_lock(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004256 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004257 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004258 netif_addr_unlock(to);
4259 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004260}
4261EXPORT_SYMBOL(dev_unicast_unsync);
4262
Jiri Pirkoccffad252009-05-22 23:22:17 +00004263static void dev_unicast_flush(struct net_device *dev)
4264{
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004265 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004266 __hw_addr_flush(&dev->uc);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004267 netif_addr_unlock_bh(dev);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004268}
4269
4270static void dev_unicast_init(struct net_device *dev)
4271{
Jiri Pirko31278e72009-06-17 01:12:19 +00004272 __hw_addr_init(&dev->uc);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004273}
4274
4275
Denis Cheng12972622007-07-18 02:12:56 -07004276static void __dev_addr_discard(struct dev_addr_list **list)
4277{
4278 struct dev_addr_list *tmp;
4279
4280 while (*list != NULL) {
4281 tmp = *list;
4282 *list = tmp->next;
4283 if (tmp->da_users > tmp->da_gusers)
4284 printk("__dev_addr_discard: address leakage! "
4285 "da_users=%d\n", tmp->da_users);
4286 kfree(tmp);
4287 }
4288}
4289
Denis Cheng26cc2522007-07-18 02:12:03 -07004290static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07004291{
David S. Millerb9e40852008-07-15 00:15:08 -07004292 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07004293
Denis Cheng456ad752007-07-18 02:10:54 -07004294 __dev_addr_discard(&dev->mc_list);
Jiri Pirko4cd24ea2010-02-08 04:30:35 +00004295 netdev_mc_count(dev) = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07004296
David S. Millerb9e40852008-07-15 00:15:08 -07004297 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07004298}
4299
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004300/**
4301 * dev_get_flags - get flags reported to userspace
4302 * @dev: device
4303 *
4304 * Get the combination of flag bits exported through APIs to userspace.
4305 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004306unsigned dev_get_flags(const struct net_device *dev)
4307{
4308 unsigned flags;
4309
4310 flags = (dev->flags & ~(IFF_PROMISC |
4311 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08004312 IFF_RUNNING |
4313 IFF_LOWER_UP |
4314 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004315 (dev->gflags & (IFF_PROMISC |
4316 IFF_ALLMULTI));
4317
Stefan Rompfb00055a2006-03-20 17:09:11 -08004318 if (netif_running(dev)) {
4319 if (netif_oper_up(dev))
4320 flags |= IFF_RUNNING;
4321 if (netif_carrier_ok(dev))
4322 flags |= IFF_LOWER_UP;
4323 if (netif_dormant(dev))
4324 flags |= IFF_DORMANT;
4325 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004326
4327 return flags;
4328}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004329EXPORT_SYMBOL(dev_get_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004330
Patrick McHardybd380812010-02-26 06:34:53 +00004331int __dev_change_flags(struct net_device *dev, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004332{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004333 int old_flags = dev->flags;
Patrick McHardybd380812010-02-26 06:34:53 +00004334 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004335
Patrick McHardy24023452007-07-14 18:51:31 -07004336 ASSERT_RTNL();
4337
Linus Torvalds1da177e2005-04-16 15:20:36 -07004338 /*
4339 * Set the flags on our device.
4340 */
4341
4342 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4343 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4344 IFF_AUTOMEDIA)) |
4345 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4346 IFF_ALLMULTI));
4347
4348 /*
4349 * Load in the correct multicast list now the flags have changed.
4350 */
4351
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004352 if ((old_flags ^ flags) & IFF_MULTICAST)
4353 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07004354
Patrick McHardy4417da62007-06-27 01:28:10 -07004355 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004356
4357 /*
4358 * Have we downed the interface. We handle IFF_UP ourselves
4359 * according to user attempts to set it, rather than blindly
4360 * setting it.
4361 */
4362
4363 ret = 0;
4364 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
Patrick McHardybd380812010-02-26 06:34:53 +00004365 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004366
4367 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07004368 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369 }
4370
Linus Torvalds1da177e2005-04-16 15:20:36 -07004371 if ((flags ^ dev->gflags) & IFF_PROMISC) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004372 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4373
Linus Torvalds1da177e2005-04-16 15:20:36 -07004374 dev->gflags ^= IFF_PROMISC;
4375 dev_set_promiscuity(dev, inc);
4376 }
4377
4378 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4379 is important. Some (broken) drivers set IFF_PROMISC, when
4380 IFF_ALLMULTI is requested not asking us and not reporting.
4381 */
4382 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004383 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4384
Linus Torvalds1da177e2005-04-16 15:20:36 -07004385 dev->gflags ^= IFF_ALLMULTI;
4386 dev_set_allmulti(dev, inc);
4387 }
4388
Patrick McHardybd380812010-02-26 06:34:53 +00004389 return ret;
4390}
4391
4392void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4393{
4394 unsigned int changes = dev->flags ^ old_flags;
4395
4396 if (changes & IFF_UP) {
4397 if (dev->flags & IFF_UP)
4398 call_netdevice_notifiers(NETDEV_UP, dev);
4399 else
4400 call_netdevice_notifiers(NETDEV_DOWN, dev);
4401 }
4402
4403 if (dev->flags & IFF_UP &&
4404 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4405 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4406}
4407
4408/**
4409 * dev_change_flags - change device settings
4410 * @dev: device
4411 * @flags: device state flags
4412 *
4413 * Change settings on device based state flags. The flags are
4414 * in the userspace exported format.
4415 */
4416int dev_change_flags(struct net_device *dev, unsigned flags)
4417{
4418 int ret, changes;
4419 int old_flags = dev->flags;
4420
4421 ret = __dev_change_flags(dev, flags);
4422 if (ret < 0)
4423 return ret;
4424
4425 changes = old_flags ^ dev->flags;
Thomas Graf7c355f52007-06-05 16:03:03 -07004426 if (changes)
4427 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004428
Patrick McHardybd380812010-02-26 06:34:53 +00004429 __dev_notify_flags(dev, old_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004430 return ret;
4431}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004432EXPORT_SYMBOL(dev_change_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004433
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004434/**
4435 * dev_set_mtu - Change maximum transfer unit
4436 * @dev: device
4437 * @new_mtu: new transfer unit
4438 *
4439 * Change the maximum transfer size of the network device.
4440 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004441int dev_set_mtu(struct net_device *dev, int new_mtu)
4442{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004443 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004444 int err;
4445
4446 if (new_mtu == dev->mtu)
4447 return 0;
4448
4449 /* MTU must be positive. */
4450 if (new_mtu < 0)
4451 return -EINVAL;
4452
4453 if (!netif_device_present(dev))
4454 return -ENODEV;
4455
4456 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004457 if (ops->ndo_change_mtu)
4458 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004459 else
4460 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004461
Linus Torvalds1da177e2005-04-16 15:20:36 -07004462 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004463 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004464 return err;
4465}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004466EXPORT_SYMBOL(dev_set_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004467
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004468/**
4469 * dev_set_mac_address - Change Media Access Control Address
4470 * @dev: device
4471 * @sa: new address
4472 *
4473 * Change the hardware (MAC) address of the device
4474 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004475int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4476{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004477 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004478 int err;
4479
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004480 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004481 return -EOPNOTSUPP;
4482 if (sa->sa_family != dev->type)
4483 return -EINVAL;
4484 if (!netif_device_present(dev))
4485 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004486 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004487 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004488 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004489 return err;
4490}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004491EXPORT_SYMBOL(dev_set_mac_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004492
4493/*
Eric Dumazet3710bec2009-11-01 19:42:09 +00004494 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07004495 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07004496static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004497{
4498 int err;
Eric Dumazet3710bec2009-11-01 19:42:09 +00004499 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004500
4501 if (!dev)
4502 return -ENODEV;
4503
4504 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004505 case SIOCGIFFLAGS: /* Get interface flags */
4506 ifr->ifr_flags = (short) dev_get_flags(dev);
4507 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004508
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004509 case SIOCGIFMETRIC: /* Get the metric on the interface
4510 (currently unused) */
4511 ifr->ifr_metric = 0;
4512 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004513
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004514 case SIOCGIFMTU: /* Get the MTU of a device */
4515 ifr->ifr_mtu = dev->mtu;
4516 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004517
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004518 case SIOCGIFHWADDR:
4519 if (!dev->addr_len)
4520 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4521 else
4522 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4523 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4524 ifr->ifr_hwaddr.sa_family = dev->type;
4525 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004526
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004527 case SIOCGIFSLAVE:
4528 err = -EINVAL;
4529 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004530
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004531 case SIOCGIFMAP:
4532 ifr->ifr_map.mem_start = dev->mem_start;
4533 ifr->ifr_map.mem_end = dev->mem_end;
4534 ifr->ifr_map.base_addr = dev->base_addr;
4535 ifr->ifr_map.irq = dev->irq;
4536 ifr->ifr_map.dma = dev->dma;
4537 ifr->ifr_map.port = dev->if_port;
4538 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004539
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004540 case SIOCGIFINDEX:
4541 ifr->ifr_ifindex = dev->ifindex;
4542 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004543
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004544 case SIOCGIFTXQLEN:
4545 ifr->ifr_qlen = dev->tx_queue_len;
4546 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004547
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004548 default:
4549 /* dev_ioctl() should ensure this case
4550 * is never reached
4551 */
4552 WARN_ON(1);
4553 err = -EINVAL;
4554 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004555
4556 }
4557 return err;
4558}
4559
4560/*
4561 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
4562 */
4563static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4564{
4565 int err;
4566 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004567 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004568
4569 if (!dev)
4570 return -ENODEV;
4571
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004572 ops = dev->netdev_ops;
4573
Jeff Garzik14e3e072007-10-08 00:06:32 -07004574 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004575 case SIOCSIFFLAGS: /* Set interface flags */
4576 return dev_change_flags(dev, ifr->ifr_flags);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004577
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004578 case SIOCSIFMETRIC: /* Set the metric on the interface
4579 (currently unused) */
4580 return -EOPNOTSUPP;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004581
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004582 case SIOCSIFMTU: /* Set the MTU of a device */
4583 return dev_set_mtu(dev, ifr->ifr_mtu);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004584
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004585 case SIOCSIFHWADDR:
4586 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004587
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004588 case SIOCSIFHWBROADCAST:
4589 if (ifr->ifr_hwaddr.sa_family != dev->type)
4590 return -EINVAL;
4591 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4592 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4593 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4594 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004595
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004596 case SIOCSIFMAP:
4597 if (ops->ndo_set_config) {
4598 if (!netif_device_present(dev))
4599 return -ENODEV;
4600 return ops->ndo_set_config(dev, &ifr->ifr_map);
4601 }
4602 return -EOPNOTSUPP;
4603
4604 case SIOCADDMULTI:
4605 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4606 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4607 return -EINVAL;
4608 if (!netif_device_present(dev))
4609 return -ENODEV;
4610 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4611 dev->addr_len, 1);
4612
4613 case SIOCDELMULTI:
4614 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4615 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4616 return -EINVAL;
4617 if (!netif_device_present(dev))
4618 return -ENODEV;
4619 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4620 dev->addr_len, 1);
4621
4622 case SIOCSIFTXQLEN:
4623 if (ifr->ifr_qlen < 0)
4624 return -EINVAL;
4625 dev->tx_queue_len = ifr->ifr_qlen;
4626 return 0;
4627
4628 case SIOCSIFNAME:
4629 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4630 return dev_change_name(dev, ifr->ifr_newname);
4631
4632 /*
4633 * Unknown or private ioctl
4634 */
4635 default:
4636 if ((cmd >= SIOCDEVPRIVATE &&
4637 cmd <= SIOCDEVPRIVATE + 15) ||
4638 cmd == SIOCBONDENSLAVE ||
4639 cmd == SIOCBONDRELEASE ||
4640 cmd == SIOCBONDSETHWADDR ||
4641 cmd == SIOCBONDSLAVEINFOQUERY ||
4642 cmd == SIOCBONDINFOQUERY ||
4643 cmd == SIOCBONDCHANGEACTIVE ||
4644 cmd == SIOCGMIIPHY ||
4645 cmd == SIOCGMIIREG ||
4646 cmd == SIOCSMIIREG ||
4647 cmd == SIOCBRADDIF ||
4648 cmd == SIOCBRDELIF ||
4649 cmd == SIOCSHWTSTAMP ||
4650 cmd == SIOCWANDEV) {
4651 err = -EOPNOTSUPP;
4652 if (ops->ndo_do_ioctl) {
4653 if (netif_device_present(dev))
4654 err = ops->ndo_do_ioctl(dev, ifr, cmd);
4655 else
4656 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004657 }
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004658 } else
4659 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004660
4661 }
4662 return err;
4663}
4664
4665/*
4666 * This function handles all "interface"-type I/O control requests. The actual
4667 * 'doing' part of this is dev_ifsioc above.
4668 */
4669
4670/**
4671 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004672 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004673 * @cmd: command to issue
4674 * @arg: pointer to a struct ifreq in user space
4675 *
4676 * Issue ioctl functions to devices. This is normally called by the
4677 * user space syscall interfaces but can sometimes be useful for
4678 * other purposes. The return value is the return from the syscall if
4679 * positive or a negative errno code on error.
4680 */
4681
Eric W. Biederman881d9662007-09-17 11:56:21 -07004682int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004683{
4684 struct ifreq ifr;
4685 int ret;
4686 char *colon;
4687
4688 /* One special case: SIOCGIFCONF takes ifconf argument
4689 and requires shared lock, because it sleeps writing
4690 to user space.
4691 */
4692
4693 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004694 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004695 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004696 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004697 return ret;
4698 }
4699 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004700 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004701
4702 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4703 return -EFAULT;
4704
4705 ifr.ifr_name[IFNAMSIZ-1] = 0;
4706
4707 colon = strchr(ifr.ifr_name, ':');
4708 if (colon)
4709 *colon = 0;
4710
4711 /*
4712 * See which interface the caller is talking about.
4713 */
4714
4715 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004716 /*
4717 * These ioctl calls:
4718 * - can be done by all.
4719 * - atomic and do not require locking.
4720 * - return a value
4721 */
4722 case SIOCGIFFLAGS:
4723 case SIOCGIFMETRIC:
4724 case SIOCGIFMTU:
4725 case SIOCGIFHWADDR:
4726 case SIOCGIFSLAVE:
4727 case SIOCGIFMAP:
4728 case SIOCGIFINDEX:
4729 case SIOCGIFTXQLEN:
4730 dev_load(net, ifr.ifr_name);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004731 rcu_read_lock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004732 ret = dev_ifsioc_locked(net, &ifr, cmd);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004733 rcu_read_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004734 if (!ret) {
4735 if (colon)
4736 *colon = ':';
4737 if (copy_to_user(arg, &ifr,
4738 sizeof(struct ifreq)))
4739 ret = -EFAULT;
4740 }
4741 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004742
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004743 case SIOCETHTOOL:
4744 dev_load(net, ifr.ifr_name);
4745 rtnl_lock();
4746 ret = dev_ethtool(net, &ifr);
4747 rtnl_unlock();
4748 if (!ret) {
4749 if (colon)
4750 *colon = ':';
4751 if (copy_to_user(arg, &ifr,
4752 sizeof(struct ifreq)))
4753 ret = -EFAULT;
4754 }
4755 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004756
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004757 /*
4758 * These ioctl calls:
4759 * - require superuser power.
4760 * - require strict serialization.
4761 * - return a value
4762 */
4763 case SIOCGMIIPHY:
4764 case SIOCGMIIREG:
4765 case SIOCSIFNAME:
4766 if (!capable(CAP_NET_ADMIN))
4767 return -EPERM;
4768 dev_load(net, ifr.ifr_name);
4769 rtnl_lock();
4770 ret = dev_ifsioc(net, &ifr, cmd);
4771 rtnl_unlock();
4772 if (!ret) {
4773 if (colon)
4774 *colon = ':';
4775 if (copy_to_user(arg, &ifr,
4776 sizeof(struct ifreq)))
4777 ret = -EFAULT;
4778 }
4779 return ret;
4780
4781 /*
4782 * These ioctl calls:
4783 * - require superuser power.
4784 * - require strict serialization.
4785 * - do not return a value
4786 */
4787 case SIOCSIFFLAGS:
4788 case SIOCSIFMETRIC:
4789 case SIOCSIFMTU:
4790 case SIOCSIFMAP:
4791 case SIOCSIFHWADDR:
4792 case SIOCSIFSLAVE:
4793 case SIOCADDMULTI:
4794 case SIOCDELMULTI:
4795 case SIOCSIFHWBROADCAST:
4796 case SIOCSIFTXQLEN:
4797 case SIOCSMIIREG:
4798 case SIOCBONDENSLAVE:
4799 case SIOCBONDRELEASE:
4800 case SIOCBONDSETHWADDR:
4801 case SIOCBONDCHANGEACTIVE:
4802 case SIOCBRADDIF:
4803 case SIOCBRDELIF:
4804 case SIOCSHWTSTAMP:
4805 if (!capable(CAP_NET_ADMIN))
4806 return -EPERM;
4807 /* fall through */
4808 case SIOCBONDSLAVEINFOQUERY:
4809 case SIOCBONDINFOQUERY:
4810 dev_load(net, ifr.ifr_name);
4811 rtnl_lock();
4812 ret = dev_ifsioc(net, &ifr, cmd);
4813 rtnl_unlock();
4814 return ret;
4815
4816 case SIOCGIFMEM:
4817 /* Get the per device memory space. We can add this but
4818 * currently do not support it */
4819 case SIOCSIFMEM:
4820 /* Set the per device memory buffer space.
4821 * Not applicable in our case */
4822 case SIOCSIFLINK:
4823 return -EINVAL;
4824
4825 /*
4826 * Unknown or private ioctl.
4827 */
4828 default:
4829 if (cmd == SIOCWANDEV ||
4830 (cmd >= SIOCDEVPRIVATE &&
4831 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004832 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004833 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004834 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004835 rtnl_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004836 if (!ret && copy_to_user(arg, &ifr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004837 sizeof(struct ifreq)))
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004838 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004839 return ret;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004840 }
4841 /* Take care of Wireless Extensions */
4842 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4843 return wext_handle_ioctl(net, &ifr, cmd, arg);
4844 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004845 }
4846}
4847
4848
4849/**
4850 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004851 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004852 *
4853 * Returns a suitable unique value for a new device interface
4854 * number. The caller must hold the rtnl semaphore or the
4855 * dev_base_lock to be sure it remains unique.
4856 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004857static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004858{
4859 static int ifindex;
4860 for (;;) {
4861 if (++ifindex <= 0)
4862 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004863 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004864 return ifindex;
4865 }
4866}
4867
Linus Torvalds1da177e2005-04-16 15:20:36 -07004868/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004869static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004870
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004871static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004872{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004873 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004874}
4875
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004876static void rollback_registered_many(struct list_head *head)
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004877{
Krishna Kumare93737b2009-12-08 22:26:02 +00004878 struct net_device *dev, *tmp;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004879
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004880 BUG_ON(dev_boot_phase);
4881 ASSERT_RTNL();
4882
Krishna Kumare93737b2009-12-08 22:26:02 +00004883 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004884 /* Some devices call without registering
Krishna Kumare93737b2009-12-08 22:26:02 +00004885 * for initialization unwind. Remove those
4886 * devices and proceed with the remaining.
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004887 */
4888 if (dev->reg_state == NETREG_UNINITIALIZED) {
4889 pr_debug("unregister_netdevice: device %s/%p never "
4890 "was registered\n", dev->name, dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004891
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004892 WARN_ON(1);
Krishna Kumare93737b2009-12-08 22:26:02 +00004893 list_del(&dev->unreg_list);
4894 continue;
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004895 }
4896
4897 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4898
4899 /* If device is running, close it first. */
4900 dev_close(dev);
4901
4902 /* And unlink it from device chain. */
4903 unlist_netdevice(dev);
4904
4905 dev->reg_state = NETREG_UNREGISTERING;
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004906 }
4907
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004908 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004909
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004910 list_for_each_entry(dev, head, unreg_list) {
4911 /* Shutdown queueing discipline. */
4912 dev_shutdown(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004913
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004914
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004915 /* Notify protocols, that we are about to destroy
4916 this device. They should clean all the things.
4917 */
4918 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4919
Patrick McHardya2835762010-02-26 06:34:51 +00004920 if (!dev->rtnl_link_ops ||
4921 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4922 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4923
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004924 /*
4925 * Flush the unicast and multicast chains
4926 */
4927 dev_unicast_flush(dev);
4928 dev_addr_discard(dev);
4929
4930 if (dev->netdev_ops->ndo_uninit)
4931 dev->netdev_ops->ndo_uninit(dev);
4932
4933 /* Notifier chain MUST detach us from master device. */
4934 WARN_ON(dev->master);
4935
4936 /* Remove entries from kobject tree */
4937 netdev_unregister_kobject(dev);
4938 }
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004939
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00004940 /* Process any work delayed until the end of the batch */
stephen hemmingere5e26d72010-02-24 14:01:38 +00004941 dev = list_first_entry(head, struct net_device, unreg_list);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00004942 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4943
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004944 synchronize_net();
4945
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00004946 list_for_each_entry(dev, head, unreg_list)
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004947 dev_put(dev);
4948}
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004949
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004950static void rollback_registered(struct net_device *dev)
4951{
4952 LIST_HEAD(single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004953
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004954 list_add(&dev->unreg_list, &single);
4955 rollback_registered_many(&single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004956}
4957
David S. Millere8a04642008-07-17 00:34:19 -07004958static void __netdev_init_queue_locks_one(struct net_device *dev,
4959 struct netdev_queue *dev_queue,
4960 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004961{
4962 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004963 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004964 dev_queue->xmit_lock_owner = -1;
4965}
4966
4967static void netdev_init_queue_locks(struct net_device *dev)
4968{
David S. Millere8a04642008-07-17 00:34:19 -07004969 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4970 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004971}
4972
Herbert Xub63365a2008-10-23 01:11:29 -07004973unsigned long netdev_fix_features(unsigned long features, const char *name)
4974{
4975 /* Fix illegal SG+CSUM combinations. */
4976 if ((features & NETIF_F_SG) &&
4977 !(features & NETIF_F_ALL_CSUM)) {
4978 if (name)
4979 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4980 "checksum feature.\n", name);
4981 features &= ~NETIF_F_SG;
4982 }
4983
4984 /* TSO requires that SG is present as well. */
4985 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4986 if (name)
4987 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4988 "SG feature.\n", name);
4989 features &= ~NETIF_F_TSO;
4990 }
4991
4992 if (features & NETIF_F_UFO) {
4993 if (!(features & NETIF_F_GEN_CSUM)) {
4994 if (name)
4995 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4996 "since no NETIF_F_HW_CSUM feature.\n",
4997 name);
4998 features &= ~NETIF_F_UFO;
4999 }
5000
5001 if (!(features & NETIF_F_SG)) {
5002 if (name)
5003 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
5004 "since no NETIF_F_SG feature.\n", name);
5005 features &= ~NETIF_F_UFO;
5006 }
5007 }
5008
5009 return features;
5010}
5011EXPORT_SYMBOL(netdev_fix_features);
5012
Linus Torvalds1da177e2005-04-16 15:20:36 -07005013/**
Patrick Mullaneyfc4a7482009-12-03 15:59:22 -08005014 * netif_stacked_transfer_operstate - transfer operstate
5015 * @rootdev: the root or lower level device to transfer state from
5016 * @dev: the device to transfer operstate to
5017 *
5018 * Transfer operational state from root to device. This is normally
5019 * called when a stacking relationship exists between the root
5020 * device and the device(a leaf device).
5021 */
5022void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5023 struct net_device *dev)
5024{
5025 if (rootdev->operstate == IF_OPER_DORMANT)
5026 netif_dormant_on(dev);
5027 else
5028 netif_dormant_off(dev);
5029
5030 if (netif_carrier_ok(rootdev)) {
5031 if (!netif_carrier_ok(dev))
5032 netif_carrier_on(dev);
5033 } else {
5034 if (netif_carrier_ok(dev))
5035 netif_carrier_off(dev);
5036 }
5037}
5038EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5039
5040/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005041 * register_netdevice - register a network device
5042 * @dev: device to register
5043 *
5044 * Take a completed network device structure and add it to the kernel
5045 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5046 * chain. 0 is returned on success. A negative errno code is returned
5047 * on a failure to set up the device, or if the name is a duplicate.
5048 *
5049 * Callers must hold the rtnl semaphore. You may want
5050 * register_netdev() instead of this.
5051 *
5052 * BUGS:
5053 * The locking appears insufficient to guarantee two parallel registers
5054 * will not get the same name.
5055 */
5056
5057int register_netdevice(struct net_device *dev)
5058{
Linus Torvalds1da177e2005-04-16 15:20:36 -07005059 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005060 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005061
5062 BUG_ON(dev_boot_phase);
5063 ASSERT_RTNL();
5064
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005065 might_sleep();
5066
Linus Torvalds1da177e2005-04-16 15:20:36 -07005067 /* When net_device's are persistent, this will be fatal. */
5068 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005069 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005070
David S. Millerf1f28aa2008-07-15 00:08:33 -07005071 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07005072 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07005073 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005074
Linus Torvalds1da177e2005-04-16 15:20:36 -07005075 dev->iflink = -1;
5076
5077 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005078 if (dev->netdev_ops->ndo_init) {
5079 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005080 if (ret) {
5081 if (ret > 0)
5082 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08005083 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005084 }
5085 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005086
Octavian Purdilad9031022009-11-18 02:36:59 +00005087 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
5088 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005089 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005090
Eric W. Biederman881d9662007-09-17 11:56:21 -07005091 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005092 if (dev->iflink == -1)
5093 dev->iflink = dev->ifindex;
5094
Stephen Hemmingerd212f872007-06-27 00:47:37 -07005095 /* Fix illegal checksum combinations */
5096 if ((dev->features & NETIF_F_HW_CSUM) &&
5097 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5098 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5099 dev->name);
5100 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5101 }
5102
5103 if ((dev->features & NETIF_F_NO_CSUM) &&
5104 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5105 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5106 dev->name);
5107 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5108 }
5109
Herbert Xub63365a2008-10-23 01:11:29 -07005110 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005111
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07005112 /* Enable software GSO if SG is supported. */
5113 if (dev->features & NETIF_F_SG)
5114 dev->features |= NETIF_F_GSO;
5115
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07005116 netdev_initialize_kobject(dev);
Johannes Berg7ffbe3f2009-10-02 05:15:27 +00005117
5118 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5119 ret = notifier_to_errno(ret);
5120 if (ret)
5121 goto err_uninit;
5122
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005123 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005124 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005125 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005126 dev->reg_state = NETREG_REGISTERED;
5127
Linus Torvalds1da177e2005-04-16 15:20:36 -07005128 /*
5129 * Default initial state at registry is that the
5130 * device is present.
5131 */
5132
5133 set_bit(__LINK_STATE_PRESENT, &dev->state);
5134
Linus Torvalds1da177e2005-04-16 15:20:36 -07005135 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005136 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005137 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005138
5139 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005140 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07005141 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07005142 if (ret) {
5143 rollback_registered(dev);
5144 dev->reg_state = NETREG_UNREGISTERED;
5145 }
Eric W. Biedermand90a9092009-12-12 22:11:15 +00005146 /*
5147 * Prevent userspace races by waiting until the network
5148 * device is fully setup before sending notifications.
5149 */
Patrick McHardya2835762010-02-26 06:34:51 +00005150 if (!dev->rtnl_link_ops ||
5151 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5152 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005153
5154out:
5155 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005156
5157err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08005158 if (dev->netdev_ops->ndo_uninit)
5159 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07005160 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005161}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005162EXPORT_SYMBOL(register_netdevice);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005163
5164/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08005165 * init_dummy_netdev - init a dummy network device for NAPI
5166 * @dev: device to init
5167 *
5168 * This takes a network device structure and initialize the minimum
5169 * amount of fields so it can be used to schedule NAPI polls without
5170 * registering a full blown interface. This is to be used by drivers
5171 * that need to tie several hardware interfaces to a single NAPI
5172 * poll scheduler due to HW limitations.
5173 */
5174int init_dummy_netdev(struct net_device *dev)
5175{
5176 /* Clear everything. Note we don't initialize spinlocks
5177 * are they aren't supposed to be taken by any of the
5178 * NAPI code and this dummy netdev is supposed to be
5179 * only ever used for NAPI polls
5180 */
5181 memset(dev, 0, sizeof(struct net_device));
5182
5183 /* make sure we BUG if trying to hit standard
5184 * register/unregister code path
5185 */
5186 dev->reg_state = NETREG_DUMMY;
5187
5188 /* initialize the ref count */
5189 atomic_set(&dev->refcnt, 1);
5190
5191 /* NAPI wants this */
5192 INIT_LIST_HEAD(&dev->napi_list);
5193
5194 /* a dummy interface is started by default */
5195 set_bit(__LINK_STATE_PRESENT, &dev->state);
5196 set_bit(__LINK_STATE_START, &dev->state);
5197
5198 return 0;
5199}
5200EXPORT_SYMBOL_GPL(init_dummy_netdev);
5201
5202
5203/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005204 * register_netdev - register a network device
5205 * @dev: device to register
5206 *
5207 * Take a completed network device structure and add it to the kernel
5208 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5209 * chain. 0 is returned on success. A negative errno code is returned
5210 * on a failure to set up the device, or if the name is a duplicate.
5211 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07005212 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07005213 * and expands the device name if you passed a format string to
5214 * alloc_netdev.
5215 */
5216int register_netdev(struct net_device *dev)
5217{
5218 int err;
5219
5220 rtnl_lock();
5221
5222 /*
5223 * If the name is a format string the caller wants us to do a
5224 * name allocation.
5225 */
5226 if (strchr(dev->name, '%')) {
5227 err = dev_alloc_name(dev, dev->name);
5228 if (err < 0)
5229 goto out;
5230 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005231
Linus Torvalds1da177e2005-04-16 15:20:36 -07005232 err = register_netdevice(dev);
5233out:
5234 rtnl_unlock();
5235 return err;
5236}
5237EXPORT_SYMBOL(register_netdev);
5238
5239/*
5240 * netdev_wait_allrefs - wait until all references are gone.
5241 *
5242 * This is called when unregistering network devices.
5243 *
5244 * Any protocol or device that holds a reference should register
5245 * for netdevice notification, and cleanup and put back the
5246 * reference if they receive an UNREGISTER event.
5247 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005248 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005249 */
5250static void netdev_wait_allrefs(struct net_device *dev)
5251{
5252 unsigned long rebroadcast_time, warning_time;
5253
Eric Dumazete014deb2009-11-17 05:59:21 +00005254 linkwatch_forget_dev(dev);
5255
Linus Torvalds1da177e2005-04-16 15:20:36 -07005256 rebroadcast_time = warning_time = jiffies;
5257 while (atomic_read(&dev->refcnt) != 0) {
5258 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005259 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005260
5261 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005262 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00005263 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
Octavian Purdila395264d2009-11-16 13:49:35 +00005264 * should have already handle it the first time */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005265
5266 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5267 &dev->state)) {
5268 /* We must not have linkwatch events
5269 * pending on unregister. If this
5270 * happens, we simply run the queue
5271 * unscheduled, resulting in a noop
5272 * for this device.
5273 */
5274 linkwatch_run_queue();
5275 }
5276
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005277 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005278
5279 rebroadcast_time = jiffies;
5280 }
5281
5282 msleep(250);
5283
5284 if (time_after(jiffies, warning_time + 10 * HZ)) {
5285 printk(KERN_EMERG "unregister_netdevice: "
5286 "waiting for %s to become free. Usage "
5287 "count = %d\n",
5288 dev->name, atomic_read(&dev->refcnt));
5289 warning_time = jiffies;
5290 }
5291 }
5292}
5293
5294/* The sequence is:
5295 *
5296 * rtnl_lock();
5297 * ...
5298 * register_netdevice(x1);
5299 * register_netdevice(x2);
5300 * ...
5301 * unregister_netdevice(y1);
5302 * unregister_netdevice(y2);
5303 * ...
5304 * rtnl_unlock();
5305 * free_netdev(y1);
5306 * free_netdev(y2);
5307 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07005308 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07005309 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005310 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07005311 * without deadlocking with linkwatch via keventd.
5312 * 2) Since we run with the RTNL semaphore not held, we can sleep
5313 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07005314 *
5315 * We must not return until all unregister events added during
5316 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005317 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005318void netdev_run_todo(void)
5319{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005320 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005321
Linus Torvalds1da177e2005-04-16 15:20:36 -07005322 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005323 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07005324
5325 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005326
Linus Torvalds1da177e2005-04-16 15:20:36 -07005327 while (!list_empty(&list)) {
5328 struct net_device *dev
stephen hemmingere5e26d72010-02-24 14:01:38 +00005329 = list_first_entry(&list, struct net_device, todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005330 list_del(&dev->todo_list);
5331
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005332 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005333 printk(KERN_ERR "network todo '%s' but state %d\n",
5334 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005335 dump_stack();
5336 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005337 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005338
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005339 dev->reg_state = NETREG_UNREGISTERED;
5340
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07005341 on_each_cpu(flush_backlog, dev, 1);
5342
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005343 netdev_wait_allrefs(dev);
5344
5345 /* paranoia */
5346 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07005347 WARN_ON(dev->ip_ptr);
5348 WARN_ON(dev->ip6_ptr);
5349 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005350
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005351 if (dev->destructor)
5352 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07005353
5354 /* Free network device */
5355 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005357}
5358
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005359/**
Eric Dumazetd83345a2009-11-16 03:36:51 +00005360 * dev_txq_stats_fold - fold tx_queues stats
5361 * @dev: device to get statistics from
5362 * @stats: struct net_device_stats to hold results
5363 */
5364void dev_txq_stats_fold(const struct net_device *dev,
5365 struct net_device_stats *stats)
5366{
5367 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5368 unsigned int i;
5369 struct netdev_queue *txq;
5370
5371 for (i = 0; i < dev->num_tx_queues; i++) {
5372 txq = netdev_get_tx_queue(dev, i);
5373 tx_bytes += txq->tx_bytes;
5374 tx_packets += txq->tx_packets;
5375 tx_dropped += txq->tx_dropped;
5376 }
5377 if (tx_bytes || tx_packets || tx_dropped) {
5378 stats->tx_bytes = tx_bytes;
5379 stats->tx_packets = tx_packets;
5380 stats->tx_dropped = tx_dropped;
5381 }
5382}
5383EXPORT_SYMBOL(dev_txq_stats_fold);
5384
5385/**
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005386 * dev_get_stats - get network device statistics
5387 * @dev: device to get statistics from
5388 *
5389 * Get network statistics from device. The device driver may provide
5390 * its own method by setting dev->netdev_ops->get_stats; otherwise
5391 * the internal statistics structure is used.
5392 */
5393const struct net_device_stats *dev_get_stats(struct net_device *dev)
Eric Dumazet7004bf22009-05-18 00:34:33 +00005394{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005395 const struct net_device_ops *ops = dev->netdev_ops;
5396
5397 if (ops->ndo_get_stats)
5398 return ops->ndo_get_stats(dev);
Eric Dumazet7004bf22009-05-18 00:34:33 +00005399
Eric Dumazetd83345a2009-11-16 03:36:51 +00005400 dev_txq_stats_fold(dev, &dev->stats);
5401 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07005402}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005403EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07005404
David S. Millerdc2b4842008-07-08 17:18:23 -07005405static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07005406 struct netdev_queue *queue,
5407 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07005408{
David S. Millerdc2b4842008-07-08 17:18:23 -07005409 queue->dev = dev;
5410}
5411
David S. Millerbb949fb2008-07-08 16:55:56 -07005412static void netdev_init_queues(struct net_device *dev)
5413{
David S. Millere8a04642008-07-17 00:34:19 -07005414 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5415 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07005416 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07005417}
5418
Linus Torvalds1da177e2005-04-16 15:20:36 -07005419/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005420 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005421 * @sizeof_priv: size of private data to allocate space for
5422 * @name: device name format string
5423 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005424 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07005425 *
5426 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005427 * and performs basic initialization. Also allocates subquue structs
5428 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005429 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005430struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5431 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005432{
David S. Millere8a04642008-07-17 00:34:19 -07005433 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005434 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07005435 size_t alloc_size;
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005436 struct net_device *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005437
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005438 BUG_ON(strlen(name) >= sizeof(dev->name));
5439
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005440 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005441 if (sizeof_priv) {
5442 /* ensure 32-byte alignment of private area */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005443 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005444 alloc_size += sizeof_priv;
5445 }
5446 /* ensure 32-byte alignment of whole construct */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005447 alloc_size += NETDEV_ALIGN - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005448
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07005449 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005450 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005451 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005452 return NULL;
5453 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005454
Stephen Hemminger79439862008-07-21 13:28:44 -07005455 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07005456 if (!tx) {
5457 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5458 "tx qdiscs.\n");
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005459 goto free_p;
David S. Millere8a04642008-07-17 00:34:19 -07005460 }
5461
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005462 dev = PTR_ALIGN(p, NETDEV_ALIGN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005463 dev->padded = (char *)dev - (char *)p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005464
5465 if (dev_addr_init(dev))
5466 goto free_tx;
5467
Jiri Pirkoccffad252009-05-22 23:22:17 +00005468 dev_unicast_init(dev);
5469
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005470 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005471
David S. Millere8a04642008-07-17 00:34:19 -07005472 dev->_tx = tx;
5473 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005474 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07005475
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07005476 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005477
David S. Millerbb949fb2008-07-08 16:55:56 -07005478 netdev_init_queues(dev);
5479
Peter P Waskiewicz Jr15682bc2010-02-10 20:03:05 -08005480 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5481 dev->ethtool_ntuple_list.count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08005482 INIT_LIST_HEAD(&dev->napi_list);
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005483 INIT_LIST_HEAD(&dev->unreg_list);
Eric Dumazete014deb2009-11-17 05:59:21 +00005484 INIT_LIST_HEAD(&dev->link_watch_list);
Eric Dumazet93f154b2009-05-18 22:19:19 -07005485 dev->priv_flags = IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005486 setup(dev);
5487 strcpy(dev->name, name);
5488 return dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005489
5490free_tx:
5491 kfree(tx);
5492
5493free_p:
5494 kfree(p);
5495 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005496}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005497EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005498
5499/**
5500 * free_netdev - free network device
5501 * @dev: device
5502 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005503 * This function does the last stage of destroying an allocated device
5504 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005505 * If this is the last reference then it will be freed.
5506 */
5507void free_netdev(struct net_device *dev)
5508{
Herbert Xud565b0a2008-12-15 23:38:52 -08005509 struct napi_struct *p, *n;
5510
Denis V. Lunevf3005d72008-04-16 02:02:18 -07005511 release_net(dev_net(dev));
5512
David S. Millere8a04642008-07-17 00:34:19 -07005513 kfree(dev->_tx);
5514
Jiri Pirkof001fde2009-05-05 02:48:28 +00005515 /* Flush device addresses */
5516 dev_addr_flush(dev);
5517
Peter P Waskiewicz Jr15682bc2010-02-10 20:03:05 -08005518 /* Clear ethtool n-tuple list */
5519 ethtool_ntuple_flush(dev);
5520
Herbert Xud565b0a2008-12-15 23:38:52 -08005521 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5522 netif_napi_del(p);
5523
Stephen Hemminger3041a062006-05-26 13:25:24 -07005524 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005525 if (dev->reg_state == NETREG_UNINITIALIZED) {
5526 kfree((char *)dev - dev->padded);
5527 return;
5528 }
5529
5530 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5531 dev->reg_state = NETREG_RELEASED;
5532
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07005533 /* will free via device release */
5534 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005535}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005536EXPORT_SYMBOL(free_netdev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005537
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005538/**
5539 * synchronize_net - Synchronize with packet receive processing
5540 *
5541 * Wait for packets currently being received to be done.
5542 * Does not block later packets from starting.
5543 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005544void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005545{
5546 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07005547 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005548}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005549EXPORT_SYMBOL(synchronize_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005550
5551/**
Eric Dumazet44a08732009-10-27 07:03:04 +00005552 * unregister_netdevice_queue - remove device from the kernel
Linus Torvalds1da177e2005-04-16 15:20:36 -07005553 * @dev: device
Eric Dumazet44a08732009-10-27 07:03:04 +00005554 * @head: list
Jaswinder Singh Rajput6ebfbc02009-11-22 20:43:13 -08005555 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07005556 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005557 * from the kernel tables.
Eric Dumazet44a08732009-10-27 07:03:04 +00005558 * If head not NULL, device is queued to be unregistered later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005559 *
5560 * Callers must hold the rtnl semaphore. You may want
5561 * unregister_netdev() instead of this.
5562 */
5563
Eric Dumazet44a08732009-10-27 07:03:04 +00005564void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005565{
Herbert Xua6620712007-12-12 19:21:56 -08005566 ASSERT_RTNL();
5567
Eric Dumazet44a08732009-10-27 07:03:04 +00005568 if (head) {
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005569 list_move_tail(&dev->unreg_list, head);
Eric Dumazet44a08732009-10-27 07:03:04 +00005570 } else {
5571 rollback_registered(dev);
5572 /* Finish processing unregister after unlock */
5573 net_set_todo(dev);
5574 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005575}
Eric Dumazet44a08732009-10-27 07:03:04 +00005576EXPORT_SYMBOL(unregister_netdevice_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005577
5578/**
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005579 * unregister_netdevice_many - unregister many devices
5580 * @head: list of devices
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005581 */
5582void unregister_netdevice_many(struct list_head *head)
5583{
5584 struct net_device *dev;
5585
5586 if (!list_empty(head)) {
5587 rollback_registered_many(head);
5588 list_for_each_entry(dev, head, unreg_list)
5589 net_set_todo(dev);
5590 }
5591}
Eric Dumazet63c80992009-10-27 07:06:49 +00005592EXPORT_SYMBOL(unregister_netdevice_many);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005593
5594/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005595 * unregister_netdev - remove device from the kernel
5596 * @dev: device
5597 *
5598 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005599 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005600 *
5601 * This is just a wrapper for unregister_netdevice that takes
5602 * the rtnl semaphore. In general you want to use this and not
5603 * unregister_netdevice.
5604 */
5605void unregister_netdev(struct net_device *dev)
5606{
5607 rtnl_lock();
5608 unregister_netdevice(dev);
5609 rtnl_unlock();
5610}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005611EXPORT_SYMBOL(unregister_netdev);
5612
Eric W. Biedermance286d32007-09-12 13:53:49 +02005613/**
5614 * dev_change_net_namespace - move device to different nethost namespace
5615 * @dev: device
5616 * @net: network namespace
5617 * @pat: If not NULL name pattern to try if the current device name
5618 * is already taken in the destination network namespace.
5619 *
5620 * This function shuts down a device interface and moves it
5621 * to a new network namespace. On success 0 is returned, on
5622 * a failure a netagive errno code is returned.
5623 *
5624 * Callers must hold the rtnl semaphore.
5625 */
5626
5627int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5628{
Eric W. Biedermance286d32007-09-12 13:53:49 +02005629 int err;
5630
5631 ASSERT_RTNL();
5632
5633 /* Don't allow namespace local devices to be moved. */
5634 err = -EINVAL;
5635 if (dev->features & NETIF_F_NETNS_LOCAL)
5636 goto out;
5637
Eric W. Biederman38918452008-10-27 17:51:47 -07005638#ifdef CONFIG_SYSFS
5639 /* Don't allow real devices to be moved when sysfs
5640 * is enabled.
5641 */
5642 err = -EINVAL;
5643 if (dev->dev.parent)
5644 goto out;
5645#endif
5646
Eric W. Biedermance286d32007-09-12 13:53:49 +02005647 /* Ensure the device has been registrered */
5648 err = -EINVAL;
5649 if (dev->reg_state != NETREG_REGISTERED)
5650 goto out;
5651
5652 /* Get out if there is nothing todo */
5653 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09005654 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02005655 goto out;
5656
5657 /* Pick the destination device name, and ensure
5658 * we can use it in the destination network namespace.
5659 */
5660 err = -EEXIST;
Octavian Purdilad9031022009-11-18 02:36:59 +00005661 if (__dev_get_by_name(net, dev->name)) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005662 /* We get here if we can't use the current device name */
5663 if (!pat)
5664 goto out;
Octavian Purdilad9031022009-11-18 02:36:59 +00005665 if (dev_get_valid_name(net, pat, dev->name, 1))
Eric W. Biedermance286d32007-09-12 13:53:49 +02005666 goto out;
5667 }
5668
5669 /*
5670 * And now a mini version of register_netdevice unregister_netdevice.
5671 */
5672
5673 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07005674 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005675
5676 /* And unlink it from device chain */
5677 err = -ENODEV;
5678 unlist_netdevice(dev);
5679
5680 synchronize_net();
5681
5682 /* Shutdown queueing discipline. */
5683 dev_shutdown(dev);
5684
5685 /* Notify protocols, that we are about to destroy
5686 this device. They should clean all the things.
5687 */
5688 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Eric W. Biedermana5ee1552009-11-29 15:45:58 +00005689 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005690
5691 /*
5692 * Flush the unicast and multicast chains
5693 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00005694 dev_unicast_flush(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005695 dev_addr_discard(dev);
5696
Eric W. Biederman38918452008-10-27 17:51:47 -07005697 netdev_unregister_kobject(dev);
5698
Eric W. Biedermance286d32007-09-12 13:53:49 +02005699 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005700 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005701
Eric W. Biedermance286d32007-09-12 13:53:49 +02005702 /* If there is an ifindex conflict assign a new one */
5703 if (__dev_get_by_index(net, dev->ifindex)) {
5704 int iflink = (dev->iflink == dev->ifindex);
5705 dev->ifindex = dev_new_index(net);
5706 if (iflink)
5707 dev->iflink = dev->ifindex;
5708 }
5709
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005710 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07005711 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005712 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005713
5714 /* Add the device back in the hashes */
5715 list_netdevice(dev);
5716
5717 /* Notify protocols, that a new device appeared. */
5718 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5719
Eric W. Biedermand90a9092009-12-12 22:11:15 +00005720 /*
5721 * Prevent userspace races by waiting until the network
5722 * device is fully setup before sending notifications.
5723 */
5724 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5725
Eric W. Biedermance286d32007-09-12 13:53:49 +02005726 synchronize_net();
5727 err = 0;
5728out:
5729 return err;
5730}
Johannes Berg463d0182009-07-14 00:33:35 +02005731EXPORT_SYMBOL_GPL(dev_change_net_namespace);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005732
Linus Torvalds1da177e2005-04-16 15:20:36 -07005733static int dev_cpu_callback(struct notifier_block *nfb,
5734 unsigned long action,
5735 void *ocpu)
5736{
5737 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07005738 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005739 struct sk_buff *skb;
5740 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5741 struct softnet_data *sd, *oldsd;
5742
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07005743 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005744 return NOTIFY_OK;
5745
5746 local_irq_disable();
5747 cpu = smp_processor_id();
5748 sd = &per_cpu(softnet_data, cpu);
5749 oldsd = &per_cpu(softnet_data, oldcpu);
5750
5751 /* Find end of our completion_queue. */
5752 list_skb = &sd->completion_queue;
5753 while (*list_skb)
5754 list_skb = &(*list_skb)->next;
5755 /* Append completion queue from offline CPU. */
5756 *list_skb = oldsd->completion_queue;
5757 oldsd->completion_queue = NULL;
5758
5759 /* Find end of our output_queue. */
5760 list_net = &sd->output_queue;
5761 while (*list_net)
5762 list_net = &(*list_net)->next_sched;
5763 /* Append output queue from offline CPU. */
5764 *list_net = oldsd->output_queue;
5765 oldsd->output_queue = NULL;
5766
5767 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5768 local_irq_enable();
5769
5770 /* Process offline CPU's input_pkt_queue */
5771 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5772 netif_rx(skb);
5773
5774 return NOTIFY_OK;
5775}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005776
5777
Herbert Xu7f353bf2007-08-10 15:47:58 -07005778/**
Herbert Xub63365a2008-10-23 01:11:29 -07005779 * netdev_increment_features - increment feature set by one
5780 * @all: current feature set
5781 * @one: new feature set
5782 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005783 *
5784 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005785 * @one to the master device with current feature set @all. Will not
5786 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005787 */
Herbert Xub63365a2008-10-23 01:11:29 -07005788unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5789 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005790{
Herbert Xub63365a2008-10-23 01:11:29 -07005791 /* If device needs checksumming, downgrade to it. */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005792 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
Herbert Xub63365a2008-10-23 01:11:29 -07005793 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5794 else if (mask & NETIF_F_ALL_CSUM) {
5795 /* If one device supports v4/v6 checksumming, set for all. */
5796 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5797 !(all & NETIF_F_GEN_CSUM)) {
5798 all &= ~NETIF_F_ALL_CSUM;
5799 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5800 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005801
Herbert Xub63365a2008-10-23 01:11:29 -07005802 /* If one device supports hw checksumming, set for all. */
5803 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5804 all &= ~NETIF_F_ALL_CSUM;
5805 all |= NETIF_F_HW_CSUM;
5806 }
5807 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005808
Herbert Xub63365a2008-10-23 01:11:29 -07005809 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005810
Herbert Xub63365a2008-10-23 01:11:29 -07005811 one |= all & NETIF_F_ONE_FOR_ALL;
Sridhar Samudralad9f59502009-10-07 12:24:25 +00005812 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
Herbert Xub63365a2008-10-23 01:11:29 -07005813 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005814
5815 return all;
5816}
Herbert Xub63365a2008-10-23 01:11:29 -07005817EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005818
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005819static struct hlist_head *netdev_create_hash(void)
5820{
5821 int i;
5822 struct hlist_head *hash;
5823
5824 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5825 if (hash != NULL)
5826 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5827 INIT_HLIST_HEAD(&hash[i]);
5828
5829 return hash;
5830}
5831
Eric W. Biederman881d9662007-09-17 11:56:21 -07005832/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005833static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005834{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005835 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005836
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005837 net->dev_name_head = netdev_create_hash();
5838 if (net->dev_name_head == NULL)
5839 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005840
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005841 net->dev_index_head = netdev_create_hash();
5842 if (net->dev_index_head == NULL)
5843 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005844
5845 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005846
5847err_idx:
5848 kfree(net->dev_name_head);
5849err_name:
5850 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005851}
5852
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005853/**
5854 * netdev_drivername - network driver for the device
5855 * @dev: network device
5856 * @buffer: buffer for resulting name
5857 * @len: size of buffer
5858 *
5859 * Determine network driver for device.
5860 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005861char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005862{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005863 const struct device_driver *driver;
5864 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005865
5866 if (len <= 0 || !buffer)
5867 return buffer;
5868 buffer[0] = 0;
5869
5870 parent = dev->dev.parent;
5871
5872 if (!parent)
5873 return buffer;
5874
5875 driver = parent->driver;
5876 if (driver && driver->name)
5877 strlcpy(buffer, driver->name, len);
5878 return buffer;
5879}
5880
Pavel Emelyanov46650792007-10-08 20:38:39 -07005881static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005882{
5883 kfree(net->dev_name_head);
5884 kfree(net->dev_index_head);
5885}
5886
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005887static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005888 .init = netdev_init,
5889 .exit = netdev_exit,
5890};
5891
Pavel Emelyanov46650792007-10-08 20:38:39 -07005892static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005893{
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005894 struct net_device *dev, *aux;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005895 /*
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005896 * Push all migratable network devices back to the
Eric W. Biedermance286d32007-09-12 13:53:49 +02005897 * initial network namespace
5898 */
5899 rtnl_lock();
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005900 for_each_netdev_safe(net, dev, aux) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005901 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005902 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005903
5904 /* Ignore unmoveable devices (i.e. loopback) */
5905 if (dev->features & NETIF_F_NETNS_LOCAL)
5906 continue;
5907
Eric W. Biedermane008b5f2009-11-29 22:25:30 +00005908 /* Leave virtual devices for the generic cleanup */
5909 if (dev->rtnl_link_ops)
5910 continue;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005911
Eric W. Biedermance286d32007-09-12 13:53:49 +02005912 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005913 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5914 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005915 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005916 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005917 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005918 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005919 }
5920 }
5921 rtnl_unlock();
5922}
5923
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00005924static void __net_exit default_device_exit_batch(struct list_head *net_list)
5925{
5926 /* At exit all network devices most be removed from a network
5927 * namespace. Do this in the reverse order of registeration.
5928 * Do this across as many network namespaces as possible to
5929 * improve batching efficiency.
5930 */
5931 struct net_device *dev;
5932 struct net *net;
5933 LIST_HEAD(dev_kill_list);
5934
5935 rtnl_lock();
5936 list_for_each_entry(net, net_list, exit_list) {
5937 for_each_netdev_reverse(net, dev) {
5938 if (dev->rtnl_link_ops)
5939 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5940 else
5941 unregister_netdevice_queue(dev, &dev_kill_list);
5942 }
5943 }
5944 unregister_netdevice_many(&dev_kill_list);
5945 rtnl_unlock();
5946}
5947
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005948static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005949 .exit = default_device_exit,
Eric W. Biederman04dc7f6b2009-12-03 02:29:04 +00005950 .exit_batch = default_device_exit_batch,
Eric W. Biedermance286d32007-09-12 13:53:49 +02005951};
5952
Linus Torvalds1da177e2005-04-16 15:20:36 -07005953/*
5954 * Initialize the DEV module. At boot time this walks the device list and
5955 * unhooks any devices that fail to initialise (normally hardware not
5956 * present) and leaves us with a valid list of present and active devices.
5957 *
5958 */
5959
5960/*
5961 * This is called single threaded during boot, so no need
5962 * to take the rtnl semaphore.
5963 */
5964static int __init net_dev_init(void)
5965{
5966 int i, rc = -ENOMEM;
5967
5968 BUG_ON(!dev_boot_phase);
5969
Linus Torvalds1da177e2005-04-16 15:20:36 -07005970 if (dev_proc_init())
5971 goto out;
5972
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005973 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005974 goto out;
5975
5976 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08005977 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005978 INIT_LIST_HEAD(&ptype_base[i]);
5979
Eric W. Biederman881d9662007-09-17 11:56:21 -07005980 if (register_pernet_subsys(&netdev_net_ops))
5981 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005982
5983 /*
5984 * Initialise the packet receive queues.
5985 */
5986
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005987 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005988 struct softnet_data *queue;
5989
5990 queue = &per_cpu(softnet_data, i);
5991 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005992 queue->completion_queue = NULL;
5993 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005994
5995 queue->backlog.poll = process_backlog;
5996 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005997 queue->backlog.gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00005998 queue->backlog.gro_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005999 }
6000
Linus Torvalds1da177e2005-04-16 15:20:36 -07006001 dev_boot_phase = 0;
6002
Eric W. Biederman505d4f72008-11-07 22:54:20 -08006003 /* The loopback device is special if any other network devices
6004 * is present in a network namespace the loopback device must
6005 * be present. Since we now dynamically allocate and free the
6006 * loopback device ensure this invariant is maintained by
6007 * keeping the loopback device as the first device on the
6008 * list of network devices. Ensuring the loopback devices
6009 * is the first device that appears and the last network device
6010 * that disappears.
6011 */
6012 if (register_pernet_device(&loopback_net_ops))
6013 goto out;
6014
6015 if (register_pernet_device(&default_device_ops))
6016 goto out;
6017
Carlos R. Mafra962cf362008-05-15 11:15:37 -03006018 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6019 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07006020
6021 hotcpu_notifier(dev_cpu_callback, 0);
6022 dst_init();
6023 dev_mcast_init();
6024 rc = 0;
6025out:
6026 return rc;
6027}
6028
6029subsys_initcall(net_dev_init);
6030
Krishna Kumare88721f2009-02-18 17:55:02 -08006031static int __init initialize_hashrnd(void)
6032{
6033 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
6034 return 0;
6035}
6036
6037late_initcall_sync(initialize_hashrnd);
6038