blob: 4615e9a443aa6314a23112816485bc4885c5853d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
133 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke.
135 *
136 * Why 16. Because with 16 the only overlap we get on a hash of the
137 * low nibble of the protocol value is RARP/SNAP/X.25.
138 *
139 * NOTE: That is no longer true with the addition of VLAN tags. Not
140 * sure which should go first, but I bet it won't make much
141 * difference if we are running VLANs. The good news is that
142 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700143 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 * --BLG
145 *
146 * 0800 IP
147 * 8100 802.1Q VLAN
148 * 0001 802.3
149 * 0002 AX.25
150 * 0004 802.2
151 * 8035 RARP
152 * 0005 SNAP
153 * 0805 X.25
154 * 0806 ARP
155 * 8137 IPX
156 * 0009 Localtalk
157 * 86DD IPv6
158 */
159
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800160#define PTYPE_HASH_SIZE (16)
161#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800164static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700165static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Chris Leechdb217332006-06-17 21:24:58 -0700167#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700168struct net_dma {
169 struct dma_client client;
170 spinlock_t lock;
171 cpumask_t channel_mask;
Mike Travis0c0b0ac2008-05-02 16:43:08 -0700172 struct dma_chan **channels;
Dan Williamsd379b012007-07-09 11:56:42 -0700173};
174
175static enum dma_state_client
176netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 enum dma_state state);
178
179static struct net_dma net_dma = {
180 .client = {
181 .event_callback = netdev_dma_event,
182 },
183};
Chris Leechdb217332006-06-17 21:24:58 -0700184#endif
185
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 * semaphore.
189 *
190 * Pure readers hold dev_base_lock for reading.
191 *
192 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700193 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 * actual updates. This allows pure readers to access the list even
195 * while a writer is preparing to update it.
196 *
197 * To put it another way, dev_base_lock is held for writing only to
198 * protect against pure readers; the rtnl semaphore provides the
199 * protection against other writers.
200 *
201 * See, for example usages, register_netdevice() and
202 * unregister_netdevice(), which must be called with the rtnl
203 * semaphore held.
204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205DEFINE_RWLOCK(dev_base_lock);
206
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207EXPORT_SYMBOL(dev_base_lock);
208
209#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700210#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Eric W. Biederman881d9662007-09-17 11:56:21 -0700212static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213{
214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
Eric W. Biederman881d9662007-09-17 11:56:21 -0700218static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
Eric W. Biedermance286d32007-09-12 13:53:49 +0200223/* Device list insertion */
224static int list_netdevice(struct net_device *dev)
225{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900226 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200227
228 ASSERT_RTNL();
229
230 write_lock_bh(&dev_base_lock);
231 list_add_tail(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 write_unlock_bh(&dev_base_lock);
235 return 0;
236}
237
238/* Device list removal */
239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
245 list_del(&dev->dev_list);
246 hlist_del(&dev->name_hlist);
247 hlist_del(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
249}
250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251/*
252 * Our notifier list
253 */
254
Alan Sternf07d5b92006-05-09 15:23:03 -0700255static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257/*
258 * Device drivers call our routines to queue packets here. We empty the
259 * queue in the local softnet handler.
260 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700261
262DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
David S. Millercf508b12008-07-22 14:16:42 -0700264#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700265/*
David S. Millerc773e842008-07-08 23:13:53 -0700266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700267 * according to dev->type
268 */
269static const unsigned short netdev_lock_type[] =
270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 ARPHRD_NONE};
285
286static const char *netdev_lock_name[] =
287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 "_xmit_NONE"};
302
303static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700304static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700305
306static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307{
308 int i;
309
310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 if (netdev_lock_type[i] == dev_type)
312 return i;
313 /* the last key is used by default */
314 return ARRAY_SIZE(netdev_lock_type) - 1;
315}
316
David S. Millercf508b12008-07-22 14:16:42 -0700317static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700319{
320 int i;
321
322 i = netdev_lock_pos(dev_type);
323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 netdev_lock_name[i]);
325}
David S. Millercf508b12008-07-22 14:16:42 -0700326
327static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328{
329 int i;
330
331 i = netdev_lock_pos(dev->type);
332 lockdep_set_class_and_name(&dev->addr_list_lock,
333 &netdev_addr_lock_key[i],
334 netdev_lock_name[i]);
335}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700336#else
David S. Millercf508b12008-07-22 14:16:42 -0700337static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 unsigned short dev_type)
339{
340}
341static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700342{
343}
344#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
346/*******************************************************************************
347
348 Protocol management and registration routines
349
350*******************************************************************************/
351
352/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 * Add a protocol ID to the list. Now that the input handler is
354 * smarter we can dispense with all the messy stuff that used to be
355 * here.
356 *
357 * BEWARE!!! Protocol handlers, mangling input packets,
358 * MUST BE last in hash buckets and checking protocol handlers
359 * MUST start from promiscuous ptype_all chain in net_bh.
360 * It is true now, do not change it.
361 * Explanation follows: if protocol handler, mangling packet, will
362 * be the first on list, it is not able to sense, that packet
363 * is cloned and should be copied-on-write, so that it will
364 * change it and subsequent readers will get broken packet.
365 * --ANK (980803)
366 */
367
368/**
369 * dev_add_pack - add packet handler
370 * @pt: packet type declaration
371 *
372 * Add a protocol handler to the networking stack. The passed &packet_type
373 * is linked into kernel lists and may not be freed until it has been
374 * removed from the kernel lists.
375 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900376 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 * guarantee all CPU's that are in middle of receiving packets
378 * will see the new packet type (until the next received packet).
379 */
380
381void dev_add_pack(struct packet_type *pt)
382{
383 int hash;
384
385 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700386 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700388 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800389 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 list_add_rcu(&pt->list, &ptype_base[hash]);
391 }
392 spin_unlock_bh(&ptype_lock);
393}
394
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395/**
396 * __dev_remove_pack - remove packet handler
397 * @pt: packet type declaration
398 *
399 * Remove a protocol handler that was previously added to the kernel
400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
401 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900402 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 *
404 * The packet type might still be in use by receivers
405 * and must not be freed until after all the CPU's have gone
406 * through a quiescent state.
407 */
408void __dev_remove_pack(struct packet_type *pt)
409{
410 struct list_head *head;
411 struct packet_type *pt1;
412
413 spin_lock_bh(&ptype_lock);
414
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700415 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700417 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 list_for_each_entry(pt1, head, list) {
421 if (pt == pt1) {
422 list_del_rcu(&pt->list);
423 goto out;
424 }
425 }
426
427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428out:
429 spin_unlock_bh(&ptype_lock);
430}
431/**
432 * dev_remove_pack - remove packet handler
433 * @pt: packet type declaration
434 *
435 * Remove a protocol handler that was previously added to the kernel
436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
437 * from the kernel lists and can be freed or reused once this function
438 * returns.
439 *
440 * This call sleeps to guarantee that no CPU is looking at the packet
441 * type after return.
442 */
443void dev_remove_pack(struct packet_type *pt)
444{
445 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 synchronize_net();
448}
449
450/******************************************************************************
451
452 Device Boot-time Settings Routines
453
454*******************************************************************************/
455
456/* Boot time configuration table */
457static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458
459/**
460 * netdev_boot_setup_add - add new setup entry
461 * @name: name of the device
462 * @map: configured settings for the device
463 *
464 * Adds new setup entry to the dev_boot_setup list. The function
465 * returns 0 on error and 1 on success. This is a generic routine to
466 * all netdevices.
467 */
468static int netdev_boot_setup_add(char *name, struct ifmap *map)
469{
470 struct netdev_boot_setup *s;
471 int i;
472
473 s = dev_boot_setup;
474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700477 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 memcpy(&s[i].map, map, sizeof(s[i].map));
479 break;
480 }
481 }
482
483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484}
485
486/**
487 * netdev_boot_setup_check - check boot time settings
488 * @dev: the netdevice
489 *
490 * Check boot time settings for the device.
491 * The found settings are set for the device to be used
492 * later in the device probing.
493 * Returns 0 if no settings found, 1 if they are.
494 */
495int netdev_boot_setup_check(struct net_device *dev)
496{
497 struct netdev_boot_setup *s = dev_boot_setup;
498 int i;
499
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700502 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 dev->irq = s[i].map.irq;
504 dev->base_addr = s[i].map.base_addr;
505 dev->mem_start = s[i].map.mem_start;
506 dev->mem_end = s[i].map.mem_end;
507 return 1;
508 }
509 }
510 return 0;
511}
512
513
514/**
515 * netdev_boot_base - get address from boot time settings
516 * @prefix: prefix for network device
517 * @unit: id for network device
518 *
519 * Check boot time settings for the base address of device.
520 * The found settings are set for the device to be used
521 * later in the device probing.
522 * Returns 0 if no settings found.
523 */
524unsigned long netdev_boot_base(const char *prefix, int unit)
525{
526 const struct netdev_boot_setup *s = dev_boot_setup;
527 char name[IFNAMSIZ];
528 int i;
529
530 sprintf(name, "%s%d", prefix, unit);
531
532 /*
533 * If device already registered then return base of 1
534 * to indicate not to probe for this interface
535 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700536 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return 1;
538
539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 if (!strcmp(name, s[i].name))
541 return s[i].map.base_addr;
542 return 0;
543}
544
545/*
546 * Saves at boot time configured settings for any netdevice.
547 */
548int __init netdev_boot_setup(char *str)
549{
550 int ints[5];
551 struct ifmap map;
552
553 str = get_options(str, ARRAY_SIZE(ints), ints);
554 if (!str || !*str)
555 return 0;
556
557 /* Save settings */
558 memset(&map, 0, sizeof(map));
559 if (ints[0] > 0)
560 map.irq = ints[1];
561 if (ints[0] > 1)
562 map.base_addr = ints[2];
563 if (ints[0] > 2)
564 map.mem_start = ints[3];
565 if (ints[0] > 3)
566 map.mem_end = ints[4];
567
568 /* Add new entry to the list */
569 return netdev_boot_setup_add(str, &map);
570}
571
572__setup("netdev=", netdev_boot_setup);
573
574/*******************************************************************************
575
576 Device Interface Subroutines
577
578*******************************************************************************/
579
580/**
581 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700582 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 * @name: name to find
584 *
585 * Find an interface by name. Must be called under RTNL semaphore
586 * or @dev_base_lock. If the name is found a pointer to the device
587 * is returned. If the name is not found then %NULL is returned. The
588 * reference counters are not incremented so the caller must be
589 * careful with locks.
590 */
591
Eric W. Biederman881d9662007-09-17 11:56:21 -0700592struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 struct hlist_node *p;
595
Eric W. Biederman881d9662007-09-17 11:56:21 -0700596 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 struct net_device *dev
598 = hlist_entry(p, struct net_device, name_hlist);
599 if (!strncmp(dev->name, name, IFNAMSIZ))
600 return dev;
601 }
602 return NULL;
603}
604
605/**
606 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700607 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 * @name: name to find
609 *
610 * Find an interface by name. This can be called from any
611 * context and does its own locking. The returned handle has
612 * the usage count incremented and the caller must use dev_put() to
613 * release it when it is no longer needed. %NULL is returned if no
614 * matching device is found.
615 */
616
Eric W. Biederman881d9662007-09-17 11:56:21 -0700617struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
619 struct net_device *dev;
620
621 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700622 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 if (dev)
624 dev_hold(dev);
625 read_unlock(&dev_base_lock);
626 return dev;
627}
628
629/**
630 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700631 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 * @ifindex: index of device
633 *
634 * Search for an interface by index. Returns %NULL if the device
635 * is not found or a pointer to the device. The device has not
636 * had its reference counter increased so the caller must be careful
637 * about locking. The caller must hold either the RTNL semaphore
638 * or @dev_base_lock.
639 */
640
Eric W. Biederman881d9662007-09-17 11:56:21 -0700641struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct hlist_node *p;
644
Eric W. Biederman881d9662007-09-17 11:56:21 -0700645 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 struct net_device *dev
647 = hlist_entry(p, struct net_device, index_hlist);
648 if (dev->ifindex == ifindex)
649 return dev;
650 }
651 return NULL;
652}
653
654
655/**
656 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700657 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 * @ifindex: index of device
659 *
660 * Search for an interface by index. Returns NULL if the device
661 * is not found or a pointer to the device. The device returned has
662 * had a reference added and the pointer is safe until the user calls
663 * dev_put to indicate they have finished with it.
664 */
665
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
668 struct net_device *dev;
669
670 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700671 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 if (dev)
673 dev_hold(dev);
674 read_unlock(&dev_base_lock);
675 return dev;
676}
677
678/**
679 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700680 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 * @type: media type of device
682 * @ha: hardware address
683 *
684 * Search for an interface by MAC address. Returns NULL if the device
685 * is not found or a pointer to the device. The caller must hold the
686 * rtnl semaphore. The returned device has not had its ref count increased
687 * and the caller must therefore be careful about locking
688 *
689 * BUGS:
690 * If the API was consistent this would be __dev_get_by_hwaddr
691 */
692
Eric W. Biederman881d9662007-09-17 11:56:21 -0700693struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694{
695 struct net_device *dev;
696
697 ASSERT_RTNL();
698
Denis V. Lunev81103a52007-12-12 10:47:38 -0800699 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 if (dev->type == type &&
701 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700702 return dev;
703
704 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705}
706
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300707EXPORT_SYMBOL(dev_getbyhwaddr);
708
Eric W. Biederman881d9662007-09-17 11:56:21 -0700709struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700710{
711 struct net_device *dev;
712
713 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700714 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700715 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700716 return dev;
717
718 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700719}
720
721EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722
Eric W. Biederman881d9662007-09-17 11:56:21 -0700723struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
725 struct net_device *dev;
726
727 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700728 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700729 if (dev)
730 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 rtnl_unlock();
732 return dev;
733}
734
735EXPORT_SYMBOL(dev_getfirstbyhwtype);
736
737/**
738 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700739 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 * @if_flags: IFF_* values
741 * @mask: bitmask of bits in if_flags to check
742 *
743 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900744 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 * had a reference added and the pointer is safe until the user calls
746 * dev_put to indicate they have finished with it.
747 */
748
Eric W. Biederman881d9662007-09-17 11:56:21 -0700749struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700751 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
Pavel Emelianov7562f872007-05-03 15:13:45 -0700753 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700755 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (((dev->flags ^ if_flags) & mask) == 0) {
757 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700758 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 break;
760 }
761 }
762 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700763 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764}
765
766/**
767 * dev_valid_name - check if name is okay for network device
768 * @name: name string
769 *
770 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700771 * to allow sysfs to work. We also disallow any kind of
772 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800774int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700776 if (*name == '\0')
777 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700778 if (strlen(name) >= IFNAMSIZ)
779 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700780 if (!strcmp(name, ".") || !strcmp(name, ".."))
781 return 0;
782
783 while (*name) {
784 if (*name == '/' || isspace(*name))
785 return 0;
786 name++;
787 }
788 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
791/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200792 * __dev_alloc_name - allocate a name for a device
793 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200795 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 *
797 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700798 * id. It scans list of devices to build up a free map, then chooses
799 * the first empty slot. The caller must hold the dev_base or rtnl lock
800 * while allocating the name and adding the device in order to avoid
801 * duplicates.
802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 */
805
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200806static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
808 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 const char *p;
810 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700811 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 struct net_device *d;
813
814 p = strnchr(name, IFNAMSIZ-1, '%');
815 if (p) {
816 /*
817 * Verify the string as this thing may have come from
818 * the user. There must be either one "%d" and no other "%"
819 * characters.
820 */
821 if (p[1] != 'd' || strchr(p + 2, '%'))
822 return -EINVAL;
823
824 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 if (!inuse)
827 return -ENOMEM;
828
Eric W. Biederman881d9662007-09-17 11:56:21 -0700829 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 if (!sscanf(d->name, name, &i))
831 continue;
832 if (i < 0 || i >= max_netdevices)
833 continue;
834
835 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200836 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 if (!strncmp(buf, d->name, IFNAMSIZ))
838 set_bit(i, inuse);
839 }
840
841 i = find_first_zero_bit(inuse, max_netdevices);
842 free_page((unsigned long) inuse);
843 }
844
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200845 snprintf(buf, IFNAMSIZ, name, i);
846 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
849 /* It is possible to run out of possible slots
850 * when the name is long and there isn't enough space left
851 * for the digits, or if all bits are used.
852 */
853 return -ENFILE;
854}
855
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200856/**
857 * dev_alloc_name - allocate a name for a device
858 * @dev: device
859 * @name: name format string
860 *
861 * Passed a format string - eg "lt%d" it will try and find a suitable
862 * id. It scans list of devices to build up a free map, then chooses
863 * the first empty slot. The caller must hold the dev_base or rtnl lock
864 * while allocating the name and adding the device in order to avoid
865 * duplicates.
866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867 * Returns the number of the unit assigned or a negative errno code.
868 */
869
870int dev_alloc_name(struct net_device *dev, const char *name)
871{
872 char buf[IFNAMSIZ];
873 struct net *net;
874 int ret;
875
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900876 BUG_ON(!dev_net(dev));
877 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200878 ret = __dev_alloc_name(net, name, buf);
879 if (ret >= 0)
880 strlcpy(dev->name, buf, IFNAMSIZ);
881 return ret;
882}
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885/**
886 * dev_change_name - change name of a device
887 * @dev: device
888 * @newname: name (or format string) must be at least IFNAMSIZ
889 *
890 * Change name of a device, can pass format strings "eth%d".
891 * for wildcarding.
892 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700893int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894{
Herbert Xufcc5a032007-07-30 17:03:38 -0700895 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700897 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700898 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
900 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900901 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900903 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (dev->flags & IFF_UP)
905 return -EBUSY;
906
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0;
912
Herbert Xufcc5a032007-07-30 17:03:38 -0700913 memcpy(oldname, dev->name, IFNAMSIZ);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (strchr(newname, '%')) {
916 err = dev_alloc_name(dev, newname);
917 if (err < 0)
918 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700920 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 return -EEXIST;
922 else
923 strlcpy(dev->name, newname, IFNAMSIZ);
924
Herbert Xufcc5a032007-07-30 17:03:38 -0700925rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700926 /* For now only devices in the initial network namespace
927 * are in sysfs.
928 */
929 if (net == &init_net) {
930 ret = device_rename(&dev->dev, dev->name);
931 if (ret) {
932 memcpy(dev->name, oldname, IFNAMSIZ);
933 return ret;
934 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700935 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700936
937 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600938 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700939 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700940 write_unlock_bh(&dev_base_lock);
941
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700942 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700943 ret = notifier_to_errno(ret);
944
945 if (ret) {
946 if (err) {
947 printk(KERN_ERR
948 "%s: name change rollback failed: %d.\n",
949 dev->name, ret);
950 } else {
951 err = ret;
952 memcpy(dev->name, oldname, IFNAMSIZ);
953 goto rollback;
954 }
955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 return err;
958}
959
960/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700961 * dev_set_alias - change ifalias of a device
962 * @dev: device
963 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700964 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700965 *
966 * Set ifalias for a device,
967 */
968int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969{
970 ASSERT_RTNL();
971
972 if (len >= IFALIASZ)
973 return -EINVAL;
974
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700975 if (!len) {
976 if (dev->ifalias) {
977 kfree(dev->ifalias);
978 dev->ifalias = NULL;
979 }
980 return 0;
981 }
982
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700983 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 if (!dev->ifalias)
985 return -ENOMEM;
986
987 strlcpy(dev->ifalias, alias, len+1);
988 return len;
989}
990
991
992/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700993 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700994 * @dev: device to cause notification
995 *
996 * Called to indicate a device has changed features.
997 */
998void netdev_features_change(struct net_device *dev)
999{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001000 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001001}
1002EXPORT_SYMBOL(netdev_features_change);
1003
1004/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 * netdev_state_change - device changes state
1006 * @dev: device to cause notification
1007 *
1008 * Called to indicate a device has changed state. This function calls
1009 * the notifier chains for netdev_chain and sends a NEWLINK message
1010 * to the routing socket.
1011 */
1012void netdev_state_change(struct net_device *dev)
1013{
1014 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001015 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 }
1018}
1019
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001020void netdev_bonding_change(struct net_device *dev)
1021{
1022 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1023}
1024EXPORT_SYMBOL(netdev_bonding_change);
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026/**
1027 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001028 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 * @name: name of interface
1030 *
1031 * If a network interface is not present and the process has suitable
1032 * privileges this function loads the module. If module loading is not
1033 * available in this kernel then it becomes a nop.
1034 */
1035
Eric W. Biederman881d9662007-09-17 11:56:21 -07001036void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001038 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
1040 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001041 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 read_unlock(&dev_base_lock);
1043
1044 if (!dev && capable(CAP_SYS_MODULE))
1045 request_module("%s", name);
1046}
1047
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048/**
1049 * dev_open - prepare an interface for use.
1050 * @dev: device to open
1051 *
1052 * Takes a device from down to up state. The device's private open
1053 * function is invoked and then the multicast lists are loaded. Finally
1054 * the device is moved into the up state and a %NETDEV_UP message is
1055 * sent to the netdev notifier chain.
1056 *
1057 * Calling this function on an active interface is a nop. On a failure
1058 * a negative errno code is returned.
1059 */
1060int dev_open(struct net_device *dev)
1061{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001062 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 int ret = 0;
1064
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001065 ASSERT_RTNL();
1066
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 /*
1068 * Is it already up?
1069 */
1070
1071 if (dev->flags & IFF_UP)
1072 return 0;
1073
1074 /*
1075 * Is it even present?
1076 */
1077 if (!netif_device_present(dev))
1078 return -ENODEV;
1079
1080 /*
1081 * Call device private open method
1082 */
1083 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001084
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001085 if (ops->ndo_validate_addr)
1086 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001087
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001088 if (!ret && ops->ndo_open)
1089 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001091 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 * If it went open OK then:
1093 */
1094
Jeff Garzikbada3392007-10-23 20:19:37 -07001095 if (ret)
1096 clear_bit(__LINK_STATE_START, &dev->state);
1097 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 /*
1099 * Set the flags.
1100 */
1101 dev->flags |= IFF_UP;
1102
1103 /*
1104 * Initialize multicasting status
1105 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001106 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
1108 /*
1109 * Wakeup transmit queue engine
1110 */
1111 dev_activate(dev);
1112
1113 /*
1114 * ... and announce new interface.
1115 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001116 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 return ret;
1120}
1121
1122/**
1123 * dev_close - shutdown an interface.
1124 * @dev: device to shutdown
1125 *
1126 * This function moves an active device into down state. A
1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 * chain.
1130 */
1131int dev_close(struct net_device *dev)
1132{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001133 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001134 ASSERT_RTNL();
1135
David S. Miller9d5010d2007-09-12 14:33:25 +02001136 might_sleep();
1137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (!(dev->flags & IFF_UP))
1139 return 0;
1140
1141 /*
1142 * Tell people we are going down, so that they can
1143 * prepare to death, when device is still operating.
1144 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 clear_bit(__LINK_STATE_START, &dev->state);
1148
1149 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001150 * it can be even on different cpu. So just clear netif_running().
1151 *
1152 * dev->stop() will invoke napi_disable() on all of it's
1153 * napi_struct instances on this device.
1154 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001157 dev_deactivate(dev);
1158
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 /*
1160 * Call the device specific close. This cannot fail.
1161 * Only if device is UP
1162 *
1163 * We allow it to be called even after a DETACH hot-plug
1164 * event.
1165 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001166 if (ops->ndo_stop)
1167 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 /*
1170 * Device is now down.
1171 */
1172
1173 dev->flags &= ~IFF_UP;
1174
1175 /*
1176 * Tell people we are down
1177 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001178 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
1180 return 0;
1181}
1182
1183
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001184/**
1185 * dev_disable_lro - disable Large Receive Offload on a device
1186 * @dev: device
1187 *
1188 * Disable Large Receive Offload (LRO) on a net device. Must be
1189 * called under RTNL. This is needed if received packets may be
1190 * forwarded to another interface.
1191 */
1192void dev_disable_lro(struct net_device *dev)
1193{
1194 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 dev->ethtool_ops->set_flags) {
1196 u32 flags = dev->ethtool_ops->get_flags(dev);
1197 if (flags & ETH_FLAG_LRO) {
1198 flags &= ~ETH_FLAG_LRO;
1199 dev->ethtool_ops->set_flags(dev, flags);
1200 }
1201 }
1202 WARN_ON(dev->features & NETIF_F_LRO);
1203}
1204EXPORT_SYMBOL(dev_disable_lro);
1205
1206
Eric W. Biederman881d9662007-09-17 11:56:21 -07001207static int dev_boot_phase = 1;
1208
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209/*
1210 * Device change register/unregister. These are not inline or static
1211 * as we export them to the world.
1212 */
1213
1214/**
1215 * register_netdevice_notifier - register a network notifier block
1216 * @nb: notifier
1217 *
1218 * Register a notifier to be called when network device events occur.
1219 * The notifier passed is linked into the kernel structures and must
1220 * not be reused until it has been unregistered. A negative errno code
1221 * is returned on a failure.
1222 *
1223 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001224 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 * view of the network device list.
1226 */
1227
1228int register_netdevice_notifier(struct notifier_block *nb)
1229{
1230 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001231 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001232 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 int err;
1234
1235 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001236 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001237 if (err)
1238 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001239 if (dev_boot_phase)
1240 goto unlock;
1241 for_each_net(net) {
1242 for_each_netdev(net, dev) {
1243 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 err = notifier_to_errno(err);
1245 if (err)
1246 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
Eric W. Biederman881d9662007-09-17 11:56:21 -07001248 if (!(dev->flags & IFF_UP))
1249 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001250
Eric W. Biederman881d9662007-09-17 11:56:21 -07001251 nb->notifier_call(nb, NETDEV_UP, dev);
1252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001254
1255unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 rtnl_unlock();
1257 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001258
1259rollback:
1260 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001261 for_each_net(net) {
1262 for_each_netdev(net, dev) {
1263 if (dev == last)
1264 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001265
Eric W. Biederman881d9662007-09-17 11:56:21 -07001266 if (dev->flags & IFF_UP) {
1267 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 nb->notifier_call(nb, NETDEV_DOWN, dev);
1269 }
1270 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001271 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001272 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001273
1274 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001275 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276}
1277
1278/**
1279 * unregister_netdevice_notifier - unregister a network notifier block
1280 * @nb: notifier
1281 *
1282 * Unregister a notifier previously registered by
1283 * register_netdevice_notifier(). The notifier is unlinked into the
1284 * kernel structures and may then be reused. A negative errno code
1285 * is returned on a failure.
1286 */
1287
1288int unregister_netdevice_notifier(struct notifier_block *nb)
1289{
Herbert Xu9f514952006-03-25 01:24:25 -08001290 int err;
1291
1292 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001293 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001294 rtnl_unlock();
1295 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296}
1297
1298/**
1299 * call_netdevice_notifiers - call all network notifier blocks
1300 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001301 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 *
1303 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001304 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 */
1306
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001307int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001309 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310}
1311
1312/* When > 0 there are consumers of rx skb time stamps */
1313static atomic_t netstamp_needed = ATOMIC_INIT(0);
1314
1315void net_enable_timestamp(void)
1316{
1317 atomic_inc(&netstamp_needed);
1318}
1319
1320void net_disable_timestamp(void)
1321{
1322 atomic_dec(&netstamp_needed);
1323}
1324
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001325static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326{
1327 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001328 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001329 else
1330 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331}
1332
1333/*
1334 * Support routine. Sends outgoing frames to any network
1335 * taps currently in use.
1336 */
1337
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001338static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339{
1340 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001341
1342 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
1344 rcu_read_lock();
1345 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 /* Never send packets back to the socket
1347 * they originated from - MvS (miquels@drinkel.ow.org)
1348 */
1349 if ((ptype->dev == dev || !ptype->dev) &&
1350 (ptype->af_packet_priv == NULL ||
1351 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 if (!skb2)
1354 break;
1355
1356 /* skb->nh should be correctly
1357 set by sender, so that the second statement is
1358 just protection against buggy protocols.
1359 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001360 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001362 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001363 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 if (net_ratelimit())
1365 printk(KERN_CRIT "protocol %04x is "
1366 "buggy, dev %s\n",
1367 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001368 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 }
1370
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001371 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001373 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 }
1375 }
1376 rcu_read_unlock();
1377}
1378
Denis Vlasenko56079432006-03-29 15:57:29 -08001379
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001380static inline void __netif_reschedule(struct Qdisc *q)
1381{
1382 struct softnet_data *sd;
1383 unsigned long flags;
1384
1385 local_irq_save(flags);
1386 sd = &__get_cpu_var(softnet_data);
1387 q->next_sched = sd->output_queue;
1388 sd->output_queue = q;
1389 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 local_irq_restore(flags);
1391}
1392
David S. Miller37437bb2008-07-16 02:15:04 -07001393void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001394{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001395 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001397}
1398EXPORT_SYMBOL(__netif_schedule);
1399
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001400void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001401{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001402 if (atomic_dec_and_test(&skb->users)) {
1403 struct softnet_data *sd;
1404 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001405
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001406 local_irq_save(flags);
1407 sd = &__get_cpu_var(softnet_data);
1408 skb->next = sd->completion_queue;
1409 sd->completion_queue = skb;
1410 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 local_irq_restore(flags);
1412 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001413}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001414EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001415
1416void dev_kfree_skb_any(struct sk_buff *skb)
1417{
1418 if (in_irq() || irqs_disabled())
1419 dev_kfree_skb_irq(skb);
1420 else
1421 dev_kfree_skb(skb);
1422}
1423EXPORT_SYMBOL(dev_kfree_skb_any);
1424
1425
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001426/**
1427 * netif_device_detach - mark device as removed
1428 * @dev: network device
1429 *
1430 * Mark device as removed from system and therefore no longer available.
1431 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001432void netif_device_detach(struct net_device *dev)
1433{
1434 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 netif_running(dev)) {
1436 netif_stop_queue(dev);
1437 }
1438}
1439EXPORT_SYMBOL(netif_device_detach);
1440
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001441/**
1442 * netif_device_attach - mark device as attached
1443 * @dev: network device
1444 *
1445 * Mark device as attached from system and restart if needed.
1446 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001447void netif_device_attach(struct net_device *dev)
1448{
1449 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 netif_running(dev)) {
1451 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001452 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001453 }
1454}
1455EXPORT_SYMBOL(netif_device_attach);
1456
Ben Hutchings6de329e2008-06-16 17:02:28 -07001457static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1458{
1459 return ((features & NETIF_F_GEN_CSUM) ||
1460 ((features & NETIF_F_IP_CSUM) &&
1461 protocol == htons(ETH_P_IP)) ||
1462 ((features & NETIF_F_IPV6_CSUM) &&
1463 protocol == htons(ETH_P_IPV6)));
1464}
1465
1466static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1467{
1468 if (can_checksum_protocol(dev->features, skb->protocol))
1469 return true;
1470
1471 if (skb->protocol == htons(ETH_P_8021Q)) {
1472 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 veh->h_vlan_encapsulated_proto))
1475 return true;
1476 }
1477
1478 return false;
1479}
Denis Vlasenko56079432006-03-29 15:57:29 -08001480
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481/*
1482 * Invalidate hardware checksum when packet is to be mangled, and
1483 * complete checksum manually on outgoing path.
1484 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001485int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486{
Al Virod3bc23e2006-11-14 21:24:49 -08001487 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001488 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489
Patrick McHardy84fa7932006-08-29 16:44:56 -07001490 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001491 goto out_set_summed;
1492
1493 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001494 /* Let GSO fix up the checksum. */
1495 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 }
1497
Herbert Xua0308472007-10-15 01:47:15 -07001498 offset = skb->csum_start - skb_headroom(skb);
1499 BUG_ON(offset >= skb_headlen(skb));
1500 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1501
1502 offset += skb->csum_offset;
1503 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1504
1505 if (skb_cloned(skb) &&
1506 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 if (ret)
1509 goto out;
1510 }
1511
Herbert Xua0308472007-10-15 01:47:15 -07001512 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001513out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001515out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 return ret;
1517}
1518
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001519/**
1520 * skb_gso_segment - Perform segmentation on skb.
1521 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001522 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001523 *
1524 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001525 *
1526 * It may return NULL if the skb requires no segmentation. This is
1527 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001528 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001529struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001530{
1531 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 struct packet_type *ptype;
Al Viro252e33462006-11-14 20:48:11 -08001533 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001534 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001535
1536 BUG_ON(skb_shinfo(skb)->frag_list);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001537
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001538 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001539 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001540 __skb_pull(skb, skb->mac_len);
1541
Herbert Xuf9d106a2007-04-23 22:36:13 -07001542 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001543 if (skb_header_cloned(skb) &&
1544 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1545 return ERR_PTR(err);
1546 }
1547
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001548 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001549 list_for_each_entry_rcu(ptype,
1550 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001551 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001552 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001553 err = ptype->gso_send_check(skb);
1554 segs = ERR_PTR(err);
1555 if (err || skb_gso_ok(skb, features))
1556 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001557 __skb_push(skb, (skb->data -
1558 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001559 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001560 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001561 break;
1562 }
1563 }
1564 rcu_read_unlock();
1565
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001566 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001567
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001568 return segs;
1569}
1570
1571EXPORT_SYMBOL(skb_gso_segment);
1572
Herbert Xufb286bb2005-11-10 13:01:24 -08001573/* Take action when hardware reception checksum errors are detected. */
1574#ifdef CONFIG_BUG
1575void netdev_rx_csum_fault(struct net_device *dev)
1576{
1577 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001578 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001579 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001580 dump_stack();
1581 }
1582}
1583EXPORT_SYMBOL(netdev_rx_csum_fault);
1584#endif
1585
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586/* Actually, we should eliminate this check as soon as we know, that:
1587 * 1. IOMMU is present and allows to map all the memory.
1588 * 2. No high memory really exists on this machine.
1589 */
1590
1591static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1592{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001593#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 int i;
1595
1596 if (dev->features & NETIF_F_HIGHDMA)
1597 return 0;
1598
1599 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1600 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1601 return 1;
1602
Herbert Xu3d3a8532006-06-27 13:33:10 -07001603#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 return 0;
1605}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001607struct dev_gso_cb {
1608 void (*destructor)(struct sk_buff *skb);
1609};
1610
1611#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1612
1613static void dev_gso_skb_destructor(struct sk_buff *skb)
1614{
1615 struct dev_gso_cb *cb;
1616
1617 do {
1618 struct sk_buff *nskb = skb->next;
1619
1620 skb->next = nskb->next;
1621 nskb->next = NULL;
1622 kfree_skb(nskb);
1623 } while (skb->next);
1624
1625 cb = DEV_GSO_CB(skb);
1626 if (cb->destructor)
1627 cb->destructor(skb);
1628}
1629
1630/**
1631 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1632 * @skb: buffer to segment
1633 *
1634 * This function segments the given skb and stores the list of segments
1635 * in skb->next.
1636 */
1637static int dev_gso_segment(struct sk_buff *skb)
1638{
1639 struct net_device *dev = skb->dev;
1640 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001641 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1642 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001643
Herbert Xu576a30e2006-06-27 13:22:38 -07001644 segs = skb_gso_segment(skb, features);
1645
1646 /* Verifying header integrity only. */
1647 if (!segs)
1648 return 0;
1649
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001650 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001651 return PTR_ERR(segs);
1652
1653 skb->next = segs;
1654 DEV_GSO_CB(skb)->destructor = skb->destructor;
1655 skb->destructor = dev_gso_skb_destructor;
1656
1657 return 0;
1658}
1659
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001660int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1661 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001662{
Stephen Hemminger00829822008-11-20 20:14:53 -08001663 const struct net_device_ops *ops = dev->netdev_ops;
1664
1665 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001667 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001668 dev_queue_xmit_nit(skb, dev);
1669
Herbert Xu576a30e2006-06-27 13:22:38 -07001670 if (netif_needs_gso(dev, skb)) {
1671 if (unlikely(dev_gso_segment(skb)))
1672 goto out_kfree_skb;
1673 if (skb->next)
1674 goto gso;
1675 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001676
Stephen Hemminger00829822008-11-20 20:14:53 -08001677 return ops->ndo_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001678 }
1679
Herbert Xu576a30e2006-06-27 13:22:38 -07001680gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001681 do {
1682 struct sk_buff *nskb = skb->next;
1683 int rc;
1684
1685 skb->next = nskb->next;
1686 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001687 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001688 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001689 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001690 skb->next = nskb;
1691 return rc;
1692 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001693 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001694 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001695 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001696
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001697 skb->destructor = DEV_GSO_CB(skb)->destructor;
1698
1699out_kfree_skb:
1700 kfree_skb(skb);
1701 return 0;
1702}
1703
David S. Millerb6b2fed2008-07-21 09:48:06 -07001704static u32 simple_tx_hashrnd;
1705static int simple_tx_hashrnd_initialized = 0;
1706
David S. Miller8f0f2222008-07-15 03:47:03 -07001707static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1708{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001709 u32 addr1, addr2, ports;
1710 u32 hash, ihl;
Alexander Duyckad55dca2008-09-20 22:05:50 -07001711 u8 ip_proto = 0;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001712
1713 if (unlikely(!simple_tx_hashrnd_initialized)) {
1714 get_random_bytes(&simple_tx_hashrnd, 4);
1715 simple_tx_hashrnd_initialized = 1;
1716 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001717
1718 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001719 case htons(ETH_P_IP):
Alexander Duyckad55dca2008-09-20 22:05:50 -07001720 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1721 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001722 addr1 = ip_hdr(skb)->saddr;
1723 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001724 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001725 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001726 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001727 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001728 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1729 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001730 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001731 break;
1732 default:
1733 return 0;
1734 }
1735
David S. Miller8f0f2222008-07-15 03:47:03 -07001736
1737 switch (ip_proto) {
1738 case IPPROTO_TCP:
1739 case IPPROTO_UDP:
1740 case IPPROTO_DCCP:
1741 case IPPROTO_ESP:
1742 case IPPROTO_AH:
1743 case IPPROTO_SCTP:
1744 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001745 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001746 break;
1747
1748 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001749 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001750 break;
1751 }
1752
David S. Millerb6b2fed2008-07-21 09:48:06 -07001753 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1754
1755 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001756}
1757
David S. Millere8a04642008-07-17 00:34:19 -07001758static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1759 struct sk_buff *skb)
1760{
Stephen Hemminger00829822008-11-20 20:14:53 -08001761 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001762 u16 queue_index = 0;
1763
Stephen Hemminger00829822008-11-20 20:14:53 -08001764 if (ops->ndo_select_queue)
1765 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001766 else if (dev->real_num_tx_queues > 1)
1767 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001768
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001769 skb_set_queue_mapping(skb, queue_index);
1770 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001771}
1772
Dave Jonesd29f7492008-07-22 14:09:06 -07001773/**
1774 * dev_queue_xmit - transmit a buffer
1775 * @skb: buffer to transmit
1776 *
1777 * Queue a buffer for transmission to a network device. The caller must
1778 * have set the device and priority and built the buffer before calling
1779 * this function. The function can be called from an interrupt.
1780 *
1781 * A negative errno code is returned on a failure. A success does not
1782 * guarantee the frame will be transmitted as it may be dropped due
1783 * to congestion or traffic shaping.
1784 *
1785 * -----------------------------------------------------------------------------------
1786 * I notice this method can also return errors from the queue disciplines,
1787 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1788 * be positive.
1789 *
1790 * Regardless of the return value, the skb is consumed, so it is currently
1791 * difficult to retry a send to this method. (You can bump the ref count
1792 * before sending to hold a reference for retry if you are careful.)
1793 *
1794 * When calling this method, interrupts MUST be enabled. This is because
1795 * the BH enable code must have IRQs enabled so that it will not deadlock.
1796 * --BLG
1797 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798int dev_queue_xmit(struct sk_buff *skb)
1799{
1800 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001801 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 struct Qdisc *q;
1803 int rc = -ENOMEM;
1804
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001805 /* GSO will handle the following emulations directly. */
1806 if (netif_needs_gso(dev, skb))
1807 goto gso;
1808
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 if (skb_shinfo(skb)->frag_list &&
1810 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001811 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 goto out_kfree_skb;
1813
1814 /* Fragmented skb is linearized if device does not support SG,
1815 * or if at least one of fragments is in highmem and device
1816 * does not support DMA from it.
1817 */
1818 if (skb_shinfo(skb)->nr_frags &&
1819 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001820 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 goto out_kfree_skb;
1822
1823 /* If packet is not checksummed and device does not support
1824 * checksumming for this protocol, complete checksumming here.
1825 */
Herbert Xu663ead32007-04-09 11:59:07 -07001826 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1827 skb_set_transport_header(skb, skb->csum_start -
1828 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001829 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1830 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001831 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001833gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001834 /* Disable soft irqs for various locks below. Also
1835 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001837 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
David S. Millereae792b2008-07-15 03:03:33 -07001839 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001840 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842#ifdef CONFIG_NET_CLS_ACT
1843 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1844#endif
1845 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001846 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847
David S. Miller37437bb2008-07-16 02:15:04 -07001848 spin_lock(root_lock);
1849
David S. Millera9312ae2008-08-17 21:51:03 -07001850 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001851 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001852 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001853 } else {
1854 rc = qdisc_enqueue_root(skb, q);
1855 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001856 }
David S. Miller37437bb2008-07-16 02:15:04 -07001857 spin_unlock(root_lock);
1858
David S. Miller37437bb2008-07-16 02:15:04 -07001859 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 }
1861
1862 /* The device has no queue. Common case for software devices:
1863 loopback, all the sorts of tunnels...
1864
Herbert Xu932ff272006-06-09 12:20:56 -07001865 Really, it is unlikely that netif_tx_lock protection is necessary
1866 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 counters.)
1868 However, it is possible, that they rely on protection
1869 made by us here.
1870
1871 Check this and shot the lock. It is not prone from deadlocks.
1872 Either shot noqueue qdisc, it is even simpler 8)
1873 */
1874 if (dev->flags & IFF_UP) {
1875 int cpu = smp_processor_id(); /* ok because BHs are off */
1876
David S. Millerc773e842008-07-08 23:13:53 -07001877 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878
David S. Millerc773e842008-07-08 23:13:53 -07001879 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001881 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001883 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001884 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 goto out;
1886 }
1887 }
David S. Millerc773e842008-07-08 23:13:53 -07001888 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 if (net_ratelimit())
1890 printk(KERN_CRIT "Virtual device %s asks to "
1891 "queue packet!\n", dev->name);
1892 } else {
1893 /* Recursion is detected! It is possible,
1894 * unfortunately */
1895 if (net_ratelimit())
1896 printk(KERN_CRIT "Dead loop on virtual device "
1897 "%s, fix it urgently!\n", dev->name);
1898 }
1899 }
1900
1901 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001902 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903
1904out_kfree_skb:
1905 kfree_skb(skb);
1906 return rc;
1907out:
Herbert Xud4828d82006-06-22 02:28:18 -07001908 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 return rc;
1910}
1911
1912
1913/*=======================================================================
1914 Receiver routines
1915 =======================================================================*/
1916
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001917int netdev_max_backlog __read_mostly = 1000;
1918int netdev_budget __read_mostly = 300;
1919int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920
1921DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1922
1923
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924/**
1925 * netif_rx - post buffer to the network code
1926 * @skb: buffer to post
1927 *
1928 * This function receives a packet from a device driver and queues it for
1929 * the upper (protocol) levels to process. It always succeeds. The buffer
1930 * may be dropped during processing for congestion control or by the
1931 * protocol layers.
1932 *
1933 * return values:
1934 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 * NET_RX_DROP (packet was dropped)
1936 *
1937 */
1938
1939int netif_rx(struct sk_buff *skb)
1940{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 struct softnet_data *queue;
1942 unsigned long flags;
1943
1944 /* if netpoll wants it, pretend we never saw it */
1945 if (netpoll_rx(skb))
1946 return NET_RX_DROP;
1947
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001948 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001949 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950
1951 /*
1952 * The code is rearranged so that the path is the most
1953 * short when CPU is congested, but is still operating.
1954 */
1955 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 queue = &__get_cpu_var(softnet_data);
1957
1958 __get_cpu_var(netdev_rx_stat).total++;
1959 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1960 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001964 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 }
1966
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001967 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 goto enqueue;
1969 }
1970
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 __get_cpu_var(netdev_rx_stat).dropped++;
1972 local_irq_restore(flags);
1973
1974 kfree_skb(skb);
1975 return NET_RX_DROP;
1976}
1977
1978int netif_rx_ni(struct sk_buff *skb)
1979{
1980 int err;
1981
1982 preempt_disable();
1983 err = netif_rx(skb);
1984 if (local_softirq_pending())
1985 do_softirq();
1986 preempt_enable();
1987
1988 return err;
1989}
1990
1991EXPORT_SYMBOL(netif_rx_ni);
1992
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993static void net_tx_action(struct softirq_action *h)
1994{
1995 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1996
1997 if (sd->completion_queue) {
1998 struct sk_buff *clist;
1999
2000 local_irq_disable();
2001 clist = sd->completion_queue;
2002 sd->completion_queue = NULL;
2003 local_irq_enable();
2004
2005 while (clist) {
2006 struct sk_buff *skb = clist;
2007 clist = clist->next;
2008
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002009 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 __kfree_skb(skb);
2011 }
2012 }
2013
2014 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002015 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016
2017 local_irq_disable();
2018 head = sd->output_queue;
2019 sd->output_queue = NULL;
2020 local_irq_enable();
2021
2022 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002023 struct Qdisc *q = head;
2024 spinlock_t *root_lock;
2025
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026 head = head->next_sched;
2027
David S. Miller5fb66222008-08-02 20:02:43 -07002028 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002029 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002030 smp_mb__before_clear_bit();
2031 clear_bit(__QDISC_STATE_SCHED,
2032 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002033 qdisc_run(q);
2034 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002036 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002037 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002038 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002039 } else {
2040 smp_mb__before_clear_bit();
2041 clear_bit(__QDISC_STATE_SCHED,
2042 &q->state);
2043 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 }
2045 }
2046 }
2047}
2048
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002049static inline int deliver_skb(struct sk_buff *skb,
2050 struct packet_type *pt_prev,
2051 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052{
2053 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002054 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055}
2056
2057#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002058/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059struct net_bridge;
2060struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2061 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002062void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
Stephen Hemminger6229e362007-03-21 13:38:47 -07002064/*
2065 * If bridge module is loaded call bridging hook.
2066 * returns NULL if packet was consumed.
2067 */
2068struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2069 struct sk_buff *skb) __read_mostly;
2070static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2071 struct packet_type **pt_prev, int *ret,
2072 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073{
2074 struct net_bridge_port *port;
2075
Stephen Hemminger6229e362007-03-21 13:38:47 -07002076 if (skb->pkt_type == PACKET_LOOPBACK ||
2077 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2078 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079
2080 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002081 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002083 }
2084
Stephen Hemminger6229e362007-03-21 13:38:47 -07002085 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086}
2087#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002088#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089#endif
2090
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002091#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2092struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2093EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2094
2095static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2096 struct packet_type **pt_prev,
2097 int *ret,
2098 struct net_device *orig_dev)
2099{
2100 if (skb->dev->macvlan_port == NULL)
2101 return skb;
2102
2103 if (*pt_prev) {
2104 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2105 *pt_prev = NULL;
2106 }
2107 return macvlan_handle_frame_hook(skb);
2108}
2109#else
2110#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2111#endif
2112
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113#ifdef CONFIG_NET_CLS_ACT
2114/* TODO: Maybe we should just force sch_ingress to be compiled in
2115 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2116 * a compare and 2 stores extra right now if we dont have it on
2117 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002118 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 * the ingress scheduler, you just cant add policies on ingress.
2120 *
2121 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002122static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002125 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002126 struct netdev_queue *rxq;
2127 int result = TC_ACT_OK;
2128 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002129
Herbert Xuf697c3e2007-10-14 00:38:47 -07002130 if (MAX_RED_LOOP < ttl++) {
2131 printk(KERN_WARNING
2132 "Redir loop detected Dropping packet (%d->%d)\n",
2133 skb->iif, dev->ifindex);
2134 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 }
2136
Herbert Xuf697c3e2007-10-14 00:38:47 -07002137 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2138 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2139
David S. Miller555353c2008-07-08 17:33:13 -07002140 rxq = &dev->rx_queue;
2141
David S. Miller83874002008-07-17 00:53:03 -07002142 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002143 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002144 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002145 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2146 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002147 spin_unlock(qdisc_lock(q));
2148 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002149
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 return result;
2151}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002152
2153static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2154 struct packet_type **pt_prev,
2155 int *ret, struct net_device *orig_dev)
2156{
David S. Miller8d50b532008-07-30 02:37:46 -07002157 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002158 goto out;
2159
2160 if (*pt_prev) {
2161 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2162 *pt_prev = NULL;
2163 } else {
2164 /* Huh? Why does turning on AF_PACKET affect this? */
2165 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2166 }
2167
2168 switch (ing_filter(skb)) {
2169 case TC_ACT_SHOT:
2170 case TC_ACT_STOLEN:
2171 kfree_skb(skb);
2172 return NULL;
2173 }
2174
2175out:
2176 skb->tc_verd = 0;
2177 return skb;
2178}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179#endif
2180
Patrick McHardybc1d0412008-07-14 22:49:30 -07002181/*
2182 * netif_nit_deliver - deliver received packets to network taps
2183 * @skb: buffer
2184 *
2185 * This function is used to deliver incoming packets to network
2186 * taps. It should be used when the normal netif_receive_skb path
2187 * is bypassed, for example because of VLAN acceleration.
2188 */
2189void netif_nit_deliver(struct sk_buff *skb)
2190{
2191 struct packet_type *ptype;
2192
2193 if (list_empty(&ptype_all))
2194 return;
2195
2196 skb_reset_network_header(skb);
2197 skb_reset_transport_header(skb);
2198 skb->mac_len = skb->network_header - skb->mac_header;
2199
2200 rcu_read_lock();
2201 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2202 if (!ptype->dev || ptype->dev == skb->dev)
2203 deliver_skb(skb, ptype, skb->dev);
2204 }
2205 rcu_read_unlock();
2206}
2207
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002208/**
2209 * netif_receive_skb - process receive buffer from network
2210 * @skb: buffer to process
2211 *
2212 * netif_receive_skb() is the main receive data processing function.
2213 * It always succeeds. The buffer may be dropped during processing
2214 * for congestion control or by the protocol layers.
2215 *
2216 * This function may only be called from softirq context and interrupts
2217 * should be enabled.
2218 *
2219 * Return values (usually ignored):
2220 * NET_RX_SUCCESS: no congestion
2221 * NET_RX_DROP: packet was dropped
2222 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223int netif_receive_skb(struct sk_buff *skb)
2224{
2225 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002226 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002227 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 int ret = NET_RX_DROP;
Al Viro252e33462006-11-14 20:48:11 -08002229 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002231 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2232 return NET_RX_SUCCESS;
2233
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002235 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 return NET_RX_DROP;
2237
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002238 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002239 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240
Patrick McHardyc01003c2007-03-29 11:46:52 -07002241 if (!skb->iif)
2242 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002243
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002244 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002245 orig_dev = skb->dev;
2246 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002247 if (skb_bond_should_drop(skb))
2248 null_or_orig = orig_dev; /* deliver only exact match */
2249 else
2250 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002251 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002252
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 __get_cpu_var(netdev_rx_stat).total++;
2254
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002255 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002256 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002257 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258
2259 pt_prev = NULL;
2260
2261 rcu_read_lock();
2262
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002263 /* Don't receive packets in an exiting network namespace */
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002264 if (!net_alive(dev_net(skb->dev))) {
2265 kfree_skb(skb);
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002266 goto out;
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002267 }
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002268
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269#ifdef CONFIG_NET_CLS_ACT
2270 if (skb->tc_verd & TC_NCLS) {
2271 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2272 goto ncls;
2273 }
2274#endif
2275
2276 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002277 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2278 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002279 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002280 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 pt_prev = ptype;
2282 }
2283 }
2284
2285#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002286 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2287 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289ncls:
2290#endif
2291
Stephen Hemminger6229e362007-03-21 13:38:47 -07002292 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2293 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002295 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2296 if (!skb)
2297 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298
2299 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002300 list_for_each_entry_rcu(ptype,
2301 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002303 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2304 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002305 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002306 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 pt_prev = ptype;
2308 }
2309 }
2310
2311 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002312 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 } else {
2314 kfree_skb(skb);
2315 /* Jamal, now you will not able to escape explaining
2316 * me how you were going to use this. :-)
2317 */
2318 ret = NET_RX_DROP;
2319 }
2320
2321out:
2322 rcu_read_unlock();
2323 return ret;
2324}
2325
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002326/* Network device is going away, flush any packets still pending */
2327static void flush_backlog(void *arg)
2328{
2329 struct net_device *dev = arg;
2330 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2331 struct sk_buff *skb, *tmp;
2332
2333 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2334 if (skb->dev == dev) {
2335 __skb_unlink(skb, &queue->input_pkt_queue);
2336 kfree_skb(skb);
2337 }
2338}
2339
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002340static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341{
2342 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2344 unsigned long start_time = jiffies;
2345
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002346 napi->weight = weight_p;
2347 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349
2350 local_irq_disable();
2351 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002352 if (!skb) {
2353 __napi_complete(napi);
2354 local_irq_enable();
2355 break;
2356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 local_irq_enable();
2358
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002360 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002362 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363}
2364
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002365/**
2366 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002367 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002368 *
2369 * The entry's receive function will be scheduled to run
2370 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002371void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002372{
2373 unsigned long flags;
2374
2375 local_irq_save(flags);
2376 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2377 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2378 local_irq_restore(flags);
2379}
2380EXPORT_SYMBOL(__napi_schedule);
2381
2382
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383static void net_rx_action(struct softirq_action *h)
2384{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002385 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002386 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002387 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002388 void *have;
2389
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 local_irq_disable();
2391
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002392 while (!list_empty(list)) {
2393 struct napi_struct *n;
2394 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002396 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002397 * Allow this to run for 2 jiffies since which will allow
2398 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002399 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002400 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 goto softnet_break;
2402
2403 local_irq_enable();
2404
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002405 /* Even though interrupts have been re-enabled, this
2406 * access is safe because interrupts can only add new
2407 * entries to the tail of this list, and only ->poll()
2408 * calls can remove this head entry from the list.
2409 */
2410 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002412 have = netpoll_poll_lock(n);
2413
2414 weight = n->weight;
2415
David S. Miller0a7606c2007-10-29 21:28:47 -07002416 /* This NAPI_STATE_SCHED test is for avoiding a race
2417 * with netpoll's poll_napi(). Only the entity which
2418 * obtains the lock and sees NAPI_STATE_SCHED set will
2419 * actually make the ->poll() call. Therefore we avoid
2420 * accidently calling ->poll() when NAPI is not scheduled.
2421 */
2422 work = 0;
2423 if (test_bit(NAPI_STATE_SCHED, &n->state))
2424 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002425
2426 WARN_ON_ONCE(work > weight);
2427
2428 budget -= work;
2429
2430 local_irq_disable();
2431
2432 /* Drivers must not modify the NAPI state if they
2433 * consume the entire weight. In such cases this code
2434 * still "owns" the NAPI instance and therefore can
2435 * move the instance around on the list at-will.
2436 */
David S. Millerfed17f32008-01-07 21:00:40 -08002437 if (unlikely(work == weight)) {
2438 if (unlikely(napi_disable_pending(n)))
2439 __napi_complete(n);
2440 else
2441 list_move_tail(&n->poll_list, list);
2442 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002443
2444 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445 }
2446out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002447 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002448
Chris Leechdb217332006-06-17 21:24:58 -07002449#ifdef CONFIG_NET_DMA
2450 /*
2451 * There may not be any more sk_buffs coming right now, so push
2452 * any pending DMA copies to hardware
2453 */
Dan Williamsd379b012007-07-09 11:56:42 -07002454 if (!cpus_empty(net_dma.channel_mask)) {
2455 int chan_idx;
Mike Travis0e12f842008-05-12 21:21:13 +02002456 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07002457 struct dma_chan *chan = net_dma.channels[chan_idx];
2458 if (chan)
2459 dma_async_memcpy_issue_pending(chan);
2460 }
Chris Leechdb217332006-06-17 21:24:58 -07002461 }
2462#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002463
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 return;
2465
2466softnet_break:
2467 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2468 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2469 goto out;
2470}
2471
2472static gifconf_func_t * gifconf_list [NPROTO];
2473
2474/**
2475 * register_gifconf - register a SIOCGIF handler
2476 * @family: Address family
2477 * @gifconf: Function handler
2478 *
2479 * Register protocol dependent address dumping routines. The handler
2480 * that is passed must not be freed or reused until it has been replaced
2481 * by another handler.
2482 */
2483int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2484{
2485 if (family >= NPROTO)
2486 return -EINVAL;
2487 gifconf_list[family] = gifconf;
2488 return 0;
2489}
2490
2491
2492/*
2493 * Map an interface index to its name (SIOCGIFNAME)
2494 */
2495
2496/*
2497 * We need this ioctl for efficient implementation of the
2498 * if_indextoname() function required by the IPv6 API. Without
2499 * it, we would have to search all the interfaces to find a
2500 * match. --pb
2501 */
2502
Eric W. Biederman881d9662007-09-17 11:56:21 -07002503static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504{
2505 struct net_device *dev;
2506 struct ifreq ifr;
2507
2508 /*
2509 * Fetch the caller's info block.
2510 */
2511
2512 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2513 return -EFAULT;
2514
2515 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002516 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 if (!dev) {
2518 read_unlock(&dev_base_lock);
2519 return -ENODEV;
2520 }
2521
2522 strcpy(ifr.ifr_name, dev->name);
2523 read_unlock(&dev_base_lock);
2524
2525 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2526 return -EFAULT;
2527 return 0;
2528}
2529
2530/*
2531 * Perform a SIOCGIFCONF call. This structure will change
2532 * size eventually, and there is nothing I can do about it.
2533 * Thus we will need a 'compatibility mode'.
2534 */
2535
Eric W. Biederman881d9662007-09-17 11:56:21 -07002536static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537{
2538 struct ifconf ifc;
2539 struct net_device *dev;
2540 char __user *pos;
2541 int len;
2542 int total;
2543 int i;
2544
2545 /*
2546 * Fetch the caller's info block.
2547 */
2548
2549 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2550 return -EFAULT;
2551
2552 pos = ifc.ifc_buf;
2553 len = ifc.ifc_len;
2554
2555 /*
2556 * Loop over the interfaces, and write an info block for each.
2557 */
2558
2559 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002560 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 for (i = 0; i < NPROTO; i++) {
2562 if (gifconf_list[i]) {
2563 int done;
2564 if (!pos)
2565 done = gifconf_list[i](dev, NULL, 0);
2566 else
2567 done = gifconf_list[i](dev, pos + total,
2568 len - total);
2569 if (done < 0)
2570 return -EFAULT;
2571 total += done;
2572 }
2573 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002574 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575
2576 /*
2577 * All done. Write the updated control block back to the caller.
2578 */
2579 ifc.ifc_len = total;
2580
2581 /*
2582 * Both BSD and Solaris return 0 here, so we do too.
2583 */
2584 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2585}
2586
2587#ifdef CONFIG_PROC_FS
2588/*
2589 * This is invoked by the /proc filesystem handler to display a device
2590 * in detail.
2591 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002593 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594{
Denis V. Luneve372c412007-11-19 22:31:54 -08002595 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002596 loff_t off;
2597 struct net_device *dev;
2598
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002600 if (!*pos)
2601 return SEQ_START_TOKEN;
2602
2603 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002604 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002605 if (off++ == *pos)
2606 return dev;
2607
2608 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609}
2610
2611void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2612{
Denis V. Luneve372c412007-11-19 22:31:54 -08002613 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002615 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002616 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617}
2618
2619void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002620 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621{
2622 read_unlock(&dev_base_lock);
2623}
2624
2625static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2626{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002627 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628
Rusty Russell5a1b5892007-04-28 21:04:03 -07002629 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2630 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2631 dev->name, stats->rx_bytes, stats->rx_packets,
2632 stats->rx_errors,
2633 stats->rx_dropped + stats->rx_missed_errors,
2634 stats->rx_fifo_errors,
2635 stats->rx_length_errors + stats->rx_over_errors +
2636 stats->rx_crc_errors + stats->rx_frame_errors,
2637 stats->rx_compressed, stats->multicast,
2638 stats->tx_bytes, stats->tx_packets,
2639 stats->tx_errors, stats->tx_dropped,
2640 stats->tx_fifo_errors, stats->collisions,
2641 stats->tx_carrier_errors +
2642 stats->tx_aborted_errors +
2643 stats->tx_window_errors +
2644 stats->tx_heartbeat_errors,
2645 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646}
2647
2648/*
2649 * Called from the PROCfs module. This now uses the new arbitrary sized
2650 * /proc/net interface to create /proc/net/dev
2651 */
2652static int dev_seq_show(struct seq_file *seq, void *v)
2653{
2654 if (v == SEQ_START_TOKEN)
2655 seq_puts(seq, "Inter-| Receive "
2656 " | Transmit\n"
2657 " face |bytes packets errs drop fifo frame "
2658 "compressed multicast|bytes packets errs "
2659 "drop fifo colls carrier compressed\n");
2660 else
2661 dev_seq_printf_stats(seq, v);
2662 return 0;
2663}
2664
2665static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2666{
2667 struct netif_rx_stats *rc = NULL;
2668
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002669 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002670 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671 rc = &per_cpu(netdev_rx_stat, *pos);
2672 break;
2673 } else
2674 ++*pos;
2675 return rc;
2676}
2677
2678static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2679{
2680 return softnet_get_online(pos);
2681}
2682
2683static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2684{
2685 ++*pos;
2686 return softnet_get_online(pos);
2687}
2688
2689static void softnet_seq_stop(struct seq_file *seq, void *v)
2690{
2691}
2692
2693static int softnet_seq_show(struct seq_file *seq, void *v)
2694{
2695 struct netif_rx_stats *s = v;
2696
2697 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002698 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002699 0, 0, 0, 0, /* was fastroute */
2700 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701 return 0;
2702}
2703
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002704static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 .start = dev_seq_start,
2706 .next = dev_seq_next,
2707 .stop = dev_seq_stop,
2708 .show = dev_seq_show,
2709};
2710
2711static int dev_seq_open(struct inode *inode, struct file *file)
2712{
Denis V. Luneve372c412007-11-19 22:31:54 -08002713 return seq_open_net(inode, file, &dev_seq_ops,
2714 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715}
2716
Arjan van de Ven9a321442007-02-12 00:55:35 -08002717static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 .owner = THIS_MODULE,
2719 .open = dev_seq_open,
2720 .read = seq_read,
2721 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002722 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723};
2724
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002725static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 .start = softnet_seq_start,
2727 .next = softnet_seq_next,
2728 .stop = softnet_seq_stop,
2729 .show = softnet_seq_show,
2730};
2731
2732static int softnet_seq_open(struct inode *inode, struct file *file)
2733{
2734 return seq_open(file, &softnet_seq_ops);
2735}
2736
Arjan van de Ven9a321442007-02-12 00:55:35 -08002737static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 .owner = THIS_MODULE,
2739 .open = softnet_seq_open,
2740 .read = seq_read,
2741 .llseek = seq_lseek,
2742 .release = seq_release,
2743};
2744
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002745static void *ptype_get_idx(loff_t pos)
2746{
2747 struct packet_type *pt = NULL;
2748 loff_t i = 0;
2749 int t;
2750
2751 list_for_each_entry_rcu(pt, &ptype_all, list) {
2752 if (i == pos)
2753 return pt;
2754 ++i;
2755 }
2756
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002757 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002758 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2759 if (i == pos)
2760 return pt;
2761 ++i;
2762 }
2763 }
2764 return NULL;
2765}
2766
2767static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a422008-01-21 02:27:29 -08002768 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002769{
2770 rcu_read_lock();
2771 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2772}
2773
2774static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2775{
2776 struct packet_type *pt;
2777 struct list_head *nxt;
2778 int hash;
2779
2780 ++*pos;
2781 if (v == SEQ_START_TOKEN)
2782 return ptype_get_idx(0);
2783
2784 pt = v;
2785 nxt = pt->list.next;
2786 if (pt->type == htons(ETH_P_ALL)) {
2787 if (nxt != &ptype_all)
2788 goto found;
2789 hash = 0;
2790 nxt = ptype_base[0].next;
2791 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002792 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002793
2794 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002795 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002796 return NULL;
2797 nxt = ptype_base[hash].next;
2798 }
2799found:
2800 return list_entry(nxt, struct packet_type, list);
2801}
2802
2803static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a422008-01-21 02:27:29 -08002804 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002805{
2806 rcu_read_unlock();
2807}
2808
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002809static int ptype_seq_show(struct seq_file *seq, void *v)
2810{
2811 struct packet_type *pt = v;
2812
2813 if (v == SEQ_START_TOKEN)
2814 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002815 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002816 if (pt->type == htons(ETH_P_ALL))
2817 seq_puts(seq, "ALL ");
2818 else
2819 seq_printf(seq, "%04x", ntohs(pt->type));
2820
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08002821 seq_printf(seq, " %-8s %pF\n",
2822 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002823 }
2824
2825 return 0;
2826}
2827
2828static const struct seq_operations ptype_seq_ops = {
2829 .start = ptype_seq_start,
2830 .next = ptype_seq_next,
2831 .stop = ptype_seq_stop,
2832 .show = ptype_seq_show,
2833};
2834
2835static int ptype_seq_open(struct inode *inode, struct file *file)
2836{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002837 return seq_open_net(inode, file, &ptype_seq_ops,
2838 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002839}
2840
2841static const struct file_operations ptype_seq_fops = {
2842 .owner = THIS_MODULE,
2843 .open = ptype_seq_open,
2844 .read = seq_read,
2845 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002846 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002847};
2848
2849
Pavel Emelyanov46650792007-10-08 20:38:39 -07002850static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002851{
2852 int rc = -ENOMEM;
2853
Eric W. Biederman881d9662007-09-17 11:56:21 -07002854 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002855 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002856 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002857 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002858 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002859 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002860
Eric W. Biederman881d9662007-09-17 11:56:21 -07002861 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002862 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002863 rc = 0;
2864out:
2865 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002866out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002867 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002868out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002869 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002870out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002871 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002872 goto out;
2873}
Eric W. Biederman881d9662007-09-17 11:56:21 -07002874
Pavel Emelyanov46650792007-10-08 20:38:39 -07002875static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07002876{
2877 wext_proc_exit(net);
2878
2879 proc_net_remove(net, "ptype");
2880 proc_net_remove(net, "softnet_stat");
2881 proc_net_remove(net, "dev");
2882}
2883
Denis V. Lunev022cbae2007-11-13 03:23:50 -08002884static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07002885 .init = dev_proc_net_init,
2886 .exit = dev_proc_net_exit,
2887};
2888
2889static int __init dev_proc_init(void)
2890{
2891 return register_pernet_subsys(&dev_proc_ops);
2892}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893#else
2894#define dev_proc_init() 0
2895#endif /* CONFIG_PROC_FS */
2896
2897
2898/**
2899 * netdev_set_master - set up master/slave pair
2900 * @slave: slave device
2901 * @master: new master device
2902 *
2903 * Changes the master device of the slave. Pass %NULL to break the
2904 * bonding. The caller must hold the RTNL semaphore. On a failure
2905 * a negative errno code is returned. On success the reference counts
2906 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2907 * function returns zero.
2908 */
2909int netdev_set_master(struct net_device *slave, struct net_device *master)
2910{
2911 struct net_device *old = slave->master;
2912
2913 ASSERT_RTNL();
2914
2915 if (master) {
2916 if (old)
2917 return -EBUSY;
2918 dev_hold(master);
2919 }
2920
2921 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002922
Linus Torvalds1da177e2005-04-16 15:20:36 -07002923 synchronize_net();
2924
2925 if (old)
2926 dev_put(old);
2927
2928 if (master)
2929 slave->flags |= IFF_SLAVE;
2930 else
2931 slave->flags &= ~IFF_SLAVE;
2932
2933 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2934 return 0;
2935}
2936
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002937static void dev_change_rx_flags(struct net_device *dev, int flags)
2938{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08002939 const struct net_device_ops *ops = dev->netdev_ops;
2940
2941 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2942 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002943}
2944
Wang Chendad9b332008-06-18 01:48:28 -07002945static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07002946{
2947 unsigned short old_flags = dev->flags;
2948
Patrick McHardy24023452007-07-14 18:51:31 -07002949 ASSERT_RTNL();
2950
Wang Chendad9b332008-06-18 01:48:28 -07002951 dev->flags |= IFF_PROMISC;
2952 dev->promiscuity += inc;
2953 if (dev->promiscuity == 0) {
2954 /*
2955 * Avoid overflow.
2956 * If inc causes overflow, untouch promisc and return error.
2957 */
2958 if (inc < 0)
2959 dev->flags &= ~IFF_PROMISC;
2960 else {
2961 dev->promiscuity -= inc;
2962 printk(KERN_WARNING "%s: promiscuity touches roof, "
2963 "set promiscuity failed, promiscuity feature "
2964 "of device might be broken.\n", dev->name);
2965 return -EOVERFLOW;
2966 }
2967 }
Patrick McHardy4417da62007-06-27 01:28:10 -07002968 if (dev->flags != old_flags) {
2969 printk(KERN_INFO "device %s %s promiscuous mode\n",
2970 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2971 "left");
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05002972 if (audit_enabled)
2973 audit_log(current->audit_context, GFP_ATOMIC,
2974 AUDIT_ANOM_PROMISCUOUS,
2975 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2976 dev->name, (dev->flags & IFF_PROMISC),
2977 (old_flags & IFF_PROMISC),
2978 audit_get_loginuid(current),
2979 current->uid, current->gid,
2980 audit_get_sessionid(current));
Patrick McHardy24023452007-07-14 18:51:31 -07002981
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002982 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07002983 }
Wang Chendad9b332008-06-18 01:48:28 -07002984 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07002985}
2986
Linus Torvalds1da177e2005-04-16 15:20:36 -07002987/**
2988 * dev_set_promiscuity - update promiscuity count on a device
2989 * @dev: device
2990 * @inc: modifier
2991 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002992 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 * remains above zero the interface remains promiscuous. Once it hits zero
2994 * the device reverts back to normal filtering operation. A negative inc
2995 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07002996 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997 */
Wang Chendad9b332008-06-18 01:48:28 -07002998int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002999{
3000 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003001 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003002
Wang Chendad9b332008-06-18 01:48:28 -07003003 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003004 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003005 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003006 if (dev->flags != old_flags)
3007 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003008 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009}
3010
3011/**
3012 * dev_set_allmulti - update allmulti count on a device
3013 * @dev: device
3014 * @inc: modifier
3015 *
3016 * Add or remove reception of all multicast frames to a device. While the
3017 * count in the device remains above zero the interface remains listening
3018 * to all interfaces. Once it hits zero the device reverts back to normal
3019 * filtering operation. A negative @inc value is used to drop the counter
3020 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003021 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022 */
3023
Wang Chendad9b332008-06-18 01:48:28 -07003024int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003025{
3026 unsigned short old_flags = dev->flags;
3027
Patrick McHardy24023452007-07-14 18:51:31 -07003028 ASSERT_RTNL();
3029
Linus Torvalds1da177e2005-04-16 15:20:36 -07003030 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003031 dev->allmulti += inc;
3032 if (dev->allmulti == 0) {
3033 /*
3034 * Avoid overflow.
3035 * If inc causes overflow, untouch allmulti and return error.
3036 */
3037 if (inc < 0)
3038 dev->flags &= ~IFF_ALLMULTI;
3039 else {
3040 dev->allmulti -= inc;
3041 printk(KERN_WARNING "%s: allmulti touches roof, "
3042 "set allmulti failed, allmulti feature of "
3043 "device might be broken.\n", dev->name);
3044 return -EOVERFLOW;
3045 }
3046 }
Patrick McHardy24023452007-07-14 18:51:31 -07003047 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003048 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003049 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003050 }
Wang Chendad9b332008-06-18 01:48:28 -07003051 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003052}
3053
3054/*
3055 * Upload unicast and multicast address lists to device and
3056 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003057 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003058 * are present.
3059 */
3060void __dev_set_rx_mode(struct net_device *dev)
3061{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003062 const struct net_device_ops *ops = dev->netdev_ops;
3063
Patrick McHardy4417da62007-06-27 01:28:10 -07003064 /* dev_open will call this function so the list will stay sane. */
3065 if (!(dev->flags&IFF_UP))
3066 return;
3067
3068 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003069 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003070
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003071 if (ops->ndo_set_rx_mode)
3072 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003073 else {
3074 /* Unicast addresses changes may only happen under the rtnl,
3075 * therefore calling __dev_set_promiscuity here is safe.
3076 */
3077 if (dev->uc_count > 0 && !dev->uc_promisc) {
3078 __dev_set_promiscuity(dev, 1);
3079 dev->uc_promisc = 1;
3080 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3081 __dev_set_promiscuity(dev, -1);
3082 dev->uc_promisc = 0;
3083 }
3084
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003085 if (ops->ndo_set_multicast_list)
3086 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003087 }
3088}
3089
3090void dev_set_rx_mode(struct net_device *dev)
3091{
David S. Millerb9e40852008-07-15 00:15:08 -07003092 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003093 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003094 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095}
3096
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003097int __dev_addr_delete(struct dev_addr_list **list, int *count,
3098 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003099{
3100 struct dev_addr_list *da;
3101
3102 for (; (da = *list) != NULL; list = &da->next) {
3103 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3104 alen == da->da_addrlen) {
3105 if (glbl) {
3106 int old_glbl = da->da_gusers;
3107 da->da_gusers = 0;
3108 if (old_glbl == 0)
3109 break;
3110 }
3111 if (--da->da_users)
3112 return 0;
3113
3114 *list = da->next;
3115 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003116 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003117 return 0;
3118 }
3119 }
3120 return -ENOENT;
3121}
3122
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003123int __dev_addr_add(struct dev_addr_list **list, int *count,
3124 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003125{
3126 struct dev_addr_list *da;
3127
3128 for (da = *list; da != NULL; da = da->next) {
3129 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3130 da->da_addrlen == alen) {
3131 if (glbl) {
3132 int old_glbl = da->da_gusers;
3133 da->da_gusers = 1;
3134 if (old_glbl)
3135 return 0;
3136 }
3137 da->da_users++;
3138 return 0;
3139 }
3140 }
3141
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003142 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003143 if (da == NULL)
3144 return -ENOMEM;
3145 memcpy(da->da_addr, addr, alen);
3146 da->da_addrlen = alen;
3147 da->da_users = 1;
3148 da->da_gusers = glbl ? 1 : 0;
3149 da->next = *list;
3150 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003151 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003152 return 0;
3153}
3154
Patrick McHardy4417da62007-06-27 01:28:10 -07003155/**
3156 * dev_unicast_delete - Release secondary unicast address.
3157 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003158 * @addr: address to delete
3159 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003160 *
3161 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003162 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003163 *
3164 * The caller must hold the rtnl_mutex.
3165 */
3166int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3167{
3168 int err;
3169
3170 ASSERT_RTNL();
3171
David S. Millerb9e40852008-07-15 00:15:08 -07003172 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003173 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3174 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003175 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003176 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003177 return err;
3178}
3179EXPORT_SYMBOL(dev_unicast_delete);
3180
3181/**
3182 * dev_unicast_add - add a secondary unicast address
3183 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003184 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003185 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003186 *
3187 * Add a secondary unicast address to the device or increase
3188 * the reference count if it already exists.
3189 *
3190 * The caller must hold the rtnl_mutex.
3191 */
3192int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3193{
3194 int err;
3195
3196 ASSERT_RTNL();
3197
David S. Millerb9e40852008-07-15 00:15:08 -07003198 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003199 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3200 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003201 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003202 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003203 return err;
3204}
3205EXPORT_SYMBOL(dev_unicast_add);
3206
Chris Leeche83a2ea2008-01-31 16:53:23 -08003207int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3208 struct dev_addr_list **from, int *from_count)
3209{
3210 struct dev_addr_list *da, *next;
3211 int err = 0;
3212
3213 da = *from;
3214 while (da != NULL) {
3215 next = da->next;
3216 if (!da->da_synced) {
3217 err = __dev_addr_add(to, to_count,
3218 da->da_addr, da->da_addrlen, 0);
3219 if (err < 0)
3220 break;
3221 da->da_synced = 1;
3222 da->da_users++;
3223 } else if (da->da_users == 1) {
3224 __dev_addr_delete(to, to_count,
3225 da->da_addr, da->da_addrlen, 0);
3226 __dev_addr_delete(from, from_count,
3227 da->da_addr, da->da_addrlen, 0);
3228 }
3229 da = next;
3230 }
3231 return err;
3232}
3233
3234void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3235 struct dev_addr_list **from, int *from_count)
3236{
3237 struct dev_addr_list *da, *next;
3238
3239 da = *from;
3240 while (da != NULL) {
3241 next = da->next;
3242 if (da->da_synced) {
3243 __dev_addr_delete(to, to_count,
3244 da->da_addr, da->da_addrlen, 0);
3245 da->da_synced = 0;
3246 __dev_addr_delete(from, from_count,
3247 da->da_addr, da->da_addrlen, 0);
3248 }
3249 da = next;
3250 }
3251}
3252
3253/**
3254 * dev_unicast_sync - Synchronize device's unicast list to another device
3255 * @to: destination device
3256 * @from: source device
3257 *
3258 * Add newly added addresses to the destination device and release
3259 * addresses that have no users left. The source device must be
3260 * locked by netif_tx_lock_bh.
3261 *
3262 * This function is intended to be called from the dev->set_rx_mode
3263 * function of layered software devices.
3264 */
3265int dev_unicast_sync(struct net_device *to, struct net_device *from)
3266{
3267 int err = 0;
3268
David S. Millerb9e40852008-07-15 00:15:08 -07003269 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003270 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3271 &from->uc_list, &from->uc_count);
3272 if (!err)
3273 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003274 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003275 return err;
3276}
3277EXPORT_SYMBOL(dev_unicast_sync);
3278
3279/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003280 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003281 * @to: destination device
3282 * @from: source device
3283 *
3284 * Remove all addresses that were added to the destination device by
3285 * dev_unicast_sync(). This function is intended to be called from the
3286 * dev->stop function of layered software devices.
3287 */
3288void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3289{
David S. Millerb9e40852008-07-15 00:15:08 -07003290 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003291 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003292
3293 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3294 &from->uc_list, &from->uc_count);
3295 __dev_set_rx_mode(to);
3296
David S. Millere308a5d2008-07-15 00:13:44 -07003297 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003298 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003299}
3300EXPORT_SYMBOL(dev_unicast_unsync);
3301
Denis Cheng12972622007-07-18 02:12:56 -07003302static void __dev_addr_discard(struct dev_addr_list **list)
3303{
3304 struct dev_addr_list *tmp;
3305
3306 while (*list != NULL) {
3307 tmp = *list;
3308 *list = tmp->next;
3309 if (tmp->da_users > tmp->da_gusers)
3310 printk("__dev_addr_discard: address leakage! "
3311 "da_users=%d\n", tmp->da_users);
3312 kfree(tmp);
3313 }
3314}
3315
Denis Cheng26cc2522007-07-18 02:12:03 -07003316static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003317{
David S. Millerb9e40852008-07-15 00:15:08 -07003318 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003319
Patrick McHardy4417da62007-06-27 01:28:10 -07003320 __dev_addr_discard(&dev->uc_list);
3321 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003322
Denis Cheng456ad752007-07-18 02:10:54 -07003323 __dev_addr_discard(&dev->mc_list);
3324 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003325
David S. Millerb9e40852008-07-15 00:15:08 -07003326 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003327}
3328
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003329/**
3330 * dev_get_flags - get flags reported to userspace
3331 * @dev: device
3332 *
3333 * Get the combination of flag bits exported through APIs to userspace.
3334 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003335unsigned dev_get_flags(const struct net_device *dev)
3336{
3337 unsigned flags;
3338
3339 flags = (dev->flags & ~(IFF_PROMISC |
3340 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003341 IFF_RUNNING |
3342 IFF_LOWER_UP |
3343 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003344 (dev->gflags & (IFF_PROMISC |
3345 IFF_ALLMULTI));
3346
Stefan Rompfb00055a2006-03-20 17:09:11 -08003347 if (netif_running(dev)) {
3348 if (netif_oper_up(dev))
3349 flags |= IFF_RUNNING;
3350 if (netif_carrier_ok(dev))
3351 flags |= IFF_LOWER_UP;
3352 if (netif_dormant(dev))
3353 flags |= IFF_DORMANT;
3354 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003355
3356 return flags;
3357}
3358
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003359/**
3360 * dev_change_flags - change device settings
3361 * @dev: device
3362 * @flags: device state flags
3363 *
3364 * Change settings on device based state flags. The flags are
3365 * in the userspace exported format.
3366 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003367int dev_change_flags(struct net_device *dev, unsigned flags)
3368{
Thomas Graf7c355f52007-06-05 16:03:03 -07003369 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003370 int old_flags = dev->flags;
3371
Patrick McHardy24023452007-07-14 18:51:31 -07003372 ASSERT_RTNL();
3373
Linus Torvalds1da177e2005-04-16 15:20:36 -07003374 /*
3375 * Set the flags on our device.
3376 */
3377
3378 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3379 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3380 IFF_AUTOMEDIA)) |
3381 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3382 IFF_ALLMULTI));
3383
3384 /*
3385 * Load in the correct multicast list now the flags have changed.
3386 */
3387
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003388 if ((old_flags ^ flags) & IFF_MULTICAST)
3389 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003390
Patrick McHardy4417da62007-06-27 01:28:10 -07003391 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003392
3393 /*
3394 * Have we downed the interface. We handle IFF_UP ourselves
3395 * according to user attempts to set it, rather than blindly
3396 * setting it.
3397 */
3398
3399 ret = 0;
3400 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3401 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3402
3403 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003404 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003405 }
3406
3407 if (dev->flags & IFF_UP &&
3408 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3409 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003410 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003411
3412 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3413 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3414 dev->gflags ^= IFF_PROMISC;
3415 dev_set_promiscuity(dev, inc);
3416 }
3417
3418 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3419 is important. Some (broken) drivers set IFF_PROMISC, when
3420 IFF_ALLMULTI is requested not asking us and not reporting.
3421 */
3422 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3423 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3424 dev->gflags ^= IFF_ALLMULTI;
3425 dev_set_allmulti(dev, inc);
3426 }
3427
Thomas Graf7c355f52007-06-05 16:03:03 -07003428 /* Exclude state transition flags, already notified */
3429 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3430 if (changes)
3431 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003432
3433 return ret;
3434}
3435
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003436/**
3437 * dev_set_mtu - Change maximum transfer unit
3438 * @dev: device
3439 * @new_mtu: new transfer unit
3440 *
3441 * Change the maximum transfer size of the network device.
3442 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003443int dev_set_mtu(struct net_device *dev, int new_mtu)
3444{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003445 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446 int err;
3447
3448 if (new_mtu == dev->mtu)
3449 return 0;
3450
3451 /* MTU must be positive. */
3452 if (new_mtu < 0)
3453 return -EINVAL;
3454
3455 if (!netif_device_present(dev))
3456 return -ENODEV;
3457
3458 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003459 if (ops->ndo_change_mtu)
3460 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003461 else
3462 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003463
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003465 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003466 return err;
3467}
3468
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003469/**
3470 * dev_set_mac_address - Change Media Access Control Address
3471 * @dev: device
3472 * @sa: new address
3473 *
3474 * Change the hardware (MAC) address of the device
3475 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003476int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3477{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003478 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003479 int err;
3480
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003481 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003482 return -EOPNOTSUPP;
3483 if (sa->sa_family != dev->type)
3484 return -EINVAL;
3485 if (!netif_device_present(dev))
3486 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003487 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003488 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003489 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003490 return err;
3491}
3492
3493/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003494 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003495 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003496static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003497{
3498 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003499 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003500
3501 if (!dev)
3502 return -ENODEV;
3503
3504 switch (cmd) {
3505 case SIOCGIFFLAGS: /* Get interface flags */
3506 ifr->ifr_flags = dev_get_flags(dev);
3507 return 0;
3508
Linus Torvalds1da177e2005-04-16 15:20:36 -07003509 case SIOCGIFMETRIC: /* Get the metric on the interface
3510 (currently unused) */
3511 ifr->ifr_metric = 0;
3512 return 0;
3513
Linus Torvalds1da177e2005-04-16 15:20:36 -07003514 case SIOCGIFMTU: /* Get the MTU of a device */
3515 ifr->ifr_mtu = dev->mtu;
3516 return 0;
3517
Linus Torvalds1da177e2005-04-16 15:20:36 -07003518 case SIOCGIFHWADDR:
3519 if (!dev->addr_len)
3520 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3521 else
3522 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3523 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3524 ifr->ifr_hwaddr.sa_family = dev->type;
3525 return 0;
3526
Jeff Garzik14e3e072007-10-08 00:06:32 -07003527 case SIOCGIFSLAVE:
3528 err = -EINVAL;
3529 break;
3530
3531 case SIOCGIFMAP:
3532 ifr->ifr_map.mem_start = dev->mem_start;
3533 ifr->ifr_map.mem_end = dev->mem_end;
3534 ifr->ifr_map.base_addr = dev->base_addr;
3535 ifr->ifr_map.irq = dev->irq;
3536 ifr->ifr_map.dma = dev->dma;
3537 ifr->ifr_map.port = dev->if_port;
3538 return 0;
3539
3540 case SIOCGIFINDEX:
3541 ifr->ifr_ifindex = dev->ifindex;
3542 return 0;
3543
3544 case SIOCGIFTXQLEN:
3545 ifr->ifr_qlen = dev->tx_queue_len;
3546 return 0;
3547
3548 default:
3549 /* dev_ioctl() should ensure this case
3550 * is never reached
3551 */
3552 WARN_ON(1);
3553 err = -EINVAL;
3554 break;
3555
3556 }
3557 return err;
3558}
3559
3560/*
3561 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3562 */
3563static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3564{
3565 int err;
3566 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003567 const struct net_device_ops *ops = dev->netdev_ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003568
3569 if (!dev)
3570 return -ENODEV;
3571
3572 switch (cmd) {
3573 case SIOCSIFFLAGS: /* Set interface flags */
3574 return dev_change_flags(dev, ifr->ifr_flags);
3575
3576 case SIOCSIFMETRIC: /* Set the metric on the interface
3577 (currently unused) */
3578 return -EOPNOTSUPP;
3579
3580 case SIOCSIFMTU: /* Set the MTU of a device */
3581 return dev_set_mtu(dev, ifr->ifr_mtu);
3582
Linus Torvalds1da177e2005-04-16 15:20:36 -07003583 case SIOCSIFHWADDR:
3584 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3585
3586 case SIOCSIFHWBROADCAST:
3587 if (ifr->ifr_hwaddr.sa_family != dev->type)
3588 return -EINVAL;
3589 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3590 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003591 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003592 return 0;
3593
Linus Torvalds1da177e2005-04-16 15:20:36 -07003594 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003595 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003596 if (!netif_device_present(dev))
3597 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003598 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003599 }
3600 return -EOPNOTSUPP;
3601
3602 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003603 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003604 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3605 return -EINVAL;
3606 if (!netif_device_present(dev))
3607 return -ENODEV;
3608 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3609 dev->addr_len, 1);
3610
3611 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003612 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003613 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3614 return -EINVAL;
3615 if (!netif_device_present(dev))
3616 return -ENODEV;
3617 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3618 dev->addr_len, 1);
3619
Linus Torvalds1da177e2005-04-16 15:20:36 -07003620 case SIOCSIFTXQLEN:
3621 if (ifr->ifr_qlen < 0)
3622 return -EINVAL;
3623 dev->tx_queue_len = ifr->ifr_qlen;
3624 return 0;
3625
3626 case SIOCSIFNAME:
3627 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3628 return dev_change_name(dev, ifr->ifr_newname);
3629
3630 /*
3631 * Unknown or private ioctl
3632 */
3633
3634 default:
3635 if ((cmd >= SIOCDEVPRIVATE &&
3636 cmd <= SIOCDEVPRIVATE + 15) ||
3637 cmd == SIOCBONDENSLAVE ||
3638 cmd == SIOCBONDRELEASE ||
3639 cmd == SIOCBONDSETHWADDR ||
3640 cmd == SIOCBONDSLAVEINFOQUERY ||
3641 cmd == SIOCBONDINFOQUERY ||
3642 cmd == SIOCBONDCHANGEACTIVE ||
3643 cmd == SIOCGMIIPHY ||
3644 cmd == SIOCGMIIREG ||
3645 cmd == SIOCSMIIREG ||
3646 cmd == SIOCBRADDIF ||
3647 cmd == SIOCBRDELIF ||
3648 cmd == SIOCWANDEV) {
3649 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003650 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003652 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003653 else
3654 err = -ENODEV;
3655 }
3656 } else
3657 err = -EINVAL;
3658
3659 }
3660 return err;
3661}
3662
3663/*
3664 * This function handles all "interface"-type I/O control requests. The actual
3665 * 'doing' part of this is dev_ifsioc above.
3666 */
3667
3668/**
3669 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003670 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003671 * @cmd: command to issue
3672 * @arg: pointer to a struct ifreq in user space
3673 *
3674 * Issue ioctl functions to devices. This is normally called by the
3675 * user space syscall interfaces but can sometimes be useful for
3676 * other purposes. The return value is the return from the syscall if
3677 * positive or a negative errno code on error.
3678 */
3679
Eric W. Biederman881d9662007-09-17 11:56:21 -07003680int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003681{
3682 struct ifreq ifr;
3683 int ret;
3684 char *colon;
3685
3686 /* One special case: SIOCGIFCONF takes ifconf argument
3687 and requires shared lock, because it sleeps writing
3688 to user space.
3689 */
3690
3691 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003692 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003693 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003694 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003695 return ret;
3696 }
3697 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003698 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003699
3700 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3701 return -EFAULT;
3702
3703 ifr.ifr_name[IFNAMSIZ-1] = 0;
3704
3705 colon = strchr(ifr.ifr_name, ':');
3706 if (colon)
3707 *colon = 0;
3708
3709 /*
3710 * See which interface the caller is talking about.
3711 */
3712
3713 switch (cmd) {
3714 /*
3715 * These ioctl calls:
3716 * - can be done by all.
3717 * - atomic and do not require locking.
3718 * - return a value
3719 */
3720 case SIOCGIFFLAGS:
3721 case SIOCGIFMETRIC:
3722 case SIOCGIFMTU:
3723 case SIOCGIFHWADDR:
3724 case SIOCGIFSLAVE:
3725 case SIOCGIFMAP:
3726 case SIOCGIFINDEX:
3727 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003728 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003729 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07003730 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003731 read_unlock(&dev_base_lock);
3732 if (!ret) {
3733 if (colon)
3734 *colon = ':';
3735 if (copy_to_user(arg, &ifr,
3736 sizeof(struct ifreq)))
3737 ret = -EFAULT;
3738 }
3739 return ret;
3740
3741 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003742 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003743 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003744 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003745 rtnl_unlock();
3746 if (!ret) {
3747 if (colon)
3748 *colon = ':';
3749 if (copy_to_user(arg, &ifr,
3750 sizeof(struct ifreq)))
3751 ret = -EFAULT;
3752 }
3753 return ret;
3754
3755 /*
3756 * These ioctl calls:
3757 * - require superuser power.
3758 * - require strict serialization.
3759 * - return a value
3760 */
3761 case SIOCGMIIPHY:
3762 case SIOCGMIIREG:
3763 case SIOCSIFNAME:
3764 if (!capable(CAP_NET_ADMIN))
3765 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003766 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003767 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003768 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003769 rtnl_unlock();
3770 if (!ret) {
3771 if (colon)
3772 *colon = ':';
3773 if (copy_to_user(arg, &ifr,
3774 sizeof(struct ifreq)))
3775 ret = -EFAULT;
3776 }
3777 return ret;
3778
3779 /*
3780 * These ioctl calls:
3781 * - require superuser power.
3782 * - require strict serialization.
3783 * - do not return a value
3784 */
3785 case SIOCSIFFLAGS:
3786 case SIOCSIFMETRIC:
3787 case SIOCSIFMTU:
3788 case SIOCSIFMAP:
3789 case SIOCSIFHWADDR:
3790 case SIOCSIFSLAVE:
3791 case SIOCADDMULTI:
3792 case SIOCDELMULTI:
3793 case SIOCSIFHWBROADCAST:
3794 case SIOCSIFTXQLEN:
3795 case SIOCSMIIREG:
3796 case SIOCBONDENSLAVE:
3797 case SIOCBONDRELEASE:
3798 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003799 case SIOCBONDCHANGEACTIVE:
3800 case SIOCBRADDIF:
3801 case SIOCBRDELIF:
3802 if (!capable(CAP_NET_ADMIN))
3803 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003804 /* fall through */
3805 case SIOCBONDSLAVEINFOQUERY:
3806 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003807 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003808 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003809 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003810 rtnl_unlock();
3811 return ret;
3812
3813 case SIOCGIFMEM:
3814 /* Get the per device memory space. We can add this but
3815 * currently do not support it */
3816 case SIOCSIFMEM:
3817 /* Set the per device memory buffer space.
3818 * Not applicable in our case */
3819 case SIOCSIFLINK:
3820 return -EINVAL;
3821
3822 /*
3823 * Unknown or private ioctl.
3824 */
3825 default:
3826 if (cmd == SIOCWANDEV ||
3827 (cmd >= SIOCDEVPRIVATE &&
3828 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003829 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003830 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003831 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003832 rtnl_unlock();
3833 if (!ret && copy_to_user(arg, &ifr,
3834 sizeof(struct ifreq)))
3835 ret = -EFAULT;
3836 return ret;
3837 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003838 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07003839 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003840 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003841 return -EINVAL;
3842 }
3843}
3844
3845
3846/**
3847 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003848 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003849 *
3850 * Returns a suitable unique value for a new device interface
3851 * number. The caller must hold the rtnl semaphore or the
3852 * dev_base_lock to be sure it remains unique.
3853 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003854static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003855{
3856 static int ifindex;
3857 for (;;) {
3858 if (++ifindex <= 0)
3859 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003860 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003861 return ifindex;
3862 }
3863}
3864
Linus Torvalds1da177e2005-04-16 15:20:36 -07003865/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08003866static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003867
Stephen Hemminger6f05f622007-03-08 20:46:03 -08003868static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003869{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003870 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003871}
3872
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003873static void rollback_registered(struct net_device *dev)
3874{
3875 BUG_ON(dev_boot_phase);
3876 ASSERT_RTNL();
3877
3878 /* Some devices call without registering for initialization unwind. */
3879 if (dev->reg_state == NETREG_UNINITIALIZED) {
3880 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3881 "was registered\n", dev->name, dev);
3882
3883 WARN_ON(1);
3884 return;
3885 }
3886
3887 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3888
3889 /* If device is running, close it first. */
3890 dev_close(dev);
3891
3892 /* And unlink it from device chain. */
3893 unlist_netdevice(dev);
3894
3895 dev->reg_state = NETREG_UNREGISTERING;
3896
3897 synchronize_net();
3898
3899 /* Shutdown queueing discipline. */
3900 dev_shutdown(dev);
3901
3902
3903 /* Notify protocols, that we are about to destroy
3904 this device. They should clean all the things.
3905 */
3906 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3907
3908 /*
3909 * Flush the unicast and multicast chains
3910 */
3911 dev_addr_discard(dev);
3912
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003913 if (dev->netdev_ops->ndo_uninit)
3914 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003915
3916 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003917 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003918
3919 /* Remove entries from kobject tree */
3920 netdev_unregister_kobject(dev);
3921
3922 synchronize_net();
3923
3924 dev_put(dev);
3925}
3926
David S. Millere8a04642008-07-17 00:34:19 -07003927static void __netdev_init_queue_locks_one(struct net_device *dev,
3928 struct netdev_queue *dev_queue,
3929 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07003930{
3931 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07003932 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07003933 dev_queue->xmit_lock_owner = -1;
3934}
3935
3936static void netdev_init_queue_locks(struct net_device *dev)
3937{
David S. Millere8a04642008-07-17 00:34:19 -07003938 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3939 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07003940}
3941
Herbert Xub63365a2008-10-23 01:11:29 -07003942unsigned long netdev_fix_features(unsigned long features, const char *name)
3943{
3944 /* Fix illegal SG+CSUM combinations. */
3945 if ((features & NETIF_F_SG) &&
3946 !(features & NETIF_F_ALL_CSUM)) {
3947 if (name)
3948 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3949 "checksum feature.\n", name);
3950 features &= ~NETIF_F_SG;
3951 }
3952
3953 /* TSO requires that SG is present as well. */
3954 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3955 if (name)
3956 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3957 "SG feature.\n", name);
3958 features &= ~NETIF_F_TSO;
3959 }
3960
3961 if (features & NETIF_F_UFO) {
3962 if (!(features & NETIF_F_GEN_CSUM)) {
3963 if (name)
3964 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3965 "since no NETIF_F_HW_CSUM feature.\n",
3966 name);
3967 features &= ~NETIF_F_UFO;
3968 }
3969
3970 if (!(features & NETIF_F_SG)) {
3971 if (name)
3972 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3973 "since no NETIF_F_SG feature.\n", name);
3974 features &= ~NETIF_F_UFO;
3975 }
3976 }
3977
3978 return features;
3979}
3980EXPORT_SYMBOL(netdev_fix_features);
3981
Linus Torvalds1da177e2005-04-16 15:20:36 -07003982/**
3983 * register_netdevice - register a network device
3984 * @dev: device to register
3985 *
3986 * Take a completed network device structure and add it to the kernel
3987 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3988 * chain. 0 is returned on success. A negative errno code is returned
3989 * on a failure to set up the device, or if the name is a duplicate.
3990 *
3991 * Callers must hold the rtnl semaphore. You may want
3992 * register_netdev() instead of this.
3993 *
3994 * BUGS:
3995 * The locking appears insufficient to guarantee two parallel registers
3996 * will not get the same name.
3997 */
3998
3999int register_netdevice(struct net_device *dev)
4000{
4001 struct hlist_head *head;
4002 struct hlist_node *p;
4003 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004004 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004005
4006 BUG_ON(dev_boot_phase);
4007 ASSERT_RTNL();
4008
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004009 might_sleep();
4010
Linus Torvalds1da177e2005-04-16 15:20:36 -07004011 /* When net_device's are persistent, this will be fatal. */
4012 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004013 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004014
David S. Millerf1f28aa2008-07-15 00:08:33 -07004015 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004016 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004017 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004018
Linus Torvalds1da177e2005-04-16 15:20:36 -07004019 dev->iflink = -1;
4020
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004021#ifdef CONFIG_COMPAT_NET_DEV_OPS
4022 /* Netdevice_ops API compatiability support.
4023 * This is temporary until all network devices are converted.
4024 */
4025 if (dev->netdev_ops) {
4026 const struct net_device_ops *ops = dev->netdev_ops;
4027
4028 dev->init = ops->ndo_init;
4029 dev->uninit = ops->ndo_uninit;
4030 dev->open = ops->ndo_open;
4031 dev->change_rx_flags = ops->ndo_change_rx_flags;
4032 dev->set_rx_mode = ops->ndo_set_rx_mode;
4033 dev->set_multicast_list = ops->ndo_set_multicast_list;
4034 dev->set_mac_address = ops->ndo_set_mac_address;
4035 dev->validate_addr = ops->ndo_validate_addr;
4036 dev->do_ioctl = ops->ndo_do_ioctl;
4037 dev->set_config = ops->ndo_set_config;
4038 dev->change_mtu = ops->ndo_change_mtu;
4039 dev->tx_timeout = ops->ndo_tx_timeout;
4040 dev->get_stats = ops->ndo_get_stats;
4041 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4042 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4043 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4044#ifdef CONFIG_NET_POLL_CONTROLLER
4045 dev->poll_controller = ops->ndo_poll_controller;
4046#endif
4047 } else {
4048 char drivername[64];
4049 pr_info("%s (%s): not using net_device_ops yet\n",
4050 dev->name, netdev_drivername(dev, drivername, 64));
4051
4052 /* This works only because net_device_ops and the
4053 compatiablity structure are the same. */
4054 dev->netdev_ops = (void *) &(dev->init);
4055 }
4056#endif
4057
Linus Torvalds1da177e2005-04-16 15:20:36 -07004058 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004059 if (dev->netdev_ops->ndo_init) {
4060 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004061 if (ret) {
4062 if (ret > 0)
4063 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004064 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004065 }
4066 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004067
Linus Torvalds1da177e2005-04-16 15:20:36 -07004068 if (!dev_valid_name(dev->name)) {
4069 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004070 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004071 }
4072
Eric W. Biederman881d9662007-09-17 11:56:21 -07004073 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004074 if (dev->iflink == -1)
4075 dev->iflink = dev->ifindex;
4076
4077 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004078 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004079 hlist_for_each(p, head) {
4080 struct net_device *d
4081 = hlist_entry(p, struct net_device, name_hlist);
4082 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4083 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004084 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004085 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004086 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004087
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004088 /* Fix illegal checksum combinations */
4089 if ((dev->features & NETIF_F_HW_CSUM) &&
4090 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4091 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4092 dev->name);
4093 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4094 }
4095
4096 if ((dev->features & NETIF_F_NO_CSUM) &&
4097 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4098 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4099 dev->name);
4100 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4101 }
4102
Herbert Xub63365a2008-10-23 01:11:29 -07004103 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004104
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004105 /* Enable software GSO if SG is supported. */
4106 if (dev->features & NETIF_F_SG)
4107 dev->features |= NETIF_F_GSO;
4108
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004109 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004110 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004111 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004112 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004113 dev->reg_state = NETREG_REGISTERED;
4114
Linus Torvalds1da177e2005-04-16 15:20:36 -07004115 /*
4116 * Default initial state at registry is that the
4117 * device is present.
4118 */
4119
4120 set_bit(__LINK_STATE_PRESENT, &dev->state);
4121
Linus Torvalds1da177e2005-04-16 15:20:36 -07004122 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004123 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004124 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004125
4126 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004127 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004128 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004129 if (ret) {
4130 rollback_registered(dev);
4131 dev->reg_state = NETREG_UNREGISTERED;
4132 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004133
4134out:
4135 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004136
4137err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004138 if (dev->netdev_ops->ndo_uninit)
4139 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004140 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004141}
4142
4143/**
4144 * register_netdev - register a network device
4145 * @dev: device to register
4146 *
4147 * Take a completed network device structure and add it to the kernel
4148 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4149 * chain. 0 is returned on success. A negative errno code is returned
4150 * on a failure to set up the device, or if the name is a duplicate.
4151 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004152 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004153 * and expands the device name if you passed a format string to
4154 * alloc_netdev.
4155 */
4156int register_netdev(struct net_device *dev)
4157{
4158 int err;
4159
4160 rtnl_lock();
4161
4162 /*
4163 * If the name is a format string the caller wants us to do a
4164 * name allocation.
4165 */
4166 if (strchr(dev->name, '%')) {
4167 err = dev_alloc_name(dev, dev->name);
4168 if (err < 0)
4169 goto out;
4170 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004171
Linus Torvalds1da177e2005-04-16 15:20:36 -07004172 err = register_netdevice(dev);
4173out:
4174 rtnl_unlock();
4175 return err;
4176}
4177EXPORT_SYMBOL(register_netdev);
4178
4179/*
4180 * netdev_wait_allrefs - wait until all references are gone.
4181 *
4182 * This is called when unregistering network devices.
4183 *
4184 * Any protocol or device that holds a reference should register
4185 * for netdevice notification, and cleanup and put back the
4186 * reference if they receive an UNREGISTER event.
4187 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004188 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004189 */
4190static void netdev_wait_allrefs(struct net_device *dev)
4191{
4192 unsigned long rebroadcast_time, warning_time;
4193
4194 rebroadcast_time = warning_time = jiffies;
4195 while (atomic_read(&dev->refcnt) != 0) {
4196 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004197 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004198
4199 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004200 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004201
4202 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4203 &dev->state)) {
4204 /* We must not have linkwatch events
4205 * pending on unregister. If this
4206 * happens, we simply run the queue
4207 * unscheduled, resulting in a noop
4208 * for this device.
4209 */
4210 linkwatch_run_queue();
4211 }
4212
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004213 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004214
4215 rebroadcast_time = jiffies;
4216 }
4217
4218 msleep(250);
4219
4220 if (time_after(jiffies, warning_time + 10 * HZ)) {
4221 printk(KERN_EMERG "unregister_netdevice: "
4222 "waiting for %s to become free. Usage "
4223 "count = %d\n",
4224 dev->name, atomic_read(&dev->refcnt));
4225 warning_time = jiffies;
4226 }
4227 }
4228}
4229
4230/* The sequence is:
4231 *
4232 * rtnl_lock();
4233 * ...
4234 * register_netdevice(x1);
4235 * register_netdevice(x2);
4236 * ...
4237 * unregister_netdevice(y1);
4238 * unregister_netdevice(y2);
4239 * ...
4240 * rtnl_unlock();
4241 * free_netdev(y1);
4242 * free_netdev(y2);
4243 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004244 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004245 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004246 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004247 * without deadlocking with linkwatch via keventd.
4248 * 2) Since we run with the RTNL semaphore not held, we can sleep
4249 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004250 *
4251 * We must not return until all unregister events added during
4252 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004253 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004254void netdev_run_todo(void)
4255{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004256 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004257
Linus Torvalds1da177e2005-04-16 15:20:36 -07004258 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004259 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004260
4261 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004262
Linus Torvalds1da177e2005-04-16 15:20:36 -07004263 while (!list_empty(&list)) {
4264 struct net_device *dev
4265 = list_entry(list.next, struct net_device, todo_list);
4266 list_del(&dev->todo_list);
4267
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004268 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004269 printk(KERN_ERR "network todo '%s' but state %d\n",
4270 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004271 dump_stack();
4272 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004273 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004274
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004275 dev->reg_state = NETREG_UNREGISTERED;
4276
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004277 on_each_cpu(flush_backlog, dev, 1);
4278
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004279 netdev_wait_allrefs(dev);
4280
4281 /* paranoia */
4282 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004283 WARN_ON(dev->ip_ptr);
4284 WARN_ON(dev->ip6_ptr);
4285 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004286
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004287 if (dev->destructor)
4288 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004289
4290 /* Free network device */
4291 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004292 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004293}
4294
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004295/**
4296 * dev_get_stats - get network device statistics
4297 * @dev: device to get statistics from
4298 *
4299 * Get network statistics from device. The device driver may provide
4300 * its own method by setting dev->netdev_ops->get_stats; otherwise
4301 * the internal statistics structure is used.
4302 */
4303const struct net_device_stats *dev_get_stats(struct net_device *dev)
4304 {
4305 const struct net_device_ops *ops = dev->netdev_ops;
4306
4307 if (ops->ndo_get_stats)
4308 return ops->ndo_get_stats(dev);
4309 else
4310 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004311}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004312EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004313
David S. Millerdc2b4842008-07-08 17:18:23 -07004314static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004315 struct netdev_queue *queue,
4316 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004317{
David S. Millerdc2b4842008-07-08 17:18:23 -07004318 queue->dev = dev;
4319}
4320
David S. Millerbb949fb2008-07-08 16:55:56 -07004321static void netdev_init_queues(struct net_device *dev)
4322{
David S. Millere8a04642008-07-17 00:34:19 -07004323 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4324 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004325 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004326}
4327
Linus Torvalds1da177e2005-04-16 15:20:36 -07004328/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004329 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004330 * @sizeof_priv: size of private data to allocate space for
4331 * @name: device name format string
4332 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004333 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004334 *
4335 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004336 * and performs basic initialization. Also allocates subquue structs
4337 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004338 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004339struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4340 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004341{
David S. Millere8a04642008-07-17 00:34:19 -07004342 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004343 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004344 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004345 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004346
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004347 BUG_ON(strlen(name) >= sizeof(dev->name));
4348
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004349 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004350 if (sizeof_priv) {
4351 /* ensure 32-byte alignment of private area */
4352 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4353 alloc_size += sizeof_priv;
4354 }
4355 /* ensure 32-byte alignment of whole construct */
4356 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004357
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004358 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004359 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004360 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004361 return NULL;
4362 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004363
Stephen Hemminger79439862008-07-21 13:28:44 -07004364 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004365 if (!tx) {
4366 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4367 "tx qdiscs.\n");
4368 kfree(p);
4369 return NULL;
4370 }
4371
Linus Torvalds1da177e2005-04-16 15:20:36 -07004372 dev = (struct net_device *)
4373 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4374 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004375 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004376
David S. Millere8a04642008-07-17 00:34:19 -07004377 dev->_tx = tx;
4378 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004379 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004380
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004381 if (sizeof_priv) {
4382 dev->priv = ((char *)dev +
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004383 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004384 & ~NETDEV_ALIGN_CONST));
4385 }
4386
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004387 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004388
David S. Millerbb949fb2008-07-08 16:55:56 -07004389 netdev_init_queues(dev);
4390
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004391 netpoll_netdev_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004392 setup(dev);
4393 strcpy(dev->name, name);
4394 return dev;
4395}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004396EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004397
4398/**
4399 * free_netdev - free network device
4400 * @dev: device
4401 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004402 * This function does the last stage of destroying an allocated device
4403 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004404 * If this is the last reference then it will be freed.
4405 */
4406void free_netdev(struct net_device *dev)
4407{
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004408 release_net(dev_net(dev));
4409
David S. Millere8a04642008-07-17 00:34:19 -07004410 kfree(dev->_tx);
4411
Stephen Hemminger3041a062006-05-26 13:25:24 -07004412 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004413 if (dev->reg_state == NETREG_UNINITIALIZED) {
4414 kfree((char *)dev - dev->padded);
4415 return;
4416 }
4417
4418 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4419 dev->reg_state = NETREG_RELEASED;
4420
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004421 /* will free via device release */
4422 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004423}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004424
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004425/**
4426 * synchronize_net - Synchronize with packet receive processing
4427 *
4428 * Wait for packets currently being received to be done.
4429 * Does not block later packets from starting.
4430 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004431void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004432{
4433 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004434 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004435}
4436
4437/**
4438 * unregister_netdevice - remove device from the kernel
4439 * @dev: device
4440 *
4441 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004442 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443 *
4444 * Callers must hold the rtnl semaphore. You may want
4445 * unregister_netdev() instead of this.
4446 */
4447
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004448void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004449{
Herbert Xua6620712007-12-12 19:21:56 -08004450 ASSERT_RTNL();
4451
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004452 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004453 /* Finish processing unregister after unlock */
4454 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004455}
4456
4457/**
4458 * unregister_netdev - remove device from the kernel
4459 * @dev: device
4460 *
4461 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004462 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004463 *
4464 * This is just a wrapper for unregister_netdevice that takes
4465 * the rtnl semaphore. In general you want to use this and not
4466 * unregister_netdevice.
4467 */
4468void unregister_netdev(struct net_device *dev)
4469{
4470 rtnl_lock();
4471 unregister_netdevice(dev);
4472 rtnl_unlock();
4473}
4474
4475EXPORT_SYMBOL(unregister_netdev);
4476
Eric W. Biedermance286d32007-09-12 13:53:49 +02004477/**
4478 * dev_change_net_namespace - move device to different nethost namespace
4479 * @dev: device
4480 * @net: network namespace
4481 * @pat: If not NULL name pattern to try if the current device name
4482 * is already taken in the destination network namespace.
4483 *
4484 * This function shuts down a device interface and moves it
4485 * to a new network namespace. On success 0 is returned, on
4486 * a failure a netagive errno code is returned.
4487 *
4488 * Callers must hold the rtnl semaphore.
4489 */
4490
4491int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4492{
4493 char buf[IFNAMSIZ];
4494 const char *destname;
4495 int err;
4496
4497 ASSERT_RTNL();
4498
4499 /* Don't allow namespace local devices to be moved. */
4500 err = -EINVAL;
4501 if (dev->features & NETIF_F_NETNS_LOCAL)
4502 goto out;
4503
Eric W. Biederman38918452008-10-27 17:51:47 -07004504#ifdef CONFIG_SYSFS
4505 /* Don't allow real devices to be moved when sysfs
4506 * is enabled.
4507 */
4508 err = -EINVAL;
4509 if (dev->dev.parent)
4510 goto out;
4511#endif
4512
Eric W. Biedermance286d32007-09-12 13:53:49 +02004513 /* Ensure the device has been registrered */
4514 err = -EINVAL;
4515 if (dev->reg_state != NETREG_REGISTERED)
4516 goto out;
4517
4518 /* Get out if there is nothing todo */
4519 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004520 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004521 goto out;
4522
4523 /* Pick the destination device name, and ensure
4524 * we can use it in the destination network namespace.
4525 */
4526 err = -EEXIST;
4527 destname = dev->name;
4528 if (__dev_get_by_name(net, destname)) {
4529 /* We get here if we can't use the current device name */
4530 if (!pat)
4531 goto out;
4532 if (!dev_valid_name(pat))
4533 goto out;
4534 if (strchr(pat, '%')) {
4535 if (__dev_alloc_name(net, pat, buf) < 0)
4536 goto out;
4537 destname = buf;
4538 } else
4539 destname = pat;
4540 if (__dev_get_by_name(net, destname))
4541 goto out;
4542 }
4543
4544 /*
4545 * And now a mini version of register_netdevice unregister_netdevice.
4546 */
4547
4548 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004549 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004550
4551 /* And unlink it from device chain */
4552 err = -ENODEV;
4553 unlist_netdevice(dev);
4554
4555 synchronize_net();
4556
4557 /* Shutdown queueing discipline. */
4558 dev_shutdown(dev);
4559
4560 /* Notify protocols, that we are about to destroy
4561 this device. They should clean all the things.
4562 */
4563 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4564
4565 /*
4566 * Flush the unicast and multicast chains
4567 */
4568 dev_addr_discard(dev);
4569
Eric W. Biederman38918452008-10-27 17:51:47 -07004570 netdev_unregister_kobject(dev);
4571
Eric W. Biedermance286d32007-09-12 13:53:49 +02004572 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004573 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004574
4575 /* Assign the new device name */
4576 if (destname != dev->name)
4577 strcpy(dev->name, destname);
4578
4579 /* If there is an ifindex conflict assign a new one */
4580 if (__dev_get_by_index(net, dev->ifindex)) {
4581 int iflink = (dev->iflink == dev->ifindex);
4582 dev->ifindex = dev_new_index(net);
4583 if (iflink)
4584 dev->iflink = dev->ifindex;
4585 }
4586
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004587 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004588 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004589 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004590
4591 /* Add the device back in the hashes */
4592 list_netdevice(dev);
4593
4594 /* Notify protocols, that a new device appeared. */
4595 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4596
4597 synchronize_net();
4598 err = 0;
4599out:
4600 return err;
4601}
4602
Linus Torvalds1da177e2005-04-16 15:20:36 -07004603static int dev_cpu_callback(struct notifier_block *nfb,
4604 unsigned long action,
4605 void *ocpu)
4606{
4607 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004608 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004609 struct sk_buff *skb;
4610 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4611 struct softnet_data *sd, *oldsd;
4612
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004613 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004614 return NOTIFY_OK;
4615
4616 local_irq_disable();
4617 cpu = smp_processor_id();
4618 sd = &per_cpu(softnet_data, cpu);
4619 oldsd = &per_cpu(softnet_data, oldcpu);
4620
4621 /* Find end of our completion_queue. */
4622 list_skb = &sd->completion_queue;
4623 while (*list_skb)
4624 list_skb = &(*list_skb)->next;
4625 /* Append completion queue from offline CPU. */
4626 *list_skb = oldsd->completion_queue;
4627 oldsd->completion_queue = NULL;
4628
4629 /* Find end of our output_queue. */
4630 list_net = &sd->output_queue;
4631 while (*list_net)
4632 list_net = &(*list_net)->next_sched;
4633 /* Append output queue from offline CPU. */
4634 *list_net = oldsd->output_queue;
4635 oldsd->output_queue = NULL;
4636
4637 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4638 local_irq_enable();
4639
4640 /* Process offline CPU's input_pkt_queue */
4641 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4642 netif_rx(skb);
4643
4644 return NOTIFY_OK;
4645}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004646
Chris Leechdb217332006-06-17 21:24:58 -07004647#ifdef CONFIG_NET_DMA
4648/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004649 * net_dma_rebalance - try to maintain one DMA channel per CPU
4650 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4651 *
4652 * This is called when the number of channels allocated to the net_dma client
4653 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004654 */
Dan Williamsd379b012007-07-09 11:56:42 -07004655
4656static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004657{
Dan Williamsd379b012007-07-09 11:56:42 -07004658 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004659 struct dma_chan *chan;
4660
Dan Williamsd379b012007-07-09 11:56:42 -07004661 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004662 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004663 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004664 return;
4665 }
4666
4667 i = 0;
4668 cpu = first_cpu(cpu_online_map);
4669
Mike Travis0e12f842008-05-12 21:21:13 +02004670 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07004671 chan = net_dma->channels[chan_idx];
4672
4673 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4674 + (i < (num_online_cpus() %
4675 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004676
4677 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004678 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004679 cpu = next_cpu(cpu, cpu_online_map);
4680 n--;
4681 }
4682 i++;
4683 }
Chris Leechdb217332006-06-17 21:24:58 -07004684}
4685
4686/**
4687 * netdev_dma_event - event callback for the net_dma_client
4688 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004689 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004690 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004691 */
Dan Williamsd379b012007-07-09 11:56:42 -07004692static enum dma_state_client
4693netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4694 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004695{
Dan Williamsd379b012007-07-09 11:56:42 -07004696 int i, found = 0, pos = -1;
4697 struct net_dma *net_dma =
4698 container_of(client, struct net_dma, client);
4699 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4700
4701 spin_lock(&net_dma->lock);
4702 switch (state) {
4703 case DMA_RESOURCE_AVAILABLE:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004704 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004705 if (net_dma->channels[i] == chan) {
4706 found = 1;
4707 break;
4708 } else if (net_dma->channels[i] == NULL && pos < 0)
4709 pos = i;
4710
4711 if (!found && pos >= 0) {
4712 ack = DMA_ACK;
4713 net_dma->channels[pos] = chan;
4714 cpu_set(pos, net_dma->channel_mask);
4715 net_dma_rebalance(net_dma);
4716 }
Chris Leechdb217332006-06-17 21:24:58 -07004717 break;
4718 case DMA_RESOURCE_REMOVED:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004719 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004720 if (net_dma->channels[i] == chan) {
4721 found = 1;
4722 pos = i;
4723 break;
4724 }
4725
4726 if (found) {
4727 ack = DMA_ACK;
4728 cpu_clear(pos, net_dma->channel_mask);
4729 net_dma->channels[i] = NULL;
4730 net_dma_rebalance(net_dma);
4731 }
Chris Leechdb217332006-06-17 21:24:58 -07004732 break;
4733 default:
4734 break;
4735 }
Dan Williamsd379b012007-07-09 11:56:42 -07004736 spin_unlock(&net_dma->lock);
4737
4738 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07004739}
4740
4741/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004742 * netdev_dma_register - register the networking subsystem as a DMA client
Chris Leechdb217332006-06-17 21:24:58 -07004743 */
4744static int __init netdev_dma_register(void)
4745{
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004746 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4747 GFP_KERNEL);
4748 if (unlikely(!net_dma.channels)) {
4749 printk(KERN_NOTICE
4750 "netdev_dma: no memory for net_dma.channels\n");
4751 return -ENOMEM;
4752 }
Dan Williamsd379b012007-07-09 11:56:42 -07004753 spin_lock_init(&net_dma.lock);
4754 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4755 dma_async_client_register(&net_dma.client);
4756 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07004757 return 0;
4758}
4759
4760#else
4761static int __init netdev_dma_register(void) { return -ENODEV; }
4762#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004763
Herbert Xu7f353bf2007-08-10 15:47:58 -07004764/**
Herbert Xub63365a2008-10-23 01:11:29 -07004765 * netdev_increment_features - increment feature set by one
4766 * @all: current feature set
4767 * @one: new feature set
4768 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07004769 *
4770 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07004771 * @one to the master device with current feature set @all. Will not
4772 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07004773 */
Herbert Xub63365a2008-10-23 01:11:29 -07004774unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4775 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07004776{
Herbert Xub63365a2008-10-23 01:11:29 -07004777 /* If device needs checksumming, downgrade to it. */
4778 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4779 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4780 else if (mask & NETIF_F_ALL_CSUM) {
4781 /* If one device supports v4/v6 checksumming, set for all. */
4782 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4783 !(all & NETIF_F_GEN_CSUM)) {
4784 all &= ~NETIF_F_ALL_CSUM;
4785 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4786 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004787
Herbert Xub63365a2008-10-23 01:11:29 -07004788 /* If one device supports hw checksumming, set for all. */
4789 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4790 all &= ~NETIF_F_ALL_CSUM;
4791 all |= NETIF_F_HW_CSUM;
4792 }
4793 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004794
Herbert Xub63365a2008-10-23 01:11:29 -07004795 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004796
Herbert Xub63365a2008-10-23 01:11:29 -07004797 one |= all & NETIF_F_ONE_FOR_ALL;
4798 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4799 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004800
4801 return all;
4802}
Herbert Xub63365a2008-10-23 01:11:29 -07004803EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07004804
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004805static struct hlist_head *netdev_create_hash(void)
4806{
4807 int i;
4808 struct hlist_head *hash;
4809
4810 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4811 if (hash != NULL)
4812 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4813 INIT_HLIST_HEAD(&hash[i]);
4814
4815 return hash;
4816}
4817
Eric W. Biederman881d9662007-09-17 11:56:21 -07004818/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07004819static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004820{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004821 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07004822
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004823 net->dev_name_head = netdev_create_hash();
4824 if (net->dev_name_head == NULL)
4825 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004826
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004827 net->dev_index_head = netdev_create_hash();
4828 if (net->dev_index_head == NULL)
4829 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004830
4831 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004832
4833err_idx:
4834 kfree(net->dev_name_head);
4835err_name:
4836 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004837}
4838
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004839/**
4840 * netdev_drivername - network driver for the device
4841 * @dev: network device
4842 * @buffer: buffer for resulting name
4843 * @len: size of buffer
4844 *
4845 * Determine network driver for device.
4846 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004847char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07004848{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004849 const struct device_driver *driver;
4850 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07004851
4852 if (len <= 0 || !buffer)
4853 return buffer;
4854 buffer[0] = 0;
4855
4856 parent = dev->dev.parent;
4857
4858 if (!parent)
4859 return buffer;
4860
4861 driver = parent->driver;
4862 if (driver && driver->name)
4863 strlcpy(buffer, driver->name, len);
4864 return buffer;
4865}
4866
Pavel Emelyanov46650792007-10-08 20:38:39 -07004867static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004868{
4869 kfree(net->dev_name_head);
4870 kfree(net->dev_index_head);
4871}
4872
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004873static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004874 .init = netdev_init,
4875 .exit = netdev_exit,
4876};
4877
Pavel Emelyanov46650792007-10-08 20:38:39 -07004878static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02004879{
4880 struct net_device *dev, *next;
4881 /*
4882 * Push all migratable of the network devices back to the
4883 * initial network namespace
4884 */
4885 rtnl_lock();
4886 for_each_netdev_safe(net, dev, next) {
4887 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004888 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02004889
4890 /* Ignore unmoveable devices (i.e. loopback) */
4891 if (dev->features & NETIF_F_NETNS_LOCAL)
4892 continue;
4893
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08004894 /* Delete virtual devices */
4895 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4896 dev->rtnl_link_ops->dellink(dev);
4897 continue;
4898 }
4899
Eric W. Biedermance286d32007-09-12 13:53:49 +02004900 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004901 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4902 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004903 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004904 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02004905 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004906 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02004907 }
4908 }
4909 rtnl_unlock();
4910}
4911
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004912static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004913 .exit = default_device_exit,
4914};
4915
Linus Torvalds1da177e2005-04-16 15:20:36 -07004916/*
4917 * Initialize the DEV module. At boot time this walks the device list and
4918 * unhooks any devices that fail to initialise (normally hardware not
4919 * present) and leaves us with a valid list of present and active devices.
4920 *
4921 */
4922
4923/*
4924 * This is called single threaded during boot, so no need
4925 * to take the rtnl semaphore.
4926 */
4927static int __init net_dev_init(void)
4928{
4929 int i, rc = -ENOMEM;
4930
4931 BUG_ON(!dev_boot_phase);
4932
Linus Torvalds1da177e2005-04-16 15:20:36 -07004933 if (dev_proc_init())
4934 goto out;
4935
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004936 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004937 goto out;
4938
4939 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004940 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004941 INIT_LIST_HEAD(&ptype_base[i]);
4942
Eric W. Biederman881d9662007-09-17 11:56:21 -07004943 if (register_pernet_subsys(&netdev_net_ops))
4944 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004945
4946 /*
4947 * Initialise the packet receive queues.
4948 */
4949
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004950 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004951 struct softnet_data *queue;
4952
4953 queue = &per_cpu(softnet_data, i);
4954 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004955 queue->completion_queue = NULL;
4956 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004957
4958 queue->backlog.poll = process_backlog;
4959 queue->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004960 }
4961
Linus Torvalds1da177e2005-04-16 15:20:36 -07004962 dev_boot_phase = 0;
4963
Eric W. Biederman505d4f72008-11-07 22:54:20 -08004964 /* The loopback device is special if any other network devices
4965 * is present in a network namespace the loopback device must
4966 * be present. Since we now dynamically allocate and free the
4967 * loopback device ensure this invariant is maintained by
4968 * keeping the loopback device as the first device on the
4969 * list of network devices. Ensuring the loopback devices
4970 * is the first device that appears and the last network device
4971 * that disappears.
4972 */
4973 if (register_pernet_device(&loopback_net_ops))
4974 goto out;
4975
4976 if (register_pernet_device(&default_device_ops))
4977 goto out;
4978
4979 netdev_dma_register();
4980
Carlos R. Mafra962cf362008-05-15 11:15:37 -03004981 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4982 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004983
4984 hotcpu_notifier(dev_cpu_callback, 0);
4985 dst_init();
4986 dev_mcast_init();
4987 rc = 0;
4988out:
4989 return rc;
4990}
4991
4992subsys_initcall(net_dev_init);
4993
4994EXPORT_SYMBOL(__dev_get_by_index);
4995EXPORT_SYMBOL(__dev_get_by_name);
4996EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08004997EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004998EXPORT_SYMBOL(dev_add_pack);
4999EXPORT_SYMBOL(dev_alloc_name);
5000EXPORT_SYMBOL(dev_close);
5001EXPORT_SYMBOL(dev_get_by_flags);
5002EXPORT_SYMBOL(dev_get_by_index);
5003EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005004EXPORT_SYMBOL(dev_open);
5005EXPORT_SYMBOL(dev_queue_xmit);
5006EXPORT_SYMBOL(dev_remove_pack);
5007EXPORT_SYMBOL(dev_set_allmulti);
5008EXPORT_SYMBOL(dev_set_promiscuity);
5009EXPORT_SYMBOL(dev_change_flags);
5010EXPORT_SYMBOL(dev_set_mtu);
5011EXPORT_SYMBOL(dev_set_mac_address);
5012EXPORT_SYMBOL(free_netdev);
5013EXPORT_SYMBOL(netdev_boot_setup_check);
5014EXPORT_SYMBOL(netdev_set_master);
5015EXPORT_SYMBOL(netdev_state_change);
5016EXPORT_SYMBOL(netif_receive_skb);
5017EXPORT_SYMBOL(netif_rx);
5018EXPORT_SYMBOL(register_gifconf);
5019EXPORT_SYMBOL(register_netdevice);
5020EXPORT_SYMBOL(register_netdevice_notifier);
5021EXPORT_SYMBOL(skb_checksum_help);
5022EXPORT_SYMBOL(synchronize_net);
5023EXPORT_SYMBOL(unregister_netdevice);
5024EXPORT_SYMBOL(unregister_netdevice_notifier);
5025EXPORT_SYMBOL(net_enable_timestamp);
5026EXPORT_SYMBOL(net_disable_timestamp);
5027EXPORT_SYMBOL(dev_get_flags);
5028
5029#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5030EXPORT_SYMBOL(br_handle_frame_hook);
5031EXPORT_SYMBOL(br_fdb_get_hook);
5032EXPORT_SYMBOL(br_fdb_put_hook);
5033#endif
5034
Linus Torvalds1da177e2005-04-16 15:20:36 -07005035EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005036
5037EXPORT_PER_CPU_SYMBOL(softnet_data);