Blame - net/core/dev.c - kernel/msm-4.9

blob: 3156df699f014569e208785901ab36d335f3b795 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* NET3 Protocol independent device support routines.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation; either version
				7	* 2 of the License, or (at your option) any later version.
				8	*
				9	* Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl	02c30a8	2005-05-05 16:16:16 -0700	[diff] [blame]	10	* Authors: Ross Biro
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	11	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				12	* Mark Evans, <evansmp@uhura.aston.ac.uk>
				13	*
				14	* Additional Authors:
				15	* Florian la Roche <rzsfl@rz.uni-sb.de>
				16	* Alan Cox <gw4pts@gw4pts.ampr.org>
				17	* David Hinds <dahinds@users.sourceforge.net>
				18	* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
				19	* Adam Sulmicki <adam@cfar.umd.edu>
				20	* Pekka Riikonen <priikone@poesidon.pspt.fi>
				21	*
				22	* Changes:
				23	* D.J. Barrow : Fixed bug where dev->refcnt gets set
				24	* to 2 if register_netdev gets called
				25	* before net_dev_init & also removed a
				26	* few lines of code in the process.
				27	* Alan Cox : device private ioctl copies fields back.
				28	* Alan Cox : Transmit queue code does relevant
				29	* stunts to keep the queue safe.
				30	* Alan Cox : Fixed double lock.
				31	* Alan Cox : Fixed promisc NULL pointer trap
				32	* ???????? : Support the full private ioctl range
				33	* Alan Cox : Moved ioctl permission check into
				34	* drivers
				35	* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
				36	* Alan Cox : 100 backlog just doesn't cut it when
				37	* you start doing multicast video 8)
				38	* Alan Cox : Rewrote net_bh and list manager.
				39	* Alan Cox : Fix ETH_P_ALL echoback lengths.
				40	* Alan Cox : Took out transmit every packet pass
				41	* Saved a few bytes in the ioctl handler
				42	* Alan Cox : Network driver sets packet type before
				43	* calling netif_rx. Saves a function
				44	* call a packet.
				45	* Alan Cox : Hashed net_bh()
				46	* Richard Kooijman: Timestamp fixes.
				47	* Alan Cox : Wrong field in SIOCGIFDSTADDR
				48	* Alan Cox : Device lock protection.
				49	* Alan Cox : Fixed nasty side effect of device close
				50	* changes.
				51	* Rudi Cilibrasi : Pass the right thing to
				52	* set_mac_address()
				53	* Dave Miller : 32bit quantity for the device lock to
				54	* make it work out on a Sparc.
				55	* Bjorn Ekwall : Added KERNELD hack.
				56	* Alan Cox : Cleaned up the backlog initialise.
				57	* Craig Metz : SIOCGIFCONF fix if space for under
				58	* 1 device.
				59	* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
				60	* is no device open function.
				61	* Andi Kleen : Fix error reporting for SIOCGIFCONF
				62	* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
				63	* Cyrus Durgin : Cleaned for KMOD
				64	* Adam Sulmicki : Bug Fix : Network Device Unload
				65	* A network device unload needs to purge
				66	* the backlog queue.
				67	* Paul Rusty Russell : SIOCSIFNAME
				68	* Pekka Riikonen : Netdev boot-time settings code
				69	* Andrew Morton : Make unregister_netdevice wait
				70	* indefinitely on dev->refcnt
				71	* J Hadi Salim : - Backlog queue sampling
				72	* - netif_rx() feedback
				73	*/
				74
				75	#include <asm/uaccess.h>
				76	#include <asm/system.h>
				77	#include <linux/bitops.h>
				78	#include <linux/config.h>
				79	#include <linux/cpu.h>
				80	#include <linux/types.h>
				81	#include <linux/kernel.h>
				82	#include <linux/sched.h>
				83	#include <linux/string.h>
				84	#include <linux/mm.h>
				85	#include <linux/socket.h>
				86	#include <linux/sockios.h>
				87	#include <linux/errno.h>
				88	#include <linux/interrupt.h>
				89	#include <linux/if_ether.h>
				90	#include <linux/netdevice.h>
				91	#include <linux/etherdevice.h>
				92	#include <linux/notifier.h>
				93	#include <linux/skbuff.h>
				94	#include <net/sock.h>
				95	#include <linux/rtnetlink.h>
				96	#include <linux/proc_fs.h>
				97	#include <linux/seq_file.h>
				98	#include <linux/stat.h>
				99	#include <linux/if_bridge.h>
				100	#include <linux/divert.h>
				101	#include <net/dst.h>
				102	#include <net/pkt_sched.h>
				103	#include <net/checksum.h>
				104	#include <linux/highmem.h>
				105	#include <linux/init.h>
				106	#include <linux/kmod.h>
				107	#include <linux/module.h>
				108	#include <linux/kallsyms.h>
				109	#include <linux/netpoll.h>
				110	#include <linux/rcupdate.h>
				111	#include <linux/delay.h>
				112	#ifdef CONFIG_NET_RADIO
				113	#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
				114	#include <net/iw_handler.h>
				115	#endif /* CONFIG_NET_RADIO */
				116	#include <asm/current.h>
				117
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	118	/*
				119	* The list of packet types we will receive (as opposed to discard)
				120	* and the routines to invoke.
				121	*
				122	* Why 16. Because with 16 the only overlap we get on a hash of the
				123	* low nibble of the protocol value is RARP/SNAP/X.25.
				124	*
				125	* NOTE: That is no longer true with the addition of VLAN tags. Not
				126	* sure which should go first, but I bet it won't make much
				127	* difference if we are running VLANs. The good news is that
				128	* this protocol won't be in the list unless compiled in, so
				129	* the average user (w/out VLANs) will not be adversly affected.
				130	* --BLG
				131	*
				132	* 0800 IP
				133	* 8100 802.1Q VLAN
				134	* 0001 802.3
				135	* 0002 AX.25
				136	* 0004 802.2
				137	* 8035 RARP
				138	* 0005 SNAP
				139	* 0805 X.25
				140	* 0806 ARP
				141	* 8137 IPX
				142	* 0009 Localtalk
				143	* 86DD IPv6
				144	*/
				145
				146	static DEFINE_SPINLOCK(ptype_lock);
				147	static struct list_head ptype_base[16]; /* 16 way hashed list */
				148	static struct list_head ptype_all; /* Taps */
				149
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	150	/*
				151	* The @dev_base list is protected by @dev_base_lock and the rtln
				152	* semaphore.
				153	*
				154	* Pure readers hold dev_base_lock for reading.
				155	*
				156	* Writers must hold the rtnl semaphore while they loop through the
				157	* dev_base list, and hold dev_base_lock for writing when they do the
				158	* actual updates. This allows pure readers to access the list even
				159	* while a writer is preparing to update it.
				160	*
				161	* To put it another way, dev_base_lock is held for writing only to
				162	* protect against pure readers; the rtnl semaphore provides the
				163	* protection against other writers.
				164	*
				165	* See, for example usages, register_netdevice() and
				166	* unregister_netdevice(), which must be called with the rtnl
				167	* semaphore held.
				168	*/
				169	struct net_device *dev_base;
				170	static struct net_device **dev_tail = &dev_base;
				171	DEFINE_RWLOCK(dev_base_lock);
				172
				173	EXPORT_SYMBOL(dev_base);
				174	EXPORT_SYMBOL(dev_base_lock);
				175
				176	#define NETDEV_HASHBITS 8
				177	static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
				178	static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
				179
				180	static inline struct hlist_head dev_name_hash(const char name)
				181	{
				182	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
				183	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
				184	}
				185
				186	static inline struct hlist_head *dev_index_hash(int ifindex)
				187	{
				188	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
				189	}
				190
				191	/*
				192	* Our notifier list
				193	*/
				194
				195	static struct notifier_block *netdev_chain;
				196
				197	/*
				198	* Device drivers call our routines to queue packets here. We empty the
				199	* queue in the local softnet handler.
				200	*/
				201	DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
				202
				203	#ifdef CONFIG_SYSFS
				204	extern int netdev_sysfs_init(void);
				205	extern int netdev_register_sysfs(struct net_device *);
				206	extern void netdev_unregister_sysfs(struct net_device *);
				207	#else
				208	#define netdev_sysfs_init() (0)
				209	#define netdev_register_sysfs(dev) (0)
				210	#define netdev_unregister_sysfs(dev) do { } while(0)
				211	#endif
				212
				213
				214	/*******************************************************************************
				215
				216	Protocol management and registration routines
				217
				218	*******************************************************************************/
				219
				220	/*
				221	* For efficiency
				222	*/
				223
				224	int netdev_nit;
				225
				226	/*
				227	* Add a protocol ID to the list. Now that the input handler is
				228	* smarter we can dispense with all the messy stuff that used to be
				229	* here.
				230	*
				231	* BEWARE!!! Protocol handlers, mangling input packets,
				232	* MUST BE last in hash buckets and checking protocol handlers
				233	* MUST start from promiscuous ptype_all chain in net_bh.
				234	* It is true now, do not change it.
				235	* Explanation follows: if protocol handler, mangling packet, will
				236	* be the first on list, it is not able to sense, that packet
				237	* is cloned and should be copied-on-write, so that it will
				238	* change it and subsequent readers will get broken packet.
				239	* --ANK (980803)
				240	*/
				241
				242	/**
				243	* dev_add_pack - add packet handler
				244	* @pt: packet type declaration
				245	*
				246	* Add a protocol handler to the networking stack. The passed &packet_type
				247	* is linked into kernel lists and may not be freed until it has been
				248	* removed from the kernel lists.
				249	*
				250	* This call does not sleep therefore it can not
				251	* guarantee all CPU's that are in middle of receiving packets
				252	* will see the new packet type (until the next received packet).
				253	*/
				254
				255	void dev_add_pack(struct packet_type *pt)
				256	{
				257	int hash;
				258
				259	spin_lock_bh(&ptype_lock);
				260	if (pt->type == htons(ETH_P_ALL)) {
				261	netdev_nit++;
				262	list_add_rcu(&pt->list, &ptype_all);
				263	} else {
				264	hash = ntohs(pt->type) & 15;
				265	list_add_rcu(&pt->list, &ptype_base[hash]);
				266	}
				267	spin_unlock_bh(&ptype_lock);
				268	}
				269
				270	extern void linkwatch_run_queue(void);
				271
				272
				273
				274	/**
				275	* __dev_remove_pack - remove packet handler
				276	* @pt: packet type declaration
				277	*
				278	* Remove a protocol handler that was previously added to the kernel
				279	* protocol handlers by dev_add_pack(). The passed &packet_type is removed
				280	* from the kernel lists and can be freed or reused once this function
				281	* returns.
				282	*
				283	* The packet type might still be in use by receivers
				284	* and must not be freed until after all the CPU's have gone
				285	* through a quiescent state.
				286	*/
				287	void __dev_remove_pack(struct packet_type *pt)
				288	{
				289	struct list_head *head;
				290	struct packet_type *pt1;
				291
				292	spin_lock_bh(&ptype_lock);
				293
				294	if (pt->type == htons(ETH_P_ALL)) {
				295	netdev_nit--;
				296	head = &ptype_all;
				297	} else
				298	head = &ptype_base[ntohs(pt->type) & 15];
				299
				300	list_for_each_entry(pt1, head, list) {
				301	if (pt == pt1) {
				302	list_del_rcu(&pt->list);
				303	goto out;
				304	}
				305	}
				306
				307	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
				308	out:
				309	spin_unlock_bh(&ptype_lock);
				310	}
				311	/**
				312	* dev_remove_pack - remove packet handler
				313	* @pt: packet type declaration
				314	*
				315	* Remove a protocol handler that was previously added to the kernel
				316	* protocol handlers by dev_add_pack(). The passed &packet_type is removed
				317	* from the kernel lists and can be freed or reused once this function
				318	* returns.
				319	*
				320	* This call sleeps to guarantee that no CPU is looking at the packet
				321	* type after return.
				322	*/
				323	void dev_remove_pack(struct packet_type *pt)
				324	{
				325	__dev_remove_pack(pt);
				326
				327	synchronize_net();
				328	}
				329
				330	/******************************************************************************
				331
				332	Device Boot-time Settings Routines
				333
				334	*******************************************************************************/
				335
				336	/* Boot time configuration table */
				337	static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
				338
				339	/**
				340	* netdev_boot_setup_add - add new setup entry
				341	* @name: name of the device
				342	* @map: configured settings for the device
				343	*
				344	* Adds new setup entry to the dev_boot_setup list. The function
				345	* returns 0 on error and 1 on success. This is a generic routine to
				346	* all netdevices.
				347	*/
				348	static int netdev_boot_setup_add(char name, struct ifmap map)
				349	{
				350	struct netdev_boot_setup *s;
				351	int i;
				352
				353	s = dev_boot_setup;
				354	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
				355	if (s[i].name[0] == '\0' \|\| s[i].name[0] == ' ') {
				356	memset(s[i].name, 0, sizeof(s[i].name));
				357	strcpy(s[i].name, name);
				358	memcpy(&s[i].map, map, sizeof(s[i].map));
				359	break;
				360	}
				361	}
				362
				363	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
				364	}
				365
				366	/**
				367	* netdev_boot_setup_check - check boot time settings
				368	* @dev: the netdevice
				369	*
				370	* Check boot time settings for the device.
				371	* The found settings are set for the device to be used
				372	* later in the device probing.
				373	* Returns 0 if no settings found, 1 if they are.
				374	*/
				375	int netdev_boot_setup_check(struct net_device *dev)
				376	{
				377	struct netdev_boot_setup *s = dev_boot_setup;
				378	int i;
				379
				380	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
				381	if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
				382	!strncmp(dev->name, s[i].name, strlen(s[i].name))) {
				383	dev->irq = s[i].map.irq;
				384	dev->base_addr = s[i].map.base_addr;
				385	dev->mem_start = s[i].map.mem_start;
				386	dev->mem_end = s[i].map.mem_end;
				387	return 1;
				388	}
				389	}
				390	return 0;
				391	}
				392
				393
				394	/**
				395	* netdev_boot_base - get address from boot time settings
				396	* @prefix: prefix for network device
				397	* @unit: id for network device
				398	*
				399	* Check boot time settings for the base address of device.
				400	* The found settings are set for the device to be used
				401	* later in the device probing.
				402	* Returns 0 if no settings found.
				403	*/
				404	unsigned long netdev_boot_base(const char *prefix, int unit)
				405	{
				406	const struct netdev_boot_setup *s = dev_boot_setup;
				407	char name[IFNAMSIZ];
				408	int i;
				409
				410	sprintf(name, "%s%d", prefix, unit);
				411
				412	/*
				413	* If device already registered then return base of 1
				414	* to indicate not to probe for this interface
				415	*/
				416	if (__dev_get_by_name(name))
				417	return 1;
				418
				419	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
				420	if (!strcmp(name, s[i].name))
				421	return s[i].map.base_addr;
				422	return 0;
				423	}
				424
				425	/*
				426	* Saves at boot time configured settings for any netdevice.
				427	*/
				428	int __init netdev_boot_setup(char *str)
				429	{
				430	int ints[5];
				431	struct ifmap map;
				432
				433	str = get_options(str, ARRAY_SIZE(ints), ints);
				434	if (!str \|\| !*str)
				435	return 0;
				436
				437	/* Save settings */
				438	memset(&map, 0, sizeof(map));
				439	if (ints[0] > 0)
				440	map.irq = ints[1];
				441	if (ints[0] > 1)
				442	map.base_addr = ints[2];
				443	if (ints[0] > 2)
				444	map.mem_start = ints[3];
				445	if (ints[0] > 3)
				446	map.mem_end = ints[4];
				447
				448	/* Add new entry to the list */
				449	return netdev_boot_setup_add(str, &map);
				450	}
				451
				452	__setup("netdev=", netdev_boot_setup);
				453
				454	/*******************************************************************************
				455
				456	Device Interface Subroutines
				457
				458	*******************************************************************************/
				459
				460	/**
				461	* __dev_get_by_name - find a device by its name
				462	* @name: name to find
				463	*
				464	* Find an interface by name. Must be called under RTNL semaphore
				465	* or @dev_base_lock. If the name is found a pointer to the device
				466	* is returned. If the name is not found then %NULL is returned. The
				467	* reference counters are not incremented so the caller must be
				468	* careful with locks.
				469	*/
				470
				471	struct net_device __dev_get_by_name(const char name)
				472	{
				473	struct hlist_node *p;
				474
				475	hlist_for_each(p, dev_name_hash(name)) {
				476	struct net_device *dev
				477	= hlist_entry(p, struct net_device, name_hlist);
				478	if (!strncmp(dev->name, name, IFNAMSIZ))
				479	return dev;
				480	}
				481	return NULL;
				482	}
				483
				484	/**
				485	* dev_get_by_name - find a device by its name
				486	* @name: name to find
				487	*
				488	* Find an interface by name. This can be called from any
				489	* context and does its own locking. The returned handle has
				490	* the usage count incremented and the caller must use dev_put() to
				491	* release it when it is no longer needed. %NULL is returned if no
				492	* matching device is found.
				493	*/
				494
				495	struct net_device dev_get_by_name(const char name)
				496	{
				497	struct net_device *dev;
				498
				499	read_lock(&dev_base_lock);
				500	dev = __dev_get_by_name(name);
				501	if (dev)
				502	dev_hold(dev);
				503	read_unlock(&dev_base_lock);
				504	return dev;
				505	}
				506
				507	/**
				508	* __dev_get_by_index - find a device by its ifindex
				509	* @ifindex: index of device
				510	*
				511	* Search for an interface by index. Returns %NULL if the device
				512	* is not found or a pointer to the device. The device has not
				513	* had its reference counter increased so the caller must be careful
				514	* about locking. The caller must hold either the RTNL semaphore
				515	* or @dev_base_lock.
				516	*/
				517
				518	struct net_device *__dev_get_by_index(int ifindex)
				519	{
				520	struct hlist_node *p;
				521
				522	hlist_for_each(p, dev_index_hash(ifindex)) {
				523	struct net_device *dev
				524	= hlist_entry(p, struct net_device, index_hlist);
				525	if (dev->ifindex == ifindex)
				526	return dev;
				527	}
				528	return NULL;
				529	}
				530
				531
				532	/**
				533	* dev_get_by_index - find a device by its ifindex
				534	* @ifindex: index of device
				535	*
				536	* Search for an interface by index. Returns NULL if the device
				537	* is not found or a pointer to the device. The device returned has
				538	* had a reference added and the pointer is safe until the user calls
				539	* dev_put to indicate they have finished with it.
				540	*/
				541
				542	struct net_device *dev_get_by_index(int ifindex)
				543	{
				544	struct net_device *dev;
				545
				546	read_lock(&dev_base_lock);
				547	dev = __dev_get_by_index(ifindex);
				548	if (dev)
				549	dev_hold(dev);
				550	read_unlock(&dev_base_lock);
				551	return dev;
				552	}
				553
				554	/**
				555	* dev_getbyhwaddr - find a device by its hardware address
				556	* @type: media type of device
				557	* @ha: hardware address
				558	*
				559	* Search for an interface by MAC address. Returns NULL if the device
				560	* is not found or a pointer to the device. The caller must hold the
				561	* rtnl semaphore. The returned device has not had its ref count increased
				562	* and the caller must therefore be careful about locking
				563	*
				564	* BUGS:
				565	* If the API was consistent this would be __dev_get_by_hwaddr
				566	*/
				567
				568	struct net_device dev_getbyhwaddr(unsigned short type, char ha)
				569	{
				570	struct net_device *dev;
				571
				572	ASSERT_RTNL();
				573
				574	for (dev = dev_base; dev; dev = dev->next)
				575	if (dev->type == type &&
				576	!memcmp(dev->dev_addr, ha, dev->addr_len))
				577	break;
				578	return dev;
				579	}
				580
				581	struct net_device *dev_getfirstbyhwtype(unsigned short type)
				582	{
				583	struct net_device *dev;
				584
				585	rtnl_lock();
				586	for (dev = dev_base; dev; dev = dev->next) {
				587	if (dev->type == type) {
				588	dev_hold(dev);
				589	break;
				590	}
				591	}
				592	rtnl_unlock();
				593	return dev;
				594	}
				595
				596	EXPORT_SYMBOL(dev_getfirstbyhwtype);
				597
				598	/**
				599	* dev_get_by_flags - find any device with given flags
				600	* @if_flags: IFF_* values
				601	* @mask: bitmask of bits in if_flags to check
				602	*
				603	* Search for any interface with the given flags. Returns NULL if a device
				604	* is not found or a pointer to the device. The device returned has
				605	* had a reference added and the pointer is safe until the user calls
				606	* dev_put to indicate they have finished with it.
				607	*/
				608
				609	struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
				610	{
				611	struct net_device *dev;
				612
				613	read_lock(&dev_base_lock);
				614	for (dev = dev_base; dev != NULL; dev = dev->next) {
				615	if (((dev->flags ^ if_flags) & mask) == 0) {
				616	dev_hold(dev);
				617	break;
				618	}
				619	}
				620	read_unlock(&dev_base_lock);
				621	return dev;
				622	}
				623
				624	/**
				625	* dev_valid_name - check if name is okay for network device
				626	* @name: name string
				627	*
				628	* Network device names need to be valid file names to
				629	* to allow sysfs to work
				630	*/
				631	static int dev_valid_name(const char *name)
				632	{
				633	return !(*name == '\0'
				634	\|\| !strcmp(name, ".")
				635	\|\| !strcmp(name, "..")
				636	\|\| strchr(name, '/'));
				637	}
				638
				639	/**
				640	* dev_alloc_name - allocate a name for a device
				641	* @dev: device
				642	* @name: name format string
				643	*
				644	* Passed a format string - eg "lt%d" it will try and find a suitable
				645	* id. Not efficient for many devices, not called a lot. The caller
				646	* must hold the dev_base or rtnl lock while allocating the name and
				647	* adding the device in order to avoid duplicates. Returns the number
				648	* of the unit assigned or a negative errno code.
				649	*/
				650
				651	int dev_alloc_name(struct net_device dev, const char name)
				652	{
				653	int i = 0;
				654	char buf[IFNAMSIZ];
				655	const char *p;
				656	const int max_netdevices = 8*PAGE_SIZE;
				657	long *inuse;
				658	struct net_device *d;
				659
				660	p = strnchr(name, IFNAMSIZ-1, '%');
				661	if (p) {
				662	/*
				663	* Verify the string as this thing may have come from
				664	* the user. There must be either one "%d" and no other "%"
				665	* characters.
				666	*/
				667	if (p[1] != 'd' \|\| strchr(p + 2, '%'))
				668	return -EINVAL;
				669
				670	/* Use one page as a bit array of possible slots */
				671	inuse = (long *) get_zeroed_page(GFP_ATOMIC);
				672	if (!inuse)
				673	return -ENOMEM;
				674
				675	for (d = dev_base; d; d = d->next) {
				676	if (!sscanf(d->name, name, &i))
				677	continue;
				678	if (i < 0 \|\| i >= max_netdevices)
				679	continue;
				680
				681	/* avoid cases where sscanf is not exact inverse of printf */
				682	snprintf(buf, sizeof(buf), name, i);
				683	if (!strncmp(buf, d->name, IFNAMSIZ))
				684	set_bit(i, inuse);
				685	}
				686
				687	i = find_first_zero_bit(inuse, max_netdevices);
				688	free_page((unsigned long) inuse);
				689	}
				690
				691	snprintf(buf, sizeof(buf), name, i);
				692	if (!__dev_get_by_name(buf)) {
				693	strlcpy(dev->name, buf, IFNAMSIZ);
				694	return i;
				695	}
				696
				697	/* It is possible to run out of possible slots
				698	* when the name is long and there isn't enough space left
				699	* for the digits, or if all bits are used.
				700	*/
				701	return -ENFILE;
				702	}
				703
				704
				705	/**
				706	* dev_change_name - change name of a device
				707	* @dev: device
				708	* @newname: name (or format string) must be at least IFNAMSIZ
				709	*
				710	* Change name of a device, can pass format strings "eth%d".
				711	* for wildcarding.
				712	*/
				713	int dev_change_name(struct net_device dev, char newname)
				714	{
				715	int err = 0;
				716
				717	ASSERT_RTNL();
				718
				719	if (dev->flags & IFF_UP)
				720	return -EBUSY;
				721
				722	if (!dev_valid_name(newname))
				723	return -EINVAL;
				724
				725	if (strchr(newname, '%')) {
				726	err = dev_alloc_name(dev, newname);
				727	if (err < 0)
				728	return err;
				729	strcpy(newname, dev->name);
				730	}
				731	else if (__dev_get_by_name(newname))
				732	return -EEXIST;
				733	else
				734	strlcpy(dev->name, newname, IFNAMSIZ);
				735
				736	err = class_device_rename(&dev->class_dev, dev->name);
				737	if (!err) {
				738	hlist_del(&dev->name_hlist);
				739	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
				740	notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
				741	}
				742
				743	return err;
				744	}
				745
				746	/**
Stephen Hemminger	d8a33ac	2005-05-29 14:13:47 -0700	[diff] [blame]	747	* netdev_features_change - device changes fatures
				748	* @dev: device to cause notification
				749	*
				750	* Called to indicate a device has changed features.
				751	*/
				752	void netdev_features_change(struct net_device *dev)
				753	{
				754	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
				755	}
				756	EXPORT_SYMBOL(netdev_features_change);
				757
				758	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	759	* netdev_state_change - device changes state
				760	* @dev: device to cause notification
				761	*
				762	* Called to indicate a device has changed state. This function calls
				763	* the notifier chains for netdev_chain and sends a NEWLINK message
				764	* to the routing socket.
				765	*/
				766	void netdev_state_change(struct net_device *dev)
				767	{
				768	if (dev->flags & IFF_UP) {
				769	notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
				770	rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
				771	}
				772	}
				773
				774	/**
				775	* dev_load - load a network module
				776	* @name: name of interface
				777	*
				778	* If a network interface is not present and the process has suitable
				779	* privileges this function loads the module. If module loading is not
				780	* available in this kernel then it becomes a nop.
				781	*/
				782
				783	void dev_load(const char *name)
				784	{
				785	struct net_device *dev;
				786
				787	read_lock(&dev_base_lock);
				788	dev = __dev_get_by_name(name);
				789	read_unlock(&dev_base_lock);
				790
				791	if (!dev && capable(CAP_SYS_MODULE))
				792	request_module("%s", name);
				793	}
				794
				795	static int default_rebuild_header(struct sk_buff *skb)
				796	{
				797	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
				798	skb->dev ? skb->dev->name : "NULL!!!");
				799	kfree_skb(skb);
				800	return 1;
				801	}
				802
				803
				804	/**
				805	* dev_open - prepare an interface for use.
				806	* @dev: device to open
				807	*
				808	* Takes a device from down to up state. The device's private open
				809	* function is invoked and then the multicast lists are loaded. Finally
				810	* the device is moved into the up state and a %NETDEV_UP message is
				811	* sent to the netdev notifier chain.
				812	*
				813	* Calling this function on an active interface is a nop. On a failure
				814	* a negative errno code is returned.
				815	*/
				816	int dev_open(struct net_device *dev)
				817	{
				818	int ret = 0;
				819
				820	/*
				821	* Is it already up?
				822	*/
				823
				824	if (dev->flags & IFF_UP)
				825	return 0;
				826
				827	/*
				828	* Is it even present?
				829	*/
				830	if (!netif_device_present(dev))
				831	return -ENODEV;
				832
				833	/*
				834	* Call device private open method
				835	*/
				836	set_bit(__LINK_STATE_START, &dev->state);
				837	if (dev->open) {
				838	ret = dev->open(dev);
				839	if (ret)
				840	clear_bit(__LINK_STATE_START, &dev->state);
				841	}
				842
				843	/*
				844	* If it went open OK then:
				845	*/
				846
				847	if (!ret) {
				848	/*
				849	* Set the flags.
				850	*/
				851	dev->flags \|= IFF_UP;
				852
				853	/*
				854	* Initialize multicasting status
				855	*/
				856	dev_mc_upload(dev);
				857
				858	/*
				859	* Wakeup transmit queue engine
				860	*/
				861	dev_activate(dev);
				862
				863	/*
				864	* ... and announce new interface.
				865	*/
				866	notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
				867	}
				868	return ret;
				869	}
				870
				871	/**
				872	* dev_close - shutdown an interface.
				873	* @dev: device to shutdown
				874	*
				875	* This function moves an active device into down state. A
				876	* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
				877	* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
				878	* chain.
				879	*/
				880	int dev_close(struct net_device *dev)
				881	{
				882	if (!(dev->flags & IFF_UP))
				883	return 0;
				884
				885	/*
				886	* Tell people we are going down, so that they can
				887	* prepare to death, when device is still operating.
				888	*/
				889	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
				890
				891	dev_deactivate(dev);
				892
				893	clear_bit(__LINK_STATE_START, &dev->state);
				894
				895	/* Synchronize to scheduled poll. We cannot touch poll list,
				896	* it can be even on different cpu. So just clear netif_running(),
				897	* and wait when poll really will happen. Actually, the best place
				898	* for this is inside dev->stop() after device stopped its irq
				899	* engine, but this requires more changes in devices. */
				900
				901	smp_mb__after_clear_bit(); /* Commit netif_running(). */
				902	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
				903	/* No hurry. */
				904	current->state = TASK_INTERRUPTIBLE;
				905	schedule_timeout(1);
				906	}
				907
				908	/*
				909	* Call the device specific close. This cannot fail.
				910	* Only if device is UP
				911	*
				912	* We allow it to be called even after a DETACH hot-plug
				913	* event.
				914	*/
				915	if (dev->stop)
				916	dev->stop(dev);
				917
				918	/*
				919	* Device is now down.
				920	*/
				921
				922	dev->flags &= ~IFF_UP;
				923
				924	/*
				925	* Tell people we are down
				926	*/
				927	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
				928
				929	return 0;
				930	}
				931
				932
				933	/*
				934	* Device change register/unregister. These are not inline or static
				935	* as we export them to the world.
				936	*/
				937
				938	/**
				939	* register_netdevice_notifier - register a network notifier block
				940	* @nb: notifier
				941	*
				942	* Register a notifier to be called when network device events occur.
				943	* The notifier passed is linked into the kernel structures and must
				944	* not be reused until it has been unregistered. A negative errno code
				945	* is returned on a failure.
				946	*
				947	* When registered all registration and up events are replayed
				948	* to the new notifier to allow device to have a race free
				949	* view of the network device list.
				950	*/
				951
				952	int register_netdevice_notifier(struct notifier_block *nb)
				953	{
				954	struct net_device *dev;
				955	int err;
				956
				957	rtnl_lock();
				958	err = notifier_chain_register(&netdev_chain, nb);
				959	if (!err) {
				960	for (dev = dev_base; dev; dev = dev->next) {
				961	nb->notifier_call(nb, NETDEV_REGISTER, dev);
				962
				963	if (dev->flags & IFF_UP)
				964	nb->notifier_call(nb, NETDEV_UP, dev);
				965	}
				966	}
				967	rtnl_unlock();
				968	return err;
				969	}
				970
				971	/**
				972	* unregister_netdevice_notifier - unregister a network notifier block
				973	* @nb: notifier
				974	*
				975	* Unregister a notifier previously registered by
				976	* register_netdevice_notifier(). The notifier is unlinked into the
				977	* kernel structures and may then be reused. A negative errno code
				978	* is returned on a failure.
				979	*/
				980
				981	int unregister_netdevice_notifier(struct notifier_block *nb)
				982	{
				983	return notifier_chain_unregister(&netdev_chain, nb);
				984	}
				985
				986	/**
				987	* call_netdevice_notifiers - call all network notifier blocks
				988	* @val: value passed unmodified to notifier function
				989	* @v: pointer passed unmodified to notifier function
				990	*
				991	* Call all network notifier blocks. Parameters and return value
				992	* are as for notifier_call_chain().
				993	*/
				994
				995	int call_netdevice_notifiers(unsigned long val, void *v)
				996	{
				997	return notifier_call_chain(&netdev_chain, val, v);
				998	}
				999
				1000	/* When > 0 there are consumers of rx skb time stamps */
				1001	static atomic_t netstamp_needed = ATOMIC_INIT(0);
				1002
				1003	void net_enable_timestamp(void)
				1004	{
				1005	atomic_inc(&netstamp_needed);
				1006	}
				1007
				1008	void net_disable_timestamp(void)
				1009	{
				1010	atomic_dec(&netstamp_needed);
				1011	}
				1012
				1013	static inline void net_timestamp(struct timeval *stamp)
				1014	{
				1015	if (atomic_read(&netstamp_needed))
				1016	do_gettimeofday(stamp);
				1017	else {
				1018	stamp->tv_sec = 0;
				1019	stamp->tv_usec = 0;
				1020	}
				1021	}
				1022
				1023	/*
				1024	* Support routine. Sends outgoing frames to any network
				1025	* taps currently in use.
				1026	*/
				1027
				1028	void dev_queue_xmit_nit(struct sk_buff skb, struct net_device dev)
				1029	{
				1030	struct packet_type *ptype;
				1031	net_timestamp(&skb->stamp);
				1032
				1033	rcu_read_lock();
				1034	list_for_each_entry_rcu(ptype, &ptype_all, list) {
				1035	/* Never send packets back to the socket
				1036	* they originated from - MvS (miquels@drinkel.ow.org)
				1037	*/
				1038	if ((ptype->dev == dev \|\| !ptype->dev) &&
				1039	(ptype->af_packet_priv == NULL \|\|
				1040	(struct sock *)ptype->af_packet_priv != skb->sk)) {
				1041	struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
				1042	if (!skb2)
				1043	break;
				1044
				1045	/* skb->nh should be correctly
				1046	set by sender, so that the second statement is
				1047	just protection against buggy protocols.
				1048	*/
				1049	skb2->mac.raw = skb2->data;
				1050
				1051	if (skb2->nh.raw < skb2->data \|\|
				1052	skb2->nh.raw > skb2->tail) {
				1053	if (net_ratelimit())
				1054	printk(KERN_CRIT "protocol %04x is "
				1055	"buggy, dev %s\n",
				1056	skb2->protocol, dev->name);
				1057	skb2->nh.raw = skb2->data;
				1058	}
				1059
				1060	skb2->h.raw = skb2->nh.raw;
				1061	skb2->pkt_type = PACKET_OUTGOING;
				1062	ptype->func(skb2, skb->dev, ptype);
				1063	}
				1064	}
				1065	rcu_read_unlock();
				1066	}
				1067
				1068	/*
				1069	* Invalidate hardware checksum when packet is to be mangled, and
				1070	* complete checksum manually on outgoing path.
				1071	*/
				1072	int skb_checksum_help(struct sk_buff *skb, int inward)
				1073	{
				1074	unsigned int csum;
				1075	int ret = 0, offset = skb->h.raw - skb->data;
				1076
				1077	if (inward) {
				1078	skb->ip_summed = CHECKSUM_NONE;
				1079	goto out;
				1080	}
				1081
				1082	if (skb_cloned(skb)) {
				1083	ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
				1084	if (ret)
				1085	goto out;
				1086	}
				1087
				1088	if (offset > (int)skb->len)
				1089	BUG();
				1090	csum = skb_checksum(skb, offset, skb->len-offset, 0);
				1091
				1092	offset = skb->tail - skb->h.raw;
				1093	if (offset <= 0)
				1094	BUG();
				1095	if (skb->csum + 2 > offset)
				1096	BUG();
				1097
				1098	(u16)(skb->h.raw + skb->csum) = csum_fold(csum);
				1099	skb->ip_summed = CHECKSUM_NONE;
				1100	out:
				1101	return ret;
				1102	}
				1103
				1104	#ifdef CONFIG_HIGHMEM
				1105	/* Actually, we should eliminate this check as soon as we know, that:
				1106	* 1. IOMMU is present and allows to map all the memory.
				1107	* 2. No high memory really exists on this machine.
				1108	*/
				1109
				1110	static inline int illegal_highdma(struct net_device dev, struct sk_buff skb)
				1111	{
				1112	int i;
				1113
				1114	if (dev->features & NETIF_F_HIGHDMA)
				1115	return 0;
				1116
				1117	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
				1118	if (PageHighMem(skb_shinfo(skb)->frags[i].page))
				1119	return 1;
				1120
				1121	return 0;
				1122	}
				1123	#else
				1124	#define illegal_highdma(dev, skb) (0)
				1125	#endif
				1126
				1127	extern void skb_release_data(struct sk_buff *);
				1128
				1129	/* Keep head the same: replace data */
				1130	int __skb_linearize(struct sk_buff *skb, int gfp_mask)
				1131	{
				1132	unsigned int size;
				1133	u8 *data;
				1134	long offset;
				1135	struct skb_shared_info *ninfo;
				1136	int headerlen = skb->data - skb->head;
				1137	int expand = (skb->tail + skb->data_len) - skb->end;
				1138
				1139	if (skb_shared(skb))
				1140	BUG();
				1141
				1142	if (expand <= 0)
				1143	expand = 0;
				1144
				1145	size = skb->end - skb->head + expand;
				1146	size = SKB_DATA_ALIGN(size);
				1147	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
				1148	if (!data)
				1149	return -ENOMEM;
				1150
				1151	/* Copy entire thing */
				1152	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
				1153	BUG();
				1154
				1155	/* Set up shinfo */
				1156	ninfo = (struct skb_shared_info*)(data + size);
				1157	atomic_set(&ninfo->dataref, 1);
				1158	ninfo->tso_size = skb_shinfo(skb)->tso_size;
				1159	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
				1160	ninfo->nr_frags = 0;
				1161	ninfo->frag_list = NULL;
				1162
				1163	/* Offset between the two in bytes */
				1164	offset = data - skb->head;
				1165
				1166	/* Free old data. */
				1167	skb_release_data(skb);
				1168
				1169	skb->head = data;
				1170	skb->end = data + size;
				1171
				1172	/* Set up new pointers */
				1173	skb->h.raw += offset;
				1174	skb->nh.raw += offset;
				1175	skb->mac.raw += offset;
				1176	skb->tail += offset;
				1177	skb->data += offset;
				1178
				1179	/* We are no longer a clone, even if we were. */
				1180	skb->cloned = 0;
				1181
				1182	skb->tail += skb->data_len;
				1183	skb->data_len = 0;
				1184	return 0;
				1185	}
				1186
				1187	#define HARD_TX_LOCK(dev, cpu) { \
				1188	if ((dev->features & NETIF_F_LLTX) == 0) { \
				1189	spin_lock(&dev->xmit_lock); \
				1190	dev->xmit_lock_owner = cpu; \
				1191	} \
				1192	}
				1193
				1194	#define HARD_TX_UNLOCK(dev) { \
				1195	if ((dev->features & NETIF_F_LLTX) == 0) { \
				1196	dev->xmit_lock_owner = -1; \
				1197	spin_unlock(&dev->xmit_lock); \
				1198	} \
				1199	}
				1200
				1201	/**
				1202	* dev_queue_xmit - transmit a buffer
				1203	* @skb: buffer to transmit
				1204	*
				1205	* Queue a buffer for transmission to a network device. The caller must
				1206	* have set the device and priority and built the buffer before calling
				1207	* this function. The function can be called from an interrupt.
				1208	*
				1209	* A negative errno code is returned on a failure. A success does not
				1210	* guarantee the frame will be transmitted as it may be dropped due
				1211	* to congestion or traffic shaping.
Ben Greear	af19136	2005-04-24 20:12:36 -0700	[diff] [blame]	1212	*
				1213	* -----------------------------------------------------------------------------------
				1214	* I notice this method can also return errors from the queue disciplines,
				1215	* including NET_XMIT_DROP, which is a positive value. So, errors can also
				1216	* be positive.
				1217	*
				1218	* Regardless of the return value, the skb is consumed, so it is currently
				1219	* difficult to retry a send to this method. (You can bump the ref count
				1220	* before sending to hold a reference for retry if you are careful.)
				1221	*
				1222	* When calling this method, interrupts MUST be enabled. This is because
				1223	* the BH enable code must have IRQs enabled so that it will not deadlock.
				1224	* --BLG
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1225	*/
				1226
				1227	int dev_queue_xmit(struct sk_buff *skb)
				1228	{
				1229	struct net_device *dev = skb->dev;
				1230	struct Qdisc *q;
				1231	int rc = -ENOMEM;
				1232
				1233	if (skb_shinfo(skb)->frag_list &&
				1234	!(dev->features & NETIF_F_FRAGLIST) &&
				1235	__skb_linearize(skb, GFP_ATOMIC))
				1236	goto out_kfree_skb;
				1237
				1238	/* Fragmented skb is linearized if device does not support SG,
				1239	* or if at least one of fragments is in highmem and device
				1240	* does not support DMA from it.
				1241	*/
				1242	if (skb_shinfo(skb)->nr_frags &&
				1243	(!(dev->features & NETIF_F_SG) \|\| illegal_highdma(dev, skb)) &&
				1244	__skb_linearize(skb, GFP_ATOMIC))
				1245	goto out_kfree_skb;
				1246
				1247	/* If packet is not checksummed and device does not support
				1248	* checksumming for this protocol, complete checksumming here.
				1249	*/
				1250	if (skb->ip_summed == CHECKSUM_HW &&
				1251	(!(dev->features & (NETIF_F_HW_CSUM \| NETIF_F_NO_CSUM)) &&
				1252	(!(dev->features & NETIF_F_IP_CSUM) \|\|
				1253	skb->protocol != htons(ETH_P_IP))))
				1254	if (skb_checksum_help(skb, 0))
				1255	goto out_kfree_skb;
				1256
				1257	/* Disable soft irqs for various locks below. Also
				1258	* stops preemption for RCU.
				1259	*/
				1260	local_bh_disable();
				1261
				1262	/* Updates of qdisc are serialized by queue_lock.
				1263	* The struct Qdisc which is pointed to by qdisc is now a
				1264	* rcu structure - it may be accessed without acquiring
				1265	* a lock (but the structure may be stale.) The freeing of the
				1266	* qdisc will be deferred until it's known that there are no
				1267	* more references to it.
				1268	*
				1269	* If the qdisc has an enqueue function, we still need to
				1270	* hold the queue_lock before calling it, since queue_lock
				1271	* also serializes access to the device queue.
				1272	*/
				1273
				1274	q = rcu_dereference(dev->qdisc);
				1275	#ifdef CONFIG_NET_CLS_ACT
				1276	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
				1277	#endif
				1278	if (q->enqueue) {
				1279	/* Grab device queue */
				1280	spin_lock(&dev->queue_lock);
				1281
				1282	rc = q->enqueue(skb, q);
				1283
				1284	qdisc_run(dev);
				1285
				1286	spin_unlock(&dev->queue_lock);
				1287	rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
				1288	goto out;
				1289	}
				1290
				1291	/* The device has no queue. Common case for software devices:
				1292	loopback, all the sorts of tunnels...
				1293
				1294	Really, it is unlikely that xmit_lock protection is necessary here.
				1295	(f.e. loopback and IP tunnels are clean ignoring statistics
				1296	counters.)
				1297	However, it is possible, that they rely on protection
				1298	made by us here.
				1299
				1300	Check this and shot the lock. It is not prone from deadlocks.
				1301	Either shot noqueue qdisc, it is even simpler 8)
				1302	*/
				1303	if (dev->flags & IFF_UP) {
				1304	int cpu = smp_processor_id(); /* ok because BHs are off */
				1305
				1306	if (dev->xmit_lock_owner != cpu) {
				1307
				1308	HARD_TX_LOCK(dev, cpu);
				1309
				1310	if (!netif_queue_stopped(dev)) {
				1311	if (netdev_nit)
				1312	dev_queue_xmit_nit(skb, dev);
				1313
				1314	rc = 0;
				1315	if (!dev->hard_start_xmit(skb, dev)) {
				1316	HARD_TX_UNLOCK(dev);
				1317	goto out;
				1318	}
				1319	}
				1320	HARD_TX_UNLOCK(dev);
				1321	if (net_ratelimit())
				1322	printk(KERN_CRIT "Virtual device %s asks to "
				1323	"queue packet!\n", dev->name);
				1324	} else {
				1325	/* Recursion is detected! It is possible,
				1326	* unfortunately */
				1327	if (net_ratelimit())
				1328	printk(KERN_CRIT "Dead loop on virtual device "
				1329	"%s, fix it urgently!\n", dev->name);
				1330	}
				1331	}
				1332
				1333	rc = -ENETDOWN;
				1334	local_bh_enable();
				1335
				1336	out_kfree_skb:
				1337	kfree_skb(skb);
				1338	return rc;
				1339	out:
				1340	local_bh_enable();
				1341	return rc;
				1342	}
				1343
				1344
				1345	/*=======================================================================
				1346	Receiver routines
				1347	=======================================================================*/
				1348
				1349	int netdev_max_backlog = 300;
				1350	int weight_p = 64; /* old backlog weight */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351
				1352	DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
				1353
				1354
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1355	/**
				1356	* netif_rx - post buffer to the network code
				1357	* @skb: buffer to post
				1358	*
				1359	* This function receives a packet from a device driver and queues it for
				1360	* the upper (protocol) levels to process. It always succeeds. The buffer
				1361	* may be dropped during processing for congestion control or by the
				1362	* protocol layers.
				1363	*
				1364	* return values:
				1365	* NET_RX_SUCCESS (no congestion)
				1366	* NET_RX_CN_LOW (low congestion)
				1367	* NET_RX_CN_MOD (moderate congestion)
				1368	* NET_RX_CN_HIGH (high congestion)
				1369	* NET_RX_DROP (packet was dropped)
				1370	*
				1371	*/
				1372
				1373	int netif_rx(struct sk_buff *skb)
				1374	{
				1375	int this_cpu;
				1376	struct softnet_data *queue;
				1377	unsigned long flags;
				1378
				1379	/* if netpoll wants it, pretend we never saw it */
				1380	if (netpoll_rx(skb))
				1381	return NET_RX_DROP;
				1382
				1383	if (!skb->stamp.tv_sec)
				1384	net_timestamp(&skb->stamp);
				1385
				1386	/*
				1387	* The code is rearranged so that the path is the most
				1388	* short when CPU is congested, but is still operating.
				1389	*/
				1390	local_irq_save(flags);
				1391	this_cpu = smp_processor_id();
				1392	queue = &__get_cpu_var(softnet_data);
				1393
				1394	__get_cpu_var(netdev_rx_stat).total++;
				1395	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
				1396	if (queue->input_pkt_queue.qlen) {
				1397	if (queue->throttle)
				1398	goto drop;
				1399
				1400	enqueue:
				1401	dev_hold(skb->dev);
				1402	__skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1403	local_irq_restore(flags);
Stephen Hemminger	34008d8	2005-06-23 20:10:00 -0700	[diff] [blame^]	1404	return NET_RX_SUCCESS;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1405	}
				1406
				1407	if (queue->throttle)
				1408	queue->throttle = 0;
				1409
				1410	netif_rx_schedule(&queue->backlog_dev);
				1411	goto enqueue;
				1412	}
				1413
				1414	if (!queue->throttle) {
				1415	queue->throttle = 1;
				1416	__get_cpu_var(netdev_rx_stat).throttled++;
				1417	}
				1418
				1419	drop:
				1420	__get_cpu_var(netdev_rx_stat).dropped++;
				1421	local_irq_restore(flags);
				1422
				1423	kfree_skb(skb);
				1424	return NET_RX_DROP;
				1425	}
				1426
				1427	int netif_rx_ni(struct sk_buff *skb)
				1428	{
				1429	int err;
				1430
				1431	preempt_disable();
				1432	err = netif_rx(skb);
				1433	if (local_softirq_pending())
				1434	do_softirq();
				1435	preempt_enable();
				1436
				1437	return err;
				1438	}
				1439
				1440	EXPORT_SYMBOL(netif_rx_ni);
				1441
				1442	static __inline__ void skb_bond(struct sk_buff *skb)
				1443	{
				1444	struct net_device *dev = skb->dev;
				1445
				1446	if (dev->master) {
				1447	skb->real_dev = skb->dev;
				1448	skb->dev = dev->master;
				1449	}
				1450	}
				1451
				1452	static void net_tx_action(struct softirq_action *h)
				1453	{
				1454	struct softnet_data *sd = &__get_cpu_var(softnet_data);
				1455
				1456	if (sd->completion_queue) {
				1457	struct sk_buff *clist;
				1458
				1459	local_irq_disable();
				1460	clist = sd->completion_queue;
				1461	sd->completion_queue = NULL;
				1462	local_irq_enable();
				1463
				1464	while (clist) {
				1465	struct sk_buff *skb = clist;
				1466	clist = clist->next;
				1467
				1468	BUG_TRAP(!atomic_read(&skb->users));
				1469	__kfree_skb(skb);
				1470	}
				1471	}
				1472
				1473	if (sd->output_queue) {
				1474	struct net_device *head;
				1475
				1476	local_irq_disable();
				1477	head = sd->output_queue;
				1478	sd->output_queue = NULL;
				1479	local_irq_enable();
				1480
				1481	while (head) {
				1482	struct net_device *dev = head;
				1483	head = head->next_sched;
				1484
				1485	smp_mb__before_clear_bit();
				1486	clear_bit(__LINK_STATE_SCHED, &dev->state);
				1487
				1488	if (spin_trylock(&dev->queue_lock)) {
				1489	qdisc_run(dev);
				1490	spin_unlock(&dev->queue_lock);
				1491	} else {
				1492	netif_schedule(dev);
				1493	}
				1494	}
				1495	}
				1496	}
				1497
				1498	static __inline__ int deliver_skb(struct sk_buff *skb,
				1499	struct packet_type *pt_prev)
				1500	{
				1501	atomic_inc(&skb->users);
				1502	return pt_prev->func(skb, skb->dev, pt_prev);
				1503	}
				1504
				1505	#if defined(CONFIG_BRIDGE) \|\| defined (CONFIG_BRIDGE_MODULE)
				1506	int (br_handle_frame_hook)(struct net_bridge_port p, struct sk_buff **pskb);
				1507	struct net_bridge;
				1508	struct net_bridge_fdb_entry (br_fdb_get_hook)(struct net_bridge *br,
				1509	unsigned char *addr);
				1510	void (br_fdb_put_hook)(struct net_bridge_fdb_entry ent);
				1511
				1512	static __inline__ int handle_bridge(struct sk_buff **pskb,
				1513	struct packet_type *pt_prev, int ret)
				1514	{
				1515	struct net_bridge_port *port;
				1516
				1517	if ((*pskb)->pkt_type == PACKET_LOOPBACK \|\|
				1518	(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
				1519	return 0;
				1520
				1521	if (*pt_prev) {
				1522	ret = deliver_skb(pskb, *pt_prev);
				1523	*pt_prev = NULL;
				1524	}
				1525
				1526	return br_handle_frame_hook(port, pskb);
				1527	}
				1528	#else
				1529	#define handle_bridge(skb, pt_prev, ret) (0)
				1530	#endif
				1531
				1532	#ifdef CONFIG_NET_CLS_ACT
				1533	/* TODO: Maybe we should just force sch_ingress to be compiled in
				1534	* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
				1535	* a compare and 2 stores extra right now if we dont have it on
				1536	* but have CONFIG_NET_CLS_ACT
				1537	* NOTE: This doesnt stop any functionality; if you dont have
				1538	* the ingress scheduler, you just cant add policies on ingress.
				1539	*
				1540	*/
				1541	static int ing_filter(struct sk_buff *skb)
				1542	{
				1543	struct Qdisc *q;
				1544	struct net_device *dev = skb->dev;
				1545	int result = TC_ACT_OK;
				1546
				1547	if (dev->qdisc_ingress) {
				1548	__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
				1549	if (MAX_RED_LOOP < ttl++) {
				1550	printk("Redir loop detected Dropping packet (%s->%s)\n",
				1551	skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
				1552	return TC_ACT_SHOT;
				1553	}
				1554
				1555	skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
				1556
				1557	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
				1558	if (NULL == skb->input_dev) {
				1559	skb->input_dev = skb->dev;
				1560	printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
				1561	}
				1562	spin_lock(&dev->ingress_lock);
				1563	if ((q = dev->qdisc_ingress) != NULL)
				1564	result = q->enqueue(skb, q);
				1565	spin_unlock(&dev->ingress_lock);
				1566
				1567	}
				1568
				1569	return result;
				1570	}
				1571	#endif
				1572
				1573	int netif_receive_skb(struct sk_buff *skb)
				1574	{
				1575	struct packet_type ptype, pt_prev;
				1576	int ret = NET_RX_DROP;
				1577	unsigned short type;
				1578
				1579	/* if we've gotten here through NAPI, check netpoll */
				1580	if (skb->dev->poll && netpoll_rx(skb))
				1581	return NET_RX_DROP;
				1582
				1583	if (!skb->stamp.tv_sec)
				1584	net_timestamp(&skb->stamp);
				1585
				1586	skb_bond(skb);
				1587
				1588	__get_cpu_var(netdev_rx_stat).total++;
				1589
				1590	skb->h.raw = skb->nh.raw = skb->data;
				1591	skb->mac_len = skb->nh.raw - skb->mac.raw;
				1592
				1593	pt_prev = NULL;
				1594
				1595	rcu_read_lock();
				1596
				1597	#ifdef CONFIG_NET_CLS_ACT
				1598	if (skb->tc_verd & TC_NCLS) {
				1599	skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
				1600	goto ncls;
				1601	}
				1602	#endif
				1603
				1604	list_for_each_entry_rcu(ptype, &ptype_all, list) {
				1605	if (!ptype->dev \|\| ptype->dev == skb->dev) {
				1606	if (pt_prev)
				1607	ret = deliver_skb(skb, pt_prev);
				1608	pt_prev = ptype;
				1609	}
				1610	}
				1611
				1612	#ifdef CONFIG_NET_CLS_ACT
				1613	if (pt_prev) {
				1614	ret = deliver_skb(skb, pt_prev);
				1615	pt_prev = NULL; /* noone else should process this after*/
				1616	} else {
				1617	skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
				1618	}
				1619
				1620	ret = ing_filter(skb);
				1621
				1622	if (ret == TC_ACT_SHOT \|\| (ret == TC_ACT_STOLEN)) {
				1623	kfree_skb(skb);
				1624	goto out;
				1625	}
				1626
				1627	skb->tc_verd = 0;
				1628	ncls:
				1629	#endif
				1630
				1631	handle_diverter(skb);
				1632
				1633	if (handle_bridge(&skb, &pt_prev, &ret))
				1634	goto out;
				1635
				1636	type = skb->protocol;
				1637	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
				1638	if (ptype->type == type &&
				1639	(!ptype->dev \|\| ptype->dev == skb->dev)) {
				1640	if (pt_prev)
				1641	ret = deliver_skb(skb, pt_prev);
				1642	pt_prev = ptype;
				1643	}
				1644	}
				1645
				1646	if (pt_prev) {
				1647	ret = pt_prev->func(skb, skb->dev, pt_prev);
				1648	} else {
				1649	kfree_skb(skb);
				1650	/* Jamal, now you will not able to escape explaining
				1651	* me how you were going to use this. :-)
				1652	*/
				1653	ret = NET_RX_DROP;
				1654	}
				1655
				1656	out:
				1657	rcu_read_unlock();
				1658	return ret;
				1659	}
				1660
				1661	static int process_backlog(struct net_device backlog_dev, int budget)
				1662	{
				1663	int work = 0;
				1664	int quota = min(backlog_dev->quota, *budget);
				1665	struct softnet_data *queue = &__get_cpu_var(softnet_data);
				1666	unsigned long start_time = jiffies;
				1667
Stephen Hemminger	e387660	2005-06-08 14:56:01 -0700	[diff] [blame]	1668	backlog_dev->weight = weight_p;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1669	for (;;) {
				1670	struct sk_buff *skb;
				1671	struct net_device *dev;
				1672
				1673	local_irq_disable();
				1674	skb = __skb_dequeue(&queue->input_pkt_queue);
				1675	if (!skb)
				1676	goto job_done;
				1677	local_irq_enable();
				1678
				1679	dev = skb->dev;
				1680
				1681	netif_receive_skb(skb);
				1682
				1683	dev_put(dev);
				1684
				1685	work++;
				1686
				1687	if (work >= quota \|\| jiffies - start_time > 1)
				1688	break;
				1689
				1690	}
				1691
				1692	backlog_dev->quota -= work;
				1693	*budget -= work;
				1694	return -1;
				1695
				1696	job_done:
				1697	backlog_dev->quota -= work;
				1698	*budget -= work;
				1699
				1700	list_del(&backlog_dev->poll_list);
				1701	smp_mb__before_clear_bit();
				1702	netif_poll_enable(backlog_dev);
				1703
				1704	if (queue->throttle)
				1705	queue->throttle = 0;
				1706	local_irq_enable();
				1707	return 0;
				1708	}
				1709
				1710	static void net_rx_action(struct softirq_action *h)
				1711	{
				1712	struct softnet_data *queue = &__get_cpu_var(softnet_data);
				1713	unsigned long start_time = jiffies;
				1714	int budget = netdev_max_backlog;
				1715
				1716
				1717	local_irq_disable();
				1718
				1719	while (!list_empty(&queue->poll_list)) {
				1720	struct net_device *dev;
				1721
				1722	if (budget <= 0 \|\| jiffies - start_time > 1)
				1723	goto softnet_break;
				1724
				1725	local_irq_enable();
				1726
				1727	dev = list_entry(queue->poll_list.next,
				1728	struct net_device, poll_list);
				1729	netpoll_poll_lock(dev);
				1730
				1731	if (dev->quota <= 0 \|\| dev->poll(dev, &budget)) {
				1732	netpoll_poll_unlock(dev);
				1733	local_irq_disable();
				1734	list_del(&dev->poll_list);
				1735	list_add_tail(&dev->poll_list, &queue->poll_list);
				1736	if (dev->quota < 0)
				1737	dev->quota += dev->weight;
				1738	else
				1739	dev->quota = dev->weight;
				1740	} else {
				1741	netpoll_poll_unlock(dev);
				1742	dev_put(dev);
				1743	local_irq_disable();
				1744	}
				1745	}
				1746	out:
				1747	local_irq_enable();
				1748	return;
				1749
				1750	softnet_break:
				1751	__get_cpu_var(netdev_rx_stat).time_squeeze++;
				1752	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
				1753	goto out;
				1754	}
				1755
				1756	static gifconf_func_t * gifconf_list [NPROTO];
				1757
				1758	/**
				1759	* register_gifconf - register a SIOCGIF handler
				1760	* @family: Address family
				1761	* @gifconf: Function handler
				1762	*
				1763	* Register protocol dependent address dumping routines. The handler
				1764	* that is passed must not be freed or reused until it has been replaced
				1765	* by another handler.
				1766	*/
				1767	int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
				1768	{
				1769	if (family >= NPROTO)
				1770	return -EINVAL;
				1771	gifconf_list[family] = gifconf;
				1772	return 0;
				1773	}
				1774
				1775
				1776	/*
				1777	* Map an interface index to its name (SIOCGIFNAME)
				1778	*/
				1779
				1780	/*
				1781	* We need this ioctl for efficient implementation of the
				1782	* if_indextoname() function required by the IPv6 API. Without
				1783	* it, we would have to search all the interfaces to find a
				1784	* match. --pb
				1785	*/
				1786
				1787	static int dev_ifname(struct ifreq __user *arg)
				1788	{
				1789	struct net_device *dev;
				1790	struct ifreq ifr;
				1791
				1792	/*
				1793	* Fetch the caller's info block.
				1794	*/
				1795
				1796	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
				1797	return -EFAULT;
				1798
				1799	read_lock(&dev_base_lock);
				1800	dev = __dev_get_by_index(ifr.ifr_ifindex);
				1801	if (!dev) {
				1802	read_unlock(&dev_base_lock);
				1803	return -ENODEV;
				1804	}
				1805
				1806	strcpy(ifr.ifr_name, dev->name);
				1807	read_unlock(&dev_base_lock);
				1808
				1809	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
				1810	return -EFAULT;
				1811	return 0;
				1812	}
				1813
				1814	/*
				1815	* Perform a SIOCGIFCONF call. This structure will change
				1816	* size eventually, and there is nothing I can do about it.
				1817	* Thus we will need a 'compatibility mode'.
				1818	*/
				1819
				1820	static int dev_ifconf(char __user *arg)
				1821	{
				1822	struct ifconf ifc;
				1823	struct net_device *dev;
				1824	char __user *pos;
				1825	int len;
				1826	int total;
				1827	int i;
				1828
				1829	/*
				1830	* Fetch the caller's info block.
				1831	*/
				1832
				1833	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
				1834	return -EFAULT;
				1835
				1836	pos = ifc.ifc_buf;
				1837	len = ifc.ifc_len;
				1838
				1839	/*
				1840	* Loop over the interfaces, and write an info block for each.
				1841	*/
				1842
				1843	total = 0;
				1844	for (dev = dev_base; dev; dev = dev->next) {
				1845	for (i = 0; i < NPROTO; i++) {
				1846	if (gifconf_list[i]) {
				1847	int done;
				1848	if (!pos)
				1849	done = gifconf_list[i](dev, NULL, 0);
				1850	else
				1851	done = gifconf_list[i](dev, pos + total,
				1852	len - total);
				1853	if (done < 0)
				1854	return -EFAULT;
				1855	total += done;
				1856	}
				1857	}
				1858	}
				1859
				1860	/*
				1861	* All done. Write the updated control block back to the caller.
				1862	*/
				1863	ifc.ifc_len = total;
				1864
				1865	/*
				1866	* Both BSD and Solaris return 0 here, so we do too.
				1867	*/
				1868	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
				1869	}
				1870
				1871	#ifdef CONFIG_PROC_FS
				1872	/*
				1873	* This is invoked by the /proc filesystem handler to display a device
				1874	* in detail.
				1875	*/
				1876	static __inline__ struct net_device *dev_get_idx(loff_t pos)
				1877	{
				1878	struct net_device *dev;
				1879	loff_t i;
				1880
				1881	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
				1882
				1883	return i == pos ? dev : NULL;
				1884	}
				1885
				1886	void dev_seq_start(struct seq_file seq, loff_t *pos)
				1887	{
				1888	read_lock(&dev_base_lock);
				1889	return pos ? dev_get_idx(pos - 1) : SEQ_START_TOKEN;
				1890	}
				1891
				1892	void dev_seq_next(struct seq_file seq, void v, loff_t pos)
				1893	{
				1894	++*pos;
				1895	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
				1896	}
				1897
				1898	void dev_seq_stop(struct seq_file seq, void v)
				1899	{
				1900	read_unlock(&dev_base_lock);
				1901	}
				1902
				1903	static void dev_seq_printf_stats(struct seq_file seq, struct net_device dev)
				1904	{
				1905	if (dev->get_stats) {
				1906	struct net_device_stats *stats = dev->get_stats(dev);
				1907
				1908	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
				1909	"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
				1910	dev->name, stats->rx_bytes, stats->rx_packets,
				1911	stats->rx_errors,
				1912	stats->rx_dropped + stats->rx_missed_errors,
				1913	stats->rx_fifo_errors,
				1914	stats->rx_length_errors + stats->rx_over_errors +
				1915	stats->rx_crc_errors + stats->rx_frame_errors,
				1916	stats->rx_compressed, stats->multicast,
				1917	stats->tx_bytes, stats->tx_packets,
				1918	stats->tx_errors, stats->tx_dropped,
				1919	stats->tx_fifo_errors, stats->collisions,
				1920	stats->tx_carrier_errors +
				1921	stats->tx_aborted_errors +
				1922	stats->tx_window_errors +
				1923	stats->tx_heartbeat_errors,
				1924	stats->tx_compressed);
				1925	} else
				1926	seq_printf(seq, "%6s: No statistics available.\n", dev->name);
				1927	}
				1928
				1929	/*
				1930	* Called from the PROCfs module. This now uses the new arbitrary sized
				1931	* /proc/net interface to create /proc/net/dev
				1932	*/
				1933	static int dev_seq_show(struct seq_file seq, void v)
				1934	{
				1935	if (v == SEQ_START_TOKEN)
				1936	seq_puts(seq, "Inter-\| Receive "
				1937	" \| Transmit\n"
				1938	" face \|bytes packets errs drop fifo frame "
				1939	"compressed multicast\|bytes packets errs "
				1940	"drop fifo colls carrier compressed\n");
				1941	else
				1942	dev_seq_printf_stats(seq, v);
				1943	return 0;
				1944	}
				1945
				1946	static struct netif_rx_stats softnet_get_online(loff_t pos)
				1947	{
				1948	struct netif_rx_stats *rc = NULL;
				1949
				1950	while (*pos < NR_CPUS)
				1951	if (cpu_online(*pos)) {
				1952	rc = &per_cpu(netdev_rx_stat, *pos);
				1953	break;
				1954	} else
				1955	++*pos;
				1956	return rc;
				1957	}
				1958
				1959	static void softnet_seq_start(struct seq_file seq, loff_t *pos)
				1960	{
				1961	return softnet_get_online(pos);
				1962	}
				1963
				1964	static void softnet_seq_next(struct seq_file seq, void v, loff_t pos)
				1965	{
				1966	++*pos;
				1967	return softnet_get_online(pos);
				1968	}
				1969
				1970	static void softnet_seq_stop(struct seq_file seq, void v)
				1971	{
				1972	}
				1973
				1974	static int softnet_seq_show(struct seq_file seq, void v)
				1975	{
				1976	struct netif_rx_stats *s = v;
				1977
				1978	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
				1979	s->total, s->dropped, s->time_squeeze, s->throttled,
Stephen Hemminger	c1ebcdb	2005-06-23 20:08:59 -0700	[diff] [blame]	1980	0, 0, 0, 0, /* was fastroute */
				1981	s->cpu_collision );
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1982	return 0;
				1983	}
				1984
				1985	static struct seq_operations dev_seq_ops = {
				1986	.start = dev_seq_start,
				1987	.next = dev_seq_next,
				1988	.stop = dev_seq_stop,
				1989	.show = dev_seq_show,
				1990	};
				1991
				1992	static int dev_seq_open(struct inode inode, struct file file)
				1993	{
				1994	return seq_open(file, &dev_seq_ops);
				1995	}
				1996
				1997	static struct file_operations dev_seq_fops = {
				1998	.owner = THIS_MODULE,
				1999	.open = dev_seq_open,
				2000	.read = seq_read,
				2001	.llseek = seq_lseek,
				2002	.release = seq_release,
				2003	};
				2004
				2005	static struct seq_operations softnet_seq_ops = {
				2006	.start = softnet_seq_start,
				2007	.next = softnet_seq_next,
				2008	.stop = softnet_seq_stop,
				2009	.show = softnet_seq_show,
				2010	};
				2011
				2012	static int softnet_seq_open(struct inode inode, struct file file)
				2013	{
				2014	return seq_open(file, &softnet_seq_ops);
				2015	}
				2016
				2017	static struct file_operations softnet_seq_fops = {
				2018	.owner = THIS_MODULE,
				2019	.open = softnet_seq_open,
				2020	.read = seq_read,
				2021	.llseek = seq_lseek,
				2022	.release = seq_release,
				2023	};
				2024
				2025	#ifdef WIRELESS_EXT
				2026	extern int wireless_proc_init(void);
				2027	#else
				2028	#define wireless_proc_init() 0
				2029	#endif
				2030
				2031	static int __init dev_proc_init(void)
				2032	{
				2033	int rc = -ENOMEM;
				2034
				2035	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
				2036	goto out;
				2037	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
				2038	goto out_dev;
				2039	if (wireless_proc_init())
				2040	goto out_softnet;
				2041	rc = 0;
				2042	out:
				2043	return rc;
				2044	out_softnet:
				2045	proc_net_remove("softnet_stat");
				2046	out_dev:
				2047	proc_net_remove("dev");
				2048	goto out;
				2049	}
				2050	#else
				2051	#define dev_proc_init() 0
				2052	#endif /* CONFIG_PROC_FS */
				2053
				2054
				2055	/**
				2056	* netdev_set_master - set up master/slave pair
				2057	* @slave: slave device
				2058	* @master: new master device
				2059	*
				2060	* Changes the master device of the slave. Pass %NULL to break the
				2061	* bonding. The caller must hold the RTNL semaphore. On a failure
				2062	* a negative errno code is returned. On success the reference counts
				2063	* are adjusted, %RTM_NEWLINK is sent to the routing socket and the
				2064	* function returns zero.
				2065	*/
				2066	int netdev_set_master(struct net_device slave, struct net_device master)
				2067	{
				2068	struct net_device *old = slave->master;
				2069
				2070	ASSERT_RTNL();
				2071
				2072	if (master) {
				2073	if (old)
				2074	return -EBUSY;
				2075	dev_hold(master);
				2076	}
				2077
				2078	slave->master = master;
				2079
				2080	synchronize_net();
				2081
				2082	if (old)
				2083	dev_put(old);
				2084
				2085	if (master)
				2086	slave->flags \|= IFF_SLAVE;
				2087	else
				2088	slave->flags &= ~IFF_SLAVE;
				2089
				2090	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
				2091	return 0;
				2092	}
				2093
				2094	/**
				2095	* dev_set_promiscuity - update promiscuity count on a device
				2096	* @dev: device
				2097	* @inc: modifier
				2098	*
				2099	* Add or remove promsicuity from a device. While the count in the device
				2100	* remains above zero the interface remains promiscuous. Once it hits zero
				2101	* the device reverts back to normal filtering operation. A negative inc
				2102	* value is used to drop promiscuity on the device.
				2103	*/
				2104	void dev_set_promiscuity(struct net_device *dev, int inc)
				2105	{
				2106	unsigned short old_flags = dev->flags;
				2107
				2108	dev->flags \|= IFF_PROMISC;
				2109	if ((dev->promiscuity += inc) == 0)
				2110	dev->flags &= ~IFF_PROMISC;
				2111	if (dev->flags ^ old_flags) {
				2112	dev_mc_upload(dev);
				2113	printk(KERN_INFO "device %s %s promiscuous mode\n",
				2114	dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
				2115	"left");
				2116	}
				2117	}
				2118
				2119	/**
				2120	* dev_set_allmulti - update allmulti count on a device
				2121	* @dev: device
				2122	* @inc: modifier
				2123	*
				2124	* Add or remove reception of all multicast frames to a device. While the
				2125	* count in the device remains above zero the interface remains listening
				2126	* to all interfaces. Once it hits zero the device reverts back to normal
				2127	* filtering operation. A negative @inc value is used to drop the counter
				2128	* when releasing a resource needing all multicasts.
				2129	*/
				2130
				2131	void dev_set_allmulti(struct net_device *dev, int inc)
				2132	{
				2133	unsigned short old_flags = dev->flags;
				2134
				2135	dev->flags \|= IFF_ALLMULTI;
				2136	if ((dev->allmulti += inc) == 0)
				2137	dev->flags &= ~IFF_ALLMULTI;
				2138	if (dev->flags ^ old_flags)
				2139	dev_mc_upload(dev);
				2140	}
				2141
				2142	unsigned dev_get_flags(const struct net_device *dev)
				2143	{
				2144	unsigned flags;
				2145
				2146	flags = (dev->flags & ~(IFF_PROMISC \|
				2147	IFF_ALLMULTI \|
				2148	IFF_RUNNING)) \|
				2149	(dev->gflags & (IFF_PROMISC \|
				2150	IFF_ALLMULTI));
				2151
				2152	if (netif_running(dev) && netif_carrier_ok(dev))
				2153	flags \|= IFF_RUNNING;
				2154
				2155	return flags;
				2156	}
				2157
				2158	int dev_change_flags(struct net_device *dev, unsigned flags)
				2159	{
				2160	int ret;
				2161	int old_flags = dev->flags;
				2162
				2163	/*
				2164	* Set the flags on our device.
				2165	*/
				2166
				2167	dev->flags = (flags & (IFF_DEBUG \| IFF_NOTRAILERS \| IFF_NOARP \|
				2168	IFF_DYNAMIC \| IFF_MULTICAST \| IFF_PORTSEL \|
				2169	IFF_AUTOMEDIA)) \|
				2170	(dev->flags & (IFF_UP \| IFF_VOLATILE \| IFF_PROMISC \|
				2171	IFF_ALLMULTI));
				2172
				2173	/*
				2174	* Load in the correct multicast list now the flags have changed.
				2175	*/
				2176
				2177	dev_mc_upload(dev);
				2178
				2179	/*
				2180	* Have we downed the interface. We handle IFF_UP ourselves
				2181	* according to user attempts to set it, rather than blindly
				2182	* setting it.
				2183	*/
				2184
				2185	ret = 0;
				2186	if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
				2187	ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
				2188
				2189	if (!ret)
				2190	dev_mc_upload(dev);
				2191	}
				2192
				2193	if (dev->flags & IFF_UP &&
				2194	((old_flags ^ dev->flags) &~ (IFF_UP \| IFF_PROMISC \| IFF_ALLMULTI \|
				2195	IFF_VOLATILE)))
				2196	notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
				2197
				2198	if ((flags ^ dev->gflags) & IFF_PROMISC) {
				2199	int inc = (flags & IFF_PROMISC) ? +1 : -1;
				2200	dev->gflags ^= IFF_PROMISC;
				2201	dev_set_promiscuity(dev, inc);
				2202	}
				2203
				2204	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
				2205	is important. Some (broken) drivers set IFF_PROMISC, when
				2206	IFF_ALLMULTI is requested not asking us and not reporting.
				2207	*/
				2208	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
				2209	int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
				2210	dev->gflags ^= IFF_ALLMULTI;
				2211	dev_set_allmulti(dev, inc);
				2212	}
				2213
				2214	if (old_flags ^ dev->flags)
				2215	rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
				2216
				2217	return ret;
				2218	}
				2219
				2220	int dev_set_mtu(struct net_device *dev, int new_mtu)
				2221	{
				2222	int err;
				2223
				2224	if (new_mtu == dev->mtu)
				2225	return 0;
				2226
				2227	/* MTU must be positive. */
				2228	if (new_mtu < 0)
				2229	return -EINVAL;
				2230
				2231	if (!netif_device_present(dev))
				2232	return -ENODEV;
				2233
				2234	err = 0;
				2235	if (dev->change_mtu)
				2236	err = dev->change_mtu(dev, new_mtu);
				2237	else
				2238	dev->mtu = new_mtu;
				2239	if (!err && dev->flags & IFF_UP)
				2240	notifier_call_chain(&netdev_chain,
				2241	NETDEV_CHANGEMTU, dev);
				2242	return err;
				2243	}
				2244
				2245	int dev_set_mac_address(struct net_device dev, struct sockaddr sa)
				2246	{
				2247	int err;
				2248
				2249	if (!dev->set_mac_address)
				2250	return -EOPNOTSUPP;
				2251	if (sa->sa_family != dev->type)
				2252	return -EINVAL;
				2253	if (!netif_device_present(dev))
				2254	return -ENODEV;
				2255	err = dev->set_mac_address(dev, sa);
				2256	if (!err)
				2257	notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
				2258	return err;
				2259	}
				2260
				2261	/*
				2262	* Perform the SIOCxIFxxx calls.
				2263	*/
				2264	static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
				2265	{
				2266	int err;
				2267	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
				2268
				2269	if (!dev)
				2270	return -ENODEV;
				2271
				2272	switch (cmd) {
				2273	case SIOCGIFFLAGS: /* Get interface flags */
				2274	ifr->ifr_flags = dev_get_flags(dev);
				2275	return 0;
				2276
				2277	case SIOCSIFFLAGS: /* Set interface flags */
				2278	return dev_change_flags(dev, ifr->ifr_flags);
				2279
				2280	case SIOCGIFMETRIC: /* Get the metric on the interface
				2281	(currently unused) */
				2282	ifr->ifr_metric = 0;
				2283	return 0;
				2284
				2285	case SIOCSIFMETRIC: /* Set the metric on the interface
				2286	(currently unused) */
				2287	return -EOPNOTSUPP;
				2288
				2289	case SIOCGIFMTU: /* Get the MTU of a device */
				2290	ifr->ifr_mtu = dev->mtu;
				2291	return 0;
				2292
				2293	case SIOCSIFMTU: /* Set the MTU of a device */
				2294	return dev_set_mtu(dev, ifr->ifr_mtu);
				2295
				2296	case SIOCGIFHWADDR:
				2297	if (!dev->addr_len)
				2298	memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
				2299	else
				2300	memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
				2301	min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
				2302	ifr->ifr_hwaddr.sa_family = dev->type;
				2303	return 0;
				2304
				2305	case SIOCSIFHWADDR:
				2306	return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
				2307
				2308	case SIOCSIFHWBROADCAST:
				2309	if (ifr->ifr_hwaddr.sa_family != dev->type)
				2310	return -EINVAL;
				2311	memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
				2312	min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
				2313	notifier_call_chain(&netdev_chain,
				2314	NETDEV_CHANGEADDR, dev);
				2315	return 0;
				2316
				2317	case SIOCGIFMAP:
				2318	ifr->ifr_map.mem_start = dev->mem_start;
				2319	ifr->ifr_map.mem_end = dev->mem_end;
				2320	ifr->ifr_map.base_addr = dev->base_addr;
				2321	ifr->ifr_map.irq = dev->irq;
				2322	ifr->ifr_map.dma = dev->dma;
				2323	ifr->ifr_map.port = dev->if_port;
				2324	return 0;
				2325
				2326	case SIOCSIFMAP:
				2327	if (dev->set_config) {
				2328	if (!netif_device_present(dev))
				2329	return -ENODEV;
				2330	return dev->set_config(dev, &ifr->ifr_map);
				2331	}
				2332	return -EOPNOTSUPP;
				2333
				2334	case SIOCADDMULTI:
				2335	if (!dev->set_multicast_list \|\|
				2336	ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
				2337	return -EINVAL;
				2338	if (!netif_device_present(dev))
				2339	return -ENODEV;
				2340	return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
				2341	dev->addr_len, 1);
				2342
				2343	case SIOCDELMULTI:
				2344	if (!dev->set_multicast_list \|\|
				2345	ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
				2346	return -EINVAL;
				2347	if (!netif_device_present(dev))
				2348	return -ENODEV;
				2349	return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
				2350	dev->addr_len, 1);
				2351
				2352	case SIOCGIFINDEX:
				2353	ifr->ifr_ifindex = dev->ifindex;
				2354	return 0;
				2355
				2356	case SIOCGIFTXQLEN:
				2357	ifr->ifr_qlen = dev->tx_queue_len;
				2358	return 0;
				2359
				2360	case SIOCSIFTXQLEN:
				2361	if (ifr->ifr_qlen < 0)
				2362	return -EINVAL;
				2363	dev->tx_queue_len = ifr->ifr_qlen;
				2364	return 0;
				2365
				2366	case SIOCSIFNAME:
				2367	ifr->ifr_newname[IFNAMSIZ-1] = '\0';
				2368	return dev_change_name(dev, ifr->ifr_newname);
				2369
				2370	/*
				2371	* Unknown or private ioctl
				2372	*/
				2373
				2374	default:
				2375	if ((cmd >= SIOCDEVPRIVATE &&
				2376	cmd <= SIOCDEVPRIVATE + 15) \|\|
				2377	cmd == SIOCBONDENSLAVE \|\|
				2378	cmd == SIOCBONDRELEASE \|\|
				2379	cmd == SIOCBONDSETHWADDR \|\|
				2380	cmd == SIOCBONDSLAVEINFOQUERY \|\|
				2381	cmd == SIOCBONDINFOQUERY \|\|
				2382	cmd == SIOCBONDCHANGEACTIVE \|\|
				2383	cmd == SIOCGMIIPHY \|\|
				2384	cmd == SIOCGMIIREG \|\|
				2385	cmd == SIOCSMIIREG \|\|
				2386	cmd == SIOCBRADDIF \|\|
				2387	cmd == SIOCBRDELIF \|\|
				2388	cmd == SIOCWANDEV) {
				2389	err = -EOPNOTSUPP;
				2390	if (dev->do_ioctl) {
				2391	if (netif_device_present(dev))
				2392	err = dev->do_ioctl(dev, ifr,
				2393	cmd);
				2394	else
				2395	err = -ENODEV;
				2396	}
				2397	} else
				2398	err = -EINVAL;
				2399
				2400	}
				2401	return err;
				2402	}
				2403
				2404	/*
				2405	* This function handles all "interface"-type I/O control requests. The actual
				2406	* 'doing' part of this is dev_ifsioc above.
				2407	*/
				2408
				2409	/**
				2410	* dev_ioctl - network device ioctl
				2411	* @cmd: command to issue
				2412	* @arg: pointer to a struct ifreq in user space
				2413	*
				2414	* Issue ioctl functions to devices. This is normally called by the
				2415	* user space syscall interfaces but can sometimes be useful for
				2416	* other purposes. The return value is the return from the syscall if
				2417	* positive or a negative errno code on error.
				2418	*/
				2419
				2420	int dev_ioctl(unsigned int cmd, void __user *arg)
				2421	{
				2422	struct ifreq ifr;
				2423	int ret;
				2424	char *colon;
				2425
				2426	/* One special case: SIOCGIFCONF takes ifconf argument
				2427	and requires shared lock, because it sleeps writing
				2428	to user space.
				2429	*/
				2430
				2431	if (cmd == SIOCGIFCONF) {
				2432	rtnl_shlock();
				2433	ret = dev_ifconf((char __user *) arg);
				2434	rtnl_shunlock();
				2435	return ret;
				2436	}
				2437	if (cmd == SIOCGIFNAME)
				2438	return dev_ifname((struct ifreq __user *)arg);
				2439
				2440	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
				2441	return -EFAULT;
				2442
				2443	ifr.ifr_name[IFNAMSIZ-1] = 0;
				2444
				2445	colon = strchr(ifr.ifr_name, ':');
				2446	if (colon)
				2447	*colon = 0;
				2448
				2449	/*
				2450	* See which interface the caller is talking about.
				2451	*/
				2452
				2453	switch (cmd) {
				2454	/*
				2455	* These ioctl calls:
				2456	* - can be done by all.
				2457	* - atomic and do not require locking.
				2458	* - return a value
				2459	*/
				2460	case SIOCGIFFLAGS:
				2461	case SIOCGIFMETRIC:
				2462	case SIOCGIFMTU:
				2463	case SIOCGIFHWADDR:
				2464	case SIOCGIFSLAVE:
				2465	case SIOCGIFMAP:
				2466	case SIOCGIFINDEX:
				2467	case SIOCGIFTXQLEN:
				2468	dev_load(ifr.ifr_name);
				2469	read_lock(&dev_base_lock);
				2470	ret = dev_ifsioc(&ifr, cmd);
				2471	read_unlock(&dev_base_lock);
				2472	if (!ret) {
				2473	if (colon)
				2474	*colon = ':';
				2475	if (copy_to_user(arg, &ifr,
				2476	sizeof(struct ifreq)))
				2477	ret = -EFAULT;
				2478	}
				2479	return ret;
				2480
				2481	case SIOCETHTOOL:
				2482	dev_load(ifr.ifr_name);
				2483	rtnl_lock();
				2484	ret = dev_ethtool(&ifr);
				2485	rtnl_unlock();
				2486	if (!ret) {
				2487	if (colon)
				2488	*colon = ':';
				2489	if (copy_to_user(arg, &ifr,
				2490	sizeof(struct ifreq)))
				2491	ret = -EFAULT;
				2492	}
				2493	return ret;
				2494
				2495	/*
				2496	* These ioctl calls:
				2497	* - require superuser power.
				2498	* - require strict serialization.
				2499	* - return a value
				2500	*/
				2501	case SIOCGMIIPHY:
				2502	case SIOCGMIIREG:
				2503	case SIOCSIFNAME:
				2504	if (!capable(CAP_NET_ADMIN))
				2505	return -EPERM;
				2506	dev_load(ifr.ifr_name);
				2507	rtnl_lock();
				2508	ret = dev_ifsioc(&ifr, cmd);
				2509	rtnl_unlock();
				2510	if (!ret) {
				2511	if (colon)
				2512	*colon = ':';
				2513	if (copy_to_user(arg, &ifr,
				2514	sizeof(struct ifreq)))
				2515	ret = -EFAULT;
				2516	}
				2517	return ret;
				2518
				2519	/*
				2520	* These ioctl calls:
				2521	* - require superuser power.
				2522	* - require strict serialization.
				2523	* - do not return a value
				2524	*/
				2525	case SIOCSIFFLAGS:
				2526	case SIOCSIFMETRIC:
				2527	case SIOCSIFMTU:
				2528	case SIOCSIFMAP:
				2529	case SIOCSIFHWADDR:
				2530	case SIOCSIFSLAVE:
				2531	case SIOCADDMULTI:
				2532	case SIOCDELMULTI:
				2533	case SIOCSIFHWBROADCAST:
				2534	case SIOCSIFTXQLEN:
				2535	case SIOCSMIIREG:
				2536	case SIOCBONDENSLAVE:
				2537	case SIOCBONDRELEASE:
				2538	case SIOCBONDSETHWADDR:
				2539	case SIOCBONDSLAVEINFOQUERY:
				2540	case SIOCBONDINFOQUERY:
				2541	case SIOCBONDCHANGEACTIVE:
				2542	case SIOCBRADDIF:
				2543	case SIOCBRDELIF:
				2544	if (!capable(CAP_NET_ADMIN))
				2545	return -EPERM;
				2546	dev_load(ifr.ifr_name);
				2547	rtnl_lock();
				2548	ret = dev_ifsioc(&ifr, cmd);
				2549	rtnl_unlock();
				2550	return ret;
				2551
				2552	case SIOCGIFMEM:
				2553	/* Get the per device memory space. We can add this but
				2554	* currently do not support it */
				2555	case SIOCSIFMEM:
				2556	/* Set the per device memory buffer space.
				2557	* Not applicable in our case */
				2558	case SIOCSIFLINK:
				2559	return -EINVAL;
				2560
				2561	/*
				2562	* Unknown or private ioctl.
				2563	*/
				2564	default:
				2565	if (cmd == SIOCWANDEV \|\|
				2566	(cmd >= SIOCDEVPRIVATE &&
				2567	cmd <= SIOCDEVPRIVATE + 15)) {
				2568	dev_load(ifr.ifr_name);
				2569	rtnl_lock();
				2570	ret = dev_ifsioc(&ifr, cmd);
				2571	rtnl_unlock();
				2572	if (!ret && copy_to_user(arg, &ifr,
				2573	sizeof(struct ifreq)))
				2574	ret = -EFAULT;
				2575	return ret;
				2576	}
				2577	#ifdef WIRELESS_EXT
				2578	/* Take care of Wireless Extensions */
				2579	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
				2580	/* If command is `set a parameter', or
				2581	* `get the encoding parameters', check if
				2582	* the user has the right to do it */
				2583	if (IW_IS_SET(cmd) \|\| cmd == SIOCGIWENCODE) {
				2584	if (!capable(CAP_NET_ADMIN))
				2585	return -EPERM;
				2586	}
				2587	dev_load(ifr.ifr_name);
				2588	rtnl_lock();
				2589	/* Follow me in net/core/wireless.c */
				2590	ret = wireless_process_ioctl(&ifr, cmd);
				2591	rtnl_unlock();
				2592	if (IW_IS_GET(cmd) &&
				2593	copy_to_user(arg, &ifr,
				2594	sizeof(struct ifreq)))
				2595	ret = -EFAULT;
				2596	return ret;
				2597	}
				2598	#endif /* WIRELESS_EXT */
				2599	return -EINVAL;
				2600	}
				2601	}
				2602
				2603
				2604	/**
				2605	* dev_new_index - allocate an ifindex
				2606	*
				2607	* Returns a suitable unique value for a new device interface
				2608	* number. The caller must hold the rtnl semaphore or the
				2609	* dev_base_lock to be sure it remains unique.
				2610	*/
				2611	static int dev_new_index(void)
				2612	{
				2613	static int ifindex;
				2614	for (;;) {
				2615	if (++ifindex <= 0)
				2616	ifindex = 1;
				2617	if (!__dev_get_by_index(ifindex))
				2618	return ifindex;
				2619	}
				2620	}
				2621
				2622	static int dev_boot_phase = 1;
				2623
				2624	/* Delayed registration/unregisteration */
				2625	static DEFINE_SPINLOCK(net_todo_list_lock);
				2626	static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
				2627
				2628	static inline void net_set_todo(struct net_device *dev)
				2629	{
				2630	spin_lock(&net_todo_list_lock);
				2631	list_add_tail(&dev->todo_list, &net_todo_list);
				2632	spin_unlock(&net_todo_list_lock);
				2633	}
				2634
				2635	/**
				2636	* register_netdevice - register a network device
				2637	* @dev: device to register
				2638	*
				2639	* Take a completed network device structure and add it to the kernel
				2640	* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
				2641	* chain. 0 is returned on success. A negative errno code is returned
				2642	* on a failure to set up the device, or if the name is a duplicate.
				2643	*
				2644	* Callers must hold the rtnl semaphore. You may want
				2645	* register_netdev() instead of this.
				2646	*
				2647	* BUGS:
				2648	* The locking appears insufficient to guarantee two parallel registers
				2649	* will not get the same name.
				2650	*/
				2651
				2652	int register_netdevice(struct net_device *dev)
				2653	{
				2654	struct hlist_head *head;
				2655	struct hlist_node *p;
				2656	int ret;
				2657
				2658	BUG_ON(dev_boot_phase);
				2659	ASSERT_RTNL();
				2660
				2661	/* When net_device's are persistent, this will be fatal. */
				2662	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
				2663
				2664	spin_lock_init(&dev->queue_lock);
				2665	spin_lock_init(&dev->xmit_lock);
				2666	dev->xmit_lock_owner = -1;
				2667	#ifdef CONFIG_NET_CLS_ACT
				2668	spin_lock_init(&dev->ingress_lock);
				2669	#endif
				2670
				2671	ret = alloc_divert_blk(dev);
				2672	if (ret)
				2673	goto out;
				2674
				2675	dev->iflink = -1;
				2676
				2677	/* Init, if this function is available */
				2678	if (dev->init) {
				2679	ret = dev->init(dev);
				2680	if (ret) {
				2681	if (ret > 0)
				2682	ret = -EIO;
				2683	goto out_err;
				2684	}
				2685	}
				2686
				2687	if (!dev_valid_name(dev->name)) {
				2688	ret = -EINVAL;
				2689	goto out_err;
				2690	}
				2691
				2692	dev->ifindex = dev_new_index();
				2693	if (dev->iflink == -1)
				2694	dev->iflink = dev->ifindex;
				2695
				2696	/* Check for existence of name */
				2697	head = dev_name_hash(dev->name);
				2698	hlist_for_each(p, head) {
				2699	struct net_device *d
				2700	= hlist_entry(p, struct net_device, name_hlist);
				2701	if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
				2702	ret = -EEXIST;
				2703	goto out_err;
				2704	}
				2705	}
				2706
				2707	/* Fix illegal SG+CSUM combinations. */
				2708	if ((dev->features & NETIF_F_SG) &&
				2709	!(dev->features & (NETIF_F_IP_CSUM \|
				2710	NETIF_F_NO_CSUM \|
				2711	NETIF_F_HW_CSUM))) {
				2712	printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
				2713	dev->name);
				2714	dev->features &= ~NETIF_F_SG;
				2715	}
				2716
				2717	/* TSO requires that SG is present as well. */
				2718	if ((dev->features & NETIF_F_TSO) &&
				2719	!(dev->features & NETIF_F_SG)) {
				2720	printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
				2721	dev->name);
				2722	dev->features &= ~NETIF_F_TSO;
				2723	}
				2724
				2725	/*
				2726	* nil rebuild_header routine,
				2727	* that should be never called and used as just bug trap.
				2728	*/
				2729
				2730	if (!dev->rebuild_header)
				2731	dev->rebuild_header = default_rebuild_header;
				2732
				2733	/*
				2734	* Default initial state at registry is that the
				2735	* device is present.
				2736	*/
				2737
				2738	set_bit(__LINK_STATE_PRESENT, &dev->state);
				2739
				2740	dev->next = NULL;
				2741	dev_init_scheduler(dev);
				2742	write_lock_bh(&dev_base_lock);
				2743	*dev_tail = dev;
				2744	dev_tail = &dev->next;
				2745	hlist_add_head(&dev->name_hlist, head);
				2746	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
				2747	dev_hold(dev);
				2748	dev->reg_state = NETREG_REGISTERING;
				2749	write_unlock_bh(&dev_base_lock);
				2750
				2751	/* Notify protocols, that a new device appeared. */
				2752	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
				2753
				2754	/* Finish registration after unlock */
				2755	net_set_todo(dev);
				2756	ret = 0;
				2757
				2758	out:
				2759	return ret;
				2760	out_err:
				2761	free_divert_blk(dev);
				2762	goto out;
				2763	}
				2764
				2765	/**
				2766	* register_netdev - register a network device
				2767	* @dev: device to register
				2768	*
				2769	* Take a completed network device structure and add it to the kernel
				2770	* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
				2771	* chain. 0 is returned on success. A negative errno code is returned
				2772	* on a failure to set up the device, or if the name is a duplicate.
				2773	*
				2774	* This is a wrapper around register_netdev that takes the rtnl semaphore
				2775	* and expands the device name if you passed a format string to
				2776	* alloc_netdev.
				2777	*/
				2778	int register_netdev(struct net_device *dev)
				2779	{
				2780	int err;
				2781
				2782	rtnl_lock();
				2783
				2784	/*
				2785	* If the name is a format string the caller wants us to do a
				2786	* name allocation.
				2787	*/
				2788	if (strchr(dev->name, '%')) {
				2789	err = dev_alloc_name(dev, dev->name);
				2790	if (err < 0)
				2791	goto out;
				2792	}
				2793
				2794	/*
				2795	* Back compatibility hook. Kill this one in 2.5
				2796	*/
				2797	if (dev->name[0] == 0 \|\| dev->name[0] == ' ') {
				2798	err = dev_alloc_name(dev, "eth%d");
				2799	if (err < 0)
				2800	goto out;
				2801	}
				2802
				2803	err = register_netdevice(dev);
				2804	out:
				2805	rtnl_unlock();
				2806	return err;
				2807	}
				2808	EXPORT_SYMBOL(register_netdev);
				2809
				2810	/*
				2811	* netdev_wait_allrefs - wait until all references are gone.
				2812	*
				2813	* This is called when unregistering network devices.
				2814	*
				2815	* Any protocol or device that holds a reference should register
				2816	* for netdevice notification, and cleanup and put back the
				2817	* reference if they receive an UNREGISTER event.
				2818	* We can get stuck here if buggy protocols don't correctly
				2819	* call dev_put.
				2820	*/
				2821	static void netdev_wait_allrefs(struct net_device *dev)
				2822	{
				2823	unsigned long rebroadcast_time, warning_time;
				2824
				2825	rebroadcast_time = warning_time = jiffies;
				2826	while (atomic_read(&dev->refcnt) != 0) {
				2827	if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
				2828	rtnl_shlock();
				2829
				2830	/* Rebroadcast unregister notification */
				2831	notifier_call_chain(&netdev_chain,
				2832	NETDEV_UNREGISTER, dev);
				2833
				2834	if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
				2835	&dev->state)) {
				2836	/* We must not have linkwatch events
				2837	* pending on unregister. If this
				2838	* happens, we simply run the queue
				2839	* unscheduled, resulting in a noop
				2840	* for this device.
				2841	*/
				2842	linkwatch_run_queue();
				2843	}
				2844
				2845	rtnl_shunlock();
				2846
				2847	rebroadcast_time = jiffies;
				2848	}
				2849
				2850	msleep(250);
				2851
				2852	if (time_after(jiffies, warning_time + 10 * HZ)) {
				2853	printk(KERN_EMERG "unregister_netdevice: "
				2854	"waiting for %s to become free. Usage "
				2855	"count = %d\n",
				2856	dev->name, atomic_read(&dev->refcnt));
				2857	warning_time = jiffies;
				2858	}
				2859	}
				2860	}
				2861
				2862	/* The sequence is:
				2863	*
				2864	* rtnl_lock();
				2865	* ...
				2866	* register_netdevice(x1);
				2867	* register_netdevice(x2);
				2868	* ...
				2869	* unregister_netdevice(y1);
				2870	* unregister_netdevice(y2);
				2871	* ...
				2872	* rtnl_unlock();
				2873	* free_netdev(y1);
				2874	* free_netdev(y2);
				2875	*
				2876	* We are invoked by rtnl_unlock() after it drops the semaphore.
				2877	* This allows us to deal with problems:
				2878	* 1) We can create/delete sysfs objects which invoke hotplug
				2879	* without deadlocking with linkwatch via keventd.
				2880	* 2) Since we run with the RTNL semaphore not held, we can sleep
				2881	* safely in order to wait for the netdev refcnt to drop to zero.
				2882	*/
				2883	static DECLARE_MUTEX(net_todo_run_mutex);
				2884	void netdev_run_todo(void)
				2885	{
				2886	struct list_head list = LIST_HEAD_INIT(list);
				2887	int err;
				2888
				2889
				2890	/* Need to guard against multiple cpu's getting out of order. */
				2891	down(&net_todo_run_mutex);
				2892
				2893	/* Not safe to do outside the semaphore. We must not return
				2894	* until all unregister events invoked by the local processor
				2895	* have been completed (either by this todo run, or one on
				2896	* another cpu).
				2897	*/
				2898	if (list_empty(&net_todo_list))
				2899	goto out;
				2900
				2901	/* Snapshot list, allow later requests */
				2902	spin_lock(&net_todo_list_lock);
				2903	list_splice_init(&net_todo_list, &list);
				2904	spin_unlock(&net_todo_list_lock);
				2905
				2906	while (!list_empty(&list)) {
				2907	struct net_device *dev
				2908	= list_entry(list.next, struct net_device, todo_list);
				2909	list_del(&dev->todo_list);
				2910
				2911	switch(dev->reg_state) {
				2912	case NETREG_REGISTERING:
				2913	err = netdev_register_sysfs(dev);
				2914	if (err)
				2915	printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
				2916	dev->name, err);
				2917	dev->reg_state = NETREG_REGISTERED;
				2918	break;
				2919
				2920	case NETREG_UNREGISTERING:
				2921	netdev_unregister_sysfs(dev);
				2922	dev->reg_state = NETREG_UNREGISTERED;
				2923
				2924	netdev_wait_allrefs(dev);
				2925
				2926	/* paranoia */
				2927	BUG_ON(atomic_read(&dev->refcnt));
				2928	BUG_TRAP(!dev->ip_ptr);
				2929	BUG_TRAP(!dev->ip6_ptr);
				2930	BUG_TRAP(!dev->dn_ptr);
				2931
				2932
				2933	/* It must be the very last action,
				2934	* after this 'dev' may point to freed up memory.
				2935	*/
				2936	if (dev->destructor)
				2937	dev->destructor(dev);
				2938	break;
				2939
				2940	default:
				2941	printk(KERN_ERR "network todo '%s' but state %d\n",
				2942	dev->name, dev->reg_state);
				2943	break;
				2944	}
				2945	}
				2946
				2947	out:
				2948	up(&net_todo_run_mutex);
				2949	}
				2950
				2951	/**
				2952	* alloc_netdev - allocate network device
				2953	* @sizeof_priv: size of private data to allocate space for
				2954	* @name: device name format string
				2955	* @setup: callback to initialize device
				2956	*
				2957	* Allocates a struct net_device with private data area for driver use
				2958	* and performs basic initialization.
				2959	*/
				2960	struct net_device alloc_netdev(int sizeof_priv, const char name,
				2961	void (setup)(struct net_device ))
				2962	{
				2963	void *p;
				2964	struct net_device *dev;
				2965	int alloc_size;
				2966
				2967	/* ensure 32-byte alignment of both the device and private area */
				2968	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
				2969	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
				2970
				2971	p = kmalloc(alloc_size, GFP_KERNEL);
				2972	if (!p) {
				2973	printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
				2974	return NULL;
				2975	}
				2976	memset(p, 0, alloc_size);
				2977
				2978	dev = (struct net_device *)
				2979	(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
				2980	dev->padded = (char )dev - (char )p;
				2981
				2982	if (sizeof_priv)
				2983	dev->priv = netdev_priv(dev);
				2984
				2985	setup(dev);
				2986	strcpy(dev->name, name);
				2987	return dev;
				2988	}
				2989	EXPORT_SYMBOL(alloc_netdev);
				2990
				2991	/**
				2992	* free_netdev - free network device
				2993	* @dev: device
				2994	*
				2995	* This function does the last stage of destroying an allocated device
				2996	* interface. The reference to the device object is released.
				2997	* If this is the last reference then it will be freed.
				2998	*/
				2999	void free_netdev(struct net_device *dev)
				3000	{
				3001	#ifdef CONFIG_SYSFS
				3002	/* Compatiablity with error handling in drivers */
				3003	if (dev->reg_state == NETREG_UNINITIALIZED) {
				3004	kfree((char *)dev - dev->padded);
				3005	return;
				3006	}
				3007
				3008	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
				3009	dev->reg_state = NETREG_RELEASED;
				3010
				3011	/* will free via class release */
				3012	class_device_put(&dev->class_dev);
				3013	#else
				3014	kfree((char *)dev - dev->padded);
				3015	#endif
				3016	}
				3017
				3018	/* Synchronize with packet receive processing. */
				3019	void synchronize_net(void)
				3020	{
				3021	might_sleep();
Paul E. McKenney	fbd568a3e	2005-05-01 08:59:04 -0700	[diff] [blame]	3022	synchronize_rcu();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3023	}
				3024
				3025	/**
				3026	* unregister_netdevice - remove device from the kernel
				3027	* @dev: device
				3028	*
				3029	* This function shuts down a device interface and removes it
				3030	* from the kernel tables. On success 0 is returned, on a failure
				3031	* a negative errno code is returned.
				3032	*
				3033	* Callers must hold the rtnl semaphore. You may want
				3034	* unregister_netdev() instead of this.
				3035	*/
				3036
				3037	int unregister_netdevice(struct net_device *dev)
				3038	{
				3039	struct net_device d, *dp;
				3040
				3041	BUG_ON(dev_boot_phase);
				3042	ASSERT_RTNL();
				3043
				3044	/* Some devices call without registering for initialization unwind. */
				3045	if (dev->reg_state == NETREG_UNINITIALIZED) {
				3046	printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
				3047	"was registered\n", dev->name, dev);
				3048	return -ENODEV;
				3049	}
				3050
				3051	BUG_ON(dev->reg_state != NETREG_REGISTERED);
				3052
				3053	/* If device is running, close it first. */
				3054	if (dev->flags & IFF_UP)
				3055	dev_close(dev);
				3056
				3057	/* And unlink it from device chain. */
				3058	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
				3059	if (d == dev) {
				3060	write_lock_bh(&dev_base_lock);
				3061	hlist_del(&dev->name_hlist);
				3062	hlist_del(&dev->index_hlist);
				3063	if (dev_tail == &dev->next)
				3064	dev_tail = dp;
				3065	*dp = d->next;
				3066	write_unlock_bh(&dev_base_lock);
				3067	break;
				3068	}
				3069	}
				3070	if (!d) {
				3071	printk(KERN_ERR "unregister net_device: '%s' not found\n",
				3072	dev->name);
				3073	return -ENODEV;
				3074	}
				3075
				3076	dev->reg_state = NETREG_UNREGISTERING;
				3077
				3078	synchronize_net();
				3079
				3080	/* Shutdown queueing discipline. */
				3081	dev_shutdown(dev);
				3082
				3083
				3084	/* Notify protocols, that we are about to destroy
				3085	this device. They should clean all the things.
				3086	*/
				3087	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
				3088
				3089	/*
				3090	* Flush the multicast chain
				3091	*/
				3092	dev_mc_discard(dev);
				3093
				3094	if (dev->uninit)
				3095	dev->uninit(dev);
				3096
				3097	/* Notifier chain MUST detach us from master device. */
				3098	BUG_TRAP(!dev->master);
				3099
				3100	free_divert_blk(dev);
				3101
				3102	/* Finish processing unregister after unlock */
				3103	net_set_todo(dev);
				3104
				3105	synchronize_net();
				3106
				3107	dev_put(dev);
				3108	return 0;
				3109	}
				3110
				3111	/**
				3112	* unregister_netdev - remove device from the kernel
				3113	* @dev: device
				3114	*
				3115	* This function shuts down a device interface and removes it
				3116	* from the kernel tables. On success 0 is returned, on a failure
				3117	* a negative errno code is returned.
				3118	*
				3119	* This is just a wrapper for unregister_netdevice that takes
				3120	* the rtnl semaphore. In general you want to use this and not
				3121	* unregister_netdevice.
				3122	*/
				3123	void unregister_netdev(struct net_device *dev)
				3124	{
				3125	rtnl_lock();
				3126	unregister_netdevice(dev);
				3127	rtnl_unlock();
				3128	}
				3129
				3130	EXPORT_SYMBOL(unregister_netdev);
				3131
				3132	#ifdef CONFIG_HOTPLUG_CPU
				3133	static int dev_cpu_callback(struct notifier_block *nfb,
				3134	unsigned long action,
				3135	void *ocpu)
				3136	{
				3137	struct sk_buff **list_skb;
				3138	struct net_device **list_net;
				3139	struct sk_buff *skb;
				3140	unsigned int cpu, oldcpu = (unsigned long)ocpu;
				3141	struct softnet_data sd, oldsd;
				3142
				3143	if (action != CPU_DEAD)
				3144	return NOTIFY_OK;
				3145
				3146	local_irq_disable();
				3147	cpu = smp_processor_id();
				3148	sd = &per_cpu(softnet_data, cpu);
				3149	oldsd = &per_cpu(softnet_data, oldcpu);
				3150
				3151	/* Find end of our completion_queue. */
				3152	list_skb = &sd->completion_queue;
				3153	while (*list_skb)
				3154	list_skb = &(*list_skb)->next;
				3155	/* Append completion queue from offline CPU. */
				3156	*list_skb = oldsd->completion_queue;
				3157	oldsd->completion_queue = NULL;
				3158
				3159	/* Find end of our output_queue. */
				3160	list_net = &sd->output_queue;
				3161	while (*list_net)
				3162	list_net = &(*list_net)->next_sched;
				3163	/* Append output queue from offline CPU. */
				3164	*list_net = oldsd->output_queue;
				3165	oldsd->output_queue = NULL;
				3166
				3167	raise_softirq_irqoff(NET_TX_SOFTIRQ);
				3168	local_irq_enable();
				3169
				3170	/* Process offline CPU's input_pkt_queue */
				3171	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
				3172	netif_rx(skb);
				3173
				3174	return NOTIFY_OK;
				3175	}
				3176	#endif /* CONFIG_HOTPLUG_CPU */
				3177
				3178
				3179	/*
				3180	* Initialize the DEV module. At boot time this walks the device list and
				3181	* unhooks any devices that fail to initialise (normally hardware not
				3182	* present) and leaves us with a valid list of present and active devices.
				3183	*
				3184	*/
				3185
				3186	/*
				3187	* This is called single threaded during boot, so no need
				3188	* to take the rtnl semaphore.
				3189	*/
				3190	static int __init net_dev_init(void)
				3191	{
				3192	int i, rc = -ENOMEM;
				3193
				3194	BUG_ON(!dev_boot_phase);
				3195
				3196	net_random_init();
				3197
				3198	if (dev_proc_init())
				3199	goto out;
				3200
				3201	if (netdev_sysfs_init())
				3202	goto out;
				3203
				3204	INIT_LIST_HEAD(&ptype_all);
				3205	for (i = 0; i < 16; i++)
				3206	INIT_LIST_HEAD(&ptype_base[i]);
				3207
				3208	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
				3209	INIT_HLIST_HEAD(&dev_name_head[i]);
				3210
				3211	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
				3212	INIT_HLIST_HEAD(&dev_index_head[i]);
				3213
				3214	/*
				3215	* Initialise the packet receive queues.
				3216	*/
				3217
				3218	for (i = 0; i < NR_CPUS; i++) {
				3219	struct softnet_data *queue;
				3220
				3221	queue = &per_cpu(softnet_data, i);
				3222	skb_queue_head_init(&queue->input_pkt_queue);
				3223	queue->throttle = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3224	queue->completion_queue = NULL;
				3225	INIT_LIST_HEAD(&queue->poll_list);
				3226	set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
				3227	queue->backlog_dev.weight = weight_p;
				3228	queue->backlog_dev.poll = process_backlog;
				3229	atomic_set(&queue->backlog_dev.refcnt, 1);
				3230	}
				3231
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3232	dev_boot_phase = 0;
				3233
				3234	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
				3235	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
				3236
				3237	hotcpu_notifier(dev_cpu_callback, 0);
				3238	dst_init();
				3239	dev_mcast_init();
				3240	rc = 0;
				3241	out:
				3242	return rc;
				3243	}
				3244
				3245	subsys_initcall(net_dev_init);
				3246
				3247	EXPORT_SYMBOL(__dev_get_by_index);
				3248	EXPORT_SYMBOL(__dev_get_by_name);
				3249	EXPORT_SYMBOL(__dev_remove_pack);
				3250	EXPORT_SYMBOL(__skb_linearize);
				3251	EXPORT_SYMBOL(dev_add_pack);
				3252	EXPORT_SYMBOL(dev_alloc_name);
				3253	EXPORT_SYMBOL(dev_close);
				3254	EXPORT_SYMBOL(dev_get_by_flags);
				3255	EXPORT_SYMBOL(dev_get_by_index);
				3256	EXPORT_SYMBOL(dev_get_by_name);
				3257	EXPORT_SYMBOL(dev_ioctl);
				3258	EXPORT_SYMBOL(dev_open);
				3259	EXPORT_SYMBOL(dev_queue_xmit);
				3260	EXPORT_SYMBOL(dev_remove_pack);
				3261	EXPORT_SYMBOL(dev_set_allmulti);
				3262	EXPORT_SYMBOL(dev_set_promiscuity);
				3263	EXPORT_SYMBOL(dev_change_flags);
				3264	EXPORT_SYMBOL(dev_set_mtu);
				3265	EXPORT_SYMBOL(dev_set_mac_address);
				3266	EXPORT_SYMBOL(free_netdev);
				3267	EXPORT_SYMBOL(netdev_boot_setup_check);
				3268	EXPORT_SYMBOL(netdev_set_master);
				3269	EXPORT_SYMBOL(netdev_state_change);
				3270	EXPORT_SYMBOL(netif_receive_skb);
				3271	EXPORT_SYMBOL(netif_rx);
				3272	EXPORT_SYMBOL(register_gifconf);
				3273	EXPORT_SYMBOL(register_netdevice);
				3274	EXPORT_SYMBOL(register_netdevice_notifier);
				3275	EXPORT_SYMBOL(skb_checksum_help);
				3276	EXPORT_SYMBOL(synchronize_net);
				3277	EXPORT_SYMBOL(unregister_netdevice);
				3278	EXPORT_SYMBOL(unregister_netdevice_notifier);
				3279	EXPORT_SYMBOL(net_enable_timestamp);
				3280	EXPORT_SYMBOL(net_disable_timestamp);
				3281	EXPORT_SYMBOL(dev_get_flags);
				3282
				3283	#if defined(CONFIG_BRIDGE) \|\| defined(CONFIG_BRIDGE_MODULE)
				3284	EXPORT_SYMBOL(br_handle_frame_hook);
				3285	EXPORT_SYMBOL(br_fdb_get_hook);
				3286	EXPORT_SYMBOL(br_fdb_put_hook);
				3287	#endif
				3288
				3289	#ifdef CONFIG_KMOD
				3290	EXPORT_SYMBOL(dev_load);
				3291	#endif
				3292
				3293	EXPORT_PER_CPU_SYMBOL(softnet_data);