blob: e787d215115217492ab31bda0ee7df2010bfe82e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#ifndef CONFIG_IP_MULTIPLE_TABLES
51
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080052static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080053{
Denis V. Lunev93456b62008-01-10 03:23:38 -080054 struct fib_table *local_table, *main_table;
55
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080056 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080057 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080058 return -ENOMEM;
59
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080060 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080061 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080062 goto fail;
63
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080066 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080067 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080068 return 0;
69
70fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080071 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080072 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080073}
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#else
75
Denis V. Lunev8ad49422008-01-10 03:24:11 -080076struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070077{
78 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070080
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070081 if (id == 0)
82 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080083 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070084 if (tb)
85 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080086
87 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 if (!tb)
89 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070090 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080091 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 return tb;
93}
94
Denis V. Lunev8ad49422008-01-10 03:24:11 -080095struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070096{
97 struct fib_table *tb;
98 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080099 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700102 if (id == 0)
103 id = RT_TABLE_MAIN;
104 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700106 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800107 head = &net->ipv4.fib_table_hash[h];
108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700109 if (tb->tb_id == id) {
110 rcu_read_unlock();
111 return tb;
112 }
113 }
114 rcu_read_unlock();
115 return NULL;
116}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800119static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120{
121 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700123 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800124 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700125 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800128 head = &net->ipv4.fib_table_hash[h];
129 hlist_for_each_entry(tb, node, head, tb_hlist)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700130 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
133 if (flushed)
134 rt_cache_flush(-1);
135}
136
137/*
138 * Find the first device with a given source address.
139 */
140
Al Viro60cad5d2006-09-26 22:17:09 -0700141struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142{
143 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
144 struct fib_result res;
145 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700146 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
148#ifdef CONFIG_IP_MULTIPLE_TABLES
149 res.r = NULL;
150#endif
151
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800152 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700153 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 return NULL;
155 if (res.type != RTN_LOCAL)
156 goto out;
157 dev = FIB_RES_DEV(res);
158
159 if (dev)
160 dev_hold(dev);
161out:
162 fib_res_put(&res);
163 return dev;
164}
165
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800166/*
167 * Find address type as if only "dev" was present in the system. If
168 * on_dev is NULL then all interfaces are taken into consideration.
169 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800170static inline unsigned __inet_dev_addr_type(struct net *net,
171 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800172 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173{
174 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
175 struct fib_result res;
176 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700177 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178
Joe Perchesf97c1e02007-12-16 13:45:43 -0800179 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800181 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 return RTN_MULTICAST;
183
184#ifdef CONFIG_IP_MULTIPLE_TABLES
185 res.r = NULL;
186#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900187
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800188 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700189 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 ret = RTN_UNICAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700191 if (!local_table->tb_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800192 if (!dev || dev == res.fi->fib_dev)
193 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 fib_res_put(&res);
195 }
196 }
197 return ret;
198}
199
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800200unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800201{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800202 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800203}
204
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
206 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800207{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800208 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800209}
210
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211/* Given (packet source, input interface) and optional (dst, oif, tos):
212 - (main) check, that source is valid i.e. not broadcast or our local
213 address.
214 - figure out what "logical" interface this packet arrived
215 and calculate "specific destination" address.
216 - check, that packet arrived from expected physical interface.
217 */
218
Al Virod9c9df82006-09-26 21:28:14 -0700219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
220 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221{
222 struct in_device *in_dev;
223 struct flowi fl = { .nl_u = { .ip4_u =
224 { .daddr = src,
225 .saddr = dst,
226 .tos = tos } },
227 .iif = oif };
228 struct fib_result res;
229 int no_addr, rpf;
230 int ret;
231
232 no_addr = rpf = 0;
233 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700234 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 if (in_dev) {
236 no_addr = in_dev->ifa_list == NULL;
237 rpf = IN_DEV_RPFILTER(in_dev);
238 }
239 rcu_read_unlock();
240
241 if (in_dev == NULL)
242 goto e_inval;
243
244 if (fib_lookup(&fl, &res))
245 goto last_resort;
246 if (res.type != RTN_UNICAST)
247 goto e_inval_res;
248 *spec_dst = FIB_RES_PREFSRC(res);
249 fib_combine_itag(itag, &res);
250#ifdef CONFIG_IP_ROUTE_MULTIPATH
251 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
252#else
253 if (FIB_RES_DEV(res) == dev)
254#endif
255 {
256 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
257 fib_res_put(&res);
258 return ret;
259 }
260 fib_res_put(&res);
261 if (no_addr)
262 goto last_resort;
263 if (rpf)
264 goto e_inval;
265 fl.oif = dev->ifindex;
266
267 ret = 0;
268 if (fib_lookup(&fl, &res) == 0) {
269 if (res.type == RTN_UNICAST) {
270 *spec_dst = FIB_RES_PREFSRC(res);
271 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
272 }
273 fib_res_put(&res);
274 }
275 return ret;
276
277last_resort:
278 if (rpf)
279 goto e_inval;
280 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
281 *itag = 0;
282 return 0;
283
284e_inval_res:
285 fib_res_put(&res);
286e_inval:
287 return -EINVAL;
288}
289
Al Viro81f7bf62006-09-27 18:40:00 -0700290static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700291{
292 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
293}
294
295static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
296{
297 struct nlattr *nla;
298
299 nla = (struct nlattr *) ((char *) mx + len);
300 nla->nla_type = type;
301 nla->nla_len = nla_attr_size(4);
302 *(u32 *) nla_data(nla) = value;
303
304 return len + nla_total_size(4);
305}
306
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800307static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700308 struct fib_config *cfg)
309{
Al Viro6d85c102006-09-26 22:15:46 -0700310 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700311 int plen;
312
313 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800314 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700315
316 if (rt->rt_dst.sa_family != AF_INET)
317 return -EAFNOSUPPORT;
318
319 /*
320 * Check mask for validity:
321 * a) it must be contiguous.
322 * b) destination must have all host bits clear.
323 * c) if application forgot to set correct family (AF_INET),
324 * reject request unless it is absolutely clear i.e.
325 * both family and mask are zero.
326 */
327 plen = 32;
328 addr = sk_extract_addr(&rt->rt_dst);
329 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700330 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700331
332 if (rt->rt_genmask.sa_family != AF_INET) {
333 if (mask || rt->rt_genmask.sa_family)
334 return -EAFNOSUPPORT;
335 }
336
337 if (bad_mask(mask, addr))
338 return -EINVAL;
339
340 plen = inet_mask_len(mask);
341 }
342
343 cfg->fc_dst_len = plen;
344 cfg->fc_dst = addr;
345
346 if (cmd != SIOCDELRT) {
347 cfg->fc_nlflags = NLM_F_CREATE;
348 cfg->fc_protocol = RTPROT_BOOT;
349 }
350
351 if (rt->rt_metric)
352 cfg->fc_priority = rt->rt_metric - 1;
353
354 if (rt->rt_flags & RTF_REJECT) {
355 cfg->fc_scope = RT_SCOPE_HOST;
356 cfg->fc_type = RTN_UNREACHABLE;
357 return 0;
358 }
359
360 cfg->fc_scope = RT_SCOPE_NOWHERE;
361 cfg->fc_type = RTN_UNICAST;
362
363 if (rt->rt_dev) {
364 char *colon;
365 struct net_device *dev;
366 char devname[IFNAMSIZ];
367
368 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
369 return -EFAULT;
370
371 devname[IFNAMSIZ-1] = 0;
372 colon = strchr(devname, ':');
373 if (colon)
374 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800375 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700376 if (!dev)
377 return -ENODEV;
378 cfg->fc_oif = dev->ifindex;
379 if (colon) {
380 struct in_ifaddr *ifa;
381 struct in_device *in_dev = __in_dev_get_rtnl(dev);
382 if (!in_dev)
383 return -ENODEV;
384 *colon = ':';
385 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
386 if (strcmp(ifa->ifa_label, devname) == 0)
387 break;
388 if (ifa == NULL)
389 return -ENODEV;
390 cfg->fc_prefsrc = ifa->ifa_local;
391 }
392 }
393
394 addr = sk_extract_addr(&rt->rt_gateway);
395 if (rt->rt_gateway.sa_family == AF_INET && addr) {
396 cfg->fc_gw = addr;
397 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800398 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700399 cfg->fc_scope = RT_SCOPE_UNIVERSE;
400 }
401
402 if (cmd == SIOCDELRT)
403 return 0;
404
405 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
406 return -EINVAL;
407
408 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
409 cfg->fc_scope = RT_SCOPE_LINK;
410
411 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
412 struct nlattr *mx;
413 int len = 0;
414
415 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900416 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700417 return -ENOMEM;
418
419 if (rt->rt_flags & RTF_MTU)
420 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
421
422 if (rt->rt_flags & RTF_WINDOW)
423 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
424
425 if (rt->rt_flags & RTF_IRTT)
426 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
427
428 cfg->fc_mx = mx;
429 cfg->fc_mx_len = len;
430 }
431
432 return 0;
433}
434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435/*
436 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
437 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900438
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800439int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440{
Thomas Graf4e902c52006-08-17 18:14:52 -0700441 struct fib_config cfg;
442 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445 switch (cmd) {
446 case SIOCADDRT: /* Add a route */
447 case SIOCDELRT: /* Delete a route */
448 if (!capable(CAP_NET_ADMIN))
449 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700450
451 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700453
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800455 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700457 struct fib_table *tb;
458
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800460 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700462 err = tb->tb_delete(tb, &cfg);
463 else
464 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800466 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700468 err = tb->tb_insert(tb, &cfg);
469 else
470 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700472
473 /* allocated by rtentry_to_fib_config() */
474 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 }
476 rtnl_unlock();
477 return err;
478 }
479 return -EINVAL;
480}
481
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700482const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700483 [RTA_DST] = { .type = NLA_U32 },
484 [RTA_SRC] = { .type = NLA_U32 },
485 [RTA_IIF] = { .type = NLA_U32 },
486 [RTA_OIF] = { .type = NLA_U32 },
487 [RTA_GATEWAY] = { .type = NLA_U32 },
488 [RTA_PRIORITY] = { .type = NLA_U32 },
489 [RTA_PREFSRC] = { .type = NLA_U32 },
490 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700491 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700492 [RTA_PROTOINFO] = { .type = NLA_U32 },
493 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700494};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800496static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
497 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700498{
499 struct nlattr *attr;
500 int err, remaining;
501 struct rtmsg *rtm;
502
503 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
504 if (err < 0)
505 goto errout;
506
507 memset(cfg, 0, sizeof(*cfg));
508
509 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700510 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700511 cfg->fc_tos = rtm->rtm_tos;
512 cfg->fc_table = rtm->rtm_table;
513 cfg->fc_protocol = rtm->rtm_protocol;
514 cfg->fc_scope = rtm->rtm_scope;
515 cfg->fc_type = rtm->rtm_type;
516 cfg->fc_flags = rtm->rtm_flags;
517 cfg->fc_nlflags = nlh->nlmsg_flags;
518
519 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
520 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800521 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700522
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700523 if (cfg->fc_type > RTN_MAX) {
524 err = -EINVAL;
525 goto errout;
526 }
527
Thomas Graf4e902c52006-08-17 18:14:52 -0700528 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200529 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700530 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700531 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 case RTA_OIF:
534 cfg->fc_oif = nla_get_u32(attr);
535 break;
536 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700537 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700538 break;
539 case RTA_PRIORITY:
540 cfg->fc_priority = nla_get_u32(attr);
541 break;
542 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700543 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700544 break;
545 case RTA_METRICS:
546 cfg->fc_mx = nla_data(attr);
547 cfg->fc_mx_len = nla_len(attr);
548 break;
549 case RTA_MULTIPATH:
550 cfg->fc_mp = nla_data(attr);
551 cfg->fc_mp_len = nla_len(attr);
552 break;
553 case RTA_FLOW:
554 cfg->fc_flow = nla_get_u32(attr);
555 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700556 case RTA_TABLE:
557 cfg->fc_table = nla_get_u32(attr);
558 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 }
560 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700561
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700563errout:
564 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565}
566
Thomas Graf63f34442007-03-22 11:55:17 -0700567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100569 struct net *net = skb->sk->sk_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700570 struct fib_config cfg;
571 struct fib_table *tb;
572 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800574 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700575 if (err < 0)
576 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800578 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700579 if (tb == NULL) {
580 err = -ESRCH;
581 goto errout;
582 }
583
584 err = tb->tb_delete(tb, &cfg);
585errout:
586 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587}
588
Thomas Graf63f34442007-03-22 11:55:17 -0700589static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100591 struct net *net = skb->sk->sk_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700592 struct fib_config cfg;
593 struct fib_table *tb;
594 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800596 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700597 if (err < 0)
598 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800600 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700601 if (tb == NULL) {
602 err = -ENOBUFS;
603 goto errout;
604 }
605
606 err = tb->tb_insert(tb, &cfg);
607errout:
608 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
Thomas Graf63f34442007-03-22 11:55:17 -0700611static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100613 struct net *net = skb->sk->sk_net;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700614 unsigned int h, s_h;
615 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700617 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800618 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700619 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Thomas Grafbe403ea2006-08-17 18:15:17 -0700621 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
622 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 return ip_rt_dump(skb, cb);
624
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700625 s_h = cb->args[0];
626 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700628 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
629 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800630 head = &net->ipv4.fib_table_hash[h];
631 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700632 if (e < s_e)
633 goto next;
634 if (dumped)
635 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900636 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700637 if (tb->tb_dump(tb, skb, cb) < 0)
638 goto out;
639 dumped = 1;
640next:
641 e++;
642 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700644out:
645 cb->args[1] = e;
646 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 return skb->len;
649}
650
651/* Prepare and feed intra-kernel routing request.
652 Really, it should be netlink message, but :-( netlink
653 can be not configured, so that we feed it directly
654 to fib engine. It is legal, because all events occur
655 only when netlink is already locked.
656 */
657
Al Viro81f7bf62006-09-27 18:40:00 -0700658static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659{
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800660 struct net *net = ifa->ifa_dev->dev->nd_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700661 struct fib_table *tb;
662 struct fib_config cfg = {
663 .fc_protocol = RTPROT_KERNEL,
664 .fc_type = type,
665 .fc_dst = dst,
666 .fc_dst_len = dst_len,
667 .fc_prefsrc = ifa->ifa_local,
668 .fc_oif = ifa->ifa_dev->dev->ifindex,
669 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800670 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800671 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800672 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700673 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800676 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800678 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680 if (tb == NULL)
681 return;
682
Thomas Graf4e902c52006-08-17 18:14:52 -0700683 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
Thomas Graf4e902c52006-08-17 18:14:52 -0700685 if (type != RTN_LOCAL)
686 cfg.fc_scope = RT_SCOPE_LINK;
687 else
688 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
690 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700691 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700693 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694}
695
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800696void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
698 struct in_device *in_dev = ifa->ifa_dev;
699 struct net_device *dev = in_dev->dev;
700 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700701 __be32 mask = ifa->ifa_mask;
702 __be32 addr = ifa->ifa_local;
703 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
705 if (ifa->ifa_flags&IFA_F_SECONDARY) {
706 prim = inet_ifa_byprefix(in_dev, prefix, mask);
707 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800708 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 return;
710 }
711 }
712
713 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
714
715 if (!(dev->flags&IFF_UP))
716 return;
717
718 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700719 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
721
Joe Perchesf97c1e02007-12-16 13:45:43 -0800722 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 (prefix != addr || ifa->ifa_prefixlen < 32)) {
724 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
725 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
726
727 /* Add network specific broadcasts, when it takes a sense */
728 if (ifa->ifa_prefixlen < 31) {
729 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
730 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
731 }
732 }
733}
734
735static void fib_del_ifaddr(struct in_ifaddr *ifa)
736{
737 struct in_device *in_dev = ifa->ifa_dev;
738 struct net_device *dev = in_dev->dev;
739 struct in_ifaddr *ifa1;
740 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700741 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
742 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743#define LOCAL_OK 1
744#define BRD_OK 2
745#define BRD0_OK 4
746#define BRD1_OK 8
747 unsigned ok = 0;
748
749 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
750 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
751 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
752 else {
753 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
754 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800755 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 return;
757 }
758 }
759
760 /* Deletion is more complicated than add.
761 We should take care of not to delete too much :-)
762
763 Scan address list to be sure that addresses are really gone.
764 */
765
766 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
767 if (ifa->ifa_local == ifa1->ifa_local)
768 ok |= LOCAL_OK;
769 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
770 ok |= BRD_OK;
771 if (brd == ifa1->ifa_broadcast)
772 ok |= BRD1_OK;
773 if (any == ifa1->ifa_broadcast)
774 ok |= BRD0_OK;
775 }
776
777 if (!(ok&BRD_OK))
778 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
779 if (!(ok&BRD1_OK))
780 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
781 if (!(ok&BRD0_OK))
782 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
783 if (!(ok&LOCAL_OK)) {
784 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
785
786 /* Check, that this local address finally disappeared. */
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800787 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 /* And the last, but not the least thing.
789 We must flush stray FIB entries.
790
791 First of all, we scan fib_info list searching
792 for stray nexthop entries, then ignite fib_flush.
793 */
794 if (fib_sync_down(ifa->ifa_local, NULL, 0))
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800795 fib_flush(dev->nd_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 }
797 }
798#undef LOCAL_OK
799#undef BRD_OK
800#undef BRD0_OK
801#undef BRD1_OK
802}
803
Robert Olsson246955f2005-06-20 13:36:39 -0700804static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
805{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900806
Robert Olsson246955f2005-06-20 13:36:39 -0700807 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800808 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800809 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700810 .tos = frn->fl_tos,
811 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700812
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700813#ifdef CONFIG_IP_MULTIPLE_TABLES
814 res.r = NULL;
815#endif
816
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700817 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700818 if (tb) {
819 local_bh_disable();
820
821 frn->tb_id = tb->tb_id;
822 frn->err = tb->tb_lookup(tb, &fl, &res);
823
824 if (!frn->err) {
825 frn->prefixlen = res.prefixlen;
826 frn->nh_sel = res.nh_sel;
827 frn->type = res.type;
828 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700829 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700830 }
831 local_bh_enable();
832 }
833}
834
David S. Miller28f7b0362007-10-10 21:32:39 -0700835static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700836{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800837 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700838 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700839 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700840 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700841 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700842
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800843 net = skb->sk->sk_net;
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700844 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800845 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800846 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800847 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800848
849 skb = skb_clone(skb, GFP_KERNEL);
850 if (skb == NULL)
851 return;
852 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900853
Robert Olsson246955f2005-06-20 13:36:39 -0700854 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800855 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700856
857 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900858
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700859 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700860 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700861 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800862 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900863}
Robert Olsson246955f2005-06-20 13:36:39 -0700864
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800865static int nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700866{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800867 struct sock *sk;
868 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
869 nl_fib_input, NULL, THIS_MODULE);
870 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800871 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800872 /* Don't hold an extra reference on the namespace */
873 put_net(sk->sk_net);
874 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800875 return 0;
876}
877
878static void nl_fib_lookup_exit(struct net *net)
879{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800880 /* At the last minute lie and say this is a socket for the
881 * initial network namespace. So the socket will be safe to free.
882 */
883 net->ipv4.fibnl->sk_net = get_net(&init_net);
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800884 netlink_kernel_release(net->ipv4.fibnl);
Robert Olsson246955f2005-06-20 13:36:39 -0700885}
886
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887static void fib_disable_ip(struct net_device *dev, int force)
888{
889 if (fib_sync_down(0, dev, force))
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800890 fib_flush(dev->nd_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 rt_cache_flush(0);
892 arp_ifdown(dev);
893}
894
895static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
896{
897 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
898
899 switch (event) {
900 case NETDEV_UP:
901 fib_add_ifaddr(ifa);
902#ifdef CONFIG_IP_ROUTE_MULTIPATH
903 fib_sync_up(ifa->ifa_dev->dev);
904#endif
905 rt_cache_flush(-1);
906 break;
907 case NETDEV_DOWN:
908 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700909 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 /* Last address was deleted from this interface.
911 Disable IP.
912 */
913 fib_disable_ip(ifa->ifa_dev->dev, 1);
914 } else {
915 rt_cache_flush(-1);
916 }
917 break;
918 }
919 return NOTIFY_DONE;
920}
921
922static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
923{
924 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700925 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926
927 if (event == NETDEV_UNREGISTER) {
928 fib_disable_ip(dev, 2);
929 return NOTIFY_DONE;
930 }
931
932 if (!in_dev)
933 return NOTIFY_DONE;
934
935 switch (event) {
936 case NETDEV_UP:
937 for_ifa(in_dev) {
938 fib_add_ifaddr(ifa);
939 } endfor_ifa(in_dev);
940#ifdef CONFIG_IP_ROUTE_MULTIPATH
941 fib_sync_up(dev);
942#endif
943 rt_cache_flush(-1);
944 break;
945 case NETDEV_DOWN:
946 fib_disable_ip(dev, 0);
947 break;
948 case NETDEV_CHANGEMTU:
949 case NETDEV_CHANGE:
950 rt_cache_flush(0);
951 break;
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block fib_inetaddr_notifier = {
957 .notifier_call =fib_inetaddr_event,
958};
959
960static struct notifier_block fib_netdev_notifier = {
961 .notifier_call =fib_netdev_event,
962};
963
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800964static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700966 unsigned int i;
967
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800968 net->ipv4.fib_table_hash = kzalloc(
969 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
970 if (net->ipv4.fib_table_hash == NULL)
971 return -ENOMEM;
972
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700973 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800974 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800975
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800976 return fib4_rules_init(net);
977}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800979static void __net_exit ip_fib_net_exit(struct net *net)
980{
981 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -0700982
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800983#ifdef CONFIG_IP_MULTIPLE_TABLES
984 fib4_rules_exit(net);
985#endif
986
987 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
988 struct fib_table *tb;
989 struct hlist_head *head;
990 struct hlist_node *node, *tmp;
991
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800992 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800993 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
994 hlist_del(node);
995 tb->tb_flush(tb);
996 kfree(tb);
997 }
998 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800999 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001000}
1001
1002static int __net_init fib_net_init(struct net *net)
1003{
1004 int error;
1005
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001006 error = ip_fib_net_init(net);
1007 if (error < 0)
1008 goto out;
1009 error = nl_fib_lookup_init(net);
1010 if (error < 0)
1011 goto out_nlfl;
1012 error = fib_proc_init(net);
1013 if (error < 0)
1014 goto out_proc;
1015out:
1016 return error;
1017
1018out_proc:
1019 nl_fib_lookup_exit(net);
1020out_nlfl:
1021 ip_fib_net_exit(net);
1022 goto out;
1023}
1024
1025static void __net_exit fib_net_exit(struct net *net)
1026{
1027 fib_proc_exit(net);
1028 nl_fib_lookup_exit(net);
1029 ip_fib_net_exit(net);
1030}
1031
1032static struct pernet_operations fib_net_ops = {
1033 .init = fib_net_init,
1034 .exit = fib_net_exit,
1035};
1036
1037void __init ip_fib_init(void)
1038{
Thomas Graf63f34442007-03-22 11:55:17 -07001039 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1040 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1041 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001042
1043 register_pernet_subsys(&fib_net_ops);
1044 register_netdevice_notifier(&fib_netdev_notifier);
1045 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001046
1047 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048}
1049
1050EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001051EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001052EXPORT_SYMBOL(ip_dev_find);