blob: e2f950592566ac93267dbd20bc85473d4a759d48 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/arp.h>
44#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#ifndef CONFIG_IP_MULTIPLE_TABLES
48
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080049static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080050{
Denis V. Lunev93456b62008-01-10 03:23:38 -080051 struct fib_table *local_table, *main_table;
52
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080053 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080054 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080055 return -ENOMEM;
56
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080057 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080058 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080059 goto fail;
60
Denis V. Lunev93456b62008-01-10 03:23:38 -080061 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080062 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080063 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080064 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080065 return 0;
66
67fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080068 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080069 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080070}
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#else
72
Denis V. Lunev8ad49422008-01-10 03:24:11 -080073struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070074{
75 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070076 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070078 if (id == 0)
79 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080080 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070081 if (tb)
82 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080083
84 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 if (!tb)
86 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070087 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080088 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 return tb;
90}
91
Denis V. Lunev8ad49422008-01-10 03:24:11 -080092struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070093{
94 struct fib_table *tb;
95 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080096 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070097 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070099 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800102
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700103 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
Denis V. Lunev010278e2008-01-22 22:04:04 -0800116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800126 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 tb->tb_select_default(tb, flp, res);
129}
130
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800131static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132{
133 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700135 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800136 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700137 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700142 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700146 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800153struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700158 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800184 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700189 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800193 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900199
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700201 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 ret = RTN_UNICAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700203 if (!local_table->tb_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800212unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800213{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800214 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800215}
216
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800219{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800220 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800221}
222
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
Al Virod9c9df82006-09-26 21:28:14 -0700231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233{
234 struct in_device *in_dev;
235 struct flowi fl = { .nl_u = { .ip4_u =
236 { .daddr = src,
237 .saddr = dst,
238 .tos = tos } },
239 .iif = oif };
240 struct fib_result res;
241 int no_addr, rpf;
242 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800243 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245 no_addr = rpf = 0;
246 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700247 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 if (in_dev) {
249 no_addr = in_dev->ifa_list == NULL;
250 rpf = IN_DEV_RPFILTER(in_dev);
251 }
252 rcu_read_unlock();
253
254 if (in_dev == NULL)
255 goto e_inval;
256
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900257 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800258 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 goto last_resort;
260 if (res.type != RTN_UNICAST)
261 goto e_inval_res;
262 *spec_dst = FIB_RES_PREFSRC(res);
263 fib_combine_itag(itag, &res);
264#ifdef CONFIG_IP_ROUTE_MULTIPATH
265 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
266#else
267 if (FIB_RES_DEV(res) == dev)
268#endif
269 {
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 fib_res_put(&res);
272 return ret;
273 }
274 fib_res_put(&res);
275 if (no_addr)
276 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000277 if (rpf == 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 goto e_inval;
279 fl.oif = dev->ifindex;
280
281 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800282 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (res.type == RTN_UNICAST) {
284 *spec_dst = FIB_RES_PREFSRC(res);
285 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
286 }
287 fib_res_put(&res);
288 }
289 return ret;
290
291last_resort:
292 if (rpf)
293 goto e_inval;
294 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
295 *itag = 0;
296 return 0;
297
298e_inval_res:
299 fib_res_put(&res);
300e_inval:
301 return -EINVAL;
302}
303
Al Viro81f7bf62006-09-27 18:40:00 -0700304static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700305{
306 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
307}
308
309static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
310{
311 struct nlattr *nla;
312
313 nla = (struct nlattr *) ((char *) mx + len);
314 nla->nla_type = type;
315 nla->nla_len = nla_attr_size(4);
316 *(u32 *) nla_data(nla) = value;
317
318 return len + nla_total_size(4);
319}
320
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800321static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700322 struct fib_config *cfg)
323{
Al Viro6d85c102006-09-26 22:15:46 -0700324 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700325 int plen;
326
327 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800328 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700329
330 if (rt->rt_dst.sa_family != AF_INET)
331 return -EAFNOSUPPORT;
332
333 /*
334 * Check mask for validity:
335 * a) it must be contiguous.
336 * b) destination must have all host bits clear.
337 * c) if application forgot to set correct family (AF_INET),
338 * reject request unless it is absolutely clear i.e.
339 * both family and mask are zero.
340 */
341 plen = 32;
342 addr = sk_extract_addr(&rt->rt_dst);
343 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700344 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700345
346 if (rt->rt_genmask.sa_family != AF_INET) {
347 if (mask || rt->rt_genmask.sa_family)
348 return -EAFNOSUPPORT;
349 }
350
351 if (bad_mask(mask, addr))
352 return -EINVAL;
353
354 plen = inet_mask_len(mask);
355 }
356
357 cfg->fc_dst_len = plen;
358 cfg->fc_dst = addr;
359
360 if (cmd != SIOCDELRT) {
361 cfg->fc_nlflags = NLM_F_CREATE;
362 cfg->fc_protocol = RTPROT_BOOT;
363 }
364
365 if (rt->rt_metric)
366 cfg->fc_priority = rt->rt_metric - 1;
367
368 if (rt->rt_flags & RTF_REJECT) {
369 cfg->fc_scope = RT_SCOPE_HOST;
370 cfg->fc_type = RTN_UNREACHABLE;
371 return 0;
372 }
373
374 cfg->fc_scope = RT_SCOPE_NOWHERE;
375 cfg->fc_type = RTN_UNICAST;
376
377 if (rt->rt_dev) {
378 char *colon;
379 struct net_device *dev;
380 char devname[IFNAMSIZ];
381
382 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
383 return -EFAULT;
384
385 devname[IFNAMSIZ-1] = 0;
386 colon = strchr(devname, ':');
387 if (colon)
388 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800389 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700390 if (!dev)
391 return -ENODEV;
392 cfg->fc_oif = dev->ifindex;
393 if (colon) {
394 struct in_ifaddr *ifa;
395 struct in_device *in_dev = __in_dev_get_rtnl(dev);
396 if (!in_dev)
397 return -ENODEV;
398 *colon = ':';
399 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
400 if (strcmp(ifa->ifa_label, devname) == 0)
401 break;
402 if (ifa == NULL)
403 return -ENODEV;
404 cfg->fc_prefsrc = ifa->ifa_local;
405 }
406 }
407
408 addr = sk_extract_addr(&rt->rt_gateway);
409 if (rt->rt_gateway.sa_family == AF_INET && addr) {
410 cfg->fc_gw = addr;
411 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800412 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700413 cfg->fc_scope = RT_SCOPE_UNIVERSE;
414 }
415
416 if (cmd == SIOCDELRT)
417 return 0;
418
419 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
420 return -EINVAL;
421
422 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
423 cfg->fc_scope = RT_SCOPE_LINK;
424
425 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
426 struct nlattr *mx;
427 int len = 0;
428
429 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900430 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700431 return -ENOMEM;
432
433 if (rt->rt_flags & RTF_MTU)
434 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
435
436 if (rt->rt_flags & RTF_WINDOW)
437 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
438
439 if (rt->rt_flags & RTF_IRTT)
440 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
441
442 cfg->fc_mx = mx;
443 cfg->fc_mx_len = len;
444 }
445
446 return 0;
447}
448
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449/*
450 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
451 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900452
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800453int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454{
Thomas Graf4e902c52006-08-17 18:14:52 -0700455 struct fib_config cfg;
456 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
459 switch (cmd) {
460 case SIOCADDRT: /* Add a route */
461 case SIOCDELRT: /* Delete a route */
462 if (!capable(CAP_NET_ADMIN))
463 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700464
465 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700467
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800469 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700471 struct fib_table *tb;
472
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800474 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700476 err = tb->tb_delete(tb, &cfg);
477 else
478 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800480 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700482 err = tb->tb_insert(tb, &cfg);
483 else
484 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700486
487 /* allocated by rtentry_to_fib_config() */
488 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 }
490 rtnl_unlock();
491 return err;
492 }
493 return -EINVAL;
494}
495
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700496const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700497 [RTA_DST] = { .type = NLA_U32 },
498 [RTA_SRC] = { .type = NLA_U32 },
499 [RTA_IIF] = { .type = NLA_U32 },
500 [RTA_OIF] = { .type = NLA_U32 },
501 [RTA_GATEWAY] = { .type = NLA_U32 },
502 [RTA_PRIORITY] = { .type = NLA_U32 },
503 [RTA_PREFSRC] = { .type = NLA_U32 },
504 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700505 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700506 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700507};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800509static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
510 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700511{
512 struct nlattr *attr;
513 int err, remaining;
514 struct rtmsg *rtm;
515
516 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
517 if (err < 0)
518 goto errout;
519
520 memset(cfg, 0, sizeof(*cfg));
521
522 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700523 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700524 cfg->fc_tos = rtm->rtm_tos;
525 cfg->fc_table = rtm->rtm_table;
526 cfg->fc_protocol = rtm->rtm_protocol;
527 cfg->fc_scope = rtm->rtm_scope;
528 cfg->fc_type = rtm->rtm_type;
529 cfg->fc_flags = rtm->rtm_flags;
530 cfg->fc_nlflags = nlh->nlmsg_flags;
531
532 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
533 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800534 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700535
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700536 if (cfg->fc_type > RTN_MAX) {
537 err = -EINVAL;
538 goto errout;
539 }
540
Thomas Graf4e902c52006-08-17 18:14:52 -0700541 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200542 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700543 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700544 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700545 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700546 case RTA_OIF:
547 cfg->fc_oif = nla_get_u32(attr);
548 break;
549 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700550 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700551 break;
552 case RTA_PRIORITY:
553 cfg->fc_priority = nla_get_u32(attr);
554 break;
555 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700556 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700557 break;
558 case RTA_METRICS:
559 cfg->fc_mx = nla_data(attr);
560 cfg->fc_mx_len = nla_len(attr);
561 break;
562 case RTA_MULTIPATH:
563 cfg->fc_mp = nla_data(attr);
564 cfg->fc_mp_len = nla_len(attr);
565 break;
566 case RTA_FLOW:
567 cfg->fc_flow = nla_get_u32(attr);
568 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700569 case RTA_TABLE:
570 cfg->fc_table = nla_get_u32(attr);
571 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 }
573 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700574
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700576errout:
577 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578}
579
Jianjun Kong6ed25332008-11-03 00:25:16 -0800580static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900582 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700583 struct fib_config cfg;
584 struct fib_table *tb;
585 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800587 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700588 if (err < 0)
589 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800591 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700592 if (tb == NULL) {
593 err = -ESRCH;
594 goto errout;
595 }
596
597 err = tb->tb_delete(tb, &cfg);
598errout:
599 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600}
601
Jianjun Kong6ed25332008-11-03 00:25:16 -0800602static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900604 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700605 struct fib_config cfg;
606 struct fib_table *tb;
607 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800609 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700610 if (err < 0)
611 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800613 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700614 if (tb == NULL) {
615 err = -ENOBUFS;
616 goto errout;
617 }
618
619 err = tb->tb_insert(tb, &cfg);
620errout:
621 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622}
623
Thomas Graf63f34442007-03-22 11:55:17 -0700624static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900626 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700627 unsigned int h, s_h;
628 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700630 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800631 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700632 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
Thomas Grafbe403ea2006-08-17 18:15:17 -0700634 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
635 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 return ip_rt_dump(skb, cb);
637
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700638 s_h = cb->args[0];
639 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700641 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
642 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800643 head = &net->ipv4.fib_table_hash[h];
644 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700645 if (e < s_e)
646 goto next;
647 if (dumped)
648 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900649 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700650 if (tb->tb_dump(tb, skb, cb) < 0)
651 goto out;
652 dumped = 1;
653next:
654 e++;
655 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700657out:
658 cb->args[1] = e;
659 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 return skb->len;
662}
663
664/* Prepare and feed intra-kernel routing request.
665 Really, it should be netlink message, but :-( netlink
666 can be not configured, so that we feed it directly
667 to fib engine. It is legal, because all events occur
668 only when netlink is already locked.
669 */
670
Al Viro81f7bf62006-09-27 18:40:00 -0700671static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900673 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700674 struct fib_table *tb;
675 struct fib_config cfg = {
676 .fc_protocol = RTPROT_KERNEL,
677 .fc_type = type,
678 .fc_dst = dst,
679 .fc_dst_len = dst_len,
680 .fc_prefsrc = ifa->ifa_local,
681 .fc_oif = ifa->ifa_dev->dev->ifindex,
682 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800683 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800684 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800685 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700686 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687
688 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800689 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800691 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
693 if (tb == NULL)
694 return;
695
Thomas Graf4e902c52006-08-17 18:14:52 -0700696 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
Thomas Graf4e902c52006-08-17 18:14:52 -0700698 if (type != RTN_LOCAL)
699 cfg.fc_scope = RT_SCOPE_LINK;
700 else
701 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
703 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700704 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700706 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707}
708
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800709void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710{
711 struct in_device *in_dev = ifa->ifa_dev;
712 struct net_device *dev = in_dev->dev;
713 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700714 __be32 mask = ifa->ifa_mask;
715 __be32 addr = ifa->ifa_local;
716 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 if (ifa->ifa_flags&IFA_F_SECONDARY) {
719 prim = inet_ifa_byprefix(in_dev, prefix, mask);
720 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800721 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 return;
723 }
724 }
725
726 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
727
728 if (!(dev->flags&IFF_UP))
729 return;
730
731 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700732 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734
Joe Perchesf97c1e02007-12-16 13:45:43 -0800735 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 (prefix != addr || ifa->ifa_prefixlen < 32)) {
737 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
738 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
739
740 /* Add network specific broadcasts, when it takes a sense */
741 if (ifa->ifa_prefixlen < 31) {
742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
744 }
745 }
746}
747
748static void fib_del_ifaddr(struct in_ifaddr *ifa)
749{
750 struct in_device *in_dev = ifa->ifa_dev;
751 struct net_device *dev = in_dev->dev;
752 struct in_ifaddr *ifa1;
753 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700754 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
755 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756#define LOCAL_OK 1
757#define BRD_OK 2
758#define BRD0_OK 4
759#define BRD1_OK 8
760 unsigned ok = 0;
761
762 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
763 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
764 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
765 else {
766 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
767 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800768 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 return;
770 }
771 }
772
773 /* Deletion is more complicated than add.
774 We should take care of not to delete too much :-)
775
776 Scan address list to be sure that addresses are really gone.
777 */
778
779 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
780 if (ifa->ifa_local == ifa1->ifa_local)
781 ok |= LOCAL_OK;
782 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
783 ok |= BRD_OK;
784 if (brd == ifa1->ifa_broadcast)
785 ok |= BRD1_OK;
786 if (any == ifa1->ifa_broadcast)
787 ok |= BRD0_OK;
788 }
789
790 if (!(ok&BRD_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
792 if (!(ok&BRD1_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
794 if (!(ok&BRD0_OK))
795 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
796 if (!(ok&LOCAL_OK)) {
797 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
798
799 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900800 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 /* And the last, but not the least thing.
802 We must flush stray FIB entries.
803
804 First of all, we scan fib_info list searching
805 for stray nexthop entries, then ignite fib_flush.
806 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900807 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
808 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 }
810 }
811#undef LOCAL_OK
812#undef BRD_OK
813#undef BRD0_OK
814#undef BRD1_OK
815}
816
Robert Olsson246955f2005-06-20 13:36:39 -0700817static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
818{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900819
Robert Olsson246955f2005-06-20 13:36:39 -0700820 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800821 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800822 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700823 .tos = frn->fl_tos,
824 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700825
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700826#ifdef CONFIG_IP_MULTIPLE_TABLES
827 res.r = NULL;
828#endif
829
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700830 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700831 if (tb) {
832 local_bh_disable();
833
834 frn->tb_id = tb->tb_id;
835 frn->err = tb->tb_lookup(tb, &fl, &res);
836
837 if (!frn->err) {
838 frn->prefixlen = res.prefixlen;
839 frn->nh_sel = res.nh_sel;
840 frn->type = res.type;
841 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700842 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700843 }
844 local_bh_enable();
845 }
846}
847
David S. Miller28f7b0362007-10-10 21:32:39 -0700848static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700849{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800850 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700851 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700852 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700853 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700854 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700855
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900856 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700857 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800858 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800859 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800860 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800861
862 skb = skb_clone(skb, GFP_KERNEL);
863 if (skb == NULL)
864 return;
865 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900866
Robert Olsson246955f2005-06-20 13:36:39 -0700867 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800868 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700869
870 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900871
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700872 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700873 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700874 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800875 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900876}
Robert Olsson246955f2005-06-20 13:36:39 -0700877
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800878static int nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700879{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800880 struct sock *sk;
881 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
882 nl_fib_input, NULL, THIS_MODULE);
883 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800884 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800885 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800886 return 0;
887}
888
889static void nl_fib_lookup_exit(struct net *net)
890{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800891 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800892 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700893}
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895static void fib_disable_ip(struct net_device *dev, int force)
896{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800897 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900898 fib_flush(dev_net(dev));
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700899 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 arp_ifdown(dev);
901}
902
903static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
904{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800905 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700906 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 switch (event) {
909 case NETDEV_UP:
910 fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700912 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700914 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 break;
916 case NETDEV_DOWN:
917 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700918 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 /* Last address was deleted from this interface.
920 Disable IP.
921 */
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700922 fib_disable_ip(dev, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700924 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
926 break;
927 }
928 return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700934 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
936 if (event == NETDEV_UNREGISTER) {
937 fib_disable_ip(dev, 2);
938 return NOTIFY_DONE;
939 }
940
941 if (!in_dev)
942 return NOTIFY_DONE;
943
944 switch (event) {
945 case NETDEV_UP:
946 for_ifa(in_dev) {
947 fib_add_ifaddr(ifa);
948 } endfor_ifa(in_dev);
949#ifdef CONFIG_IP_ROUTE_MULTIPATH
950 fib_sync_up(dev);
951#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700952 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 break;
954 case NETDEV_DOWN:
955 fib_disable_ip(dev, 0);
956 break;
957 case NETDEV_CHANGEMTU:
958 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700959 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 break;
961 }
962 return NOTIFY_DONE;
963}
964
965static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800966 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967};
968
969static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800970 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971};
972
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800973static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800975 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700976 unsigned int i;
977
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800978 net->ipv4.fib_table_hash = kzalloc(
979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980 if (net->ipv4.fib_table_hash == NULL)
981 return -ENOMEM;
982
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700983 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800985
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800986 err = fib4_rules_init(net);
987 if (err < 0)
988 goto fail;
989 return 0;
990
991fail:
992 kfree(net->ipv4.fib_table_hash);
993 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800994}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800996static void __net_exit ip_fib_net_exit(struct net *net)
997{
998 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -0700999
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001000#ifdef CONFIG_IP_MULTIPLE_TABLES
1001 fib4_rules_exit(net);
1002#endif
1003
1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005 struct fib_table *tb;
1006 struct hlist_head *head;
1007 struct hlist_node *node, *tmp;
1008
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001009 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011 hlist_del(node);
1012 tb->tb_flush(tb);
1013 kfree(tb);
1014 }
1015 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001016 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001017}
1018
1019static int __net_init fib_net_init(struct net *net)
1020{
1021 int error;
1022
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001023 error = ip_fib_net_init(net);
1024 if (error < 0)
1025 goto out;
1026 error = nl_fib_lookup_init(net);
1027 if (error < 0)
1028 goto out_nlfl;
1029 error = fib_proc_init(net);
1030 if (error < 0)
1031 goto out_proc;
1032out:
1033 return error;
1034
1035out_proc:
1036 nl_fib_lookup_exit(net);
1037out_nlfl:
1038 ip_fib_net_exit(net);
1039 goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044 fib_proc_exit(net);
1045 nl_fib_lookup_exit(net);
1046 ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050 .init = fib_net_init,
1051 .exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
Thomas Graf63f34442007-03-22 11:55:17 -07001056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001059
1060 register_pernet_subsys(&fib_net_ops);
1061 register_netdevice_notifier(&fib_netdev_notifier);
1062 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001063
1064 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065}
1066
1067EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001068EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001069EXPORT_SYMBOL(ip_dev_find);