blob: 82dbf711d6d0ef2a704e6a636131242f379011c8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/arp.h>
44#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#ifndef CONFIG_IP_MULTIPLE_TABLES
48
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080049static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080050{
Denis V. Lunev93456b62008-01-10 03:23:38 -080051 struct fib_table *local_table, *main_table;
52
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080053 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080054 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080055 return -ENOMEM;
56
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080057 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080058 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080059 goto fail;
60
Denis V. Lunev93456b62008-01-10 03:23:38 -080061 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080062 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080063 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080064 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080065 return 0;
66
67fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080068 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080069 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080070}
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#else
72
Denis V. Lunev8ad49422008-01-10 03:24:11 -080073struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070074{
75 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070076 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070078 if (id == 0)
79 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080080 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070081 if (tb)
82 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080083
84 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 if (!tb)
86 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070087 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080088 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 return tb;
90}
91
Denis V. Lunev8ad49422008-01-10 03:24:11 -080092struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070093{
94 struct fib_table *tb;
95 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080096 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070097 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070099 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800102
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700103 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
Denis V. Lunev010278e2008-01-22 22:04:04 -0800116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800126 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000128 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800129}
130
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800131static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132{
133 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700135 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800136 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700137 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000142 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700146 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800153struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700158 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800184 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700189 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800193 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900199
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700201 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000203 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800212unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800213{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800214 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800215}
216
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800219{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800220 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800221}
222
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
Al Virod9c9df82006-09-26 21:28:14 -0700231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000232 struct net_device *dev, __be32 *spec_dst,
233 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234{
235 struct in_device *in_dev;
236 struct flowi fl = { .nl_u = { .ip4_u =
237 { .daddr = src,
238 .saddr = dst,
239 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000240 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000244 int no_addr, rpf, accept_local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800246 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
Patrick McHardy8153a102009-12-03 01:25:58 +0000248 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700250 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 if (in_dev) {
252 no_addr = in_dev->ifa_list == NULL;
253 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000254 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800255 if (mark && !IN_DEV_SRC_VMARK(in_dev))
256 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 }
258 rcu_read_unlock();
259
260 if (in_dev == NULL)
261 goto e_inval;
262
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900263 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800264 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000266 if (res.type != RTN_UNICAST) {
267 if (res.type != RTN_LOCAL || !accept_local)
268 goto e_inval_res;
269 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 *spec_dst = FIB_RES_PREFSRC(res);
271 fib_combine_itag(itag, &res);
272#ifdef CONFIG_IP_ROUTE_MULTIPATH
273 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
274#else
275 if (FIB_RES_DEV(res) == dev)
276#endif
277 {
278 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
279 fib_res_put(&res);
280 return ret;
281 }
282 fib_res_put(&res);
283 if (no_addr)
284 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000285 if (rpf == 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 goto e_inval;
287 fl.oif = dev->ifindex;
288
289 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800290 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 if (res.type == RTN_UNICAST) {
292 *spec_dst = FIB_RES_PREFSRC(res);
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 }
295 fib_res_put(&res);
296 }
297 return ret;
298
299last_resort:
300 if (rpf)
301 goto e_inval;
302 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
303 *itag = 0;
304 return 0;
305
306e_inval_res:
307 fib_res_put(&res);
308e_inval:
309 return -EINVAL;
310}
311
Al Viro81f7bf62006-09-27 18:40:00 -0700312static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700313{
314 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
315}
316
317static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
318{
319 struct nlattr *nla;
320
321 nla = (struct nlattr *) ((char *) mx + len);
322 nla->nla_type = type;
323 nla->nla_len = nla_attr_size(4);
324 *(u32 *) nla_data(nla) = value;
325
326 return len + nla_total_size(4);
327}
328
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800329static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700330 struct fib_config *cfg)
331{
Al Viro6d85c102006-09-26 22:15:46 -0700332 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700333 int plen;
334
335 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800336 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700337
338 if (rt->rt_dst.sa_family != AF_INET)
339 return -EAFNOSUPPORT;
340
341 /*
342 * Check mask for validity:
343 * a) it must be contiguous.
344 * b) destination must have all host bits clear.
345 * c) if application forgot to set correct family (AF_INET),
346 * reject request unless it is absolutely clear i.e.
347 * both family and mask are zero.
348 */
349 plen = 32;
350 addr = sk_extract_addr(&rt->rt_dst);
351 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700352 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700353
354 if (rt->rt_genmask.sa_family != AF_INET) {
355 if (mask || rt->rt_genmask.sa_family)
356 return -EAFNOSUPPORT;
357 }
358
359 if (bad_mask(mask, addr))
360 return -EINVAL;
361
362 plen = inet_mask_len(mask);
363 }
364
365 cfg->fc_dst_len = plen;
366 cfg->fc_dst = addr;
367
368 if (cmd != SIOCDELRT) {
369 cfg->fc_nlflags = NLM_F_CREATE;
370 cfg->fc_protocol = RTPROT_BOOT;
371 }
372
373 if (rt->rt_metric)
374 cfg->fc_priority = rt->rt_metric - 1;
375
376 if (rt->rt_flags & RTF_REJECT) {
377 cfg->fc_scope = RT_SCOPE_HOST;
378 cfg->fc_type = RTN_UNREACHABLE;
379 return 0;
380 }
381
382 cfg->fc_scope = RT_SCOPE_NOWHERE;
383 cfg->fc_type = RTN_UNICAST;
384
385 if (rt->rt_dev) {
386 char *colon;
387 struct net_device *dev;
388 char devname[IFNAMSIZ];
389
390 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
391 return -EFAULT;
392
393 devname[IFNAMSIZ-1] = 0;
394 colon = strchr(devname, ':');
395 if (colon)
396 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800397 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700398 if (!dev)
399 return -ENODEV;
400 cfg->fc_oif = dev->ifindex;
401 if (colon) {
402 struct in_ifaddr *ifa;
403 struct in_device *in_dev = __in_dev_get_rtnl(dev);
404 if (!in_dev)
405 return -ENODEV;
406 *colon = ':';
407 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
408 if (strcmp(ifa->ifa_label, devname) == 0)
409 break;
410 if (ifa == NULL)
411 return -ENODEV;
412 cfg->fc_prefsrc = ifa->ifa_local;
413 }
414 }
415
416 addr = sk_extract_addr(&rt->rt_gateway);
417 if (rt->rt_gateway.sa_family == AF_INET && addr) {
418 cfg->fc_gw = addr;
419 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800420 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700421 cfg->fc_scope = RT_SCOPE_UNIVERSE;
422 }
423
424 if (cmd == SIOCDELRT)
425 return 0;
426
427 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
428 return -EINVAL;
429
430 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
431 cfg->fc_scope = RT_SCOPE_LINK;
432
433 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
434 struct nlattr *mx;
435 int len = 0;
436
437 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900438 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700439 return -ENOMEM;
440
441 if (rt->rt_flags & RTF_MTU)
442 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
443
444 if (rt->rt_flags & RTF_WINDOW)
445 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
446
447 if (rt->rt_flags & RTF_IRTT)
448 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
449
450 cfg->fc_mx = mx;
451 cfg->fc_mx_len = len;
452 }
453
454 return 0;
455}
456
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457/*
458 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
459 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900460
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800461int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462{
Thomas Graf4e902c52006-08-17 18:14:52 -0700463 struct fib_config cfg;
464 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 switch (cmd) {
468 case SIOCADDRT: /* Add a route */
469 case SIOCDELRT: /* Delete a route */
470 if (!capable(CAP_NET_ADMIN))
471 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700472
473 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700475
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800477 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700479 struct fib_table *tb;
480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800482 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000484 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700485 else
486 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800488 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000490 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700491 else
492 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700494
495 /* allocated by rtentry_to_fib_config() */
496 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 }
498 rtnl_unlock();
499 return err;
500 }
501 return -EINVAL;
502}
503
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700504const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700505 [RTA_DST] = { .type = NLA_U32 },
506 [RTA_SRC] = { .type = NLA_U32 },
507 [RTA_IIF] = { .type = NLA_U32 },
508 [RTA_OIF] = { .type = NLA_U32 },
509 [RTA_GATEWAY] = { .type = NLA_U32 },
510 [RTA_PRIORITY] = { .type = NLA_U32 },
511 [RTA_PREFSRC] = { .type = NLA_U32 },
512 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700513 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700514 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700515};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800517static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
518 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700519{
520 struct nlattr *attr;
521 int err, remaining;
522 struct rtmsg *rtm;
523
524 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
525 if (err < 0)
526 goto errout;
527
528 memset(cfg, 0, sizeof(*cfg));
529
530 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700531 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 cfg->fc_tos = rtm->rtm_tos;
533 cfg->fc_table = rtm->rtm_table;
534 cfg->fc_protocol = rtm->rtm_protocol;
535 cfg->fc_scope = rtm->rtm_scope;
536 cfg->fc_type = rtm->rtm_type;
537 cfg->fc_flags = rtm->rtm_flags;
538 cfg->fc_nlflags = nlh->nlmsg_flags;
539
540 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
541 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800542 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700543
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700544 if (cfg->fc_type > RTN_MAX) {
545 err = -EINVAL;
546 goto errout;
547 }
548
Thomas Graf4e902c52006-08-17 18:14:52 -0700549 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200550 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700551 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700552 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700553 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700554 case RTA_OIF:
555 cfg->fc_oif = nla_get_u32(attr);
556 break;
557 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700558 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700559 break;
560 case RTA_PRIORITY:
561 cfg->fc_priority = nla_get_u32(attr);
562 break;
563 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700564 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700565 break;
566 case RTA_METRICS:
567 cfg->fc_mx = nla_data(attr);
568 cfg->fc_mx_len = nla_len(attr);
569 break;
570 case RTA_MULTIPATH:
571 cfg->fc_mp = nla_data(attr);
572 cfg->fc_mp_len = nla_len(attr);
573 break;
574 case RTA_FLOW:
575 cfg->fc_flow = nla_get_u32(attr);
576 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700577 case RTA_TABLE:
578 cfg->fc_table = nla_get_u32(attr);
579 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 }
581 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700582
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700584errout:
585 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586}
587
Jianjun Kong6ed25332008-11-03 00:25:16 -0800588static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900590 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700591 struct fib_config cfg;
592 struct fib_table *tb;
593 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800595 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700596 if (err < 0)
597 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800599 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700600 if (tb == NULL) {
601 err = -ESRCH;
602 goto errout;
603 }
604
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000605 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700606errout:
607 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608}
609
Jianjun Kong6ed25332008-11-03 00:25:16 -0800610static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900612 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700613 struct fib_config cfg;
614 struct fib_table *tb;
615 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800617 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700618 if (err < 0)
619 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800621 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700622 if (tb == NULL) {
623 err = -ENOBUFS;
624 goto errout;
625 }
626
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000627 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700628errout:
629 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
Thomas Graf63f34442007-03-22 11:55:17 -0700632static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900634 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700635 unsigned int h, s_h;
636 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700638 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800639 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700640 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
Thomas Grafbe403ea2006-08-17 18:15:17 -0700642 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
643 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 return ip_rt_dump(skb, cb);
645
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700646 s_h = cb->args[0];
647 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700649 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
650 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800651 head = &net->ipv4.fib_table_hash[h];
652 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700653 if (e < s_e)
654 goto next;
655 if (dumped)
656 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900657 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000658 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700659 goto out;
660 dumped = 1;
661next:
662 e++;
663 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700665out:
666 cb->args[1] = e;
667 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668
669 return skb->len;
670}
671
672/* Prepare and feed intra-kernel routing request.
673 Really, it should be netlink message, but :-( netlink
674 can be not configured, so that we feed it directly
675 to fib engine. It is legal, because all events occur
676 only when netlink is already locked.
677 */
678
Al Viro81f7bf62006-09-27 18:40:00 -0700679static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900681 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700682 struct fib_table *tb;
683 struct fib_config cfg = {
684 .fc_protocol = RTPROT_KERNEL,
685 .fc_type = type,
686 .fc_dst = dst,
687 .fc_dst_len = dst_len,
688 .fc_prefsrc = ifa->ifa_local,
689 .fc_oif = ifa->ifa_dev->dev->ifindex,
690 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800691 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800692 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800693 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700694 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
696 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800697 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800699 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 if (tb == NULL)
702 return;
703
Thomas Graf4e902c52006-08-17 18:14:52 -0700704 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
Thomas Graf4e902c52006-08-17 18:14:52 -0700706 if (type != RTN_LOCAL)
707 cfg.fc_scope = RT_SCOPE_LINK;
708 else
709 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710
711 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000712 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000714 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715}
716
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800717void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718{
719 struct in_device *in_dev = ifa->ifa_dev;
720 struct net_device *dev = in_dev->dev;
721 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700722 __be32 mask = ifa->ifa_mask;
723 __be32 addr = ifa->ifa_local;
724 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
726 if (ifa->ifa_flags&IFA_F_SECONDARY) {
727 prim = inet_ifa_byprefix(in_dev, prefix, mask);
728 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800729 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 return;
731 }
732 }
733
734 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
735
736 if (!(dev->flags&IFF_UP))
737 return;
738
739 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700740 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742
Joe Perchesf97c1e02007-12-16 13:45:43 -0800743 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 (prefix != addr || ifa->ifa_prefixlen < 32)) {
745 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
746 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
747
748 /* Add network specific broadcasts, when it takes a sense */
749 if (ifa->ifa_prefixlen < 31) {
750 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
751 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
752 }
753 }
754}
755
756static void fib_del_ifaddr(struct in_ifaddr *ifa)
757{
758 struct in_device *in_dev = ifa->ifa_dev;
759 struct net_device *dev = in_dev->dev;
760 struct in_ifaddr *ifa1;
761 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700762 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
763 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764#define LOCAL_OK 1
765#define BRD_OK 2
766#define BRD0_OK 4
767#define BRD1_OK 8
768 unsigned ok = 0;
769
770 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
771 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
772 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
773 else {
774 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
775 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800776 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 return;
778 }
779 }
780
781 /* Deletion is more complicated than add.
782 We should take care of not to delete too much :-)
783
784 Scan address list to be sure that addresses are really gone.
785 */
786
787 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
788 if (ifa->ifa_local == ifa1->ifa_local)
789 ok |= LOCAL_OK;
790 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
791 ok |= BRD_OK;
792 if (brd == ifa1->ifa_broadcast)
793 ok |= BRD1_OK;
794 if (any == ifa1->ifa_broadcast)
795 ok |= BRD0_OK;
796 }
797
798 if (!(ok&BRD_OK))
799 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
800 if (!(ok&BRD1_OK))
801 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
802 if (!(ok&BRD0_OK))
803 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
804 if (!(ok&LOCAL_OK)) {
805 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
806
807 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900808 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 /* And the last, but not the least thing.
810 We must flush stray FIB entries.
811
812 First of all, we scan fib_info list searching
813 for stray nexthop entries, then ignite fib_flush.
814 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900815 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
816 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 }
818 }
819#undef LOCAL_OK
820#undef BRD_OK
821#undef BRD0_OK
822#undef BRD1_OK
823}
824
Robert Olsson246955f2005-06-20 13:36:39 -0700825static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
826{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900827
Robert Olsson246955f2005-06-20 13:36:39 -0700828 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800829 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800830 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700831 .tos = frn->fl_tos,
832 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700833
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700834#ifdef CONFIG_IP_MULTIPLE_TABLES
835 res.r = NULL;
836#endif
837
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700838 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700839 if (tb) {
840 local_bh_disable();
841
842 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000843 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700844
845 if (!frn->err) {
846 frn->prefixlen = res.prefixlen;
847 frn->nh_sel = res.nh_sel;
848 frn->type = res.type;
849 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700850 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700851 }
852 local_bh_enable();
853 }
854}
855
David S. Miller28f7b0362007-10-10 21:32:39 -0700856static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700857{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800858 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700859 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700860 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700861 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700862 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700863
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900864 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700865 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800866 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800867 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800868 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800869
870 skb = skb_clone(skb, GFP_KERNEL);
871 if (skb == NULL)
872 return;
873 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900874
Robert Olsson246955f2005-06-20 13:36:39 -0700875 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800876 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700877
878 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900879
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700880 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700881 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700882 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800883 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900884}
Robert Olsson246955f2005-06-20 13:36:39 -0700885
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800886static int nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700887{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800888 struct sock *sk;
889 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
890 nl_fib_input, NULL, THIS_MODULE);
891 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800892 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800893 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800894 return 0;
895}
896
897static void nl_fib_lookup_exit(struct net *net)
898{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800899 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800900 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700901}
902
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000903static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800905 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900906 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000907 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 arp_ifdown(dev);
909}
910
911static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
912{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800913 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700914 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
916 switch (event) {
917 case NETDEV_UP:
918 fib_add_ifaddr(ifa);
919#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700920 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700922 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 break;
924 case NETDEV_DOWN:
925 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700926 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 /* Last address was deleted from this interface.
928 Disable IP.
929 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000930 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700932 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 }
934 break;
935 }
936 return NOTIFY_DONE;
937}
938
939static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
940{
941 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700942 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
944 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000945 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 return NOTIFY_DONE;
947 }
948
949 if (!in_dev)
950 return NOTIFY_DONE;
951
952 switch (event) {
953 case NETDEV_UP:
954 for_ifa(in_dev) {
955 fib_add_ifaddr(ifa);
956 } endfor_ifa(in_dev);
957#ifdef CONFIG_IP_ROUTE_MULTIPATH
958 fib_sync_up(dev);
959#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700960 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 break;
962 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000963 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 break;
965 case NETDEV_CHANGEMTU:
966 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700967 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000969 case NETDEV_UNREGISTER_BATCH:
970 rt_cache_flush_batch();
971 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 }
973 return NOTIFY_DONE;
974}
975
976static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800977 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978};
979
980static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800981 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982};
983
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800984static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800986 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700987 unsigned int i;
988
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800989 net->ipv4.fib_table_hash = kzalloc(
990 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
991 if (net->ipv4.fib_table_hash == NULL)
992 return -ENOMEM;
993
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700994 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800995 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800996
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800997 err = fib4_rules_init(net);
998 if (err < 0)
999 goto fail;
1000 return 0;
1001
1002fail:
1003 kfree(net->ipv4.fib_table_hash);
1004 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001005}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001007static void __net_exit ip_fib_net_exit(struct net *net)
1008{
1009 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001010
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001011#ifdef CONFIG_IP_MULTIPLE_TABLES
1012 fib4_rules_exit(net);
1013#endif
1014
1015 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1016 struct fib_table *tb;
1017 struct hlist_head *head;
1018 struct hlist_node *node, *tmp;
1019
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001020 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001021 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1022 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001023 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001024 kfree(tb);
1025 }
1026 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001027 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001028}
1029
1030static int __net_init fib_net_init(struct net *net)
1031{
1032 int error;
1033
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001034 error = ip_fib_net_init(net);
1035 if (error < 0)
1036 goto out;
1037 error = nl_fib_lookup_init(net);
1038 if (error < 0)
1039 goto out_nlfl;
1040 error = fib_proc_init(net);
1041 if (error < 0)
1042 goto out_proc;
1043out:
1044 return error;
1045
1046out_proc:
1047 nl_fib_lookup_exit(net);
1048out_nlfl:
1049 ip_fib_net_exit(net);
1050 goto out;
1051}
1052
1053static void __net_exit fib_net_exit(struct net *net)
1054{
1055 fib_proc_exit(net);
1056 nl_fib_lookup_exit(net);
1057 ip_fib_net_exit(net);
1058}
1059
1060static struct pernet_operations fib_net_ops = {
1061 .init = fib_net_init,
1062 .exit = fib_net_exit,
1063};
1064
1065void __init ip_fib_init(void)
1066{
Thomas Graf63f34442007-03-22 11:55:17 -07001067 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1068 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1069 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001070
1071 register_pernet_subsys(&fib_net_ops);
1072 register_netdevice_notifier(&fib_netdev_notifier);
1073 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001074
1075 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076}
1077
1078EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001079EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001080EXPORT_SYMBOL(ip_dev_find);