blob: a43968918350244a057e6f3364727d6a2aa7baf2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800154struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700159 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000178EXPORT_SYMBOL(ip_dev_find);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800184static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800186 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700191 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800195 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900201
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700203 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000205 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 fib_res_put(&res);
209 }
210 }
211 return ret;
212}
213
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800214unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800215{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800216 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800217}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000218EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800219
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800222{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800223 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800224}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000225EXPORT_SYMBOL(inet_dev_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
233 */
234
Al Virod9c9df82006-09-26 21:28:14 -0700235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238{
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000244 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000248 int no_addr, rpf, accept_local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800250 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
Patrick McHardy8153a102009-12-03 01:25:58 +0000252 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700254 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 }
262 rcu_read_unlock();
263
264 if (in_dev == NULL)
265 goto e_inval;
266
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900267 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800268 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
276#ifdef CONFIG_IP_ROUTE_MULTIPATH
277 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
278#else
279 if (FIB_RES_DEV(res) == dev)
280#endif
281 {
282 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
283 fib_res_put(&res);
284 return ret;
285 }
286 fib_res_put(&res);
287 if (no_addr)
288 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000289 if (rpf == 1)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000290 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 fl.oif = dev->ifindex;
292
293 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800294 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 if (res.type == RTN_UNICAST) {
296 *spec_dst = FIB_RES_PREFSRC(res);
297 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
298 }
299 fib_res_put(&res);
300 }
301 return ret;
302
303last_resort:
304 if (rpf)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000305 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
307 *itag = 0;
308 return 0;
309
310e_inval_res:
311 fib_res_put(&res);
312e_inval:
313 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000314e_rpf:
315 return -EXDEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316}
317
Al Viro81f7bf62006-09-27 18:40:00 -0700318static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700319{
320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
321}
322
323static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
324{
325 struct nlattr *nla;
326
327 nla = (struct nlattr *) ((char *) mx + len);
328 nla->nla_type = type;
329 nla->nla_len = nla_attr_size(4);
330 *(u32 *) nla_data(nla) = value;
331
332 return len + nla_total_size(4);
333}
334
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800335static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700336 struct fib_config *cfg)
337{
Al Viro6d85c102006-09-26 22:15:46 -0700338 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700339 int plen;
340
341 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800342 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700343
344 if (rt->rt_dst.sa_family != AF_INET)
345 return -EAFNOSUPPORT;
346
347 /*
348 * Check mask for validity:
349 * a) it must be contiguous.
350 * b) destination must have all host bits clear.
351 * c) if application forgot to set correct family (AF_INET),
352 * reject request unless it is absolutely clear i.e.
353 * both family and mask are zero.
354 */
355 plen = 32;
356 addr = sk_extract_addr(&rt->rt_dst);
357 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700358 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700359
360 if (rt->rt_genmask.sa_family != AF_INET) {
361 if (mask || rt->rt_genmask.sa_family)
362 return -EAFNOSUPPORT;
363 }
364
365 if (bad_mask(mask, addr))
366 return -EINVAL;
367
368 plen = inet_mask_len(mask);
369 }
370
371 cfg->fc_dst_len = plen;
372 cfg->fc_dst = addr;
373
374 if (cmd != SIOCDELRT) {
375 cfg->fc_nlflags = NLM_F_CREATE;
376 cfg->fc_protocol = RTPROT_BOOT;
377 }
378
379 if (rt->rt_metric)
380 cfg->fc_priority = rt->rt_metric - 1;
381
382 if (rt->rt_flags & RTF_REJECT) {
383 cfg->fc_scope = RT_SCOPE_HOST;
384 cfg->fc_type = RTN_UNREACHABLE;
385 return 0;
386 }
387
388 cfg->fc_scope = RT_SCOPE_NOWHERE;
389 cfg->fc_type = RTN_UNICAST;
390
391 if (rt->rt_dev) {
392 char *colon;
393 struct net_device *dev;
394 char devname[IFNAMSIZ];
395
396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
397 return -EFAULT;
398
399 devname[IFNAMSIZ-1] = 0;
400 colon = strchr(devname, ':');
401 if (colon)
402 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800403 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700404 if (!dev)
405 return -ENODEV;
406 cfg->fc_oif = dev->ifindex;
407 if (colon) {
408 struct in_ifaddr *ifa;
409 struct in_device *in_dev = __in_dev_get_rtnl(dev);
410 if (!in_dev)
411 return -ENODEV;
412 *colon = ':';
413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
414 if (strcmp(ifa->ifa_label, devname) == 0)
415 break;
416 if (ifa == NULL)
417 return -ENODEV;
418 cfg->fc_prefsrc = ifa->ifa_local;
419 }
420 }
421
422 addr = sk_extract_addr(&rt->rt_gateway);
423 if (rt->rt_gateway.sa_family == AF_INET && addr) {
424 cfg->fc_gw = addr;
425 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800426 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 cfg->fc_scope = RT_SCOPE_UNIVERSE;
428 }
429
430 if (cmd == SIOCDELRT)
431 return 0;
432
433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
434 return -EINVAL;
435
436 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
437 cfg->fc_scope = RT_SCOPE_LINK;
438
439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
440 struct nlattr *mx;
441 int len = 0;
442
443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900444 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700445 return -ENOMEM;
446
447 if (rt->rt_flags & RTF_MTU)
448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
449
450 if (rt->rt_flags & RTF_WINDOW)
451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
452
453 if (rt->rt_flags & RTF_IRTT)
454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
455
456 cfg->fc_mx = mx;
457 cfg->fc_mx_len = len;
458 }
459
460 return 0;
461}
462
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463/*
464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
465 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900466
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800467int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468{
Thomas Graf4e902c52006-08-17 18:14:52 -0700469 struct fib_config cfg;
470 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473 switch (cmd) {
474 case SIOCADDRT: /* Add a route */
475 case SIOCDELRT: /* Delete a route */
476 if (!capable(CAP_NET_ADMIN))
477 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700478
479 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700481
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700485 struct fib_table *tb;
486
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800488 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000490 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700491 else
492 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800494 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000496 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700497 else
498 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700500
501 /* allocated by rtentry_to_fib_config() */
502 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 }
504 rtnl_unlock();
505 return err;
506 }
507 return -EINVAL;
508}
509
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700510const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700511 [RTA_DST] = { .type = NLA_U32 },
512 [RTA_SRC] = { .type = NLA_U32 },
513 [RTA_IIF] = { .type = NLA_U32 },
514 [RTA_OIF] = { .type = NLA_U32 },
515 [RTA_GATEWAY] = { .type = NLA_U32 },
516 [RTA_PRIORITY] = { .type = NLA_U32 },
517 [RTA_PREFSRC] = { .type = NLA_U32 },
518 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700520 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700521};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800523static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
524 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700525{
526 struct nlattr *attr;
527 int err, remaining;
528 struct rtmsg *rtm;
529
530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
531 if (err < 0)
532 goto errout;
533
534 memset(cfg, 0, sizeof(*cfg));
535
536 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700537 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700538 cfg->fc_tos = rtm->rtm_tos;
539 cfg->fc_table = rtm->rtm_table;
540 cfg->fc_protocol = rtm->rtm_protocol;
541 cfg->fc_scope = rtm->rtm_scope;
542 cfg->fc_type = rtm->rtm_type;
543 cfg->fc_flags = rtm->rtm_flags;
544 cfg->fc_nlflags = nlh->nlmsg_flags;
545
546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
547 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800548 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700549
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700550 if (cfg->fc_type > RTN_MAX) {
551 err = -EINVAL;
552 goto errout;
553 }
554
Thomas Graf4e902c52006-08-17 18:14:52 -0700555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200556 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700557 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700558 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700559 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700560 case RTA_OIF:
561 cfg->fc_oif = nla_get_u32(attr);
562 break;
563 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700564 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700565 break;
566 case RTA_PRIORITY:
567 cfg->fc_priority = nla_get_u32(attr);
568 break;
569 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700570 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700571 break;
572 case RTA_METRICS:
573 cfg->fc_mx = nla_data(attr);
574 cfg->fc_mx_len = nla_len(attr);
575 break;
576 case RTA_MULTIPATH:
577 cfg->fc_mp = nla_data(attr);
578 cfg->fc_mp_len = nla_len(attr);
579 break;
580 case RTA_FLOW:
581 cfg->fc_flow = nla_get_u32(attr);
582 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700583 case RTA_TABLE:
584 cfg->fc_table = nla_get_u32(attr);
585 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 }
587 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700590errout:
591 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592}
593
Jianjun Kong6ed25332008-11-03 00:25:16 -0800594static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900596 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700597 struct fib_config cfg;
598 struct fib_table *tb;
599 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800601 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700602 if (err < 0)
603 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800605 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700606 if (tb == NULL) {
607 err = -ESRCH;
608 goto errout;
609 }
610
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000611 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700612errout:
613 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614}
615
Jianjun Kong6ed25332008-11-03 00:25:16 -0800616static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900618 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700619 struct fib_config cfg;
620 struct fib_table *tb;
621 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800623 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700624 if (err < 0)
625 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800627 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700628 if (tb == NULL) {
629 err = -ENOBUFS;
630 goto errout;
631 }
632
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000633 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700634errout:
635 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636}
637
Thomas Graf63f34442007-03-22 11:55:17 -0700638static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900640 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700641 unsigned int h, s_h;
642 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700644 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800645 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700646 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
Thomas Grafbe403ea2006-08-17 18:15:17 -0700648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 return ip_rt_dump(skb, cb);
651
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700652 s_h = cb->args[0];
653 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
656 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800657 head = &net->ipv4.fib_table_hash[h];
658 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700659 if (e < s_e)
660 goto next;
661 if (dumped)
662 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900663 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000664 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700665 goto out;
666 dumped = 1;
667next:
668 e++;
669 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700671out:
672 cb->args[1] = e;
673 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675 return skb->len;
676}
677
678/* Prepare and feed intra-kernel routing request.
679 Really, it should be netlink message, but :-( netlink
680 can be not configured, so that we feed it directly
681 to fib engine. It is legal, because all events occur
682 only when netlink is already locked.
683 */
684
Al Viro81f7bf62006-09-27 18:40:00 -0700685static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900687 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700688 struct fib_table *tb;
689 struct fib_config cfg = {
690 .fc_protocol = RTPROT_KERNEL,
691 .fc_type = type,
692 .fc_dst = dst,
693 .fc_dst_len = dst_len,
694 .fc_prefsrc = ifa->ifa_local,
695 .fc_oif = ifa->ifa_dev->dev->ifindex,
696 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800697 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800698 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800699 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700700 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701
702 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800703 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800705 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 if (tb == NULL)
708 return;
709
Thomas Graf4e902c52006-08-17 18:14:52 -0700710 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
Thomas Graf4e902c52006-08-17 18:14:52 -0700712 if (type != RTN_LOCAL)
713 cfg.fc_scope = RT_SCOPE_LINK;
714 else
715 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000718 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000720 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721}
722
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800723void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
725 struct in_device *in_dev = ifa->ifa_dev;
726 struct net_device *dev = in_dev->dev;
727 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700728 __be32 mask = ifa->ifa_mask;
729 __be32 addr = ifa->ifa_local;
730 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
732 if (ifa->ifa_flags&IFA_F_SECONDARY) {
733 prim = inet_ifa_byprefix(in_dev, prefix, mask);
734 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800735 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 return;
737 }
738 }
739
740 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
741
742 if (!(dev->flags&IFF_UP))
743 return;
744
745 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700746 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
748
Joe Perchesf97c1e02007-12-16 13:45:43 -0800749 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 (prefix != addr || ifa->ifa_prefixlen < 32)) {
751 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
752 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
753
754 /* Add network specific broadcasts, when it takes a sense */
755 if (ifa->ifa_prefixlen < 31) {
756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
758 }
759 }
760}
761
762static void fib_del_ifaddr(struct in_ifaddr *ifa)
763{
764 struct in_device *in_dev = ifa->ifa_dev;
765 struct net_device *dev = in_dev->dev;
766 struct in_ifaddr *ifa1;
767 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700768 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
769 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770#define LOCAL_OK 1
771#define BRD_OK 2
772#define BRD0_OK 4
773#define BRD1_OK 8
774 unsigned ok = 0;
775
776 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
777 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
778 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
779 else {
780 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
781 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800782 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 return;
784 }
785 }
786
787 /* Deletion is more complicated than add.
788 We should take care of not to delete too much :-)
789
790 Scan address list to be sure that addresses are really gone.
791 */
792
793 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
794 if (ifa->ifa_local == ifa1->ifa_local)
795 ok |= LOCAL_OK;
796 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
797 ok |= BRD_OK;
798 if (brd == ifa1->ifa_broadcast)
799 ok |= BRD1_OK;
800 if (any == ifa1->ifa_broadcast)
801 ok |= BRD0_OK;
802 }
803
804 if (!(ok&BRD_OK))
805 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
806 if (!(ok&BRD1_OK))
807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
808 if (!(ok&BRD0_OK))
809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
810 if (!(ok&LOCAL_OK)) {
811 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
812
813 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900814 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 /* And the last, but not the least thing.
816 We must flush stray FIB entries.
817
818 First of all, we scan fib_info list searching
819 for stray nexthop entries, then ignite fib_flush.
820 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900821 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
822 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 }
824 }
825#undef LOCAL_OK
826#undef BRD_OK
827#undef BRD0_OK
828#undef BRD1_OK
829}
830
Robert Olsson246955f2005-06-20 13:36:39 -0700831static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
832{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900833
Robert Olsson246955f2005-06-20 13:36:39 -0700834 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800835 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800836 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700837 .tos = frn->fl_tos,
838 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700839
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700840#ifdef CONFIG_IP_MULTIPLE_TABLES
841 res.r = NULL;
842#endif
843
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700844 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700845 if (tb) {
846 local_bh_disable();
847
848 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000849 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700850
851 if (!frn->err) {
852 frn->prefixlen = res.prefixlen;
853 frn->nh_sel = res.nh_sel;
854 frn->type = res.type;
855 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700856 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700857 }
858 local_bh_enable();
859 }
860}
861
David S. Miller28f7b0362007-10-10 21:32:39 -0700862static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700863{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800864 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700865 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700866 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700867 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700868 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700869
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900870 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700871 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800872 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800873 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800874 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800875
876 skb = skb_clone(skb, GFP_KERNEL);
877 if (skb == NULL)
878 return;
879 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900880
Robert Olsson246955f2005-06-20 13:36:39 -0700881 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800882 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700883
884 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900885
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700886 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700887 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700888 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800889 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900890}
Robert Olsson246955f2005-06-20 13:36:39 -0700891
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000892static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700893{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800894 struct sock *sk;
895 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
896 nl_fib_input, NULL, THIS_MODULE);
897 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800898 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800899 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800900 return 0;
901}
902
903static void nl_fib_lookup_exit(struct net *net)
904{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800905 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800906 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700907}
908
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000909static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800911 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900912 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000913 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 arp_ifdown(dev);
915}
916
917static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
918{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800919 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700920 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 switch (event) {
923 case NETDEV_UP:
924 fib_add_ifaddr(ifa);
925#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700926 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700928 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 break;
930 case NETDEV_DOWN:
931 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700932 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 /* Last address was deleted from this interface.
934 Disable IP.
935 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000936 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700938 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 }
940 break;
941 }
942 return NOTIFY_DONE;
943}
944
945static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
946{
947 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700948 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
950 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000951 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 return NOTIFY_DONE;
953 }
954
955 if (!in_dev)
956 return NOTIFY_DONE;
957
958 switch (event) {
959 case NETDEV_UP:
960 for_ifa(in_dev) {
961 fib_add_ifaddr(ifa);
962 } endfor_ifa(in_dev);
963#ifdef CONFIG_IP_ROUTE_MULTIPATH
964 fib_sync_up(dev);
965#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700966 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 break;
968 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000969 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 break;
971 case NETDEV_CHANGEMTU:
972 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700973 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000975 case NETDEV_UNREGISTER_BATCH:
976 rt_cache_flush_batch();
977 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 }
979 return NOTIFY_DONE;
980}
981
982static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800983 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984};
985
986static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800987 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988};
989
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800990static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800992 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700993 unsigned int i;
994
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800995 net->ipv4.fib_table_hash = kzalloc(
996 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
997 if (net->ipv4.fib_table_hash == NULL)
998 return -ENOMEM;
999
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001000 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001001 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -08001002
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001003 err = fib4_rules_init(net);
1004 if (err < 0)
1005 goto fail;
1006 return 0;
1007
1008fail:
1009 kfree(net->ipv4.fib_table_hash);
1010 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001011}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001013static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001014{
1015 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001016
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001017#ifdef CONFIG_IP_MULTIPLE_TABLES
1018 fib4_rules_exit(net);
1019#endif
1020
1021 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1022 struct fib_table *tb;
1023 struct hlist_head *head;
1024 struct hlist_node *node, *tmp;
1025
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001026 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001027 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1028 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001029 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001030 kfree(tb);
1031 }
1032 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001033 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001034}
1035
1036static int __net_init fib_net_init(struct net *net)
1037{
1038 int error;
1039
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001040 error = ip_fib_net_init(net);
1041 if (error < 0)
1042 goto out;
1043 error = nl_fib_lookup_init(net);
1044 if (error < 0)
1045 goto out_nlfl;
1046 error = fib_proc_init(net);
1047 if (error < 0)
1048 goto out_proc;
1049out:
1050 return error;
1051
1052out_proc:
1053 nl_fib_lookup_exit(net);
1054out_nlfl:
1055 ip_fib_net_exit(net);
1056 goto out;
1057}
1058
1059static void __net_exit fib_net_exit(struct net *net)
1060{
1061 fib_proc_exit(net);
1062 nl_fib_lookup_exit(net);
1063 ip_fib_net_exit(net);
1064}
1065
1066static struct pernet_operations fib_net_ops = {
1067 .init = fib_net_init,
1068 .exit = fib_net_exit,
1069};
1070
1071void __init ip_fib_init(void)
1072{
Thomas Graf63f34442007-03-22 11:55:17 -07001073 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1074 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1075 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001076
1077 register_pernet_subsys(&fib_net_ops);
1078 register_netdevice_notifier(&fib_netdev_notifier);
1079 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001080
1081 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082}