blob: 4f0ed458c883658c37265fe6537bde2ac8b88429 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800154struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700159 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800185 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700190 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800194 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900200
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700202 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000204 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800213unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800214{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800215 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800216}
217
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800220{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800221 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800222}
223
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local
226 address.
227 - figure out what "logical" interface this packet arrived
228 and calculate "specific destination" address.
229 - check, that packet arrived from expected physical interface.
230 */
231
Al Virod9c9df82006-09-26 21:28:14 -0700232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000233 struct net_device *dev, __be32 *spec_dst,
234 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 struct in_device *in_dev;
237 struct flowi fl = { .nl_u = { .ip4_u =
238 { .daddr = src,
239 .saddr = dst,
240 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000241 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000243
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000245 int no_addr, rpf, accept_local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800247 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
Patrick McHardy8153a102009-12-03 01:25:58 +0000249 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700251 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (in_dev) {
253 no_addr = in_dev->ifa_list == NULL;
254 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000255 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800256 if (mark && !IN_DEV_SRC_VMARK(in_dev))
257 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 }
259 rcu_read_unlock();
260
261 if (in_dev == NULL)
262 goto e_inval;
263
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900264 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800265 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000267 if (res.type != RTN_UNICAST) {
268 if (res.type != RTN_LOCAL || !accept_local)
269 goto e_inval_res;
270 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 *spec_dst = FIB_RES_PREFSRC(res);
272 fib_combine_itag(itag, &res);
273#ifdef CONFIG_IP_ROUTE_MULTIPATH
274 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275#else
276 if (FIB_RES_DEV(res) == dev)
277#endif
278 {
279 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280 fib_res_put(&res);
281 return ret;
282 }
283 fib_res_put(&res);
284 if (no_addr)
285 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000286 if (rpf == 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 goto e_inval;
288 fl.oif = dev->ifindex;
289
290 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800291 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 if (res.type == RTN_UNICAST) {
293 *spec_dst = FIB_RES_PREFSRC(res);
294 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295 }
296 fib_res_put(&res);
297 }
298 return ret;
299
300last_resort:
301 if (rpf)
302 goto e_inval;
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0;
305 return 0;
306
307e_inval_res:
308 fib_res_put(&res);
309e_inval:
310 return -EINVAL;
311}
312
Al Viro81f7bf62006-09-27 18:40:00 -0700313static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700314{
315 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
316}
317
318static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
319{
320 struct nlattr *nla;
321
322 nla = (struct nlattr *) ((char *) mx + len);
323 nla->nla_type = type;
324 nla->nla_len = nla_attr_size(4);
325 *(u32 *) nla_data(nla) = value;
326
327 return len + nla_total_size(4);
328}
329
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800330static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700331 struct fib_config *cfg)
332{
Al Viro6d85c102006-09-26 22:15:46 -0700333 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700334 int plen;
335
336 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800337 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700338
339 if (rt->rt_dst.sa_family != AF_INET)
340 return -EAFNOSUPPORT;
341
342 /*
343 * Check mask for validity:
344 * a) it must be contiguous.
345 * b) destination must have all host bits clear.
346 * c) if application forgot to set correct family (AF_INET),
347 * reject request unless it is absolutely clear i.e.
348 * both family and mask are zero.
349 */
350 plen = 32;
351 addr = sk_extract_addr(&rt->rt_dst);
352 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700353 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700354
355 if (rt->rt_genmask.sa_family != AF_INET) {
356 if (mask || rt->rt_genmask.sa_family)
357 return -EAFNOSUPPORT;
358 }
359
360 if (bad_mask(mask, addr))
361 return -EINVAL;
362
363 plen = inet_mask_len(mask);
364 }
365
366 cfg->fc_dst_len = plen;
367 cfg->fc_dst = addr;
368
369 if (cmd != SIOCDELRT) {
370 cfg->fc_nlflags = NLM_F_CREATE;
371 cfg->fc_protocol = RTPROT_BOOT;
372 }
373
374 if (rt->rt_metric)
375 cfg->fc_priority = rt->rt_metric - 1;
376
377 if (rt->rt_flags & RTF_REJECT) {
378 cfg->fc_scope = RT_SCOPE_HOST;
379 cfg->fc_type = RTN_UNREACHABLE;
380 return 0;
381 }
382
383 cfg->fc_scope = RT_SCOPE_NOWHERE;
384 cfg->fc_type = RTN_UNICAST;
385
386 if (rt->rt_dev) {
387 char *colon;
388 struct net_device *dev;
389 char devname[IFNAMSIZ];
390
391 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
392 return -EFAULT;
393
394 devname[IFNAMSIZ-1] = 0;
395 colon = strchr(devname, ':');
396 if (colon)
397 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800398 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700399 if (!dev)
400 return -ENODEV;
401 cfg->fc_oif = dev->ifindex;
402 if (colon) {
403 struct in_ifaddr *ifa;
404 struct in_device *in_dev = __in_dev_get_rtnl(dev);
405 if (!in_dev)
406 return -ENODEV;
407 *colon = ':';
408 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
409 if (strcmp(ifa->ifa_label, devname) == 0)
410 break;
411 if (ifa == NULL)
412 return -ENODEV;
413 cfg->fc_prefsrc = ifa->ifa_local;
414 }
415 }
416
417 addr = sk_extract_addr(&rt->rt_gateway);
418 if (rt->rt_gateway.sa_family == AF_INET && addr) {
419 cfg->fc_gw = addr;
420 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800421 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 cfg->fc_scope = RT_SCOPE_UNIVERSE;
423 }
424
425 if (cmd == SIOCDELRT)
426 return 0;
427
428 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
429 return -EINVAL;
430
431 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
432 cfg->fc_scope = RT_SCOPE_LINK;
433
434 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
435 struct nlattr *mx;
436 int len = 0;
437
438 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900439 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700440 return -ENOMEM;
441
442 if (rt->rt_flags & RTF_MTU)
443 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
444
445 if (rt->rt_flags & RTF_WINDOW)
446 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
447
448 if (rt->rt_flags & RTF_IRTT)
449 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
450
451 cfg->fc_mx = mx;
452 cfg->fc_mx_len = len;
453 }
454
455 return 0;
456}
457
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458/*
459 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
460 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900461
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800462int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463{
Thomas Graf4e902c52006-08-17 18:14:52 -0700464 struct fib_config cfg;
465 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467
468 switch (cmd) {
469 case SIOCADDRT: /* Add a route */
470 case SIOCDELRT: /* Delete a route */
471 if (!capable(CAP_NET_ADMIN))
472 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700473
474 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700476
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800478 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700480 struct fib_table *tb;
481
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800483 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000485 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700486 else
487 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800489 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000491 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700492 else
493 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700495
496 /* allocated by rtentry_to_fib_config() */
497 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 }
499 rtnl_unlock();
500 return err;
501 }
502 return -EINVAL;
503}
504
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700505const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700506 [RTA_DST] = { .type = NLA_U32 },
507 [RTA_SRC] = { .type = NLA_U32 },
508 [RTA_IIF] = { .type = NLA_U32 },
509 [RTA_OIF] = { .type = NLA_U32 },
510 [RTA_GATEWAY] = { .type = NLA_U32 },
511 [RTA_PRIORITY] = { .type = NLA_U32 },
512 [RTA_PREFSRC] = { .type = NLA_U32 },
513 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700514 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700515 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700516};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800518static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
519 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700520{
521 struct nlattr *attr;
522 int err, remaining;
523 struct rtmsg *rtm;
524
525 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
526 if (err < 0)
527 goto errout;
528
529 memset(cfg, 0, sizeof(*cfg));
530
531 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 cfg->fc_tos = rtm->rtm_tos;
534 cfg->fc_table = rtm->rtm_table;
535 cfg->fc_protocol = rtm->rtm_protocol;
536 cfg->fc_scope = rtm->rtm_scope;
537 cfg->fc_type = rtm->rtm_type;
538 cfg->fc_flags = rtm->rtm_flags;
539 cfg->fc_nlflags = nlh->nlmsg_flags;
540
541 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
542 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800543 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700544
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700545 if (cfg->fc_type > RTN_MAX) {
546 err = -EINVAL;
547 goto errout;
548 }
549
Thomas Graf4e902c52006-08-17 18:14:52 -0700550 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200551 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700552 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700553 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700554 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700555 case RTA_OIF:
556 cfg->fc_oif = nla_get_u32(attr);
557 break;
558 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700559 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700560 break;
561 case RTA_PRIORITY:
562 cfg->fc_priority = nla_get_u32(attr);
563 break;
564 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700565 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700566 break;
567 case RTA_METRICS:
568 cfg->fc_mx = nla_data(attr);
569 cfg->fc_mx_len = nla_len(attr);
570 break;
571 case RTA_MULTIPATH:
572 cfg->fc_mp = nla_data(attr);
573 cfg->fc_mp_len = nla_len(attr);
574 break;
575 case RTA_FLOW:
576 cfg->fc_flow = nla_get_u32(attr);
577 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700578 case RTA_TABLE:
579 cfg->fc_table = nla_get_u32(attr);
580 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 }
582 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700583
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700585errout:
586 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587}
588
Jianjun Kong6ed25332008-11-03 00:25:16 -0800589static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900591 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700592 struct fib_config cfg;
593 struct fib_table *tb;
594 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800596 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700597 if (err < 0)
598 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800600 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700601 if (tb == NULL) {
602 err = -ESRCH;
603 goto errout;
604 }
605
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000606 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700607errout:
608 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
Jianjun Kong6ed25332008-11-03 00:25:16 -0800611static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900613 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700614 struct fib_config cfg;
615 struct fib_table *tb;
616 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800618 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700619 if (err < 0)
620 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800622 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700623 if (tb == NULL) {
624 err = -ENOBUFS;
625 goto errout;
626 }
627
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000628 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700629errout:
630 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631}
632
Thomas Graf63f34442007-03-22 11:55:17 -0700633static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900635 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700636 unsigned int h, s_h;
637 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700639 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800640 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700641 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
Thomas Grafbe403ea2006-08-17 18:15:17 -0700643 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
644 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 return ip_rt_dump(skb, cb);
646
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700647 s_h = cb->args[0];
648 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700650 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
651 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800652 head = &net->ipv4.fib_table_hash[h];
653 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700654 if (e < s_e)
655 goto next;
656 if (dumped)
657 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900658 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000659 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700660 goto out;
661 dumped = 1;
662next:
663 e++;
664 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700666out:
667 cb->args[1] = e;
668 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 return skb->len;
671}
672
673/* Prepare and feed intra-kernel routing request.
674 Really, it should be netlink message, but :-( netlink
675 can be not configured, so that we feed it directly
676 to fib engine. It is legal, because all events occur
677 only when netlink is already locked.
678 */
679
Al Viro81f7bf62006-09-27 18:40:00 -0700680static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900682 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700683 struct fib_table *tb;
684 struct fib_config cfg = {
685 .fc_protocol = RTPROT_KERNEL,
686 .fc_type = type,
687 .fc_dst = dst,
688 .fc_dst_len = dst_len,
689 .fc_prefsrc = ifa->ifa_local,
690 .fc_oif = ifa->ifa_dev->dev->ifindex,
691 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800692 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800693 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800694 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700695 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
697 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800698 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800700 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701
702 if (tb == NULL)
703 return;
704
Thomas Graf4e902c52006-08-17 18:14:52 -0700705 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
Thomas Graf4e902c52006-08-17 18:14:52 -0700707 if (type != RTN_LOCAL)
708 cfg.fc_scope = RT_SCOPE_LINK;
709 else
710 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
712 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000713 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000715 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716}
717
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800718void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719{
720 struct in_device *in_dev = ifa->ifa_dev;
721 struct net_device *dev = in_dev->dev;
722 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700723 __be32 mask = ifa->ifa_mask;
724 __be32 addr = ifa->ifa_local;
725 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
727 if (ifa->ifa_flags&IFA_F_SECONDARY) {
728 prim = inet_ifa_byprefix(in_dev, prefix, mask);
729 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800730 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 return;
732 }
733 }
734
735 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
736
737 if (!(dev->flags&IFF_UP))
738 return;
739
740 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700741 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
743
Joe Perchesf97c1e02007-12-16 13:45:43 -0800744 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 (prefix != addr || ifa->ifa_prefixlen < 32)) {
746 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
747 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
748
749 /* Add network specific broadcasts, when it takes a sense */
750 if (ifa->ifa_prefixlen < 31) {
751 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
752 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
753 }
754 }
755}
756
757static void fib_del_ifaddr(struct in_ifaddr *ifa)
758{
759 struct in_device *in_dev = ifa->ifa_dev;
760 struct net_device *dev = in_dev->dev;
761 struct in_ifaddr *ifa1;
762 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700763 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
764 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765#define LOCAL_OK 1
766#define BRD_OK 2
767#define BRD0_OK 4
768#define BRD1_OK 8
769 unsigned ok = 0;
770
771 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
772 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
773 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
774 else {
775 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
776 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800777 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 return;
779 }
780 }
781
782 /* Deletion is more complicated than add.
783 We should take care of not to delete too much :-)
784
785 Scan address list to be sure that addresses are really gone.
786 */
787
788 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
789 if (ifa->ifa_local == ifa1->ifa_local)
790 ok |= LOCAL_OK;
791 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
792 ok |= BRD_OK;
793 if (brd == ifa1->ifa_broadcast)
794 ok |= BRD1_OK;
795 if (any == ifa1->ifa_broadcast)
796 ok |= BRD0_OK;
797 }
798
799 if (!(ok&BRD_OK))
800 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
801 if (!(ok&BRD1_OK))
802 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
803 if (!(ok&BRD0_OK))
804 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
805 if (!(ok&LOCAL_OK)) {
806 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
807
808 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900809 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 /* And the last, but not the least thing.
811 We must flush stray FIB entries.
812
813 First of all, we scan fib_info list searching
814 for stray nexthop entries, then ignite fib_flush.
815 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900816 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
817 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 }
819 }
820#undef LOCAL_OK
821#undef BRD_OK
822#undef BRD0_OK
823#undef BRD1_OK
824}
825
Robert Olsson246955f2005-06-20 13:36:39 -0700826static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
827{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900828
Robert Olsson246955f2005-06-20 13:36:39 -0700829 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800830 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800831 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700832 .tos = frn->fl_tos,
833 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700834
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700835#ifdef CONFIG_IP_MULTIPLE_TABLES
836 res.r = NULL;
837#endif
838
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700839 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700840 if (tb) {
841 local_bh_disable();
842
843 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000844 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700845
846 if (!frn->err) {
847 frn->prefixlen = res.prefixlen;
848 frn->nh_sel = res.nh_sel;
849 frn->type = res.type;
850 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700851 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700852 }
853 local_bh_enable();
854 }
855}
856
David S. Miller28f7b0362007-10-10 21:32:39 -0700857static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700858{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800859 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700860 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700861 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700862 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700863 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700864
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900865 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700866 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800867 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800868 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800869 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800870
871 skb = skb_clone(skb, GFP_KERNEL);
872 if (skb == NULL)
873 return;
874 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900875
Robert Olsson246955f2005-06-20 13:36:39 -0700876 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800877 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700878
879 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900880
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700881 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700882 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700883 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800884 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900885}
Robert Olsson246955f2005-06-20 13:36:39 -0700886
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000887static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700888{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800889 struct sock *sk;
890 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
891 nl_fib_input, NULL, THIS_MODULE);
892 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800893 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800894 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800895 return 0;
896}
897
898static void nl_fib_lookup_exit(struct net *net)
899{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800900 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800901 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700902}
903
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000904static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800906 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900907 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000908 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 arp_ifdown(dev);
910}
911
912static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
913{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800914 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700915 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916
917 switch (event) {
918 case NETDEV_UP:
919 fib_add_ifaddr(ifa);
920#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700921 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700923 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 break;
925 case NETDEV_DOWN:
926 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700927 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 /* Last address was deleted from this interface.
929 Disable IP.
930 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000931 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700933 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 }
935 break;
936 }
937 return NOTIFY_DONE;
938}
939
940static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
941{
942 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700943 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944
945 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000946 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 return NOTIFY_DONE;
948 }
949
950 if (!in_dev)
951 return NOTIFY_DONE;
952
953 switch (event) {
954 case NETDEV_UP:
955 for_ifa(in_dev) {
956 fib_add_ifaddr(ifa);
957 } endfor_ifa(in_dev);
958#ifdef CONFIG_IP_ROUTE_MULTIPATH
959 fib_sync_up(dev);
960#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700961 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 break;
963 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000964 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 break;
966 case NETDEV_CHANGEMTU:
967 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700968 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000970 case NETDEV_UNREGISTER_BATCH:
971 rt_cache_flush_batch();
972 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 }
974 return NOTIFY_DONE;
975}
976
977static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800978 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979};
980
981static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800982 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983};
984
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800985static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800987 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700988 unsigned int i;
989
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800990 net->ipv4.fib_table_hash = kzalloc(
991 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
992 if (net->ipv4.fib_table_hash == NULL)
993 return -ENOMEM;
994
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700995 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800996 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800997
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800998 err = fib4_rules_init(net);
999 if (err < 0)
1000 goto fail;
1001 return 0;
1002
1003fail:
1004 kfree(net->ipv4.fib_table_hash);
1005 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001006}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001008static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001009{
1010 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001011
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001012#ifdef CONFIG_IP_MULTIPLE_TABLES
1013 fib4_rules_exit(net);
1014#endif
1015
1016 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1017 struct fib_table *tb;
1018 struct hlist_head *head;
1019 struct hlist_node *node, *tmp;
1020
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001021 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001022 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1023 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001024 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001025 kfree(tb);
1026 }
1027 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001028 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001029}
1030
1031static int __net_init fib_net_init(struct net *net)
1032{
1033 int error;
1034
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001035 error = ip_fib_net_init(net);
1036 if (error < 0)
1037 goto out;
1038 error = nl_fib_lookup_init(net);
1039 if (error < 0)
1040 goto out_nlfl;
1041 error = fib_proc_init(net);
1042 if (error < 0)
1043 goto out_proc;
1044out:
1045 return error;
1046
1047out_proc:
1048 nl_fib_lookup_exit(net);
1049out_nlfl:
1050 ip_fib_net_exit(net);
1051 goto out;
1052}
1053
1054static void __net_exit fib_net_exit(struct net *net)
1055{
1056 fib_proc_exit(net);
1057 nl_fib_lookup_exit(net);
1058 ip_fib_net_exit(net);
1059}
1060
1061static struct pernet_operations fib_net_ops = {
1062 .init = fib_net_init,
1063 .exit = fib_net_exit,
1064};
1065
1066void __init ip_fib_init(void)
1067{
Thomas Graf63f34442007-03-22 11:55:17 -07001068 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1069 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1070 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001071
1072 register_pernet_subsys(&fib_net_ops);
1073 register_netdevice_notifier(&fib_netdev_notifier);
1074 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001075
1076 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077}
1078
1079EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001080EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001081EXPORT_SYMBOL(ip_dev_find);