blob: 7d02a9f999fabcebeb61800816d722e6f6c054ff [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800154struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700159 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000178EXPORT_SYMBOL(ip_dev_find);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800184static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800186 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700191 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800195 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900201
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700203 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000205 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 fib_res_put(&res);
209 }
210 }
211 return ret;
212}
213
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800214unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800215{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800216 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800217}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000218EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800219
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800222{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800223 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800224}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000225EXPORT_SYMBOL(inet_dev_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
233 */
234
Al Virod9c9df82006-09-26 21:28:14 -0700235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238{
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000244 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000248 int no_addr, rpf, accept_local;
David S. Miller6f86b322010-09-06 22:36:19 -0700249 bool dev_match;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800251 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
Patrick McHardy8153a102009-12-03 01:25:58 +0000253 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700255 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL;
258 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000259 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800260 if (mark && !IN_DEV_SRC_VMARK(in_dev))
261 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 rcu_read_unlock();
264
265 if (in_dev == NULL)
266 goto e_inval;
267
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900268 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800269 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000271 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res;
274 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 *spec_dst = FIB_RES_PREFSRC(res);
276 fib_combine_itag(itag, &res);
David S. Miller6f86b322010-09-06 22:36:19 -0700277 dev_match = false;
278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller6f86b322010-09-06 22:36:19 -0700280 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
281 struct fib_nh *nh = &res.fi->fib_nh[ret];
282
283 if (nh->nh_dev == dev) {
284 dev_match = true;
285 break;
286 }
287 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288#else
289 if (FIB_RES_DEV(res) == dev)
David S. Miller6f86b322010-09-06 22:36:19 -0700290 dev_match = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291#endif
David S. Miller6f86b322010-09-06 22:36:19 -0700292 if (dev_match) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret;
296 }
297 fib_res_put(&res);
298 if (no_addr)
299 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000300 if (rpf == 1)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000301 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 fl.oif = dev->ifindex;
303
304 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800305 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 if (res.type == RTN_UNICAST) {
307 *spec_dst = FIB_RES_PREFSRC(res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309 }
310 fib_res_put(&res);
311 }
312 return ret;
313
314last_resort:
315 if (rpf)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000316 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
318 *itag = 0;
319 return 0;
320
321e_inval_res:
322 fib_res_put(&res);
323e_inval:
324 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000325e_rpf:
326 return -EXDEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327}
328
Al Viro81f7bf62006-09-27 18:40:00 -0700329static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700330{
331 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
332}
333
334static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
335{
336 struct nlattr *nla;
337
338 nla = (struct nlattr *) ((char *) mx + len);
339 nla->nla_type = type;
340 nla->nla_len = nla_attr_size(4);
341 *(u32 *) nla_data(nla) = value;
342
343 return len + nla_total_size(4);
344}
345
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800346static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700347 struct fib_config *cfg)
348{
Al Viro6d85c102006-09-26 22:15:46 -0700349 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700350 int plen;
351
352 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800353 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700354
355 if (rt->rt_dst.sa_family != AF_INET)
356 return -EAFNOSUPPORT;
357
358 /*
359 * Check mask for validity:
360 * a) it must be contiguous.
361 * b) destination must have all host bits clear.
362 * c) if application forgot to set correct family (AF_INET),
363 * reject request unless it is absolutely clear i.e.
364 * both family and mask are zero.
365 */
366 plen = 32;
367 addr = sk_extract_addr(&rt->rt_dst);
368 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700369 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700370
371 if (rt->rt_genmask.sa_family != AF_INET) {
372 if (mask || rt->rt_genmask.sa_family)
373 return -EAFNOSUPPORT;
374 }
375
376 if (bad_mask(mask, addr))
377 return -EINVAL;
378
379 plen = inet_mask_len(mask);
380 }
381
382 cfg->fc_dst_len = plen;
383 cfg->fc_dst = addr;
384
385 if (cmd != SIOCDELRT) {
386 cfg->fc_nlflags = NLM_F_CREATE;
387 cfg->fc_protocol = RTPROT_BOOT;
388 }
389
390 if (rt->rt_metric)
391 cfg->fc_priority = rt->rt_metric - 1;
392
393 if (rt->rt_flags & RTF_REJECT) {
394 cfg->fc_scope = RT_SCOPE_HOST;
395 cfg->fc_type = RTN_UNREACHABLE;
396 return 0;
397 }
398
399 cfg->fc_scope = RT_SCOPE_NOWHERE;
400 cfg->fc_type = RTN_UNICAST;
401
402 if (rt->rt_dev) {
403 char *colon;
404 struct net_device *dev;
405 char devname[IFNAMSIZ];
406
407 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
408 return -EFAULT;
409
410 devname[IFNAMSIZ-1] = 0;
411 colon = strchr(devname, ':');
412 if (colon)
413 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800414 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700415 if (!dev)
416 return -ENODEV;
417 cfg->fc_oif = dev->ifindex;
418 if (colon) {
419 struct in_ifaddr *ifa;
420 struct in_device *in_dev = __in_dev_get_rtnl(dev);
421 if (!in_dev)
422 return -ENODEV;
423 *colon = ':';
424 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
425 if (strcmp(ifa->ifa_label, devname) == 0)
426 break;
427 if (ifa == NULL)
428 return -ENODEV;
429 cfg->fc_prefsrc = ifa->ifa_local;
430 }
431 }
432
433 addr = sk_extract_addr(&rt->rt_gateway);
434 if (rt->rt_gateway.sa_family == AF_INET && addr) {
435 cfg->fc_gw = addr;
436 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800437 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700438 cfg->fc_scope = RT_SCOPE_UNIVERSE;
439 }
440
441 if (cmd == SIOCDELRT)
442 return 0;
443
444 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
445 return -EINVAL;
446
447 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
448 cfg->fc_scope = RT_SCOPE_LINK;
449
450 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
451 struct nlattr *mx;
452 int len = 0;
453
454 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900455 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700456 return -ENOMEM;
457
458 if (rt->rt_flags & RTF_MTU)
459 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
460
461 if (rt->rt_flags & RTF_WINDOW)
462 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
463
464 if (rt->rt_flags & RTF_IRTT)
465 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
466
467 cfg->fc_mx = mx;
468 cfg->fc_mx_len = len;
469 }
470
471 return 0;
472}
473
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474/*
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
476 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900477
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479{
Thomas Graf4e902c52006-08-17 18:14:52 -0700480 struct fib_config cfg;
481 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484 switch (cmd) {
485 case SIOCADDRT: /* Add a route */
486 case SIOCDELRT: /* Delete a route */
487 if (!capable(CAP_NET_ADMIN))
488 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700489
490 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700492
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800494 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700496 struct fib_table *tb;
497
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800499 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000501 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700502 else
503 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800505 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000507 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700508 else
509 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700511
512 /* allocated by rtentry_to_fib_config() */
513 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 }
515 rtnl_unlock();
516 return err;
517 }
518 return -EINVAL;
519}
520
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700522 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 },
525 [RTA_OIF] = { .type = NLA_U32 },
526 [RTA_GATEWAY] = { .type = NLA_U32 },
527 [RTA_PRIORITY] = { .type = NLA_U32 },
528 [RTA_PREFSRC] = { .type = NLA_U32 },
529 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700530 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700531 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700532};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700536{
537 struct nlattr *attr;
538 int err, remaining;
539 struct rtmsg *rtm;
540
541 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
542 if (err < 0)
543 goto errout;
544
545 memset(cfg, 0, sizeof(*cfg));
546
547 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700548 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700549 cfg->fc_tos = rtm->rtm_tos;
550 cfg->fc_table = rtm->rtm_table;
551 cfg->fc_protocol = rtm->rtm_protocol;
552 cfg->fc_scope = rtm->rtm_scope;
553 cfg->fc_type = rtm->rtm_type;
554 cfg->fc_flags = rtm->rtm_flags;
555 cfg->fc_nlflags = nlh->nlmsg_flags;
556
557 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
558 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800559 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700560
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700561 if (cfg->fc_type > RTN_MAX) {
562 err = -EINVAL;
563 goto errout;
564 }
565
Thomas Graf4e902c52006-08-17 18:14:52 -0700566 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200567 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700568 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700569 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700570 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700571 case RTA_OIF:
572 cfg->fc_oif = nla_get_u32(attr);
573 break;
574 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700575 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700576 break;
577 case RTA_PRIORITY:
578 cfg->fc_priority = nla_get_u32(attr);
579 break;
580 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700581 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700582 break;
583 case RTA_METRICS:
584 cfg->fc_mx = nla_data(attr);
585 cfg->fc_mx_len = nla_len(attr);
586 break;
587 case RTA_MULTIPATH:
588 cfg->fc_mp = nla_data(attr);
589 cfg->fc_mp_len = nla_len(attr);
590 break;
591 case RTA_FLOW:
592 cfg->fc_flow = nla_get_u32(attr);
593 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700594 case RTA_TABLE:
595 cfg->fc_table = nla_get_u32(attr);
596 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 }
598 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700599
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700601errout:
602 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603}
604
Jianjun Kong6ed25332008-11-03 00:25:16 -0800605static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900607 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700608 struct fib_config cfg;
609 struct fib_table *tb;
610 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800612 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700613 if (err < 0)
614 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800616 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700617 if (tb == NULL) {
618 err = -ESRCH;
619 goto errout;
620 }
621
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000622 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700623errout:
624 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625}
626
Jianjun Kong6ed25332008-11-03 00:25:16 -0800627static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900629 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700630 struct fib_config cfg;
631 struct fib_table *tb;
632 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800634 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700635 if (err < 0)
636 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800638 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700639 if (tb == NULL) {
640 err = -ENOBUFS;
641 goto errout;
642 }
643
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000644 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700645errout:
646 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647}
648
Thomas Graf63f34442007-03-22 11:55:17 -0700649static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900651 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700652 unsigned int h, s_h;
653 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700655 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800656 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700657 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
Thomas Grafbe403ea2006-08-17 18:15:17 -0700659 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
660 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 return ip_rt_dump(skb, cb);
662
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700663 s_h = cb->args[0];
664 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700666 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
667 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800668 head = &net->ipv4.fib_table_hash[h];
669 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700670 if (e < s_e)
671 goto next;
672 if (dumped)
673 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900674 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000675 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700676 goto out;
677 dumped = 1;
678next:
679 e++;
680 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700682out:
683 cb->args[1] = e;
684 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686 return skb->len;
687}
688
689/* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur
693 only when netlink is already locked.
694 */
695
Al Viro81f7bf62006-09-27 18:40:00 -0700696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900698 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700699 struct fib_table *tb;
700 struct fib_config cfg = {
701 .fc_protocol = RTPROT_KERNEL,
702 .fc_type = type,
703 .fc_dst = dst,
704 .fc_dst_len = dst_len,
705 .fc_prefsrc = ifa->ifa_local,
706 .fc_oif = ifa->ifa_dev->dev->ifindex,
707 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800708 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800709 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800710 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700711 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
713 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800714 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800716 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 if (tb == NULL)
719 return;
720
Thomas Graf4e902c52006-08-17 18:14:52 -0700721 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
Thomas Graf4e902c52006-08-17 18:14:52 -0700723 if (type != RTN_LOCAL)
724 cfg.fc_scope = RT_SCOPE_LINK;
725 else
726 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727
728 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000729 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000731 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732}
733
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800734void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735{
736 struct in_device *in_dev = ifa->ifa_dev;
737 struct net_device *dev = in_dev->dev;
738 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700739 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743 if (ifa->ifa_flags&IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 return;
748 }
749 }
750
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752
753 if (!(dev->flags&IFF_UP))
754 return;
755
756 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759
Joe Perchesf97c1e02007-12-16 13:45:43 -0800760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
764
765 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
769 }
770 }
771}
772
773static void fib_del_ifaddr(struct in_ifaddr *ifa)
774{
775 struct in_device *in_dev = ifa->ifa_dev;
776 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781#define LOCAL_OK 1
782#define BRD_OK 2
783#define BRD0_OK 4
784#define BRD1_OK 8
785 unsigned ok = 0;
786
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
790 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 return;
795 }
796 }
797
798 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-)
800
801 Scan address list to be sure that addresses are really gone.
802 */
803
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
805 if (ifa->ifa_local == ifa1->ifa_local)
806 ok |= LOCAL_OK;
807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
808 ok |= BRD_OK;
809 if (brd == ifa1->ifa_broadcast)
810 ok |= BRD1_OK;
811 if (any == ifa1->ifa_broadcast)
812 ok |= BRD0_OK;
813 }
814
815 if (!(ok&BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK))
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819 if (!(ok&BRD0_OK))
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821 if (!(ok&LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823
824 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 /* And the last, but not the least thing.
827 We must flush stray FIB entries.
828
829 First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush.
831 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 }
835 }
836#undef LOCAL_OK
837#undef BRD_OK
838#undef BRD0_OK
839#undef BRD1_OK
840}
841
Robert Olsson246955f2005-06-20 13:36:39 -0700842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
843{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900844
Robert Olsson246955f2005-06-20 13:36:39 -0700845 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800846 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700848 .tos = frn->fl_tos,
849 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700850
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700851#ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL;
853#endif
854
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700855 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700856 if (tb) {
857 local_bh_disable();
858
859 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000860 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700861
862 if (!frn->err) {
863 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel;
865 frn->type = res.type;
866 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700867 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700868 }
869 local_bh_enable();
870 }
871}
872
David S. Miller28f7b0362007-10-10 21:32:39 -0700873static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700874{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800875 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700876 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700877 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700878 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700879 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700880
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900881 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700882 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800883 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800884 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800885 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800886
887 skb = skb_clone(skb, GFP_KERNEL);
888 if (skb == NULL)
889 return;
890 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900891
Robert Olsson246955f2005-06-20 13:36:39 -0700892 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800893 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700894
895 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900896
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700897 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700898 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700899 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900901}
Robert Olsson246955f2005-06-20 13:36:39 -0700902
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000903static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700904{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800905 struct sock *sk;
906 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
907 nl_fib_input, NULL, THIS_MODULE);
908 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800909 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800910 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800911 return 0;
912}
913
914static void nl_fib_lookup_exit(struct net *net)
915{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800916 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800917 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700918}
919
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000920static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800922 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900923 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000924 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 arp_ifdown(dev);
926}
927
928static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
929{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800930 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700931 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932
933 switch (event) {
934 case NETDEV_UP:
935 fib_add_ifaddr(ifa);
936#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700937 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700939 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 break;
941 case NETDEV_DOWN:
942 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700943 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 /* Last address was deleted from this interface.
945 Disable IP.
946 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000947 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700949 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 }
951 break;
952 }
953 return NOTIFY_DONE;
954}
955
956static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
957{
958 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700959 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960
961 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000962 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 return NOTIFY_DONE;
964 }
965
966 if (!in_dev)
967 return NOTIFY_DONE;
968
969 switch (event) {
970 case NETDEV_UP:
971 for_ifa(in_dev) {
972 fib_add_ifaddr(ifa);
973 } endfor_ifa(in_dev);
974#ifdef CONFIG_IP_ROUTE_MULTIPATH
975 fib_sync_up(dev);
976#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700977 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 break;
979 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000980 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 break;
982 case NETDEV_CHANGEMTU:
983 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700984 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000986 case NETDEV_UNREGISTER_BATCH:
987 rt_cache_flush_batch();
988 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 }
990 return NOTIFY_DONE;
991}
992
993static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800994 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995};
996
997static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800998 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999};
1000
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001001static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001003 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001004 unsigned int i;
1005
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001006 net->ipv4.fib_table_hash = kzalloc(
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM;
1010
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -08001013
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001014 err = fib4_rules_init(net);
1015 if (err < 0)
1016 goto fail;
1017 return 0;
1018
1019fail:
1020 kfree(net->ipv4.fib_table_hash);
1021 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001022}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001024static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001025{
1026 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001027
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001028#ifdef CONFIG_IP_MULTIPLE_TABLES
1029 fib4_rules_exit(net);
1030#endif
1031
1032 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1033 struct fib_table *tb;
1034 struct hlist_head *head;
1035 struct hlist_node *node, *tmp;
1036
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001037 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001040 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001041 kfree(tb);
1042 }
1043 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001044 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001045}
1046
1047static int __net_init fib_net_init(struct net *net)
1048{
1049 int error;
1050
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001051 error = ip_fib_net_init(net);
1052 if (error < 0)
1053 goto out;
1054 error = nl_fib_lookup_init(net);
1055 if (error < 0)
1056 goto out_nlfl;
1057 error = fib_proc_init(net);
1058 if (error < 0)
1059 goto out_proc;
1060out:
1061 return error;
1062
1063out_proc:
1064 nl_fib_lookup_exit(net);
1065out_nlfl:
1066 ip_fib_net_exit(net);
1067 goto out;
1068}
1069
1070static void __net_exit fib_net_exit(struct net *net)
1071{
1072 fib_proc_exit(net);
1073 nl_fib_lookup_exit(net);
1074 ip_fib_net_exit(net);
1075}
1076
1077static struct pernet_operations fib_net_ops = {
1078 .init = fib_net_init,
1079 .exit = fib_net_exit,
1080};
1081
1082void __init ip_fib_init(void)
1083{
Thomas Graf63f34442007-03-22 11:55:17 -07001084 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1085 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1086 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001087
1088 register_pernet_subsys(&fib_net_ops);
1089 register_netdevice_notifier(&fib_netdev_notifier);
1090 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001091
1092 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093}