blob: 78b514ba1414a8ebd22b0b0a423110b61b6326ce [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#define FFprint(a...) printk(KERN_DEBUG a)
51
David S. Miller28f7b0362007-10-10 21:32:39 -070052static struct sock *fibnl;
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#ifndef CONFIG_IP_MULTIPLE_TABLES
55
Linus Torvalds1da177e2005-04-16 15:20:36 -070056struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070059#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
63
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070064#define FIB_TABLE_HASHSZ 256
65static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070067struct fib_table *fib_new_table(u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070068{
69 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070070 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070072 if (id == 0)
73 id = RT_TABLE_MAIN;
74 tb = fib_get_table(id);
75 if (tb)
76 return tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 tb = fib_hash_init(id);
78 if (!tb)
79 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070080 h = id & (FIB_TABLE_HASHSZ - 1);
81 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 return tb;
83}
84
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070085struct fib_table *fib_get_table(u32 id)
86{
87 struct fib_table *tb;
88 struct hlist_node *node;
89 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070091 if (id == 0)
92 id = RT_TABLE_MAIN;
93 h = id & (FIB_TABLE_HASHSZ - 1);
94 rcu_read_lock();
95 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96 if (tb->tb_id == id) {
97 rcu_read_unlock();
98 return tb;
99 }
100 }
101 rcu_read_unlock();
102 return NULL;
103}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#endif /* CONFIG_IP_MULTIPLE_TABLES */
105
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static void fib_flush(void)
107{
108 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700110 struct hlist_node *node;
111 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700113 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118 if (flushed)
119 rt_cache_flush(-1);
120}
121
122/*
123 * Find the first device with a given source address.
124 */
125
Al Viro60cad5d2006-09-26 22:17:09 -0700126struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127{
128 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 struct fib_result res;
130 struct net_device *dev = NULL;
131
132#ifdef CONFIG_IP_MULTIPLE_TABLES
133 res.r = NULL;
134#endif
135
136 if (!ip_fib_local_table ||
137 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
138 return NULL;
139 if (res.type != RTN_LOCAL)
140 goto out;
141 dev = FIB_RES_DEV(res);
142
143 if (dev)
144 dev_hold(dev);
145out:
146 fib_res_put(&res);
147 return dev;
148}
149
Al Virofd683222006-09-26 22:17:51 -0700150unsigned inet_addr_type(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
153 struct fib_result res;
154 unsigned ret = RTN_BROADCAST;
155
156 if (ZERONET(addr) || BADCLASS(addr))
157 return RTN_BROADCAST;
158 if (MULTICAST(addr))
159 return RTN_MULTICAST;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900164
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 if (ip_fib_local_table) {
166 ret = RTN_UNICAST;
167 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
168 &fl, &res)) {
169 ret = res.type;
170 fib_res_put(&res);
171 }
172 }
173 return ret;
174}
175
176/* Given (packet source, input interface) and optional (dst, oif, tos):
177 - (main) check, that source is valid i.e. not broadcast or our local
178 address.
179 - figure out what "logical" interface this packet arrived
180 and calculate "specific destination" address.
181 - check, that packet arrived from expected physical interface.
182 */
183
Al Virod9c9df82006-09-26 21:28:14 -0700184int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
185 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186{
187 struct in_device *in_dev;
188 struct flowi fl = { .nl_u = { .ip4_u =
189 { .daddr = src,
190 .saddr = dst,
191 .tos = tos } },
192 .iif = oif };
193 struct fib_result res;
194 int no_addr, rpf;
195 int ret;
196
197 no_addr = rpf = 0;
198 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700199 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 if (in_dev) {
201 no_addr = in_dev->ifa_list == NULL;
202 rpf = IN_DEV_RPFILTER(in_dev);
203 }
204 rcu_read_unlock();
205
206 if (in_dev == NULL)
207 goto e_inval;
208
209 if (fib_lookup(&fl, &res))
210 goto last_resort;
211 if (res.type != RTN_UNICAST)
212 goto e_inval_res;
213 *spec_dst = FIB_RES_PREFSRC(res);
214 fib_combine_itag(itag, &res);
215#ifdef CONFIG_IP_ROUTE_MULTIPATH
216 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
217#else
218 if (FIB_RES_DEV(res) == dev)
219#endif
220 {
221 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
222 fib_res_put(&res);
223 return ret;
224 }
225 fib_res_put(&res);
226 if (no_addr)
227 goto last_resort;
228 if (rpf)
229 goto e_inval;
230 fl.oif = dev->ifindex;
231
232 ret = 0;
233 if (fib_lookup(&fl, &res) == 0) {
234 if (res.type == RTN_UNICAST) {
235 *spec_dst = FIB_RES_PREFSRC(res);
236 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
237 }
238 fib_res_put(&res);
239 }
240 return ret;
241
242last_resort:
243 if (rpf)
244 goto e_inval;
245 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
246 *itag = 0;
247 return 0;
248
249e_inval_res:
250 fib_res_put(&res);
251e_inval:
252 return -EINVAL;
253}
254
Al Viro81f7bf62006-09-27 18:40:00 -0700255static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700256{
257 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258}
259
260static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
261{
262 struct nlattr *nla;
263
264 nla = (struct nlattr *) ((char *) mx + len);
265 nla->nla_type = type;
266 nla->nla_len = nla_attr_size(4);
267 *(u32 *) nla_data(nla) = value;
268
269 return len + nla_total_size(4);
270}
271
272static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 struct fib_config *cfg)
274{
Al Viro6d85c102006-09-26 22:15:46 -0700275 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700276 int plen;
277
278 memset(cfg, 0, sizeof(*cfg));
279
280 if (rt->rt_dst.sa_family != AF_INET)
281 return -EAFNOSUPPORT;
282
283 /*
284 * Check mask for validity:
285 * a) it must be contiguous.
286 * b) destination must have all host bits clear.
287 * c) if application forgot to set correct family (AF_INET),
288 * reject request unless it is absolutely clear i.e.
289 * both family and mask are zero.
290 */
291 plen = 32;
292 addr = sk_extract_addr(&rt->rt_dst);
293 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700294 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700295
296 if (rt->rt_genmask.sa_family != AF_INET) {
297 if (mask || rt->rt_genmask.sa_family)
298 return -EAFNOSUPPORT;
299 }
300
301 if (bad_mask(mask, addr))
302 return -EINVAL;
303
304 plen = inet_mask_len(mask);
305 }
306
307 cfg->fc_dst_len = plen;
308 cfg->fc_dst = addr;
309
310 if (cmd != SIOCDELRT) {
311 cfg->fc_nlflags = NLM_F_CREATE;
312 cfg->fc_protocol = RTPROT_BOOT;
313 }
314
315 if (rt->rt_metric)
316 cfg->fc_priority = rt->rt_metric - 1;
317
318 if (rt->rt_flags & RTF_REJECT) {
319 cfg->fc_scope = RT_SCOPE_HOST;
320 cfg->fc_type = RTN_UNREACHABLE;
321 return 0;
322 }
323
324 cfg->fc_scope = RT_SCOPE_NOWHERE;
325 cfg->fc_type = RTN_UNICAST;
326
327 if (rt->rt_dev) {
328 char *colon;
329 struct net_device *dev;
330 char devname[IFNAMSIZ];
331
332 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333 return -EFAULT;
334
335 devname[IFNAMSIZ-1] = 0;
336 colon = strchr(devname, ':');
337 if (colon)
338 *colon = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700339 dev = __dev_get_by_name(&init_net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700340 if (!dev)
341 return -ENODEV;
342 cfg->fc_oif = dev->ifindex;
343 if (colon) {
344 struct in_ifaddr *ifa;
345 struct in_device *in_dev = __in_dev_get_rtnl(dev);
346 if (!in_dev)
347 return -ENODEV;
348 *colon = ':';
349 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 if (strcmp(ifa->ifa_label, devname) == 0)
351 break;
352 if (ifa == NULL)
353 return -ENODEV;
354 cfg->fc_prefsrc = ifa->ifa_local;
355 }
356 }
357
358 addr = sk_extract_addr(&rt->rt_gateway);
359 if (rt->rt_gateway.sa_family == AF_INET && addr) {
360 cfg->fc_gw = addr;
361 if (rt->rt_flags & RTF_GATEWAY &&
362 inet_addr_type(addr) == RTN_UNICAST)
363 cfg->fc_scope = RT_SCOPE_UNIVERSE;
364 }
365
366 if (cmd == SIOCDELRT)
367 return 0;
368
369 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370 return -EINVAL;
371
372 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 cfg->fc_scope = RT_SCOPE_LINK;
374
375 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
376 struct nlattr *mx;
377 int len = 0;
378
379 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900380 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700381 return -ENOMEM;
382
383 if (rt->rt_flags & RTF_MTU)
384 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
385
386 if (rt->rt_flags & RTF_WINDOW)
387 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
388
389 if (rt->rt_flags & RTF_IRTT)
390 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391
392 cfg->fc_mx = mx;
393 cfg->fc_mx_len = len;
394 }
395
396 return 0;
397}
398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399/*
400 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
401 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900402
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403int ip_rt_ioctl(unsigned int cmd, void __user *arg)
404{
Thomas Graf4e902c52006-08-17 18:14:52 -0700405 struct fib_config cfg;
406 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 switch (cmd) {
410 case SIOCADDRT: /* Add a route */
411 case SIOCDELRT: /* Delete a route */
412 if (!capable(CAP_NET_ADMIN))
413 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700414
415 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 rtnl_lock();
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 err = rtentry_to_fib_config(cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700421 struct fib_table *tb;
422
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 if (cmd == SIOCDELRT) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700424 tb = fib_get_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 err = tb->tb_delete(tb, &cfg);
427 else
428 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 } else {
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 tb = fib_new_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700432 err = tb->tb_insert(tb, &cfg);
433 else
434 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700436
437 /* allocated by rtentry_to_fib_config() */
438 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 }
440 rtnl_unlock();
441 return err;
442 }
443 return -EINVAL;
444}
445
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700446const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700447 [RTA_DST] = { .type = NLA_U32 },
448 [RTA_SRC] = { .type = NLA_U32 },
449 [RTA_IIF] = { .type = NLA_U32 },
450 [RTA_OIF] = { .type = NLA_U32 },
451 [RTA_GATEWAY] = { .type = NLA_U32 },
452 [RTA_PRIORITY] = { .type = NLA_U32 },
453 [RTA_PREFSRC] = { .type = NLA_U32 },
454 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700455 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700456 [RTA_PROTOINFO] = { .type = NLA_U32 },
457 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700458};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
Thomas Graf4e902c52006-08-17 18:14:52 -0700460static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
461 struct fib_config *cfg)
462{
463 struct nlattr *attr;
464 int err, remaining;
465 struct rtmsg *rtm;
466
467 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
468 if (err < 0)
469 goto errout;
470
471 memset(cfg, 0, sizeof(*cfg));
472
473 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700474 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700475 cfg->fc_tos = rtm->rtm_tos;
476 cfg->fc_table = rtm->rtm_table;
477 cfg->fc_protocol = rtm->rtm_protocol;
478 cfg->fc_scope = rtm->rtm_scope;
479 cfg->fc_type = rtm->rtm_type;
480 cfg->fc_flags = rtm->rtm_flags;
481 cfg->fc_nlflags = nlh->nlmsg_flags;
482
483 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
484 cfg->fc_nlinfo.nlh = nlh;
485
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700486 if (cfg->fc_type > RTN_MAX) {
487 err = -EINVAL;
488 goto errout;
489 }
490
Thomas Graf4e902c52006-08-17 18:14:52 -0700491 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200492 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700493 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700494 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700495 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700496 case RTA_OIF:
497 cfg->fc_oif = nla_get_u32(attr);
498 break;
499 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700500 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700501 break;
502 case RTA_PRIORITY:
503 cfg->fc_priority = nla_get_u32(attr);
504 break;
505 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700506 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700507 break;
508 case RTA_METRICS:
509 cfg->fc_mx = nla_data(attr);
510 cfg->fc_mx_len = nla_len(attr);
511 break;
512 case RTA_MULTIPATH:
513 cfg->fc_mp = nla_data(attr);
514 cfg->fc_mp_len = nla_len(attr);
515 break;
516 case RTA_FLOW:
517 cfg->fc_flow = nla_get_u32(attr);
518 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700519 case RTA_TABLE:
520 cfg->fc_table = nla_get_u32(attr);
521 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 }
523 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700524
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700526errout:
527 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528}
529
Thomas Graf63f34442007-03-22 11:55:17 -0700530static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531{
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 struct fib_config cfg;
533 struct fib_table *tb;
534 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
Thomas Graf4e902c52006-08-17 18:14:52 -0700536 err = rtm_to_fib_config(skb, nlh, &cfg);
537 if (err < 0)
538 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
Thomas Graf4e902c52006-08-17 18:14:52 -0700540 tb = fib_get_table(cfg.fc_table);
541 if (tb == NULL) {
542 err = -ESRCH;
543 goto errout;
544 }
545
546 err = tb->tb_delete(tb, &cfg);
547errout:
548 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549}
550
Thomas Graf63f34442007-03-22 11:55:17 -0700551static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552{
Thomas Graf4e902c52006-08-17 18:14:52 -0700553 struct fib_config cfg;
554 struct fib_table *tb;
555 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
Thomas Graf4e902c52006-08-17 18:14:52 -0700557 err = rtm_to_fib_config(skb, nlh, &cfg);
558 if (err < 0)
559 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
Thomas Graf4e902c52006-08-17 18:14:52 -0700561 tb = fib_new_table(cfg.fc_table);
562 if (tb == NULL) {
563 err = -ENOBUFS;
564 goto errout;
565 }
566
567 err = tb->tb_insert(tb, &cfg);
568errout:
569 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570}
571
Thomas Graf63f34442007-03-22 11:55:17 -0700572static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700574 unsigned int h, s_h;
575 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700577 struct hlist_node *node;
578 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
Thomas Grafbe403ea2006-08-17 18:15:17 -0700580 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
581 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 return ip_rt_dump(skb, cb);
583
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700584 s_h = cb->args[0];
585 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700587 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
588 e = 0;
589 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
590 if (e < s_e)
591 goto next;
592 if (dumped)
593 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900594 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700595 if (tb->tb_dump(tb, skb, cb) < 0)
596 goto out;
597 dumped = 1;
598next:
599 e++;
600 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700602out:
603 cb->args[1] = e;
604 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 return skb->len;
607}
608
609/* Prepare and feed intra-kernel routing request.
610 Really, it should be netlink message, but :-( netlink
611 can be not configured, so that we feed it directly
612 to fib engine. It is legal, because all events occur
613 only when netlink is already locked.
614 */
615
Al Viro81f7bf62006-09-27 18:40:00 -0700616static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
Thomas Graf4e902c52006-08-17 18:14:52 -0700618 struct fib_table *tb;
619 struct fib_config cfg = {
620 .fc_protocol = RTPROT_KERNEL,
621 .fc_type = type,
622 .fc_dst = dst,
623 .fc_dst_len = dst_len,
624 .fc_prefsrc = ifa->ifa_local,
625 .fc_oif = ifa->ifa_dev->dev->ifindex,
626 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
627 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628
629 if (type == RTN_UNICAST)
630 tb = fib_new_table(RT_TABLE_MAIN);
631 else
632 tb = fib_new_table(RT_TABLE_LOCAL);
633
634 if (tb == NULL)
635 return;
636
Thomas Graf4e902c52006-08-17 18:14:52 -0700637 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
Thomas Graf4e902c52006-08-17 18:14:52 -0700639 if (type != RTN_LOCAL)
640 cfg.fc_scope = RT_SCOPE_LINK;
641 else
642 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
644 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700645 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700647 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648}
649
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800650void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
652 struct in_device *in_dev = ifa->ifa_dev;
653 struct net_device *dev = in_dev->dev;
654 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700655 __be32 mask = ifa->ifa_mask;
656 __be32 addr = ifa->ifa_local;
657 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 if (ifa->ifa_flags&IFA_F_SECONDARY) {
660 prim = inet_ifa_byprefix(in_dev, prefix, mask);
661 if (prim == NULL) {
662 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
663 return;
664 }
665 }
666
667 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
668
669 if (!(dev->flags&IFF_UP))
670 return;
671
672 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700673 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
675
676 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
677 (prefix != addr || ifa->ifa_prefixlen < 32)) {
678 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
679 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
680
681 /* Add network specific broadcasts, when it takes a sense */
682 if (ifa->ifa_prefixlen < 31) {
683 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
684 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
685 }
686 }
687}
688
689static void fib_del_ifaddr(struct in_ifaddr *ifa)
690{
691 struct in_device *in_dev = ifa->ifa_dev;
692 struct net_device *dev = in_dev->dev;
693 struct in_ifaddr *ifa1;
694 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700695 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
696 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697#define LOCAL_OK 1
698#define BRD_OK 2
699#define BRD0_OK 4
700#define BRD1_OK 8
701 unsigned ok = 0;
702
703 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
704 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
705 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
706 else {
707 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
708 if (prim == NULL) {
709 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
710 return;
711 }
712 }
713
714 /* Deletion is more complicated than add.
715 We should take care of not to delete too much :-)
716
717 Scan address list to be sure that addresses are really gone.
718 */
719
720 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
721 if (ifa->ifa_local == ifa1->ifa_local)
722 ok |= LOCAL_OK;
723 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
724 ok |= BRD_OK;
725 if (brd == ifa1->ifa_broadcast)
726 ok |= BRD1_OK;
727 if (any == ifa1->ifa_broadcast)
728 ok |= BRD0_OK;
729 }
730
731 if (!(ok&BRD_OK))
732 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
733 if (!(ok&BRD1_OK))
734 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
735 if (!(ok&BRD0_OK))
736 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
737 if (!(ok&LOCAL_OK)) {
738 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
739
740 /* Check, that this local address finally disappeared. */
741 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
742 /* And the last, but not the least thing.
743 We must flush stray FIB entries.
744
745 First of all, we scan fib_info list searching
746 for stray nexthop entries, then ignite fib_flush.
747 */
748 if (fib_sync_down(ifa->ifa_local, NULL, 0))
749 fib_flush();
750 }
751 }
752#undef LOCAL_OK
753#undef BRD_OK
754#undef BRD0_OK
755#undef BRD1_OK
756}
757
Robert Olsson246955f2005-06-20 13:36:39 -0700758static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
759{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900760
Robert Olsson246955f2005-06-20 13:36:39 -0700761 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800762 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800763 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700764 .tos = frn->fl_tos,
765 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700766
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700767#ifdef CONFIG_IP_MULTIPLE_TABLES
768 res.r = NULL;
769#endif
770
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700771 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700772 if (tb) {
773 local_bh_disable();
774
775 frn->tb_id = tb->tb_id;
776 frn->err = tb->tb_lookup(tb, &fl, &res);
777
778 if (!frn->err) {
779 frn->prefixlen = res.prefixlen;
780 frn->nh_sel = res.nh_sel;
781 frn->type = res.type;
782 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700783 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700784 }
785 local_bh_enable();
786 }
787}
788
David S. Miller28f7b0362007-10-10 21:32:39 -0700789static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700790{
Robert Olsson246955f2005-06-20 13:36:39 -0700791 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700792 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700793 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700794 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700795
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700796 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800797 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
798 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
799 kfree_skb(skb);
800 return;
801 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900802
Robert Olsson246955f2005-06-20 13:36:39 -0700803 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
804 tb = fib_get_table(frn->tb_id_in);
805
806 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900807
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700808 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700809 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700810 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700811 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900812}
Robert Olsson246955f2005-06-20 13:36:39 -0700813
814static void nl_fib_lookup_init(void)
815{
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700816 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
817 nl_fib_input, NULL, THIS_MODULE);
Robert Olsson246955f2005-06-20 13:36:39 -0700818}
819
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820static void fib_disable_ip(struct net_device *dev, int force)
821{
822 if (fib_sync_down(0, dev, force))
823 fib_flush();
824 rt_cache_flush(0);
825 arp_ifdown(dev);
826}
827
828static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
829{
830 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
831
832 switch (event) {
833 case NETDEV_UP:
834 fib_add_ifaddr(ifa);
835#ifdef CONFIG_IP_ROUTE_MULTIPATH
836 fib_sync_up(ifa->ifa_dev->dev);
837#endif
838 rt_cache_flush(-1);
839 break;
840 case NETDEV_DOWN:
841 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700842 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 /* Last address was deleted from this interface.
844 Disable IP.
845 */
846 fib_disable_ip(ifa->ifa_dev->dev, 1);
847 } else {
848 rt_cache_flush(-1);
849 }
850 break;
851 }
852 return NOTIFY_DONE;
853}
854
855static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
856{
857 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700858 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
Eric W. Biedermane9dc8652007-09-12 13:02:17 +0200860 if (dev->nd_net != &init_net)
861 return NOTIFY_DONE;
862
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 if (event == NETDEV_UNREGISTER) {
864 fib_disable_ip(dev, 2);
865 return NOTIFY_DONE;
866 }
867
868 if (!in_dev)
869 return NOTIFY_DONE;
870
871 switch (event) {
872 case NETDEV_UP:
873 for_ifa(in_dev) {
874 fib_add_ifaddr(ifa);
875 } endfor_ifa(in_dev);
876#ifdef CONFIG_IP_ROUTE_MULTIPATH
877 fib_sync_up(dev);
878#endif
879 rt_cache_flush(-1);
880 break;
881 case NETDEV_DOWN:
882 fib_disable_ip(dev, 0);
883 break;
884 case NETDEV_CHANGEMTU:
885 case NETDEV_CHANGE:
886 rt_cache_flush(0);
887 break;
888 }
889 return NOTIFY_DONE;
890}
891
892static struct notifier_block fib_inetaddr_notifier = {
893 .notifier_call =fib_inetaddr_event,
894};
895
896static struct notifier_block fib_netdev_notifier = {
897 .notifier_call =fib_netdev_event,
898};
899
900void __init ip_fib_init(void)
901{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700902 unsigned int i;
903
904 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905 INIT_HLIST_HEAD(&fib_table_hash[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906#ifndef CONFIG_IP_MULTIPLE_TABLES
907 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700908 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700910 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911#else
Thomas Grafe1ef4bf2006-08-04 03:39:22 -0700912 fib4_rules_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913#endif
914
915 register_netdevice_notifier(&fib_netdev_notifier);
916 register_inetaddr_notifier(&fib_inetaddr_notifier);
Robert Olsson246955f2005-06-20 13:36:39 -0700917 nl_fib_lookup_init();
Thomas Graf63f34442007-03-22 11:55:17 -0700918
919 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
920 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
921 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922}
923
924EXPORT_SYMBOL(inet_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -0700925EXPORT_SYMBOL(ip_dev_find);