blob: 60123905dbbf65d4e67a3c7adc86264091a52bfd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#define FFprint(a...) printk(KERN_DEBUG a)
51
David S. Miller28f7b0362007-10-10 21:32:39 -070052static struct sock *fibnl;
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#ifndef CONFIG_IP_MULTIPLE_TABLES
55
Linus Torvalds1da177e2005-04-16 15:20:36 -070056struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070059#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
63
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070064#define FIB_TABLE_HASHSZ 256
65static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070067struct fib_table *fib_new_table(u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070068{
69 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070070 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070072 if (id == 0)
73 id = RT_TABLE_MAIN;
74 tb = fib_get_table(id);
75 if (tb)
76 return tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 tb = fib_hash_init(id);
78 if (!tb)
79 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070080 h = id & (FIB_TABLE_HASHSZ - 1);
81 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 return tb;
83}
84
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070085struct fib_table *fib_get_table(u32 id)
86{
87 struct fib_table *tb;
88 struct hlist_node *node;
89 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070091 if (id == 0)
92 id = RT_TABLE_MAIN;
93 h = id & (FIB_TABLE_HASHSZ - 1);
94 rcu_read_lock();
95 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96 if (tb->tb_id == id) {
97 rcu_read_unlock();
98 return tb;
99 }
100 }
101 rcu_read_unlock();
102 return NULL;
103}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#endif /* CONFIG_IP_MULTIPLE_TABLES */
105
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static void fib_flush(void)
107{
108 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700110 struct hlist_node *node;
111 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700113 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118 if (flushed)
119 rt_cache_flush(-1);
120}
121
122/*
123 * Find the first device with a given source address.
124 */
125
Al Viro60cad5d2006-09-26 22:17:09 -0700126struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127{
128 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 struct fib_result res;
130 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700131 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
133#ifdef CONFIG_IP_MULTIPLE_TABLES
134 res.r = NULL;
135#endif
136
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700137 local_table = fib_get_table(RT_TABLE_LOCAL);
138 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 return NULL;
140 if (res.type != RTN_LOCAL)
141 goto out;
142 dev = FIB_RES_DEV(res);
143
144 if (dev)
145 dev_hold(dev);
146out:
147 fib_res_put(&res);
148 return dev;
149}
150
Al Virofd683222006-09-26 22:17:51 -0700151unsigned inet_addr_type(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152{
153 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
154 struct fib_result res;
155 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700156 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
158 if (ZERONET(addr) || BADCLASS(addr))
159 return RTN_BROADCAST;
160 if (MULTICAST(addr))
161 return RTN_MULTICAST;
162
163#ifdef CONFIG_IP_MULTIPLE_TABLES
164 res.r = NULL;
165#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900166
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700167 local_table = fib_get_table(RT_TABLE_LOCAL);
168 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 ret = RTN_UNICAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700170 if (!local_table->tb_lookup(local_table, &fl, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 ret = res.type;
172 fib_res_put(&res);
173 }
174 }
175 return ret;
176}
177
178/* Given (packet source, input interface) and optional (dst, oif, tos):
179 - (main) check, that source is valid i.e. not broadcast or our local
180 address.
181 - figure out what "logical" interface this packet arrived
182 and calculate "specific destination" address.
183 - check, that packet arrived from expected physical interface.
184 */
185
Al Virod9c9df82006-09-26 21:28:14 -0700186int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
187 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188{
189 struct in_device *in_dev;
190 struct flowi fl = { .nl_u = { .ip4_u =
191 { .daddr = src,
192 .saddr = dst,
193 .tos = tos } },
194 .iif = oif };
195 struct fib_result res;
196 int no_addr, rpf;
197 int ret;
198
199 no_addr = rpf = 0;
200 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700201 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 if (in_dev) {
203 no_addr = in_dev->ifa_list == NULL;
204 rpf = IN_DEV_RPFILTER(in_dev);
205 }
206 rcu_read_unlock();
207
208 if (in_dev == NULL)
209 goto e_inval;
210
211 if (fib_lookup(&fl, &res))
212 goto last_resort;
213 if (res.type != RTN_UNICAST)
214 goto e_inval_res;
215 *spec_dst = FIB_RES_PREFSRC(res);
216 fib_combine_itag(itag, &res);
217#ifdef CONFIG_IP_ROUTE_MULTIPATH
218 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
219#else
220 if (FIB_RES_DEV(res) == dev)
221#endif
222 {
223 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
224 fib_res_put(&res);
225 return ret;
226 }
227 fib_res_put(&res);
228 if (no_addr)
229 goto last_resort;
230 if (rpf)
231 goto e_inval;
232 fl.oif = dev->ifindex;
233
234 ret = 0;
235 if (fib_lookup(&fl, &res) == 0) {
236 if (res.type == RTN_UNICAST) {
237 *spec_dst = FIB_RES_PREFSRC(res);
238 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
239 }
240 fib_res_put(&res);
241 }
242 return ret;
243
244last_resort:
245 if (rpf)
246 goto e_inval;
247 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
248 *itag = 0;
249 return 0;
250
251e_inval_res:
252 fib_res_put(&res);
253e_inval:
254 return -EINVAL;
255}
256
Al Viro81f7bf62006-09-27 18:40:00 -0700257static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700258{
259 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
260}
261
262static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
263{
264 struct nlattr *nla;
265
266 nla = (struct nlattr *) ((char *) mx + len);
267 nla->nla_type = type;
268 nla->nla_len = nla_attr_size(4);
269 *(u32 *) nla_data(nla) = value;
270
271 return len + nla_total_size(4);
272}
273
274static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
275 struct fib_config *cfg)
276{
Al Viro6d85c102006-09-26 22:15:46 -0700277 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700278 int plen;
279
280 memset(cfg, 0, sizeof(*cfg));
281
282 if (rt->rt_dst.sa_family != AF_INET)
283 return -EAFNOSUPPORT;
284
285 /*
286 * Check mask for validity:
287 * a) it must be contiguous.
288 * b) destination must have all host bits clear.
289 * c) if application forgot to set correct family (AF_INET),
290 * reject request unless it is absolutely clear i.e.
291 * both family and mask are zero.
292 */
293 plen = 32;
294 addr = sk_extract_addr(&rt->rt_dst);
295 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700296 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700297
298 if (rt->rt_genmask.sa_family != AF_INET) {
299 if (mask || rt->rt_genmask.sa_family)
300 return -EAFNOSUPPORT;
301 }
302
303 if (bad_mask(mask, addr))
304 return -EINVAL;
305
306 plen = inet_mask_len(mask);
307 }
308
309 cfg->fc_dst_len = plen;
310 cfg->fc_dst = addr;
311
312 if (cmd != SIOCDELRT) {
313 cfg->fc_nlflags = NLM_F_CREATE;
314 cfg->fc_protocol = RTPROT_BOOT;
315 }
316
317 if (rt->rt_metric)
318 cfg->fc_priority = rt->rt_metric - 1;
319
320 if (rt->rt_flags & RTF_REJECT) {
321 cfg->fc_scope = RT_SCOPE_HOST;
322 cfg->fc_type = RTN_UNREACHABLE;
323 return 0;
324 }
325
326 cfg->fc_scope = RT_SCOPE_NOWHERE;
327 cfg->fc_type = RTN_UNICAST;
328
329 if (rt->rt_dev) {
330 char *colon;
331 struct net_device *dev;
332 char devname[IFNAMSIZ];
333
334 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
335 return -EFAULT;
336
337 devname[IFNAMSIZ-1] = 0;
338 colon = strchr(devname, ':');
339 if (colon)
340 *colon = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700341 dev = __dev_get_by_name(&init_net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700342 if (!dev)
343 return -ENODEV;
344 cfg->fc_oif = dev->ifindex;
345 if (colon) {
346 struct in_ifaddr *ifa;
347 struct in_device *in_dev = __in_dev_get_rtnl(dev);
348 if (!in_dev)
349 return -ENODEV;
350 *colon = ':';
351 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
352 if (strcmp(ifa->ifa_label, devname) == 0)
353 break;
354 if (ifa == NULL)
355 return -ENODEV;
356 cfg->fc_prefsrc = ifa->ifa_local;
357 }
358 }
359
360 addr = sk_extract_addr(&rt->rt_gateway);
361 if (rt->rt_gateway.sa_family == AF_INET && addr) {
362 cfg->fc_gw = addr;
363 if (rt->rt_flags & RTF_GATEWAY &&
364 inet_addr_type(addr) == RTN_UNICAST)
365 cfg->fc_scope = RT_SCOPE_UNIVERSE;
366 }
367
368 if (cmd == SIOCDELRT)
369 return 0;
370
371 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
372 return -EINVAL;
373
374 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
375 cfg->fc_scope = RT_SCOPE_LINK;
376
377 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
378 struct nlattr *mx;
379 int len = 0;
380
381 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900382 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700383 return -ENOMEM;
384
385 if (rt->rt_flags & RTF_MTU)
386 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
387
388 if (rt->rt_flags & RTF_WINDOW)
389 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
390
391 if (rt->rt_flags & RTF_IRTT)
392 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
393
394 cfg->fc_mx = mx;
395 cfg->fc_mx_len = len;
396 }
397
398 return 0;
399}
400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401/*
402 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
403 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405int ip_rt_ioctl(unsigned int cmd, void __user *arg)
406{
Thomas Graf4e902c52006-08-17 18:14:52 -0700407 struct fib_config cfg;
408 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410
411 switch (cmd) {
412 case SIOCADDRT: /* Add a route */
413 case SIOCDELRT: /* Delete a route */
414 if (!capable(CAP_NET_ADMIN))
415 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700416
417 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 rtnl_lock();
Thomas Graf4e902c52006-08-17 18:14:52 -0700421 err = rtentry_to_fib_config(cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700423 struct fib_table *tb;
424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 if (cmd == SIOCDELRT) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 tb = fib_get_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700428 err = tb->tb_delete(tb, &cfg);
429 else
430 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 } else {
Thomas Graf4e902c52006-08-17 18:14:52 -0700432 tb = fib_new_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700434 err = tb->tb_insert(tb, &cfg);
435 else
436 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700438
439 /* allocated by rtentry_to_fib_config() */
440 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
442 rtnl_unlock();
443 return err;
444 }
445 return -EINVAL;
446}
447
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700448const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700449 [RTA_DST] = { .type = NLA_U32 },
450 [RTA_SRC] = { .type = NLA_U32 },
451 [RTA_IIF] = { .type = NLA_U32 },
452 [RTA_OIF] = { .type = NLA_U32 },
453 [RTA_GATEWAY] = { .type = NLA_U32 },
454 [RTA_PRIORITY] = { .type = NLA_U32 },
455 [RTA_PREFSRC] = { .type = NLA_U32 },
456 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700457 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700458 [RTA_PROTOINFO] = { .type = NLA_U32 },
459 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700460};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
Thomas Graf4e902c52006-08-17 18:14:52 -0700462static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
463 struct fib_config *cfg)
464{
465 struct nlattr *attr;
466 int err, remaining;
467 struct rtmsg *rtm;
468
469 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
470 if (err < 0)
471 goto errout;
472
473 memset(cfg, 0, sizeof(*cfg));
474
475 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700476 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700477 cfg->fc_tos = rtm->rtm_tos;
478 cfg->fc_table = rtm->rtm_table;
479 cfg->fc_protocol = rtm->rtm_protocol;
480 cfg->fc_scope = rtm->rtm_scope;
481 cfg->fc_type = rtm->rtm_type;
482 cfg->fc_flags = rtm->rtm_flags;
483 cfg->fc_nlflags = nlh->nlmsg_flags;
484
485 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
486 cfg->fc_nlinfo.nlh = nlh;
487
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700488 if (cfg->fc_type > RTN_MAX) {
489 err = -EINVAL;
490 goto errout;
491 }
492
Thomas Graf4e902c52006-08-17 18:14:52 -0700493 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200494 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700495 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700496 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700497 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700498 case RTA_OIF:
499 cfg->fc_oif = nla_get_u32(attr);
500 break;
501 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700502 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700503 break;
504 case RTA_PRIORITY:
505 cfg->fc_priority = nla_get_u32(attr);
506 break;
507 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700508 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700509 break;
510 case RTA_METRICS:
511 cfg->fc_mx = nla_data(attr);
512 cfg->fc_mx_len = nla_len(attr);
513 break;
514 case RTA_MULTIPATH:
515 cfg->fc_mp = nla_data(attr);
516 cfg->fc_mp_len = nla_len(attr);
517 break;
518 case RTA_FLOW:
519 cfg->fc_flow = nla_get_u32(attr);
520 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700521 case RTA_TABLE:
522 cfg->fc_table = nla_get_u32(attr);
523 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 }
525 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700526
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700528errout:
529 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530}
531
Thomas Graf63f34442007-03-22 11:55:17 -0700532static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
Thomas Graf4e902c52006-08-17 18:14:52 -0700534 struct fib_config cfg;
535 struct fib_table *tb;
536 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
Thomas Graf4e902c52006-08-17 18:14:52 -0700538 err = rtm_to_fib_config(skb, nlh, &cfg);
539 if (err < 0)
540 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
Thomas Graf4e902c52006-08-17 18:14:52 -0700542 tb = fib_get_table(cfg.fc_table);
543 if (tb == NULL) {
544 err = -ESRCH;
545 goto errout;
546 }
547
548 err = tb->tb_delete(tb, &cfg);
549errout:
550 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551}
552
Thomas Graf63f34442007-03-22 11:55:17 -0700553static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
Thomas Graf4e902c52006-08-17 18:14:52 -0700555 struct fib_config cfg;
556 struct fib_table *tb;
557 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
Thomas Graf4e902c52006-08-17 18:14:52 -0700559 err = rtm_to_fib_config(skb, nlh, &cfg);
560 if (err < 0)
561 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
Thomas Graf4e902c52006-08-17 18:14:52 -0700563 tb = fib_new_table(cfg.fc_table);
564 if (tb == NULL) {
565 err = -ENOBUFS;
566 goto errout;
567 }
568
569 err = tb->tb_insert(tb, &cfg);
570errout:
571 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572}
573
Thomas Graf63f34442007-03-22 11:55:17 -0700574static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700576 unsigned int h, s_h;
577 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700579 struct hlist_node *node;
580 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
Thomas Grafbe403ea2006-08-17 18:15:17 -0700582 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 return ip_rt_dump(skb, cb);
585
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700586 s_h = cb->args[0];
587 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700589 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590 e = 0;
591 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592 if (e < s_e)
593 goto next;
594 if (dumped)
595 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900596 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700597 if (tb->tb_dump(tb, skb, cb) < 0)
598 goto out;
599 dumped = 1;
600next:
601 e++;
602 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700604out:
605 cb->args[1] = e;
606 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
608 return skb->len;
609}
610
611/* Prepare and feed intra-kernel routing request.
612 Really, it should be netlink message, but :-( netlink
613 can be not configured, so that we feed it directly
614 to fib engine. It is legal, because all events occur
615 only when netlink is already locked.
616 */
617
Al Viro81f7bf62006-09-27 18:40:00 -0700618static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Thomas Graf4e902c52006-08-17 18:14:52 -0700620 struct fib_table *tb;
621 struct fib_config cfg = {
622 .fc_protocol = RTPROT_KERNEL,
623 .fc_type = type,
624 .fc_dst = dst,
625 .fc_dst_len = dst_len,
626 .fc_prefsrc = ifa->ifa_local,
627 .fc_oif = ifa->ifa_dev->dev->ifindex,
628 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631 if (type == RTN_UNICAST)
632 tb = fib_new_table(RT_TABLE_MAIN);
633 else
634 tb = fib_new_table(RT_TABLE_LOCAL);
635
636 if (tb == NULL)
637 return;
638
Thomas Graf4e902c52006-08-17 18:14:52 -0700639 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Thomas Graf4e902c52006-08-17 18:14:52 -0700641 if (type != RTN_LOCAL)
642 cfg.fc_scope = RT_SCOPE_LINK;
643 else
644 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700647 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700649 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650}
651
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800652void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653{
654 struct in_device *in_dev = ifa->ifa_dev;
655 struct net_device *dev = in_dev->dev;
656 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700657 __be32 mask = ifa->ifa_mask;
658 __be32 addr = ifa->ifa_local;
659 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 prim = inet_ifa_byprefix(in_dev, prefix, mask);
663 if (prim == NULL) {
664 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665 return;
666 }
667 }
668
669 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670
671 if (!(dev->flags&IFF_UP))
672 return;
673
674 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700675 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677
678 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682
683 /* Add network specific broadcasts, when it takes a sense */
684 if (ifa->ifa_prefixlen < 31) {
685 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687 }
688 }
689}
690
691static void fib_del_ifaddr(struct in_ifaddr *ifa)
692{
693 struct in_device *in_dev = ifa->ifa_dev;
694 struct net_device *dev = in_dev->dev;
695 struct in_ifaddr *ifa1;
696 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700697 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699#define LOCAL_OK 1
700#define BRD_OK 2
701#define BRD0_OK 4
702#define BRD1_OK 8
703 unsigned ok = 0;
704
705 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708 else {
709 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710 if (prim == NULL) {
711 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712 return;
713 }
714 }
715
716 /* Deletion is more complicated than add.
717 We should take care of not to delete too much :-)
718
719 Scan address list to be sure that addresses are really gone.
720 */
721
722 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723 if (ifa->ifa_local == ifa1->ifa_local)
724 ok |= LOCAL_OK;
725 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726 ok |= BRD_OK;
727 if (brd == ifa1->ifa_broadcast)
728 ok |= BRD1_OK;
729 if (any == ifa1->ifa_broadcast)
730 ok |= BRD0_OK;
731 }
732
733 if (!(ok&BRD_OK))
734 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735 if (!(ok&BRD1_OK))
736 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737 if (!(ok&BRD0_OK))
738 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739 if (!(ok&LOCAL_OK)) {
740 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741
742 /* Check, that this local address finally disappeared. */
743 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744 /* And the last, but not the least thing.
745 We must flush stray FIB entries.
746
747 First of all, we scan fib_info list searching
748 for stray nexthop entries, then ignite fib_flush.
749 */
750 if (fib_sync_down(ifa->ifa_local, NULL, 0))
751 fib_flush();
752 }
753 }
754#undef LOCAL_OK
755#undef BRD_OK
756#undef BRD0_OK
757#undef BRD1_OK
758}
759
Robert Olsson246955f2005-06-20 13:36:39 -0700760static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900762
Robert Olsson246955f2005-06-20 13:36:39 -0700763 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800764 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800765 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700766 .tos = frn->fl_tos,
767 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700768
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700769#ifdef CONFIG_IP_MULTIPLE_TABLES
770 res.r = NULL;
771#endif
772
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700773 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700774 if (tb) {
775 local_bh_disable();
776
777 frn->tb_id = tb->tb_id;
778 frn->err = tb->tb_lookup(tb, &fl, &res);
779
780 if (!frn->err) {
781 frn->prefixlen = res.prefixlen;
782 frn->nh_sel = res.nh_sel;
783 frn->type = res.type;
784 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700785 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700786 }
787 local_bh_enable();
788 }
789}
790
David S. Miller28f7b0362007-10-10 21:32:39 -0700791static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700792{
Robert Olsson246955f2005-06-20 13:36:39 -0700793 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700794 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700795 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700796 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700797
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700798 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800799 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
800 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
801 kfree_skb(skb);
802 return;
803 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900804
Robert Olsson246955f2005-06-20 13:36:39 -0700805 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
806 tb = fib_get_table(frn->tb_id_in);
807
808 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900809
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700810 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700811 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700812 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700813 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900814}
Robert Olsson246955f2005-06-20 13:36:39 -0700815
816static void nl_fib_lookup_init(void)
817{
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700818 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
819 nl_fib_input, NULL, THIS_MODULE);
Robert Olsson246955f2005-06-20 13:36:39 -0700820}
821
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822static void fib_disable_ip(struct net_device *dev, int force)
823{
824 if (fib_sync_down(0, dev, force))
825 fib_flush();
826 rt_cache_flush(0);
827 arp_ifdown(dev);
828}
829
830static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
831{
832 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
833
834 switch (event) {
835 case NETDEV_UP:
836 fib_add_ifaddr(ifa);
837#ifdef CONFIG_IP_ROUTE_MULTIPATH
838 fib_sync_up(ifa->ifa_dev->dev);
839#endif
840 rt_cache_flush(-1);
841 break;
842 case NETDEV_DOWN:
843 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700844 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 /* Last address was deleted from this interface.
846 Disable IP.
847 */
848 fib_disable_ip(ifa->ifa_dev->dev, 1);
849 } else {
850 rt_cache_flush(-1);
851 }
852 break;
853 }
854 return NOTIFY_DONE;
855}
856
857static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
858{
859 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700860 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
Eric W. Biedermane9dc8652007-09-12 13:02:17 +0200862 if (dev->nd_net != &init_net)
863 return NOTIFY_DONE;
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 if (event == NETDEV_UNREGISTER) {
866 fib_disable_ip(dev, 2);
867 return NOTIFY_DONE;
868 }
869
870 if (!in_dev)
871 return NOTIFY_DONE;
872
873 switch (event) {
874 case NETDEV_UP:
875 for_ifa(in_dev) {
876 fib_add_ifaddr(ifa);
877 } endfor_ifa(in_dev);
878#ifdef CONFIG_IP_ROUTE_MULTIPATH
879 fib_sync_up(dev);
880#endif
881 rt_cache_flush(-1);
882 break;
883 case NETDEV_DOWN:
884 fib_disable_ip(dev, 0);
885 break;
886 case NETDEV_CHANGEMTU:
887 case NETDEV_CHANGE:
888 rt_cache_flush(0);
889 break;
890 }
891 return NOTIFY_DONE;
892}
893
894static struct notifier_block fib_inetaddr_notifier = {
895 .notifier_call =fib_inetaddr_event,
896};
897
898static struct notifier_block fib_netdev_notifier = {
899 .notifier_call =fib_netdev_event,
900};
901
902void __init ip_fib_init(void)
903{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700904 unsigned int i;
905
906 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
907 INIT_HLIST_HEAD(&fib_table_hash[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908#ifndef CONFIG_IP_MULTIPLE_TABLES
909 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700910 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700912 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913#else
Thomas Grafe1ef4bf2006-08-04 03:39:22 -0700914 fib4_rules_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915#endif
916
917 register_netdevice_notifier(&fib_netdev_notifier);
918 register_inetaddr_notifier(&fib_inetaddr_notifier);
Robert Olsson246955f2005-06-20 13:36:39 -0700919 nl_fib_lookup_init();
Thomas Graf63f34442007-03-22 11:55:17 -0700920
921 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
922 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
923 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924}
925
926EXPORT_SYMBOL(inet_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -0700927EXPORT_SYMBOL(ip_dev_find);