blob: 5beff2e02751224a69dcacaa39745e05e8e598b0 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070045#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070046#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
Stephen Hemminger832b4c52006-08-29 16:48:09 -070050static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070051static struct hlist_head *fib_info_hash;
52static struct hlist_head *fib_info_laddrhash;
53static unsigned int fib_hash_size;
54static unsigned int fib_info_cnt;
55
56#define DEVINDEX_HASHBITS 8
57#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60#ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62static DEFINE_SPINLOCK(fib_multipath_lock);
63
64#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72/* Hope, that gcc will optimize it to get rid of dummy loop */
73
74#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75for (nhsel=0; nhsel < 1; nhsel++)
76
77#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82#define endfor_nexthops(fi) }
83
84
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090085static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070086{
87 int error;
88 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070089} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090090 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 .error = 0,
92 .scope = RT_SCOPE_NOWHERE,
93 }, /* RTN_UNSPEC */
94 {
95 .error = 0,
96 .scope = RT_SCOPE_UNIVERSE,
97 }, /* RTN_UNICAST */
98 {
99 .error = 0,
100 .scope = RT_SCOPE_HOST,
101 }, /* RTN_LOCAL */
102 {
103 .error = 0,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
106 {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 }, /* RTN_ANYCAST */
110 {
111 .error = 0,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
114 {
115 .error = -EINVAL,
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
118 {
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
122 {
123 .error = -EACCES,
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
126 {
127 .error = -EAGAIN,
128 .scope = RT_SCOPE_UNIVERSE,
129 }, /* RTN_THROW */
130 {
131 .error = -EINVAL,
132 .scope = RT_SCOPE_NOWHERE,
133 }, /* RTN_NAT */
134 {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
138};
139
140
141/* Release a nexthop info record */
142
143void free_fib_info(struct fib_info *fi)
144{
145 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 return;
148 }
149 change_nexthops(fi) {
150 if (nh->nh_dev)
151 dev_put(nh->nh_dev);
152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi);
154 fib_info_cnt--;
155 kfree(fi);
156}
157
158void fib_release_info(struct fib_info *fi)
159{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700160 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
166 if (!nh->nh_dev)
167 continue;
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700173 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174}
175
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184#ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186#endif
187#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195}
196
David S. Miller88ebc722008-01-12 21:49:01 -0800197static inline unsigned int fib_devindex_hashfn(unsigned int val)
198{
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204}
205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207{
208 unsigned int mask = (fib_hash_size - 1);
209 unsigned int val = fi->fib_nhs;
210
211 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700212 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 val ^= fi->fib_priority;
David S. Miller88ebc722008-01-12 21:49:01 -0800214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219}
220
221static struct fib_info *fib_find_info(const struct fib_info *nfi)
222{
223 struct hlist_head *head;
224 struct hlist_node *node;
225 struct fib_info *fi;
226 unsigned int hash;
227
228 hash = fib_info_hashfn(nfi);
229 head = &fib_info_hash[hash];
230
231 hlist_for_each_entry(fi, node, head, fib_hash) {
232 if (fi->fib_nhs != nfi->fib_nhs)
233 continue;
234 if (nfi->fib_protocol == fi->fib_protocol &&
235 nfi->fib_prefsrc == fi->fib_prefsrc &&
236 nfi->fib_priority == fi->fib_priority &&
237 memcmp(nfi->fib_metrics, fi->fib_metrics,
238 sizeof(fi->fib_metrics)) == 0 &&
239 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
240 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
241 return fi;
242 }
243
244 return NULL;
245}
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
Al Virod878e72e2006-09-26 22:18:13 -0700251int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700258 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700266 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return 0;
268 }
269 }
270
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700271 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 return -1;
274}
275
Thomas Graf339bf982006-11-10 14:10:15 -0800276static inline size_t fib_nlmsg_size(struct fib_info *fi)
277{
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301}
302
Al Viro81f7bf62006-09-27 18:40:00 -0700303void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700304 int dst_len, u32 tb_id, struct nl_info *info,
305 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306{
307 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700308 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700309 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310
Thomas Graf339bf982006-11-10 14:10:15 -0800311 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700312 if (skb == NULL)
313 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Thomas Graf4e902c52006-08-17 18:14:52 -0700315 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700316 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700317 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800318 if (err < 0) {
319 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320 WARN_ON(err == -EMSGSIZE);
321 kfree_skb(skb);
322 goto errout;
323 }
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800324 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
Thomas Graf4e902c52006-08-17 18:14:52 -0700325 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700326errout:
327 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800328 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329}
330
331/* Return the first fib alias matching TOS with
332 * priority less than or equal to PRIO.
333 */
334struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
335{
336 if (fah) {
337 struct fib_alias *fa;
338 list_for_each_entry(fa, fah, fa_list) {
339 if (fa->fa_tos > tos)
340 continue;
341 if (fa->fa_info->fib_priority >= prio ||
342 fa->fa_tos < tos)
343 return fa;
344 }
345 }
346 return NULL;
347}
348
349int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800350 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
352 struct neighbour *n;
353 int state = NUD_NONE;
354
355 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
356 if (n) {
357 state = n->nud_state;
358 neigh_release(n);
359 }
360 if (state==NUD_REACHABLE)
361 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800362 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 return 0;
364 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800365 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 *last_resort = fi;
367 *last_idx = order;
368 }
369 return 1;
370}
371
372#ifdef CONFIG_IP_ROUTE_MULTIPATH
373
Thomas Graf4e902c52006-08-17 18:14:52 -0700374static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
376 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
Thomas Graf4e902c52006-08-17 18:14:52 -0700378 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700380 rtnh = rtnh_next(rtnh, &remaining);
381 }
382
383 /* leftover implies invalid nexthop configuration, discard it */
384 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385}
386
Thomas Graf4e902c52006-08-17 18:14:52 -0700387static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
388 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700391 int attrlen;
392
393 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700395
396 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
397 nh->nh_oif = rtnh->rtnh_ifindex;
398 nh->nh_weight = rtnh->rtnh_hops + 1;
399
400 attrlen = rtnh_attrlen(rtnh);
401 if (attrlen > 0) {
402 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
403
404 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700405 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700407 nla = nla_find(attrs, attrlen, RTA_FLOW);
408 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409#endif
410 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700411
412 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 return 0;
416}
417
418#endif
419
Thomas Graf4e902c52006-08-17 18:14:52 -0700420int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421{
422#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700423 struct rtnexthop *rtnh;
424 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425#endif
426
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 return 1;
429
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 if (cfg->fc_oif || cfg->fc_gw) {
431 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
432 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 return 0;
434 return 1;
435 }
436
437#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700438 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700440
441 rtnh = cfg->fc_mp;
442 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700445 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
Thomas Graf4e902c52006-08-17 18:14:52 -0700447 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700449
450 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700452
453 attrlen = rtnh_attrlen(rtnh);
454 if (attrlen < 0) {
455 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
456
457 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700458 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 return 1;
460#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700461 nla = nla_find(attrs, attrlen, RTA_FLOW);
462 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return 1;
464#endif
465 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700466
467 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 } endfor_nexthops(fi);
469#endif
470 return 0;
471}
472
473
474/*
475 Picture
476 -------
477
478 Semantics of nexthop is very messy by historical reasons.
479 We have to take into account, that:
480 a) gateway can be actually local interface address,
481 so that gatewayed route is direct.
482 b) gateway must be on-link address, possibly
483 described not by an ifaddr, but also by a direct route.
484 c) If both gateway and interface are specified, they should not
485 contradict.
486 d) If we use tunnel routes, gateway could be not on-link.
487
488 Attempt to reconcile all of these (alas, self-contradictory) conditions
489 results in pretty ugly and hairy code with obscure logic.
490
491 I chose to generalized it instead, so that the size
492 of code does not increase practically, but it becomes
493 much more general.
494 Every prefix is assigned a "scope" value: "host" is local address,
495 "link" is direct route,
496 [ ... "site" ... "interior" ... ]
497 and "universe" is true gateway route with global meaning.
498
499 Every prefix refers to a set of "nexthop"s (gw, oif),
500 where gw must have narrower scope. This recursion stops
501 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502 which means that gw is forced to be on link.
503
504 Code is still hairy, but now it is apparently logically
505 consistent and very flexible. F.e. as by-product it allows
506 to co-exists in peace independent exterior and interior
507 routing processes.
508
509 Normally it looks as following.
510
511 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900512 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900514 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 |-> {local prefix} (terminal node)
516 */
517
Thomas Graf4e902c52006-08-17 18:14:52 -0700518static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
519 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520{
521 int err;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800522 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
Denis V. Lunev86167a32008-01-21 17:34:00 -0800524 net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (nh->nh_gw) {
526 struct fib_result res;
527
528#ifdef CONFIG_IP_ROUTE_PERVASIVE
529 if (nh->nh_flags&RTNH_F_PERVASIVE)
530 return 0;
531#endif
532 if (nh->nh_flags&RTNH_F_ONLINK) {
533 struct net_device *dev;
534
Thomas Graf4e902c52006-08-17 18:14:52 -0700535 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return -ENODEV;
541 if (!(dev->flags&IFF_UP))
542 return -ENETDOWN;
543 nh->nh_dev = dev;
544 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK;
546 return 0;
547 }
548 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700549 struct flowi fl = {
550 .nl_u = {
551 .ip4_u = {
552 .daddr = nh->nh_gw,
553 .scope = cfg->fc_scope + 1,
554 },
555 },
556 .oif = nh->nh_oif,
557 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800562 if ((err = fib_lookup(net, &fl, &res)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 return err;
564 }
565 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out;
568 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
571 goto out;
572 dev_hold(nh->nh_dev);
573 err = -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577out:
578 fib_res_put(&res);
579 return err;
580 } else {
581 struct in_device *in_dev;
582
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
584 return -EINVAL;
585
Denis V. Lunev86167a32008-01-21 17:34:00 -0800586 in_dev = inetdev_by_index(net, nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 if (in_dev == NULL)
588 return -ENODEV;
589 if (!(in_dev->dev->flags&IFF_UP)) {
590 in_dev_put(in_dev);
591 return -ENETDOWN;
592 }
593 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev);
597 }
598 return 0;
599}
600
Al Viro81f7bf62006-09-27 18:40:00 -0700601static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602{
603 unsigned int mask = (fib_hash_size - 1);
604
Al Viro81f7bf62006-09-27 18:40:00 -0700605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606}
607
608static struct hlist_head *fib_hash_alloc(int bytes)
609{
610 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800611 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 else
613 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615}
616
617static void fib_hash_free(struct hlist_head *hash, int bytes)
618{
619 if (!hash)
620 return;
621
622 if (bytes <= PAGE_SIZE)
623 kfree(hash);
624 else
625 free_pages((unsigned long) hash, get_order(bytes));
626}
627
628static void fib_hash_move(struct hlist_head *new_info_hash,
629 struct hlist_head *new_laddrhash,
630 unsigned int new_size)
631{
David S. Millerb7656e72005-08-05 04:12:48 -0700632 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700634 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700636 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700637 old_info_hash = fib_info_hash;
638 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 fib_hash_size = new_size;
640
641 for (i = 0; i < old_size; i++) {
642 struct hlist_head *head = &fib_info_hash[i];
643 struct hlist_node *node, *n;
644 struct fib_info *fi;
645
646 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
647 struct hlist_head *dest;
648 unsigned int new_hash;
649
650 hlist_del(&fi->fib_hash);
651
652 new_hash = fib_info_hashfn(fi);
653 dest = &new_info_hash[new_hash];
654 hlist_add_head(&fi->fib_hash, dest);
655 }
656 }
657 fib_info_hash = new_info_hash;
658
659 for (i = 0; i < old_size; i++) {
660 struct hlist_head *lhead = &fib_info_laddrhash[i];
661 struct hlist_node *node, *n;
662 struct fib_info *fi;
663
664 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
665 struct hlist_head *ldest;
666 unsigned int new_hash;
667
668 hlist_del(&fi->fib_lhash);
669
670 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
671 ldest = &new_laddrhash[new_hash];
672 hlist_add_head(&fi->fib_lhash, ldest);
673 }
674 }
675 fib_info_laddrhash = new_laddrhash;
676
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700677 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700678
679 bytes = old_size * sizeof(struct hlist_head *);
680 fib_hash_free(old_info_hash, bytes);
681 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682}
683
Thomas Graf4e902c52006-08-17 18:14:52 -0700684struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685{
686 int err;
687 struct fib_info *fi = NULL;
688 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700692 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 goto err_inval;
694
695#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700696 if (cfg->fc_mp) {
697 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 if (nhs == 0)
699 goto err_inval;
700 }
701#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
703 err = -ENOBUFS;
704 if (fib_info_cnt >= fib_hash_size) {
705 unsigned int new_size = fib_hash_size << 1;
706 struct hlist_head *new_info_hash;
707 struct hlist_head *new_laddrhash;
708 unsigned int bytes;
709
710 if (!new_size)
711 new_size = 1;
712 bytes = new_size * sizeof(struct hlist_head *);
713 new_info_hash = fib_hash_alloc(bytes);
714 new_laddrhash = fib_hash_alloc(bytes);
715 if (!new_info_hash || !new_laddrhash) {
716 fib_hash_free(new_info_hash, bytes);
717 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800718 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721 if (!fib_hash_size)
722 goto failure;
723 }
724
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700725 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if (fi == NULL)
727 goto failure;
728 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
Thomas Graf4e902c52006-08-17 18:14:52 -0700730 fi->fib_protocol = cfg->fc_protocol;
731 fi->fib_flags = cfg->fc_flags;
732 fi->fib_priority = cfg->fc_priority;
733 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
735 fi->fib_nhs = nhs;
736 change_nexthops(fi) {
737 nh->nh_parent = fi;
738 } endfor_nexthops(fi)
739
Thomas Graf4e902c52006-08-17 18:14:52 -0700740 if (cfg->fc_mx) {
741 struct nlattr *nla;
742 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
Thomas Graf4e902c52006-08-17 18:14:52 -0700744 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200745 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700746
747 if (type) {
748 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700750 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 }
753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754
Thomas Graf4e902c52006-08-17 18:14:52 -0700755 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700757 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
758 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700760 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 goto err_inval;
764#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700765 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 goto err_inval;
767#endif
768#else
769 goto err_inval;
770#endif
771 } else {
772 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700773
774 nh->nh_oif = cfg->fc_oif;
775 nh->nh_gw = cfg->fc_gw;
776 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700778 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780#ifdef CONFIG_IP_ROUTE_MULTIPATH
781 nh->nh_weight = 1;
782#endif
783 }
784
Thomas Graf4e902c52006-08-17 18:14:52 -0700785 if (fib_props[cfg->fc_type].error) {
786 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 goto err_inval;
788 goto link_it;
789 }
790
Thomas Graf4e902c52006-08-17 18:14:52 -0700791 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 goto err_inval;
793
Thomas Graf4e902c52006-08-17 18:14:52 -0700794 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 struct fib_nh *nh = fi->fib_nh;
796
797 /* Local address is added. */
798 if (nhs != 1 || nh->nh_gw)
799 goto err_inval;
800 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800801 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
802 fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 err = -ENODEV;
804 if (nh->nh_dev == NULL)
805 goto failure;
806 } else {
807 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700808 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 goto failure;
810 } endfor_nexthops(fi)
811 }
812
813 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800816 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
817 fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 goto err_inval;
819 }
820
821link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) {
823 fi->fib_dead = 1;
824 free_fib_info(fi);
825 ofi->fib_treeref++;
826 return ofi;
827 }
828
829 fi->fib_treeref++;
830 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700831 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 hlist_add_head(&fi->fib_hash,
833 &fib_info_hash[fib_info_hashfn(fi)]);
834 if (fi->fib_prefsrc) {
835 struct hlist_head *head;
836
837 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
838 hlist_add_head(&fi->fib_lhash, head);
839 }
840 change_nexthops(fi) {
841 struct hlist_head *head;
842 unsigned int hash;
843
844 if (!nh->nh_dev)
845 continue;
846 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
847 head = &fib_info_devhash[hash];
848 hlist_add_head(&nh->nh_hash, head);
849 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700850 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 return fi;
852
853err_inval:
854 err = -EINVAL;
855
856failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900857 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 fi->fib_dead = 1;
859 free_fib_info(fi);
860 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700861
862 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863}
864
Robert Olssone5b43762005-08-25 13:01:03 -0700865/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700867 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 int prefixlen)
869{
870 struct fib_alias *fa;
871 int nh_sel = 0;
872
Robert Olssone5b43762005-08-25 13:01:03 -0700873 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 int err;
875
876 if (fa->fa_tos &&
877 fa->fa_tos != flp->fl4_tos)
878 continue;
879
880 if (fa->fa_scope < flp->fl4_scope)
881 continue;
882
883 fa->fa_state |= FA_S_ACCESSED;
884
885 err = fib_props[fa->fa_type].error;
886 if (err == 0) {
887 struct fib_info *fi = fa->fa_info;
888
889 if (fi->fib_flags & RTNH_F_DEAD)
890 continue;
891
892 switch (fa->fa_type) {
893 case RTN_UNICAST:
894 case RTN_LOCAL:
895 case RTN_BROADCAST:
896 case RTN_ANYCAST:
897 case RTN_MULTICAST:
898 for_nexthops(fi) {
899 if (nh->nh_flags&RTNH_F_DEAD)
900 continue;
901 if (!flp->oif || flp->oif == nh->nh_oif)
902 break;
903 }
904#ifdef CONFIG_IP_ROUTE_MULTIPATH
905 if (nhsel < fi->fib_nhs) {
906 nh_sel = nhsel;
907 goto out_fill_res;
908 }
909#else
910 if (nhsel < 1) {
911 goto out_fill_res;
912 }
913#endif
914 endfor_nexthops(fi);
915 continue;
916
917 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800918 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
919 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700921 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 }
923 return err;
924 }
925 return 1;
926
927out_fill_res:
928 res->prefixlen = prefixlen;
929 res->nh_sel = nh_sel;
930 res->type = fa->fa_type;
931 res->scope = fa->fa_scope;
932 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 atomic_inc(&res->fi->fib_clntref);
934 return 0;
935}
936
937/* Find appropriate source address to this destination */
938
Al Virob83738a2006-09-26 22:14:15 -0700939__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940{
941 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
942}
943
Thomas Grafbe403ea2006-08-17 18:15:17 -0700944int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700945 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700946 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Thomas Grafbe403ea2006-08-17 18:15:17 -0700951 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
952 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800953 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700954
955 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 rtm->rtm_family = AF_INET;
957 rtm->rtm_dst_len = dst_len;
958 rtm->rtm_src_len = 0;
959 rtm->rtm_tos = tos;
960 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700961 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 rtm->rtm_type = type;
963 rtm->rtm_flags = fi->fib_flags;
964 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700966
967 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700968 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
972
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700974 goto nla_put_failure;
975
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700977 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700978
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 if (fi->fib_nhs == 1) {
980 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700981 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700982
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700985#ifdef CONFIG_NET_CLS_ROUTE
986 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700987 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700988#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 }
990#ifdef CONFIG_IP_ROUTE_MULTIPATH
991 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700992 struct rtnexthop *rtnh;
993 struct nlattr *mp;
994
995 mp = nla_nest_start(skb, RTA_MULTIPATH);
996 if (mp == NULL)
997 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998
999 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001000 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1001 if (rtnh == NULL)
1002 goto nla_put_failure;
1003
1004 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1005 rtnh->rtnh_hops = nh->nh_weight - 1;
1006 rtnh->rtnh_ifindex = nh->nh_oif;
1007
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001009 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001010#ifdef CONFIG_NET_CLS_ROUTE
1011 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001012 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001013#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001014 /* length of rtnetlink header + attributes */
1015 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001017
1018 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001021 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
Thomas Grafbe403ea2006-08-17 18:15:17 -07001023nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001024 nlmsg_cancel(skb, nlh);
1025 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026}
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028/*
1029 Update FIB if:
1030 - local address disappeared -> we must delete all the entries
1031 referring to it.
1032 - device went down -> we must shutdown all nexthops going via it.
1033 */
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001034int fib_sync_down_addr(__be32 local)
1035{
1036 int ret = 0;
1037 unsigned int hash = fib_laddr_hashfn(local);
1038 struct hlist_head *head = &fib_info_laddrhash[hash];
1039 struct hlist_node *node;
1040 struct fib_info *fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001042 if (fib_info_laddrhash == NULL || local == 0)
1043 return 0;
1044
1045 hlist_for_each_entry(fi, node, head, fib_lhash) {
1046 if (fi->fib_prefsrc == local) {
1047 fi->fib_flags |= RTNH_F_DEAD;
1048 ret++;
1049 }
1050 }
1051 return ret;
1052}
1053
1054int fib_sync_down_dev(struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055{
1056 int ret = 0;
1057 int scope = RT_SCOPE_NOWHERE;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001058 struct fib_info *prev_fi = NULL;
1059 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1060 struct hlist_head *head = &fib_info_devhash[hash];
1061 struct hlist_node *node;
1062 struct fib_nh *nh;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001063
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (force)
1065 scope = -1;
1066
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001067 hlist_for_each_entry(nh, node, head, nh_hash) {
1068 struct fib_info *fi = nh->nh_parent;
1069 int dead;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001071 BUG_ON(!fi->fib_nhs);
1072 if (nh->nh_dev != dev || fi == prev_fi)
1073 continue;
1074 prev_fi = fi;
1075 dead = 0;
1076 change_nexthops(fi) {
1077 if (nh->nh_flags&RTNH_F_DEAD)
1078 dead++;
1079 else if (nh->nh_dev == dev &&
1080 nh->nh_scope != scope) {
1081 nh->nh_flags |= RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001083 spin_lock_bh(&fib_multipath_lock);
1084 fi->fib_power -= nh->nh_power;
1085 nh->nh_power = 0;
1086 spin_unlock_bh(&fib_multipath_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087#endif
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001088 dead++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 }
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001090#ifdef CONFIG_IP_ROUTE_MULTIPATH
1091 if (force > 1 && nh->nh_dev == dev) {
1092 dead = fi->fib_nhs;
1093 break;
1094 }
1095#endif
1096 } endfor_nexthops(fi)
1097 if (dead == fi->fib_nhs) {
1098 fi->fib_flags |= RTNH_F_DEAD;
1099 ret++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
1101 }
1102
1103 return ret;
1104}
1105
1106#ifdef CONFIG_IP_ROUTE_MULTIPATH
1107
1108/*
1109 Dead device goes up. We wake up dead nexthops.
1110 It takes sense only on multipath routes.
1111 */
1112
1113int fib_sync_up(struct net_device *dev)
1114{
1115 struct fib_info *prev_fi;
1116 unsigned int hash;
1117 struct hlist_head *head;
1118 struct hlist_node *node;
1119 struct fib_nh *nh;
1120 int ret;
1121
1122 if (!(dev->flags&IFF_UP))
1123 return 0;
1124
1125 prev_fi = NULL;
1126 hash = fib_devindex_hashfn(dev->ifindex);
1127 head = &fib_info_devhash[hash];
1128 ret = 0;
1129
1130 hlist_for_each_entry(nh, node, head, nh_hash) {
1131 struct fib_info *fi = nh->nh_parent;
1132 int alive;
1133
1134 BUG_ON(!fi->fib_nhs);
1135 if (nh->nh_dev != dev || fi == prev_fi)
1136 continue;
1137
1138 prev_fi = fi;
1139 alive = 0;
1140 change_nexthops(fi) {
1141 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1142 alive++;
1143 continue;
1144 }
1145 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1146 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001147 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 continue;
1149 alive++;
1150 spin_lock_bh(&fib_multipath_lock);
1151 nh->nh_power = 0;
1152 nh->nh_flags &= ~RTNH_F_DEAD;
1153 spin_unlock_bh(&fib_multipath_lock);
1154 } endfor_nexthops(fi)
1155
1156 if (alive > 0) {
1157 fi->fib_flags &= ~RTNH_F_DEAD;
1158 ret++;
1159 }
1160 }
1161
1162 return ret;
1163}
1164
1165/*
1166 The algorithm is suboptimal, but it provides really
1167 fair weighted route distribution.
1168 */
1169
1170void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1171{
1172 struct fib_info *fi = res->fi;
1173 int w;
1174
1175 spin_lock_bh(&fib_multipath_lock);
1176 if (fi->fib_power <= 0) {
1177 int power = 0;
1178 change_nexthops(fi) {
1179 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1180 power += nh->nh_weight;
1181 nh->nh_power = nh->nh_weight;
1182 }
1183 } endfor_nexthops(fi);
1184 fi->fib_power = power;
1185 if (power <= 0) {
1186 spin_unlock_bh(&fib_multipath_lock);
1187 /* Race condition: route has just become dead. */
1188 res->nh_sel = 0;
1189 return;
1190 }
1191 }
1192
1193
1194 /* w should be random number [0..fi->fib_power-1],
1195 it is pretty bad approximation.
1196 */
1197
1198 w = jiffies % fi->fib_power;
1199
1200 change_nexthops(fi) {
1201 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1202 if ((w -= nh->nh_power) <= 0) {
1203 nh->nh_power--;
1204 fi->fib_power--;
1205 res->nh_sel = nhsel;
1206 spin_unlock_bh(&fib_multipath_lock);
1207 return;
1208 }
1209 }
1210 } endfor_nexthops(fi);
1211
1212 /* Race condition: route has just become dead. */
1213 res->nh_sel = 0;
1214 spin_unlock_bh(&fib_multipath_lock);
1215}
1216#endif