blob: bb94550d95c3e8a2c80b1fc74bed9b9eb1b52a5a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070046#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070047#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
Stephen Hemminger832b4c52006-08-29 16:48:09 -070053static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090088static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070089{
90 int error;
91 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070092} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090093 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700163 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700176 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700206 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
Al Virod878e72e2006-09-26 22:18:13 -0700251int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700258 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700266 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return 0;
268 }
269 }
270
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700271 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 return -1;
274}
275
Thomas Graf339bf982006-11-10 14:10:15 -0800276static inline size_t fib_nlmsg_size(struct fib_info *fi)
277{
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301}
302
Al Viro81f7bf62006-09-27 18:40:00 -0700303void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700304 int dst_len, u32 tb_id, struct nl_info *info,
305 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306{
307 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700308 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700309 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310
Thomas Graf339bf982006-11-10 14:10:15 -0800311 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700312 if (skb == NULL)
313 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Thomas Graf4e902c52006-08-17 18:14:52 -0700315 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700316 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700317 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800318 if (err < 0) {
319 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320 WARN_ON(err == -EMSGSIZE);
321 kfree_skb(skb);
322 goto errout;
323 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700324 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
325 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700326errout:
327 if (err < 0)
328 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329}
330
331/* Return the first fib alias matching TOS with
332 * priority less than or equal to PRIO.
333 */
334struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
335{
336 if (fah) {
337 struct fib_alias *fa;
338 list_for_each_entry(fa, fah, fa_list) {
339 if (fa->fa_tos > tos)
340 continue;
341 if (fa->fa_info->fib_priority >= prio ||
342 fa->fa_tos < tos)
343 return fa;
344 }
345 }
346 return NULL;
347}
348
349int fib_detect_death(struct fib_info *fi, int order,
350 struct fib_info **last_resort, int *last_idx, int *dflt)
351{
352 struct neighbour *n;
353 int state = NUD_NONE;
354
355 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
356 if (n) {
357 state = n->nud_state;
358 neigh_release(n);
359 }
360 if (state==NUD_REACHABLE)
361 return 0;
362 if ((state&NUD_VALID) && order != *dflt)
363 return 0;
364 if ((state&NUD_VALID) ||
365 (*last_idx<0 && order > *dflt)) {
366 *last_resort = fi;
367 *last_idx = order;
368 }
369 return 1;
370}
371
372#ifdef CONFIG_IP_ROUTE_MULTIPATH
373
Thomas Graf4e902c52006-08-17 18:14:52 -0700374static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
376 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
Thomas Graf4e902c52006-08-17 18:14:52 -0700378 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700380 rtnh = rtnh_next(rtnh, &remaining);
381 }
382
383 /* leftover implies invalid nexthop configuration, discard it */
384 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385}
386
Thomas Graf4e902c52006-08-17 18:14:52 -0700387static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
388 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700391 int attrlen;
392
393 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700395
396 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
397 nh->nh_oif = rtnh->rtnh_ifindex;
398 nh->nh_weight = rtnh->rtnh_hops + 1;
399
400 attrlen = rtnh_attrlen(rtnh);
401 if (attrlen > 0) {
402 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
403
404 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700405 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700407 nla = nla_find(attrs, attrlen, RTA_FLOW);
408 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409#endif
410 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700411
412 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 return 0;
416}
417
418#endif
419
Thomas Graf4e902c52006-08-17 18:14:52 -0700420int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421{
422#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700423 struct rtnexthop *rtnh;
424 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425#endif
426
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 return 1;
429
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 if (cfg->fc_oif || cfg->fc_gw) {
431 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
432 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 return 0;
434 return 1;
435 }
436
437#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700438 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700440
441 rtnh = cfg->fc_mp;
442 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700445 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
Thomas Graf4e902c52006-08-17 18:14:52 -0700447 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700449
450 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700452
453 attrlen = rtnh_attrlen(rtnh);
454 if (attrlen < 0) {
455 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
456
457 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700458 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 return 1;
460#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700461 nla = nla_find(attrs, attrlen, RTA_FLOW);
462 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return 1;
464#endif
465 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700466
467 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 } endfor_nexthops(fi);
469#endif
470 return 0;
471}
472
473
474/*
475 Picture
476 -------
477
478 Semantics of nexthop is very messy by historical reasons.
479 We have to take into account, that:
480 a) gateway can be actually local interface address,
481 so that gatewayed route is direct.
482 b) gateway must be on-link address, possibly
483 described not by an ifaddr, but also by a direct route.
484 c) If both gateway and interface are specified, they should not
485 contradict.
486 d) If we use tunnel routes, gateway could be not on-link.
487
488 Attempt to reconcile all of these (alas, self-contradictory) conditions
489 results in pretty ugly and hairy code with obscure logic.
490
491 I chose to generalized it instead, so that the size
492 of code does not increase practically, but it becomes
493 much more general.
494 Every prefix is assigned a "scope" value: "host" is local address,
495 "link" is direct route,
496 [ ... "site" ... "interior" ... ]
497 and "universe" is true gateway route with global meaning.
498
499 Every prefix refers to a set of "nexthop"s (gw, oif),
500 where gw must have narrower scope. This recursion stops
501 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502 which means that gw is forced to be on link.
503
504 Code is still hairy, but now it is apparently logically
505 consistent and very flexible. F.e. as by-product it allows
506 to co-exists in peace independent exterior and interior
507 routing processes.
508
509 Normally it looks as following.
510
511 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900512 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900514 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 |-> {local prefix} (terminal node)
516 */
517
Thomas Graf4e902c52006-08-17 18:14:52 -0700518static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
519 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520{
521 int err;
522
523 if (nh->nh_gw) {
524 struct fib_result res;
525
526#ifdef CONFIG_IP_ROUTE_PERVASIVE
527 if (nh->nh_flags&RTNH_F_PERVASIVE)
528 return 0;
529#endif
530 if (nh->nh_flags&RTNH_F_ONLINK) {
531 struct net_device *dev;
532
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 return -EINVAL;
535 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
536 return -EINVAL;
537 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
538 return -ENODEV;
539 if (!(dev->flags&IFF_UP))
540 return -ENETDOWN;
541 nh->nh_dev = dev;
542 dev_hold(dev);
543 nh->nh_scope = RT_SCOPE_LINK;
544 return 0;
545 }
546 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700547 struct flowi fl = {
548 .nl_u = {
549 .ip4_u = {
550 .daddr = nh->nh_gw,
551 .scope = cfg->fc_scope + 1,
552 },
553 },
554 .oif = nh->nh_oif,
555 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
557 /* It is not necessary, but requires a bit of thinking */
558 if (fl.fl4_scope < RT_SCOPE_LINK)
559 fl.fl4_scope = RT_SCOPE_LINK;
560 if ((err = fib_lookup(&fl, &res)) != 0)
561 return err;
562 }
563 err = -EINVAL;
564 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
565 goto out;
566 nh->nh_scope = res.scope;
567 nh->nh_oif = FIB_RES_OIF(res);
568 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
569 goto out;
570 dev_hold(nh->nh_dev);
571 err = -ENETDOWN;
572 if (!(nh->nh_dev->flags & IFF_UP))
573 goto out;
574 err = 0;
575out:
576 fib_res_put(&res);
577 return err;
578 } else {
579 struct in_device *in_dev;
580
581 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
582 return -EINVAL;
583
584 in_dev = inetdev_by_index(nh->nh_oif);
585 if (in_dev == NULL)
586 return -ENODEV;
587 if (!(in_dev->dev->flags&IFF_UP)) {
588 in_dev_put(in_dev);
589 return -ENETDOWN;
590 }
591 nh->nh_dev = in_dev->dev;
592 dev_hold(nh->nh_dev);
593 nh->nh_scope = RT_SCOPE_HOST;
594 in_dev_put(in_dev);
595 }
596 return 0;
597}
598
Al Viro81f7bf62006-09-27 18:40:00 -0700599static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 unsigned int mask = (fib_hash_size - 1);
602
Al Viro81f7bf62006-09-27 18:40:00 -0700603 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604}
605
606static struct hlist_head *fib_hash_alloc(int bytes)
607{
608 if (bytes <= PAGE_SIZE)
609 return kmalloc(bytes, GFP_KERNEL);
610 else
611 return (struct hlist_head *)
612 __get_free_pages(GFP_KERNEL, get_order(bytes));
613}
614
615static void fib_hash_free(struct hlist_head *hash, int bytes)
616{
617 if (!hash)
618 return;
619
620 if (bytes <= PAGE_SIZE)
621 kfree(hash);
622 else
623 free_pages((unsigned long) hash, get_order(bytes));
624}
625
626static void fib_hash_move(struct hlist_head *new_info_hash,
627 struct hlist_head *new_laddrhash,
628 unsigned int new_size)
629{
David S. Millerb7656e72005-08-05 04:12:48 -0700630 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700632 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700634 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700635 old_info_hash = fib_info_hash;
636 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 fib_hash_size = new_size;
638
639 for (i = 0; i < old_size; i++) {
640 struct hlist_head *head = &fib_info_hash[i];
641 struct hlist_node *node, *n;
642 struct fib_info *fi;
643
644 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
645 struct hlist_head *dest;
646 unsigned int new_hash;
647
648 hlist_del(&fi->fib_hash);
649
650 new_hash = fib_info_hashfn(fi);
651 dest = &new_info_hash[new_hash];
652 hlist_add_head(&fi->fib_hash, dest);
653 }
654 }
655 fib_info_hash = new_info_hash;
656
657 for (i = 0; i < old_size; i++) {
658 struct hlist_head *lhead = &fib_info_laddrhash[i];
659 struct hlist_node *node, *n;
660 struct fib_info *fi;
661
662 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
663 struct hlist_head *ldest;
664 unsigned int new_hash;
665
666 hlist_del(&fi->fib_lhash);
667
668 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
669 ldest = &new_laddrhash[new_hash];
670 hlist_add_head(&fi->fib_lhash, ldest);
671 }
672 }
673 fib_info_laddrhash = new_laddrhash;
674
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700675 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700676
677 bytes = old_size * sizeof(struct hlist_head *);
678 fib_hash_free(old_info_hash, bytes);
679 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680}
681
Thomas Graf4e902c52006-08-17 18:14:52 -0700682struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683{
684 int err;
685 struct fib_info *fi = NULL;
686 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700690 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 goto err_inval;
692
693#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700694 if (cfg->fc_mp) {
695 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 if (nhs == 0)
697 goto err_inval;
698 }
699#endif
700#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700701 if (cfg->fc_mp_alg) {
702 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
703 cfg->fc_mp_alg > IP_MP_ALG_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 goto err_inval;
705 }
706#endif
707
708 err = -ENOBUFS;
709 if (fib_info_cnt >= fib_hash_size) {
710 unsigned int new_size = fib_hash_size << 1;
711 struct hlist_head *new_info_hash;
712 struct hlist_head *new_laddrhash;
713 unsigned int bytes;
714
715 if (!new_size)
716 new_size = 1;
717 bytes = new_size * sizeof(struct hlist_head *);
718 new_info_hash = fib_hash_alloc(bytes);
719 new_laddrhash = fib_hash_alloc(bytes);
720 if (!new_info_hash || !new_laddrhash) {
721 fib_hash_free(new_info_hash, bytes);
722 fib_hash_free(new_laddrhash, bytes);
723 } else {
724 memset(new_info_hash, 0, bytes);
725 memset(new_laddrhash, 0, bytes);
726
727 fib_hash_move(new_info_hash, new_laddrhash, new_size);
728 }
729
730 if (!fib_hash_size)
731 goto failure;
732 }
733
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700734 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 if (fi == NULL)
736 goto failure;
737 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
Thomas Graf4e902c52006-08-17 18:14:52 -0700739 fi->fib_protocol = cfg->fc_protocol;
740 fi->fib_flags = cfg->fc_flags;
741 fi->fib_priority = cfg->fc_priority;
742 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
744 fi->fib_nhs = nhs;
745 change_nexthops(fi) {
746 nh->nh_parent = fi;
747 } endfor_nexthops(fi)
748
Thomas Graf4e902c52006-08-17 18:14:52 -0700749 if (cfg->fc_mx) {
750 struct nlattr *nla;
751 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
Thomas Graf4e902c52006-08-17 18:14:52 -0700753 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
754 int type = nla->nla_type;
755
756 if (type) {
757 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 }
762 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Thomas Graf4e902c52006-08-17 18:14:52 -0700764 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700766 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
767 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700769 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700771 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 goto err_inval;
773#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700774 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 goto err_inval;
776#endif
777#else
778 goto err_inval;
779#endif
780 } else {
781 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700782
783 nh->nh_oif = cfg->fc_oif;
784 nh->nh_gw = cfg->fc_gw;
785 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700787 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789#ifdef CONFIG_IP_ROUTE_MULTIPATH
790 nh->nh_weight = 1;
791#endif
792 }
793
794#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700795 fi->fib_mp_alg = cfg->fc_mp_alg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796#endif
797
Thomas Graf4e902c52006-08-17 18:14:52 -0700798 if (fib_props[cfg->fc_type].error) {
799 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 goto err_inval;
801 goto link_it;
802 }
803
Thomas Graf4e902c52006-08-17 18:14:52 -0700804 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 goto err_inval;
806
Thomas Graf4e902c52006-08-17 18:14:52 -0700807 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 struct fib_nh *nh = fi->fib_nh;
809
810 /* Local address is added. */
811 if (nhs != 1 || nh->nh_gw)
812 goto err_inval;
813 nh->nh_scope = RT_SCOPE_NOWHERE;
814 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
815 err = -ENODEV;
816 if (nh->nh_dev == NULL)
817 goto failure;
818 } else {
819 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700820 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 goto failure;
822 } endfor_nexthops(fi)
823 }
824
825 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700826 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
827 fi->fib_prefsrc != cfg->fc_dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
829 goto err_inval;
830 }
831
832link_it:
833 if ((ofi = fib_find_info(fi)) != NULL) {
834 fi->fib_dead = 1;
835 free_fib_info(fi);
836 ofi->fib_treeref++;
837 return ofi;
838 }
839
840 fi->fib_treeref++;
841 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700842 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 hlist_add_head(&fi->fib_hash,
844 &fib_info_hash[fib_info_hashfn(fi)]);
845 if (fi->fib_prefsrc) {
846 struct hlist_head *head;
847
848 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
849 hlist_add_head(&fi->fib_lhash, head);
850 }
851 change_nexthops(fi) {
852 struct hlist_head *head;
853 unsigned int hash;
854
855 if (!nh->nh_dev)
856 continue;
857 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
858 head = &fib_info_devhash[hash];
859 hlist_add_head(&nh->nh_hash, head);
860 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700861 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 return fi;
863
864err_inval:
865 err = -EINVAL;
866
867failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900868 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 fi->fib_dead = 1;
870 free_fib_info(fi);
871 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700872
873 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874}
875
Robert Olssone5b43762005-08-25 13:01:03 -0700876/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700878 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 int prefixlen)
880{
881 struct fib_alias *fa;
882 int nh_sel = 0;
883
Robert Olssone5b43762005-08-25 13:01:03 -0700884 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 int err;
886
887 if (fa->fa_tos &&
888 fa->fa_tos != flp->fl4_tos)
889 continue;
890
891 if (fa->fa_scope < flp->fl4_scope)
892 continue;
893
894 fa->fa_state |= FA_S_ACCESSED;
895
896 err = fib_props[fa->fa_type].error;
897 if (err == 0) {
898 struct fib_info *fi = fa->fa_info;
899
900 if (fi->fib_flags & RTNH_F_DEAD)
901 continue;
902
903 switch (fa->fa_type) {
904 case RTN_UNICAST:
905 case RTN_LOCAL:
906 case RTN_BROADCAST:
907 case RTN_ANYCAST:
908 case RTN_MULTICAST:
909 for_nexthops(fi) {
910 if (nh->nh_flags&RTNH_F_DEAD)
911 continue;
912 if (!flp->oif || flp->oif == nh->nh_oif)
913 break;
914 }
915#ifdef CONFIG_IP_ROUTE_MULTIPATH
916 if (nhsel < fi->fib_nhs) {
917 nh_sel = nhsel;
918 goto out_fill_res;
919 }
920#else
921 if (nhsel < 1) {
922 goto out_fill_res;
923 }
924#endif
925 endfor_nexthops(fi);
926 continue;
927
928 default:
929 printk(KERN_DEBUG "impossible 102\n");
930 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700931 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 }
933 return err;
934 }
935 return 1;
936
937out_fill_res:
938 res->prefixlen = prefixlen;
939 res->nh_sel = nh_sel;
940 res->type = fa->fa_type;
941 res->scope = fa->fa_scope;
942 res->fi = fa->fa_info;
943#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
944 res->netmask = mask;
Al Viro1e8aa6f2006-09-26 22:21:22 -0700945 res->network = zone & inet_make_mask(prefixlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946#endif
947 atomic_inc(&res->fi->fib_clntref);
948 return 0;
949}
950
951/* Find appropriate source address to this destination */
952
Al Virob83738a2006-09-26 22:14:15 -0700953__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954{
955 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
956}
957
Thomas Grafbe403ea2006-08-17 18:15:17 -0700958int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700959 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700960 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700962 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
Thomas Grafbe403ea2006-08-17 18:15:17 -0700965 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
966 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800967 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700968
969 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 rtm->rtm_family = AF_INET;
971 rtm->rtm_dst_len = dst_len;
972 rtm->rtm_src_len = 0;
973 rtm->rtm_tos = tos;
974 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700975 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 rtm->rtm_type = type;
977 rtm->rtm_flags = fi->fib_flags;
978 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700980
981 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700982 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700985 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700988 goto nla_put_failure;
989
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700991 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700992
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if (fi->fib_nhs == 1) {
994 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700995 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700998 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700999#ifdef CONFIG_NET_CLS_ROUTE
1000 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001001 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001002#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 }
1004#ifdef CONFIG_IP_ROUTE_MULTIPATH
1005 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001006 struct rtnexthop *rtnh;
1007 struct nlattr *mp;
1008
1009 mp = nla_nest_start(skb, RTA_MULTIPATH);
1010 if (mp == NULL)
1011 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001014 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1015 if (rtnh == NULL)
1016 goto nla_put_failure;
1017
1018 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1019 rtnh->rtnh_hops = nh->nh_weight - 1;
1020 rtnh->rtnh_ifindex = nh->nh_oif;
1021
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001023 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001024#ifdef CONFIG_NET_CLS_ROUTE
1025 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001026 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001027#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001028 /* length of rtnetlink header + attributes */
1029 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001031
1032 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 }
1034#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001035 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036
Thomas Grafbe403ea2006-08-17 18:15:17 -07001037nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001038 nlmsg_cancel(skb, nlh);
1039 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040}
1041
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042/*
1043 Update FIB if:
1044 - local address disappeared -> we must delete all the entries
1045 referring to it.
1046 - device went down -> we must shutdown all nexthops going via it.
1047 */
1048
Al Viro81f7bf62006-09-27 18:40:00 -07001049int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050{
1051 int ret = 0;
1052 int scope = RT_SCOPE_NOWHERE;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001053
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 if (force)
1055 scope = -1;
1056
1057 if (local && fib_info_laddrhash) {
1058 unsigned int hash = fib_laddr_hashfn(local);
1059 struct hlist_head *head = &fib_info_laddrhash[hash];
1060 struct hlist_node *node;
1061 struct fib_info *fi;
1062
1063 hlist_for_each_entry(fi, node, head, fib_lhash) {
1064 if (fi->fib_prefsrc == local) {
1065 fi->fib_flags |= RTNH_F_DEAD;
1066 ret++;
1067 }
1068 }
1069 }
1070
1071 if (dev) {
1072 struct fib_info *prev_fi = NULL;
1073 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1074 struct hlist_head *head = &fib_info_devhash[hash];
1075 struct hlist_node *node;
1076 struct fib_nh *nh;
1077
1078 hlist_for_each_entry(nh, node, head, nh_hash) {
1079 struct fib_info *fi = nh->nh_parent;
1080 int dead;
1081
1082 BUG_ON(!fi->fib_nhs);
1083 if (nh->nh_dev != dev || fi == prev_fi)
1084 continue;
1085 prev_fi = fi;
1086 dead = 0;
1087 change_nexthops(fi) {
1088 if (nh->nh_flags&RTNH_F_DEAD)
1089 dead++;
1090 else if (nh->nh_dev == dev &&
1091 nh->nh_scope != scope) {
1092 nh->nh_flags |= RTNH_F_DEAD;
1093#ifdef CONFIG_IP_ROUTE_MULTIPATH
1094 spin_lock_bh(&fib_multipath_lock);
1095 fi->fib_power -= nh->nh_power;
1096 nh->nh_power = 0;
1097 spin_unlock_bh(&fib_multipath_lock);
1098#endif
1099 dead++;
1100 }
1101#ifdef CONFIG_IP_ROUTE_MULTIPATH
1102 if (force > 1 && nh->nh_dev == dev) {
1103 dead = fi->fib_nhs;
1104 break;
1105 }
1106#endif
1107 } endfor_nexthops(fi)
1108 if (dead == fi->fib_nhs) {
1109 fi->fib_flags |= RTNH_F_DEAD;
1110 ret++;
1111 }
1112 }
1113 }
1114
1115 return ret;
1116}
1117
1118#ifdef CONFIG_IP_ROUTE_MULTIPATH
1119
1120/*
1121 Dead device goes up. We wake up dead nexthops.
1122 It takes sense only on multipath routes.
1123 */
1124
1125int fib_sync_up(struct net_device *dev)
1126{
1127 struct fib_info *prev_fi;
1128 unsigned int hash;
1129 struct hlist_head *head;
1130 struct hlist_node *node;
1131 struct fib_nh *nh;
1132 int ret;
1133
1134 if (!(dev->flags&IFF_UP))
1135 return 0;
1136
1137 prev_fi = NULL;
1138 hash = fib_devindex_hashfn(dev->ifindex);
1139 head = &fib_info_devhash[hash];
1140 ret = 0;
1141
1142 hlist_for_each_entry(nh, node, head, nh_hash) {
1143 struct fib_info *fi = nh->nh_parent;
1144 int alive;
1145
1146 BUG_ON(!fi->fib_nhs);
1147 if (nh->nh_dev != dev || fi == prev_fi)
1148 continue;
1149
1150 prev_fi = fi;
1151 alive = 0;
1152 change_nexthops(fi) {
1153 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1154 alive++;
1155 continue;
1156 }
1157 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1158 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001159 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 continue;
1161 alive++;
1162 spin_lock_bh(&fib_multipath_lock);
1163 nh->nh_power = 0;
1164 nh->nh_flags &= ~RTNH_F_DEAD;
1165 spin_unlock_bh(&fib_multipath_lock);
1166 } endfor_nexthops(fi)
1167
1168 if (alive > 0) {
1169 fi->fib_flags &= ~RTNH_F_DEAD;
1170 ret++;
1171 }
1172 }
1173
1174 return ret;
1175}
1176
1177/*
1178 The algorithm is suboptimal, but it provides really
1179 fair weighted route distribution.
1180 */
1181
1182void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1183{
1184 struct fib_info *fi = res->fi;
1185 int w;
1186
1187 spin_lock_bh(&fib_multipath_lock);
1188 if (fi->fib_power <= 0) {
1189 int power = 0;
1190 change_nexthops(fi) {
1191 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1192 power += nh->nh_weight;
1193 nh->nh_power = nh->nh_weight;
1194 }
1195 } endfor_nexthops(fi);
1196 fi->fib_power = power;
1197 if (power <= 0) {
1198 spin_unlock_bh(&fib_multipath_lock);
1199 /* Race condition: route has just become dead. */
1200 res->nh_sel = 0;
1201 return;
1202 }
1203 }
1204
1205
1206 /* w should be random number [0..fi->fib_power-1],
1207 it is pretty bad approximation.
1208 */
1209
1210 w = jiffies % fi->fib_power;
1211
1212 change_nexthops(fi) {
1213 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1214 if ((w -= nh->nh_power) <= 0) {
1215 nh->nh_power--;
1216 fi->fib_power--;
1217 res->nh_sel = nhsel;
1218 spin_unlock_bh(&fib_multipath_lock);
1219 return;
1220 }
1221 }
1222 } endfor_nexthops(fi);
1223
1224 /* Race condition: route has just become dead. */
1225 res->nh_sel = 0;
1226 spin_unlock_bh(&fib_multipath_lock);
1227}
1228#endif