blob: ef4724de7350ad3ccf613b19a55afe4c662b0ab7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/types.h>
23#include <linux/kernel.h>
24#include <linux/jiffies.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/proc_fs.h>
36#include <linux/skbuff.h>
37#include <linux/netlink.h>
38#include <linux/init.h>
39
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020040#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <net/ip.h>
42#include <net/protocol.h>
43#include <net/route.h>
44#include <net/tcp.h>
45#include <net/sock.h>
46#include <net/ip_fib.h>
47#include <net/ip_mp_alg.h>
48
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
53static DEFINE_RWLOCK(fib_info_lock);
54static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -080088static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070089{
90 int error;
91 u8 scope;
92} fib_props[RTA_MAX + 1] = {
93 {
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
163 write_lock(&fib_info_lock);
164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
176 write_unlock(&fib_info_lock);
177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
206 val ^= fi->fib_prefsrc;
207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
251int ip_fib_check_default(u32 gw, struct net_device *dev)
252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
258 read_lock(&fib_info_lock);
259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
266 read_unlock(&fib_info_lock);
267 return 0;
268 }
269 }
270
271 read_unlock(&fib_info_lock);
272
273 return -1;
274}
275
276void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
277 int z, int tb_id,
278 struct nlmsghdr *n, struct netlink_skb_parms *req)
279{
280 struct sk_buff *skb;
Jamal Hadi Salim9ed19f32005-06-18 22:55:51 -0700281 u32 pid = req ? req->pid : n->nlmsg_pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
283
284 skb = alloc_skb(size, GFP_KERNEL);
285 if (!skb)
286 return;
287
288 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
289 fa->fa_type, fa->fa_scope, &key, z,
290 fa->fa_tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700291 fa->fa_info, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 kfree_skb(skb);
293 return;
294 }
Patrick McHardyac6d4392005-08-14 19:29:52 -0700295 NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 if (n->nlmsg_flags&NLM_F_ECHO)
297 atomic_inc(&skb->users);
Patrick McHardyac6d4392005-08-14 19:29:52 -0700298 netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 if (n->nlmsg_flags&NLM_F_ECHO)
300 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
301}
302
303/* Return the first fib alias matching TOS with
304 * priority less than or equal to PRIO.
305 */
306struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
307{
308 if (fah) {
309 struct fib_alias *fa;
310 list_for_each_entry(fa, fah, fa_list) {
311 if (fa->fa_tos > tos)
312 continue;
313 if (fa->fa_info->fib_priority >= prio ||
314 fa->fa_tos < tos)
315 return fa;
316 }
317 }
318 return NULL;
319}
320
321int fib_detect_death(struct fib_info *fi, int order,
322 struct fib_info **last_resort, int *last_idx, int *dflt)
323{
324 struct neighbour *n;
325 int state = NUD_NONE;
326
327 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
328 if (n) {
329 state = n->nud_state;
330 neigh_release(n);
331 }
332 if (state==NUD_REACHABLE)
333 return 0;
334 if ((state&NUD_VALID) && order != *dflt)
335 return 0;
336 if ((state&NUD_VALID) ||
337 (*last_idx<0 && order > *dflt)) {
338 *last_resort = fi;
339 *last_idx = order;
340 }
341 return 1;
342}
343
344#ifdef CONFIG_IP_ROUTE_MULTIPATH
345
346static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
347{
348 while (RTA_OK(attr,attrlen)) {
349 if (attr->rta_type == type)
350 return *(u32*)RTA_DATA(attr);
351 attr = RTA_NEXT(attr, attrlen);
352 }
353 return 0;
354}
355
356static int
357fib_count_nexthops(struct rtattr *rta)
358{
359 int nhs = 0;
360 struct rtnexthop *nhp = RTA_DATA(rta);
361 int nhlen = RTA_PAYLOAD(rta);
362
363 while (nhlen >= (int)sizeof(struct rtnexthop)) {
364 if ((nhlen -= nhp->rtnh_len) < 0)
365 return 0;
366 nhs++;
367 nhp = RTNH_NEXT(nhp);
368 };
369 return nhs;
370}
371
372static int
373fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
374{
375 struct rtnexthop *nhp = RTA_DATA(rta);
376 int nhlen = RTA_PAYLOAD(rta);
377
378 change_nexthops(fi) {
379 int attrlen = nhlen - sizeof(struct rtnexthop);
380 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
381 return -EINVAL;
382 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
383 nh->nh_oif = nhp->rtnh_ifindex;
384 nh->nh_weight = nhp->rtnh_hops + 1;
385 if (attrlen) {
386 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
387#ifdef CONFIG_NET_CLS_ROUTE
388 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
389#endif
390 }
391 nhp = RTNH_NEXT(nhp);
392 } endfor_nexthops(fi);
393 return 0;
394}
395
396#endif
397
398int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
399 struct fib_info *fi)
400{
401#ifdef CONFIG_IP_ROUTE_MULTIPATH
402 struct rtnexthop *nhp;
403 int nhlen;
404#endif
405
406 if (rta->rta_priority &&
407 *rta->rta_priority != fi->fib_priority)
408 return 1;
409
410 if (rta->rta_oif || rta->rta_gw) {
411 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
412 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
413 return 0;
414 return 1;
415 }
416
417#ifdef CONFIG_IP_ROUTE_MULTIPATH
418 if (rta->rta_mp == NULL)
419 return 0;
420 nhp = RTA_DATA(rta->rta_mp);
421 nhlen = RTA_PAYLOAD(rta->rta_mp);
422
423 for_nexthops(fi) {
424 int attrlen = nhlen - sizeof(struct rtnexthop);
425 u32 gw;
426
427 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
428 return -EINVAL;
429 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
430 return 1;
431 if (attrlen) {
432 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
433 if (gw && gw != nh->nh_gw)
434 return 1;
435#ifdef CONFIG_NET_CLS_ROUTE
436 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
437 if (gw && gw != nh->nh_tclassid)
438 return 1;
439#endif
440 }
441 nhp = RTNH_NEXT(nhp);
442 } endfor_nexthops(fi);
443#endif
444 return 0;
445}
446
447
448/*
449 Picture
450 -------
451
452 Semantics of nexthop is very messy by historical reasons.
453 We have to take into account, that:
454 a) gateway can be actually local interface address,
455 so that gatewayed route is direct.
456 b) gateway must be on-link address, possibly
457 described not by an ifaddr, but also by a direct route.
458 c) If both gateway and interface are specified, they should not
459 contradict.
460 d) If we use tunnel routes, gateway could be not on-link.
461
462 Attempt to reconcile all of these (alas, self-contradictory) conditions
463 results in pretty ugly and hairy code with obscure logic.
464
465 I chose to generalized it instead, so that the size
466 of code does not increase practically, but it becomes
467 much more general.
468 Every prefix is assigned a "scope" value: "host" is local address,
469 "link" is direct route,
470 [ ... "site" ... "interior" ... ]
471 and "universe" is true gateway route with global meaning.
472
473 Every prefix refers to a set of "nexthop"s (gw, oif),
474 where gw must have narrower scope. This recursion stops
475 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
476 which means that gw is forced to be on link.
477
478 Code is still hairy, but now it is apparently logically
479 consistent and very flexible. F.e. as by-product it allows
480 to co-exists in peace independent exterior and interior
481 routing processes.
482
483 Normally it looks as following.
484
485 {universe prefix} -> (gw, oif) [scope link]
486 |
487 |-> {link prefix} -> (gw, oif) [scope local]
488 |
489 |-> {local prefix} (terminal node)
490 */
491
492static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
493{
494 int err;
495
496 if (nh->nh_gw) {
497 struct fib_result res;
498
499#ifdef CONFIG_IP_ROUTE_PERVASIVE
500 if (nh->nh_flags&RTNH_F_PERVASIVE)
501 return 0;
502#endif
503 if (nh->nh_flags&RTNH_F_ONLINK) {
504 struct net_device *dev;
505
506 if (r->rtm_scope >= RT_SCOPE_LINK)
507 return -EINVAL;
508 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
509 return -EINVAL;
510 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
511 return -ENODEV;
512 if (!(dev->flags&IFF_UP))
513 return -ENETDOWN;
514 nh->nh_dev = dev;
515 dev_hold(dev);
516 nh->nh_scope = RT_SCOPE_LINK;
517 return 0;
518 }
519 {
520 struct flowi fl = { .nl_u = { .ip4_u =
521 { .daddr = nh->nh_gw,
522 .scope = r->rtm_scope + 1 } },
523 .oif = nh->nh_oif };
524
525 /* It is not necessary, but requires a bit of thinking */
526 if (fl.fl4_scope < RT_SCOPE_LINK)
527 fl.fl4_scope = RT_SCOPE_LINK;
528 if ((err = fib_lookup(&fl, &res)) != 0)
529 return err;
530 }
531 err = -EINVAL;
532 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
533 goto out;
534 nh->nh_scope = res.scope;
535 nh->nh_oif = FIB_RES_OIF(res);
536 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
537 goto out;
538 dev_hold(nh->nh_dev);
539 err = -ENETDOWN;
540 if (!(nh->nh_dev->flags & IFF_UP))
541 goto out;
542 err = 0;
543out:
544 fib_res_put(&res);
545 return err;
546 } else {
547 struct in_device *in_dev;
548
549 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
550 return -EINVAL;
551
552 in_dev = inetdev_by_index(nh->nh_oif);
553 if (in_dev == NULL)
554 return -ENODEV;
555 if (!(in_dev->dev->flags&IFF_UP)) {
556 in_dev_put(in_dev);
557 return -ENETDOWN;
558 }
559 nh->nh_dev = in_dev->dev;
560 dev_hold(nh->nh_dev);
561 nh->nh_scope = RT_SCOPE_HOST;
562 in_dev_put(in_dev);
563 }
564 return 0;
565}
566
567static inline unsigned int fib_laddr_hashfn(u32 val)
568{
569 unsigned int mask = (fib_hash_size - 1);
570
571 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
572}
573
574static struct hlist_head *fib_hash_alloc(int bytes)
575{
576 if (bytes <= PAGE_SIZE)
577 return kmalloc(bytes, GFP_KERNEL);
578 else
579 return (struct hlist_head *)
580 __get_free_pages(GFP_KERNEL, get_order(bytes));
581}
582
583static void fib_hash_free(struct hlist_head *hash, int bytes)
584{
585 if (!hash)
586 return;
587
588 if (bytes <= PAGE_SIZE)
589 kfree(hash);
590 else
591 free_pages((unsigned long) hash, get_order(bytes));
592}
593
594static void fib_hash_move(struct hlist_head *new_info_hash,
595 struct hlist_head *new_laddrhash,
596 unsigned int new_size)
597{
David S. Millerb7656e72005-08-05 04:12:48 -0700598 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700600 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
602 write_lock(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700603 old_info_hash = fib_info_hash;
604 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 fib_hash_size = new_size;
606
607 for (i = 0; i < old_size; i++) {
608 struct hlist_head *head = &fib_info_hash[i];
609 struct hlist_node *node, *n;
610 struct fib_info *fi;
611
612 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
613 struct hlist_head *dest;
614 unsigned int new_hash;
615
616 hlist_del(&fi->fib_hash);
617
618 new_hash = fib_info_hashfn(fi);
619 dest = &new_info_hash[new_hash];
620 hlist_add_head(&fi->fib_hash, dest);
621 }
622 }
623 fib_info_hash = new_info_hash;
624
625 for (i = 0; i < old_size; i++) {
626 struct hlist_head *lhead = &fib_info_laddrhash[i];
627 struct hlist_node *node, *n;
628 struct fib_info *fi;
629
630 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
631 struct hlist_head *ldest;
632 unsigned int new_hash;
633
634 hlist_del(&fi->fib_lhash);
635
636 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
637 ldest = &new_laddrhash[new_hash];
638 hlist_add_head(&fi->fib_lhash, ldest);
639 }
640 }
641 fib_info_laddrhash = new_laddrhash;
642
643 write_unlock(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700644
645 bytes = old_size * sizeof(struct hlist_head *);
646 fib_hash_free(old_info_hash, bytes);
647 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648}
649
650struct fib_info *
651fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
652 const struct nlmsghdr *nlh, int *errp)
653{
654 int err;
655 struct fib_info *fi = NULL;
656 struct fib_info *ofi;
657#ifdef CONFIG_IP_ROUTE_MULTIPATH
658 int nhs = 1;
659#else
660 const int nhs = 1;
661#endif
662#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
663 u32 mp_alg = IP_MP_ALG_NONE;
664#endif
665
666 /* Fast check to catch the most weird cases */
667 if (fib_props[r->rtm_type].scope > r->rtm_scope)
668 goto err_inval;
669
670#ifdef CONFIG_IP_ROUTE_MULTIPATH
671 if (rta->rta_mp) {
672 nhs = fib_count_nexthops(rta->rta_mp);
673 if (nhs == 0)
674 goto err_inval;
675 }
676#endif
677#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
678 if (rta->rta_mp_alg) {
679 mp_alg = *rta->rta_mp_alg;
680
681 if (mp_alg < IP_MP_ALG_NONE ||
682 mp_alg > IP_MP_ALG_MAX)
683 goto err_inval;
684 }
685#endif
686
687 err = -ENOBUFS;
688 if (fib_info_cnt >= fib_hash_size) {
689 unsigned int new_size = fib_hash_size << 1;
690 struct hlist_head *new_info_hash;
691 struct hlist_head *new_laddrhash;
692 unsigned int bytes;
693
694 if (!new_size)
695 new_size = 1;
696 bytes = new_size * sizeof(struct hlist_head *);
697 new_info_hash = fib_hash_alloc(bytes);
698 new_laddrhash = fib_hash_alloc(bytes);
699 if (!new_info_hash || !new_laddrhash) {
700 fib_hash_free(new_info_hash, bytes);
701 fib_hash_free(new_laddrhash, bytes);
702 } else {
703 memset(new_info_hash, 0, bytes);
704 memset(new_laddrhash, 0, bytes);
705
706 fib_hash_move(new_info_hash, new_laddrhash, new_size);
707 }
708
709 if (!fib_hash_size)
710 goto failure;
711 }
712
713 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
714 if (fi == NULL)
715 goto failure;
716 fib_info_cnt++;
717 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
718
719 fi->fib_protocol = r->rtm_protocol;
720
721 fi->fib_nhs = nhs;
722 change_nexthops(fi) {
723 nh->nh_parent = fi;
724 } endfor_nexthops(fi)
725
726 fi->fib_flags = r->rtm_flags;
727 if (rta->rta_priority)
728 fi->fib_priority = *rta->rta_priority;
729 if (rta->rta_mx) {
730 int attrlen = RTA_PAYLOAD(rta->rta_mx);
731 struct rtattr *attr = RTA_DATA(rta->rta_mx);
732
733 while (RTA_OK(attr, attrlen)) {
734 unsigned flavor = attr->rta_type;
735 if (flavor) {
736 if (flavor > RTAX_MAX)
737 goto err_inval;
738 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
739 }
740 attr = RTA_NEXT(attr, attrlen);
741 }
742 }
743 if (rta->rta_prefsrc)
744 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
745
746 if (rta->rta_mp) {
747#ifdef CONFIG_IP_ROUTE_MULTIPATH
748 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
749 goto failure;
750 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
751 goto err_inval;
752 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
753 goto err_inval;
754#ifdef CONFIG_NET_CLS_ROUTE
755 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
756 goto err_inval;
757#endif
758#else
759 goto err_inval;
760#endif
761 } else {
762 struct fib_nh *nh = fi->fib_nh;
763 if (rta->rta_oif)
764 nh->nh_oif = *rta->rta_oif;
765 if (rta->rta_gw)
766 memcpy(&nh->nh_gw, rta->rta_gw, 4);
767#ifdef CONFIG_NET_CLS_ROUTE
768 if (rta->rta_flow)
769 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
770#endif
771 nh->nh_flags = r->rtm_flags;
772#ifdef CONFIG_IP_ROUTE_MULTIPATH
773 nh->nh_weight = 1;
774#endif
775 }
776
777#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
778 fi->fib_mp_alg = mp_alg;
779#endif
780
781 if (fib_props[r->rtm_type].error) {
782 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
783 goto err_inval;
784 goto link_it;
785 }
786
787 if (r->rtm_scope > RT_SCOPE_HOST)
788 goto err_inval;
789
790 if (r->rtm_scope == RT_SCOPE_HOST) {
791 struct fib_nh *nh = fi->fib_nh;
792
793 /* Local address is added. */
794 if (nhs != 1 || nh->nh_gw)
795 goto err_inval;
796 nh->nh_scope = RT_SCOPE_NOWHERE;
797 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
798 err = -ENODEV;
799 if (nh->nh_dev == NULL)
800 goto failure;
801 } else {
802 change_nexthops(fi) {
803 if ((err = fib_check_nh(r, fi, nh)) != 0)
804 goto failure;
805 } endfor_nexthops(fi)
806 }
807
808 if (fi->fib_prefsrc) {
809 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
810 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
811 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
812 goto err_inval;
813 }
814
815link_it:
816 if ((ofi = fib_find_info(fi)) != NULL) {
817 fi->fib_dead = 1;
818 free_fib_info(fi);
819 ofi->fib_treeref++;
820 return ofi;
821 }
822
823 fi->fib_treeref++;
824 atomic_inc(&fi->fib_clntref);
825 write_lock(&fib_info_lock);
826 hlist_add_head(&fi->fib_hash,
827 &fib_info_hash[fib_info_hashfn(fi)]);
828 if (fi->fib_prefsrc) {
829 struct hlist_head *head;
830
831 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
832 hlist_add_head(&fi->fib_lhash, head);
833 }
834 change_nexthops(fi) {
835 struct hlist_head *head;
836 unsigned int hash;
837
838 if (!nh->nh_dev)
839 continue;
840 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
841 head = &fib_info_devhash[hash];
842 hlist_add_head(&nh->nh_hash, head);
843 } endfor_nexthops(fi)
844 write_unlock(&fib_info_lock);
845 return fi;
846
847err_inval:
848 err = -EINVAL;
849
850failure:
851 *errp = err;
852 if (fi) {
853 fi->fib_dead = 1;
854 free_fib_info(fi);
855 }
856 return NULL;
857}
858
Robert Olssone5b43762005-08-25 13:01:03 -0700859/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860int fib_semantic_match(struct list_head *head, const struct flowi *flp,
861 struct fib_result *res, __u32 zone, __u32 mask,
862 int prefixlen)
863{
864 struct fib_alias *fa;
865 int nh_sel = 0;
866
Robert Olssone5b43762005-08-25 13:01:03 -0700867 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 int err;
869
870 if (fa->fa_tos &&
871 fa->fa_tos != flp->fl4_tos)
872 continue;
873
874 if (fa->fa_scope < flp->fl4_scope)
875 continue;
876
877 fa->fa_state |= FA_S_ACCESSED;
878
879 err = fib_props[fa->fa_type].error;
880 if (err == 0) {
881 struct fib_info *fi = fa->fa_info;
882
883 if (fi->fib_flags & RTNH_F_DEAD)
884 continue;
885
886 switch (fa->fa_type) {
887 case RTN_UNICAST:
888 case RTN_LOCAL:
889 case RTN_BROADCAST:
890 case RTN_ANYCAST:
891 case RTN_MULTICAST:
892 for_nexthops(fi) {
893 if (nh->nh_flags&RTNH_F_DEAD)
894 continue;
895 if (!flp->oif || flp->oif == nh->nh_oif)
896 break;
897 }
898#ifdef CONFIG_IP_ROUTE_MULTIPATH
899 if (nhsel < fi->fib_nhs) {
900 nh_sel = nhsel;
901 goto out_fill_res;
902 }
903#else
904 if (nhsel < 1) {
905 goto out_fill_res;
906 }
907#endif
908 endfor_nexthops(fi);
909 continue;
910
911 default:
912 printk(KERN_DEBUG "impossible 102\n");
913 return -EINVAL;
914 };
915 }
916 return err;
917 }
918 return 1;
919
920out_fill_res:
921 res->prefixlen = prefixlen;
922 res->nh_sel = nh_sel;
923 res->type = fa->fa_type;
924 res->scope = fa->fa_scope;
925 res->fi = fa->fa_info;
926#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
927 res->netmask = mask;
928 res->network = zone &
929 (0xFFFFFFFF >> (32 - prefixlen));
930#endif
931 atomic_inc(&res->fi->fib_clntref);
932 return 0;
933}
934
935/* Find appropriate source address to this destination */
936
937u32 __fib_res_prefsrc(struct fib_result *res)
938{
939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940}
941
942int
943fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
944 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700945 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
947 struct rtmsg *rtm;
948 struct nlmsghdr *nlh;
949 unsigned char *b = skb->tail;
950
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700951 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 rtm = NLMSG_DATA(nlh);
953 rtm->rtm_family = AF_INET;
954 rtm->rtm_dst_len = dst_len;
955 rtm->rtm_src_len = 0;
956 rtm->rtm_tos = tos;
957 rtm->rtm_table = tb_id;
958 rtm->rtm_type = type;
959 rtm->rtm_flags = fi->fib_flags;
960 rtm->rtm_scope = scope;
961 if (rtm->rtm_dst_len)
962 RTA_PUT(skb, RTA_DST, 4, dst);
963 rtm->rtm_protocol = fi->fib_protocol;
964 if (fi->fib_priority)
965 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
966#ifdef CONFIG_NET_CLS_ROUTE
967 if (fi->fib_nh[0].nh_tclassid)
968 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
969#endif
970 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
971 goto rtattr_failure;
972 if (fi->fib_prefsrc)
973 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
974 if (fi->fib_nhs == 1) {
975 if (fi->fib_nh->nh_gw)
976 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
977 if (fi->fib_nh->nh_oif)
978 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
979 }
980#ifdef CONFIG_IP_ROUTE_MULTIPATH
981 if (fi->fib_nhs > 1) {
982 struct rtnexthop *nhp;
983 struct rtattr *mp_head;
984 if (skb_tailroom(skb) <= RTA_SPACE(0))
985 goto rtattr_failure;
986 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
987
988 for_nexthops(fi) {
989 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
990 goto rtattr_failure;
991 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
992 nhp->rtnh_flags = nh->nh_flags & 0xFF;
993 nhp->rtnh_hops = nh->nh_weight-1;
994 nhp->rtnh_ifindex = nh->nh_oif;
995 if (nh->nh_gw)
996 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
997 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
998 } endfor_nexthops(fi);
999 mp_head->rta_type = RTA_MULTIPATH;
1000 mp_head->rta_len = skb->tail - (u8*)mp_head;
1001 }
1002#endif
1003 nlh->nlmsg_len = skb->tail - b;
1004 return skb->len;
1005
1006nlmsg_failure:
1007rtattr_failure:
1008 skb_trim(skb, b - skb->data);
1009 return -1;
1010}
1011
1012#ifndef CONFIG_IP_NOSIOCRT
1013
1014int
1015fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1016 struct kern_rta *rta, struct rtentry *r)
1017{
1018 int plen;
1019 u32 *ptr;
1020
1021 memset(rtm, 0, sizeof(*rtm));
1022 memset(rta, 0, sizeof(*rta));
1023
1024 if (r->rt_dst.sa_family != AF_INET)
1025 return -EAFNOSUPPORT;
1026
1027 /* Check mask for validity:
1028 a) it must be contiguous.
1029 b) destination must have all host bits clear.
1030 c) if application forgot to set correct family (AF_INET),
1031 reject request unless it is absolutely clear i.e.
1032 both family and mask are zero.
1033 */
1034 plen = 32;
1035 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1036 if (!(r->rt_flags&RTF_HOST)) {
1037 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1038 if (r->rt_genmask.sa_family != AF_INET) {
1039 if (mask || r->rt_genmask.sa_family)
1040 return -EAFNOSUPPORT;
1041 }
1042 if (bad_mask(mask, *ptr))
1043 return -EINVAL;
1044 plen = inet_mask_len(mask);
1045 }
1046
1047 nl->nlmsg_flags = NLM_F_REQUEST;
Jamal Hadi Salim9ed19f32005-06-18 22:55:51 -07001048 nl->nlmsg_pid = current->pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 nl->nlmsg_seq = 0;
1050 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1051 if (cmd == SIOCDELRT) {
1052 nl->nlmsg_type = RTM_DELROUTE;
1053 nl->nlmsg_flags = 0;
1054 } else {
1055 nl->nlmsg_type = RTM_NEWROUTE;
1056 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1057 rtm->rtm_protocol = RTPROT_BOOT;
1058 }
1059
1060 rtm->rtm_dst_len = plen;
1061 rta->rta_dst = ptr;
1062
1063 if (r->rt_metric) {
1064 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1065 rta->rta_priority = (u32*)&r->rt_pad3;
1066 }
1067 if (r->rt_flags&RTF_REJECT) {
1068 rtm->rtm_scope = RT_SCOPE_HOST;
1069 rtm->rtm_type = RTN_UNREACHABLE;
1070 return 0;
1071 }
1072 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1073 rtm->rtm_type = RTN_UNICAST;
1074
1075 if (r->rt_dev) {
1076 char *colon;
1077 struct net_device *dev;
1078 char devname[IFNAMSIZ];
1079
1080 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1081 return -EFAULT;
1082 devname[IFNAMSIZ-1] = 0;
1083 colon = strchr(devname, ':');
1084 if (colon)
1085 *colon = 0;
1086 dev = __dev_get_by_name(devname);
1087 if (!dev)
1088 return -ENODEV;
1089 rta->rta_oif = &dev->ifindex;
1090 if (colon) {
1091 struct in_ifaddr *ifa;
Herbert Xue5ed6392005-10-03 14:35:55 -07001092 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 if (!in_dev)
1094 return -ENODEV;
1095 *colon = ':';
1096 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1097 if (strcmp(ifa->ifa_label, devname) == 0)
1098 break;
1099 if (ifa == NULL)
1100 return -ENODEV;
1101 rta->rta_prefsrc = &ifa->ifa_local;
1102 }
1103 }
1104
1105 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1106 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1107 rta->rta_gw = ptr;
1108 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1109 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1110 }
1111
1112 if (cmd == SIOCDELRT)
1113 return 0;
1114
1115 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1116 return -EINVAL;
1117
1118 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1119 rtm->rtm_scope = RT_SCOPE_LINK;
1120
1121 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1122 struct rtattr *rec;
1123 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1124 if (mx == NULL)
1125 return -ENOMEM;
1126 rta->rta_mx = mx;
1127 mx->rta_type = RTA_METRICS;
1128 mx->rta_len = RTA_LENGTH(0);
1129 if (r->rt_flags&RTF_MTU) {
1130 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1131 rec->rta_type = RTAX_ADVMSS;
1132 rec->rta_len = RTA_LENGTH(4);
1133 mx->rta_len += RTA_LENGTH(4);
1134 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1135 }
1136 if (r->rt_flags&RTF_WINDOW) {
1137 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1138 rec->rta_type = RTAX_WINDOW;
1139 rec->rta_len = RTA_LENGTH(4);
1140 mx->rta_len += RTA_LENGTH(4);
1141 *(u32*)RTA_DATA(rec) = r->rt_window;
1142 }
1143 if (r->rt_flags&RTF_IRTT) {
1144 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1145 rec->rta_type = RTAX_RTT;
1146 rec->rta_len = RTA_LENGTH(4);
1147 mx->rta_len += RTA_LENGTH(4);
1148 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1149 }
1150 }
1151 return 0;
1152}
1153
1154#endif
1155
1156/*
1157 Update FIB if:
1158 - local address disappeared -> we must delete all the entries
1159 referring to it.
1160 - device went down -> we must shutdown all nexthops going via it.
1161 */
1162
1163int fib_sync_down(u32 local, struct net_device *dev, int force)
1164{
1165 int ret = 0;
1166 int scope = RT_SCOPE_NOWHERE;
1167
1168 if (force)
1169 scope = -1;
1170
1171 if (local && fib_info_laddrhash) {
1172 unsigned int hash = fib_laddr_hashfn(local);
1173 struct hlist_head *head = &fib_info_laddrhash[hash];
1174 struct hlist_node *node;
1175 struct fib_info *fi;
1176
1177 hlist_for_each_entry(fi, node, head, fib_lhash) {
1178 if (fi->fib_prefsrc == local) {
1179 fi->fib_flags |= RTNH_F_DEAD;
1180 ret++;
1181 }
1182 }
1183 }
1184
1185 if (dev) {
1186 struct fib_info *prev_fi = NULL;
1187 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1188 struct hlist_head *head = &fib_info_devhash[hash];
1189 struct hlist_node *node;
1190 struct fib_nh *nh;
1191
1192 hlist_for_each_entry(nh, node, head, nh_hash) {
1193 struct fib_info *fi = nh->nh_parent;
1194 int dead;
1195
1196 BUG_ON(!fi->fib_nhs);
1197 if (nh->nh_dev != dev || fi == prev_fi)
1198 continue;
1199 prev_fi = fi;
1200 dead = 0;
1201 change_nexthops(fi) {
1202 if (nh->nh_flags&RTNH_F_DEAD)
1203 dead++;
1204 else if (nh->nh_dev == dev &&
1205 nh->nh_scope != scope) {
1206 nh->nh_flags |= RTNH_F_DEAD;
1207#ifdef CONFIG_IP_ROUTE_MULTIPATH
1208 spin_lock_bh(&fib_multipath_lock);
1209 fi->fib_power -= nh->nh_power;
1210 nh->nh_power = 0;
1211 spin_unlock_bh(&fib_multipath_lock);
1212#endif
1213 dead++;
1214 }
1215#ifdef CONFIG_IP_ROUTE_MULTIPATH
1216 if (force > 1 && nh->nh_dev == dev) {
1217 dead = fi->fib_nhs;
1218 break;
1219 }
1220#endif
1221 } endfor_nexthops(fi)
1222 if (dead == fi->fib_nhs) {
1223 fi->fib_flags |= RTNH_F_DEAD;
1224 ret++;
1225 }
1226 }
1227 }
1228
1229 return ret;
1230}
1231
1232#ifdef CONFIG_IP_ROUTE_MULTIPATH
1233
1234/*
1235 Dead device goes up. We wake up dead nexthops.
1236 It takes sense only on multipath routes.
1237 */
1238
1239int fib_sync_up(struct net_device *dev)
1240{
1241 struct fib_info *prev_fi;
1242 unsigned int hash;
1243 struct hlist_head *head;
1244 struct hlist_node *node;
1245 struct fib_nh *nh;
1246 int ret;
1247
1248 if (!(dev->flags&IFF_UP))
1249 return 0;
1250
1251 prev_fi = NULL;
1252 hash = fib_devindex_hashfn(dev->ifindex);
1253 head = &fib_info_devhash[hash];
1254 ret = 0;
1255
1256 hlist_for_each_entry(nh, node, head, nh_hash) {
1257 struct fib_info *fi = nh->nh_parent;
1258 int alive;
1259
1260 BUG_ON(!fi->fib_nhs);
1261 if (nh->nh_dev != dev || fi == prev_fi)
1262 continue;
1263
1264 prev_fi = fi;
1265 alive = 0;
1266 change_nexthops(fi) {
1267 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1268 alive++;
1269 continue;
1270 }
1271 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1272 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001273 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 continue;
1275 alive++;
1276 spin_lock_bh(&fib_multipath_lock);
1277 nh->nh_power = 0;
1278 nh->nh_flags &= ~RTNH_F_DEAD;
1279 spin_unlock_bh(&fib_multipath_lock);
1280 } endfor_nexthops(fi)
1281
1282 if (alive > 0) {
1283 fi->fib_flags &= ~RTNH_F_DEAD;
1284 ret++;
1285 }
1286 }
1287
1288 return ret;
1289}
1290
1291/*
1292 The algorithm is suboptimal, but it provides really
1293 fair weighted route distribution.
1294 */
1295
1296void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1297{
1298 struct fib_info *fi = res->fi;
1299 int w;
1300
1301 spin_lock_bh(&fib_multipath_lock);
1302 if (fi->fib_power <= 0) {
1303 int power = 0;
1304 change_nexthops(fi) {
1305 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1306 power += nh->nh_weight;
1307 nh->nh_power = nh->nh_weight;
1308 }
1309 } endfor_nexthops(fi);
1310 fi->fib_power = power;
1311 if (power <= 0) {
1312 spin_unlock_bh(&fib_multipath_lock);
1313 /* Race condition: route has just become dead. */
1314 res->nh_sel = 0;
1315 return;
1316 }
1317 }
1318
1319
1320 /* w should be random number [0..fi->fib_power-1],
1321 it is pretty bad approximation.
1322 */
1323
1324 w = jiffies % fi->fib_power;
1325
1326 change_nexthops(fi) {
1327 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1328 if ((w -= nh->nh_power) <= 0) {
1329 nh->nh_power--;
1330 fi->fib_power--;
1331 res->nh_sel = nhsel;
1332 spin_unlock_bh(&fib_multipath_lock);
1333 return;
1334 }
1335 }
1336 } endfor_nexthops(fi);
1337
1338 /* Race condition: route has just become dead. */
1339 res->nh_sel = 0;
1340 spin_unlock_bh(&fib_multipath_lock);
1341}
1342#endif