blob: 186f20c4a45e06a65cfaee8cbfba41ab00dfbf00 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/types.h>
23#include <linux/kernel.h>
24#include <linux/jiffies.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
46
47#include "fib_lookup.h"
48
49#define FSprintk(a...)
50
51static DEFINE_RWLOCK(fib_info_lock);
52static struct hlist_head *fib_info_hash;
53static struct hlist_head *fib_info_laddrhash;
54static unsigned int fib_hash_size;
55static unsigned int fib_info_cnt;
56
57#define DEVINDEX_HASHBITS 8
58#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61#ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63static DEFINE_SPINLOCK(fib_multipath_lock);
64
65#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
66for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67
68#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
69for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70
71#else /* CONFIG_IP_ROUTE_MULTIPATH */
72
73/* Hope, that gcc will optimize it to get rid of dummy loop */
74
75#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
76for (nhsel=0; nhsel < 1; nhsel++)
77
78#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
79for (nhsel=0; nhsel < 1; nhsel++)
80
81#endif /* CONFIG_IP_ROUTE_MULTIPATH */
82
83#define endfor_nexthops(fi) }
84
85
86static struct
87{
88 int error;
89 u8 scope;
90} fib_props[RTA_MAX + 1] = {
91 {
92 .error = 0,
93 .scope = RT_SCOPE_NOWHERE,
94 }, /* RTN_UNSPEC */
95 {
96 .error = 0,
97 .scope = RT_SCOPE_UNIVERSE,
98 }, /* RTN_UNICAST */
99 {
100 .error = 0,
101 .scope = RT_SCOPE_HOST,
102 }, /* RTN_LOCAL */
103 {
104 .error = 0,
105 .scope = RT_SCOPE_LINK,
106 }, /* RTN_BROADCAST */
107 {
108 .error = 0,
109 .scope = RT_SCOPE_LINK,
110 }, /* RTN_ANYCAST */
111 {
112 .error = 0,
113 .scope = RT_SCOPE_UNIVERSE,
114 }, /* RTN_MULTICAST */
115 {
116 .error = -EINVAL,
117 .scope = RT_SCOPE_UNIVERSE,
118 }, /* RTN_BLACKHOLE */
119 {
120 .error = -EHOSTUNREACH,
121 .scope = RT_SCOPE_UNIVERSE,
122 }, /* RTN_UNREACHABLE */
123 {
124 .error = -EACCES,
125 .scope = RT_SCOPE_UNIVERSE,
126 }, /* RTN_PROHIBIT */
127 {
128 .error = -EAGAIN,
129 .scope = RT_SCOPE_UNIVERSE,
130 }, /* RTN_THROW */
131 {
132 .error = -EINVAL,
133 .scope = RT_SCOPE_NOWHERE,
134 }, /* RTN_NAT */
135 {
136 .error = -EINVAL,
137 .scope = RT_SCOPE_NOWHERE,
138 }, /* RTN_XRESOLVE */
139};
140
141
142/* Release a nexthop info record */
143
144void free_fib_info(struct fib_info *fi)
145{
146 if (fi->fib_dead == 0) {
147 printk("Freeing alive fib_info %p\n", fi);
148 return;
149 }
150 change_nexthops(fi) {
151 if (nh->nh_dev)
152 dev_put(nh->nh_dev);
153 nh->nh_dev = NULL;
154 } endfor_nexthops(fi);
155 fib_info_cnt--;
156 kfree(fi);
157}
158
159void fib_release_info(struct fib_info *fi)
160{
161 write_lock(&fib_info_lock);
162 if (fi && --fi->fib_treeref == 0) {
163 hlist_del(&fi->fib_hash);
164 if (fi->fib_prefsrc)
165 hlist_del(&fi->fib_lhash);
166 change_nexthops(fi) {
167 if (!nh->nh_dev)
168 continue;
169 hlist_del(&nh->nh_hash);
170 } endfor_nexthops(fi)
171 fi->fib_dead = 1;
172 fib_info_put(fi);
173 }
174 write_unlock(&fib_info_lock);
175}
176
177static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178{
179 const struct fib_nh *onh = ofi->fib_nh;
180
181 for_nexthops(fi) {
182 if (nh->nh_oif != onh->nh_oif ||
183 nh->nh_gw != onh->nh_gw ||
184 nh->nh_scope != onh->nh_scope ||
185#ifdef CONFIG_IP_ROUTE_MULTIPATH
186 nh->nh_weight != onh->nh_weight ||
187#endif
188#ifdef CONFIG_NET_CLS_ROUTE
189 nh->nh_tclassid != onh->nh_tclassid ||
190#endif
191 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
192 return -1;
193 onh++;
194 } endfor_nexthops(fi);
195 return 0;
196}
197
198static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199{
200 unsigned int mask = (fib_hash_size - 1);
201 unsigned int val = fi->fib_nhs;
202
203 val ^= fi->fib_protocol;
204 val ^= fi->fib_prefsrc;
205 val ^= fi->fib_priority;
206
207 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
208}
209
210static struct fib_info *fib_find_info(const struct fib_info *nfi)
211{
212 struct hlist_head *head;
213 struct hlist_node *node;
214 struct fib_info *fi;
215 unsigned int hash;
216
217 hash = fib_info_hashfn(nfi);
218 head = &fib_info_hash[hash];
219
220 hlist_for_each_entry(fi, node, head, fib_hash) {
221 if (fi->fib_nhs != nfi->fib_nhs)
222 continue;
223 if (nfi->fib_protocol == fi->fib_protocol &&
224 nfi->fib_prefsrc == fi->fib_prefsrc &&
225 nfi->fib_priority == fi->fib_priority &&
226 memcmp(nfi->fib_metrics, fi->fib_metrics,
227 sizeof(fi->fib_metrics)) == 0 &&
228 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
229 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
230 return fi;
231 }
232
233 return NULL;
234}
235
236static inline unsigned int fib_devindex_hashfn(unsigned int val)
237{
238 unsigned int mask = DEVINDEX_HASHSIZE - 1;
239
240 return (val ^
241 (val >> DEVINDEX_HASHBITS) ^
242 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
243}
244
245/* Check, that the gateway is already configured.
246 Used only by redirect accept routine.
247 */
248
249int ip_fib_check_default(u32 gw, struct net_device *dev)
250{
251 struct hlist_head *head;
252 struct hlist_node *node;
253 struct fib_nh *nh;
254 unsigned int hash;
255
256 read_lock(&fib_info_lock);
257
258 hash = fib_devindex_hashfn(dev->ifindex);
259 head = &fib_info_devhash[hash];
260 hlist_for_each_entry(nh, node, head, nh_hash) {
261 if (nh->nh_dev == dev &&
262 nh->nh_gw == gw &&
263 !(nh->nh_flags&RTNH_F_DEAD)) {
264 read_unlock(&fib_info_lock);
265 return 0;
266 }
267 }
268
269 read_unlock(&fib_info_lock);
270
271 return -1;
272}
273
274void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
275 int z, int tb_id,
276 struct nlmsghdr *n, struct netlink_skb_parms *req)
277{
278 struct sk_buff *skb;
Jamal Hadi Salim9ed19f32005-06-18 22:55:51 -0700279 u32 pid = req ? req->pid : n->nlmsg_pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281
282 skb = alloc_skb(size, GFP_KERNEL);
283 if (!skb)
284 return;
285
286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 fa->fa_type, fa->fa_scope, &key, z,
288 fa->fa_tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700289 fa->fa_info, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 kfree_skb(skb);
291 return;
292 }
Patrick McHardyac6d4392005-08-14 19:29:52 -0700293 NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (n->nlmsg_flags&NLM_F_ECHO)
295 atomic_inc(&skb->users);
Patrick McHardyac6d4392005-08-14 19:29:52 -0700296 netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 if (n->nlmsg_flags&NLM_F_ECHO)
298 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
299}
300
301/* Return the first fib alias matching TOS with
302 * priority less than or equal to PRIO.
303 */
304struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
305{
306 if (fah) {
307 struct fib_alias *fa;
308 list_for_each_entry(fa, fah, fa_list) {
309 if (fa->fa_tos > tos)
310 continue;
311 if (fa->fa_info->fib_priority >= prio ||
312 fa->fa_tos < tos)
313 return fa;
314 }
315 }
316 return NULL;
317}
318
319int fib_detect_death(struct fib_info *fi, int order,
320 struct fib_info **last_resort, int *last_idx, int *dflt)
321{
322 struct neighbour *n;
323 int state = NUD_NONE;
324
325 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
326 if (n) {
327 state = n->nud_state;
328 neigh_release(n);
329 }
330 if (state==NUD_REACHABLE)
331 return 0;
332 if ((state&NUD_VALID) && order != *dflt)
333 return 0;
334 if ((state&NUD_VALID) ||
335 (*last_idx<0 && order > *dflt)) {
336 *last_resort = fi;
337 *last_idx = order;
338 }
339 return 1;
340}
341
342#ifdef CONFIG_IP_ROUTE_MULTIPATH
343
344static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
345{
346 while (RTA_OK(attr,attrlen)) {
347 if (attr->rta_type == type)
348 return *(u32*)RTA_DATA(attr);
349 attr = RTA_NEXT(attr, attrlen);
350 }
351 return 0;
352}
353
354static int
355fib_count_nexthops(struct rtattr *rta)
356{
357 int nhs = 0;
358 struct rtnexthop *nhp = RTA_DATA(rta);
359 int nhlen = RTA_PAYLOAD(rta);
360
361 while (nhlen >= (int)sizeof(struct rtnexthop)) {
362 if ((nhlen -= nhp->rtnh_len) < 0)
363 return 0;
364 nhs++;
365 nhp = RTNH_NEXT(nhp);
366 };
367 return nhs;
368}
369
370static int
371fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
372{
373 struct rtnexthop *nhp = RTA_DATA(rta);
374 int nhlen = RTA_PAYLOAD(rta);
375
376 change_nexthops(fi) {
377 int attrlen = nhlen - sizeof(struct rtnexthop);
378 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
379 return -EINVAL;
380 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
381 nh->nh_oif = nhp->rtnh_ifindex;
382 nh->nh_weight = nhp->rtnh_hops + 1;
383 if (attrlen) {
384 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
385#ifdef CONFIG_NET_CLS_ROUTE
386 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
387#endif
388 }
389 nhp = RTNH_NEXT(nhp);
390 } endfor_nexthops(fi);
391 return 0;
392}
393
394#endif
395
396int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
397 struct fib_info *fi)
398{
399#ifdef CONFIG_IP_ROUTE_MULTIPATH
400 struct rtnexthop *nhp;
401 int nhlen;
402#endif
403
404 if (rta->rta_priority &&
405 *rta->rta_priority != fi->fib_priority)
406 return 1;
407
408 if (rta->rta_oif || rta->rta_gw) {
409 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
410 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
411 return 0;
412 return 1;
413 }
414
415#ifdef CONFIG_IP_ROUTE_MULTIPATH
416 if (rta->rta_mp == NULL)
417 return 0;
418 nhp = RTA_DATA(rta->rta_mp);
419 nhlen = RTA_PAYLOAD(rta->rta_mp);
420
421 for_nexthops(fi) {
422 int attrlen = nhlen - sizeof(struct rtnexthop);
423 u32 gw;
424
425 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
426 return -EINVAL;
427 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
428 return 1;
429 if (attrlen) {
430 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
431 if (gw && gw != nh->nh_gw)
432 return 1;
433#ifdef CONFIG_NET_CLS_ROUTE
434 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
435 if (gw && gw != nh->nh_tclassid)
436 return 1;
437#endif
438 }
439 nhp = RTNH_NEXT(nhp);
440 } endfor_nexthops(fi);
441#endif
442 return 0;
443}
444
445
446/*
447 Picture
448 -------
449
450 Semantics of nexthop is very messy by historical reasons.
451 We have to take into account, that:
452 a) gateway can be actually local interface address,
453 so that gatewayed route is direct.
454 b) gateway must be on-link address, possibly
455 described not by an ifaddr, but also by a direct route.
456 c) If both gateway and interface are specified, they should not
457 contradict.
458 d) If we use tunnel routes, gateway could be not on-link.
459
460 Attempt to reconcile all of these (alas, self-contradictory) conditions
461 results in pretty ugly and hairy code with obscure logic.
462
463 I chose to generalized it instead, so that the size
464 of code does not increase practically, but it becomes
465 much more general.
466 Every prefix is assigned a "scope" value: "host" is local address,
467 "link" is direct route,
468 [ ... "site" ... "interior" ... ]
469 and "universe" is true gateway route with global meaning.
470
471 Every prefix refers to a set of "nexthop"s (gw, oif),
472 where gw must have narrower scope. This recursion stops
473 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474 which means that gw is forced to be on link.
475
476 Code is still hairy, but now it is apparently logically
477 consistent and very flexible. F.e. as by-product it allows
478 to co-exists in peace independent exterior and interior
479 routing processes.
480
481 Normally it looks as following.
482
483 {universe prefix} -> (gw, oif) [scope link]
484 |
485 |-> {link prefix} -> (gw, oif) [scope local]
486 |
487 |-> {local prefix} (terminal node)
488 */
489
490static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
491{
492 int err;
493
494 if (nh->nh_gw) {
495 struct fib_result res;
496
497#ifdef CONFIG_IP_ROUTE_PERVASIVE
498 if (nh->nh_flags&RTNH_F_PERVASIVE)
499 return 0;
500#endif
501 if (nh->nh_flags&RTNH_F_ONLINK) {
502 struct net_device *dev;
503
504 if (r->rtm_scope >= RT_SCOPE_LINK)
505 return -EINVAL;
506 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
507 return -EINVAL;
508 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
509 return -ENODEV;
510 if (!(dev->flags&IFF_UP))
511 return -ENETDOWN;
512 nh->nh_dev = dev;
513 dev_hold(dev);
514 nh->nh_scope = RT_SCOPE_LINK;
515 return 0;
516 }
517 {
518 struct flowi fl = { .nl_u = { .ip4_u =
519 { .daddr = nh->nh_gw,
520 .scope = r->rtm_scope + 1 } },
521 .oif = nh->nh_oif };
522
523 /* It is not necessary, but requires a bit of thinking */
524 if (fl.fl4_scope < RT_SCOPE_LINK)
525 fl.fl4_scope = RT_SCOPE_LINK;
526 if ((err = fib_lookup(&fl, &res)) != 0)
527 return err;
528 }
529 err = -EINVAL;
530 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
531 goto out;
532 nh->nh_scope = res.scope;
533 nh->nh_oif = FIB_RES_OIF(res);
534 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
535 goto out;
536 dev_hold(nh->nh_dev);
537 err = -ENETDOWN;
538 if (!(nh->nh_dev->flags & IFF_UP))
539 goto out;
540 err = 0;
541out:
542 fib_res_put(&res);
543 return err;
544 } else {
545 struct in_device *in_dev;
546
547 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
548 return -EINVAL;
549
550 in_dev = inetdev_by_index(nh->nh_oif);
551 if (in_dev == NULL)
552 return -ENODEV;
553 if (!(in_dev->dev->flags&IFF_UP)) {
554 in_dev_put(in_dev);
555 return -ENETDOWN;
556 }
557 nh->nh_dev = in_dev->dev;
558 dev_hold(nh->nh_dev);
559 nh->nh_scope = RT_SCOPE_HOST;
560 in_dev_put(in_dev);
561 }
562 return 0;
563}
564
565static inline unsigned int fib_laddr_hashfn(u32 val)
566{
567 unsigned int mask = (fib_hash_size - 1);
568
569 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
570}
571
572static struct hlist_head *fib_hash_alloc(int bytes)
573{
574 if (bytes <= PAGE_SIZE)
575 return kmalloc(bytes, GFP_KERNEL);
576 else
577 return (struct hlist_head *)
578 __get_free_pages(GFP_KERNEL, get_order(bytes));
579}
580
581static void fib_hash_free(struct hlist_head *hash, int bytes)
582{
583 if (!hash)
584 return;
585
586 if (bytes <= PAGE_SIZE)
587 kfree(hash);
588 else
589 free_pages((unsigned long) hash, get_order(bytes));
590}
591
592static void fib_hash_move(struct hlist_head *new_info_hash,
593 struct hlist_head *new_laddrhash,
594 unsigned int new_size)
595{
David S. Millerb7656e72005-08-05 04:12:48 -0700596 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700598 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
600 write_lock(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700601 old_info_hash = fib_info_hash;
602 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 fib_hash_size = new_size;
604
605 for (i = 0; i < old_size; i++) {
606 struct hlist_head *head = &fib_info_hash[i];
607 struct hlist_node *node, *n;
608 struct fib_info *fi;
609
610 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
611 struct hlist_head *dest;
612 unsigned int new_hash;
613
614 hlist_del(&fi->fib_hash);
615
616 new_hash = fib_info_hashfn(fi);
617 dest = &new_info_hash[new_hash];
618 hlist_add_head(&fi->fib_hash, dest);
619 }
620 }
621 fib_info_hash = new_info_hash;
622
623 for (i = 0; i < old_size; i++) {
624 struct hlist_head *lhead = &fib_info_laddrhash[i];
625 struct hlist_node *node, *n;
626 struct fib_info *fi;
627
628 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
629 struct hlist_head *ldest;
630 unsigned int new_hash;
631
632 hlist_del(&fi->fib_lhash);
633
634 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
635 ldest = &new_laddrhash[new_hash];
636 hlist_add_head(&fi->fib_lhash, ldest);
637 }
638 }
639 fib_info_laddrhash = new_laddrhash;
640
641 write_unlock(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700642
643 bytes = old_size * sizeof(struct hlist_head *);
644 fib_hash_free(old_info_hash, bytes);
645 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646}
647
648struct fib_info *
649fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
650 const struct nlmsghdr *nlh, int *errp)
651{
652 int err;
653 struct fib_info *fi = NULL;
654 struct fib_info *ofi;
655#ifdef CONFIG_IP_ROUTE_MULTIPATH
656 int nhs = 1;
657#else
658 const int nhs = 1;
659#endif
660#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
661 u32 mp_alg = IP_MP_ALG_NONE;
662#endif
663
664 /* Fast check to catch the most weird cases */
665 if (fib_props[r->rtm_type].scope > r->rtm_scope)
666 goto err_inval;
667
668#ifdef CONFIG_IP_ROUTE_MULTIPATH
669 if (rta->rta_mp) {
670 nhs = fib_count_nexthops(rta->rta_mp);
671 if (nhs == 0)
672 goto err_inval;
673 }
674#endif
675#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
676 if (rta->rta_mp_alg) {
677 mp_alg = *rta->rta_mp_alg;
678
679 if (mp_alg < IP_MP_ALG_NONE ||
680 mp_alg > IP_MP_ALG_MAX)
681 goto err_inval;
682 }
683#endif
684
685 err = -ENOBUFS;
686 if (fib_info_cnt >= fib_hash_size) {
687 unsigned int new_size = fib_hash_size << 1;
688 struct hlist_head *new_info_hash;
689 struct hlist_head *new_laddrhash;
690 unsigned int bytes;
691
692 if (!new_size)
693 new_size = 1;
694 bytes = new_size * sizeof(struct hlist_head *);
695 new_info_hash = fib_hash_alloc(bytes);
696 new_laddrhash = fib_hash_alloc(bytes);
697 if (!new_info_hash || !new_laddrhash) {
698 fib_hash_free(new_info_hash, bytes);
699 fib_hash_free(new_laddrhash, bytes);
700 } else {
701 memset(new_info_hash, 0, bytes);
702 memset(new_laddrhash, 0, bytes);
703
704 fib_hash_move(new_info_hash, new_laddrhash, new_size);
705 }
706
707 if (!fib_hash_size)
708 goto failure;
709 }
710
711 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
712 if (fi == NULL)
713 goto failure;
714 fib_info_cnt++;
715 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
716
717 fi->fib_protocol = r->rtm_protocol;
718
719 fi->fib_nhs = nhs;
720 change_nexthops(fi) {
721 nh->nh_parent = fi;
722 } endfor_nexthops(fi)
723
724 fi->fib_flags = r->rtm_flags;
725 if (rta->rta_priority)
726 fi->fib_priority = *rta->rta_priority;
727 if (rta->rta_mx) {
728 int attrlen = RTA_PAYLOAD(rta->rta_mx);
729 struct rtattr *attr = RTA_DATA(rta->rta_mx);
730
731 while (RTA_OK(attr, attrlen)) {
732 unsigned flavor = attr->rta_type;
733 if (flavor) {
734 if (flavor > RTAX_MAX)
735 goto err_inval;
736 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737 }
738 attr = RTA_NEXT(attr, attrlen);
739 }
740 }
741 if (rta->rta_prefsrc)
742 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743
744 if (rta->rta_mp) {
745#ifdef CONFIG_IP_ROUTE_MULTIPATH
746 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747 goto failure;
748 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749 goto err_inval;
750 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751 goto err_inval;
752#ifdef CONFIG_NET_CLS_ROUTE
753 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754 goto err_inval;
755#endif
756#else
757 goto err_inval;
758#endif
759 } else {
760 struct fib_nh *nh = fi->fib_nh;
761 if (rta->rta_oif)
762 nh->nh_oif = *rta->rta_oif;
763 if (rta->rta_gw)
764 memcpy(&nh->nh_gw, rta->rta_gw, 4);
765#ifdef CONFIG_NET_CLS_ROUTE
766 if (rta->rta_flow)
767 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768#endif
769 nh->nh_flags = r->rtm_flags;
770#ifdef CONFIG_IP_ROUTE_MULTIPATH
771 nh->nh_weight = 1;
772#endif
773 }
774
775#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 fi->fib_mp_alg = mp_alg;
777#endif
778
779 if (fib_props[r->rtm_type].error) {
780 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781 goto err_inval;
782 goto link_it;
783 }
784
785 if (r->rtm_scope > RT_SCOPE_HOST)
786 goto err_inval;
787
788 if (r->rtm_scope == RT_SCOPE_HOST) {
789 struct fib_nh *nh = fi->fib_nh;
790
791 /* Local address is added. */
792 if (nhs != 1 || nh->nh_gw)
793 goto err_inval;
794 nh->nh_scope = RT_SCOPE_NOWHERE;
795 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796 err = -ENODEV;
797 if (nh->nh_dev == NULL)
798 goto failure;
799 } else {
800 change_nexthops(fi) {
801 if ((err = fib_check_nh(r, fi, nh)) != 0)
802 goto failure;
803 } endfor_nexthops(fi)
804 }
805
806 if (fi->fib_prefsrc) {
807 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 goto err_inval;
811 }
812
813link_it:
814 if ((ofi = fib_find_info(fi)) != NULL) {
815 fi->fib_dead = 1;
816 free_fib_info(fi);
817 ofi->fib_treeref++;
818 return ofi;
819 }
820
821 fi->fib_treeref++;
822 atomic_inc(&fi->fib_clntref);
823 write_lock(&fib_info_lock);
824 hlist_add_head(&fi->fib_hash,
825 &fib_info_hash[fib_info_hashfn(fi)]);
826 if (fi->fib_prefsrc) {
827 struct hlist_head *head;
828
829 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830 hlist_add_head(&fi->fib_lhash, head);
831 }
832 change_nexthops(fi) {
833 struct hlist_head *head;
834 unsigned int hash;
835
836 if (!nh->nh_dev)
837 continue;
838 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839 head = &fib_info_devhash[hash];
840 hlist_add_head(&nh->nh_hash, head);
841 } endfor_nexthops(fi)
842 write_unlock(&fib_info_lock);
843 return fi;
844
845err_inval:
846 err = -EINVAL;
847
848failure:
849 *errp = err;
850 if (fi) {
851 fi->fib_dead = 1;
852 free_fib_info(fi);
853 }
854 return NULL;
855}
856
Robert Olssone5b43762005-08-25 13:01:03 -0700857/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858int fib_semantic_match(struct list_head *head, const struct flowi *flp,
859 struct fib_result *res, __u32 zone, __u32 mask,
860 int prefixlen)
861{
862 struct fib_alias *fa;
863 int nh_sel = 0;
864
Robert Olssone5b43762005-08-25 13:01:03 -0700865 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 int err;
867
868 if (fa->fa_tos &&
869 fa->fa_tos != flp->fl4_tos)
870 continue;
871
872 if (fa->fa_scope < flp->fl4_scope)
873 continue;
874
875 fa->fa_state |= FA_S_ACCESSED;
876
877 err = fib_props[fa->fa_type].error;
878 if (err == 0) {
879 struct fib_info *fi = fa->fa_info;
880
881 if (fi->fib_flags & RTNH_F_DEAD)
882 continue;
883
884 switch (fa->fa_type) {
885 case RTN_UNICAST:
886 case RTN_LOCAL:
887 case RTN_BROADCAST:
888 case RTN_ANYCAST:
889 case RTN_MULTICAST:
890 for_nexthops(fi) {
891 if (nh->nh_flags&RTNH_F_DEAD)
892 continue;
893 if (!flp->oif || flp->oif == nh->nh_oif)
894 break;
895 }
896#ifdef CONFIG_IP_ROUTE_MULTIPATH
897 if (nhsel < fi->fib_nhs) {
898 nh_sel = nhsel;
899 goto out_fill_res;
900 }
901#else
902 if (nhsel < 1) {
903 goto out_fill_res;
904 }
905#endif
906 endfor_nexthops(fi);
907 continue;
908
909 default:
910 printk(KERN_DEBUG "impossible 102\n");
911 return -EINVAL;
912 };
913 }
914 return err;
915 }
916 return 1;
917
918out_fill_res:
919 res->prefixlen = prefixlen;
920 res->nh_sel = nh_sel;
921 res->type = fa->fa_type;
922 res->scope = fa->fa_scope;
923 res->fi = fa->fa_info;
924#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
925 res->netmask = mask;
926 res->network = zone &
927 (0xFFFFFFFF >> (32 - prefixlen));
928#endif
929 atomic_inc(&res->fi->fib_clntref);
930 return 0;
931}
932
933/* Find appropriate source address to this destination */
934
935u32 __fib_res_prefsrc(struct fib_result *res)
936{
937 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938}
939
940int
941fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700943 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
945 struct rtmsg *rtm;
946 struct nlmsghdr *nlh;
947 unsigned char *b = skb->tail;
948
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700949 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 rtm = NLMSG_DATA(nlh);
951 rtm->rtm_family = AF_INET;
952 rtm->rtm_dst_len = dst_len;
953 rtm->rtm_src_len = 0;
954 rtm->rtm_tos = tos;
955 rtm->rtm_table = tb_id;
956 rtm->rtm_type = type;
957 rtm->rtm_flags = fi->fib_flags;
958 rtm->rtm_scope = scope;
959 if (rtm->rtm_dst_len)
960 RTA_PUT(skb, RTA_DST, 4, dst);
961 rtm->rtm_protocol = fi->fib_protocol;
962 if (fi->fib_priority)
963 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
964#ifdef CONFIG_NET_CLS_ROUTE
965 if (fi->fib_nh[0].nh_tclassid)
966 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
967#endif
968 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
969 goto rtattr_failure;
970 if (fi->fib_prefsrc)
971 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
972 if (fi->fib_nhs == 1) {
973 if (fi->fib_nh->nh_gw)
974 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
975 if (fi->fib_nh->nh_oif)
976 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
977 }
978#ifdef CONFIG_IP_ROUTE_MULTIPATH
979 if (fi->fib_nhs > 1) {
980 struct rtnexthop *nhp;
981 struct rtattr *mp_head;
982 if (skb_tailroom(skb) <= RTA_SPACE(0))
983 goto rtattr_failure;
984 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
985
986 for_nexthops(fi) {
987 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
988 goto rtattr_failure;
989 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
990 nhp->rtnh_flags = nh->nh_flags & 0xFF;
991 nhp->rtnh_hops = nh->nh_weight-1;
992 nhp->rtnh_ifindex = nh->nh_oif;
993 if (nh->nh_gw)
994 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
995 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
996 } endfor_nexthops(fi);
997 mp_head->rta_type = RTA_MULTIPATH;
998 mp_head->rta_len = skb->tail - (u8*)mp_head;
999 }
1000#endif
1001 nlh->nlmsg_len = skb->tail - b;
1002 return skb->len;
1003
1004nlmsg_failure:
1005rtattr_failure:
1006 skb_trim(skb, b - skb->data);
1007 return -1;
1008}
1009
1010#ifndef CONFIG_IP_NOSIOCRT
1011
1012int
1013fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1014 struct kern_rta *rta, struct rtentry *r)
1015{
1016 int plen;
1017 u32 *ptr;
1018
1019 memset(rtm, 0, sizeof(*rtm));
1020 memset(rta, 0, sizeof(*rta));
1021
1022 if (r->rt_dst.sa_family != AF_INET)
1023 return -EAFNOSUPPORT;
1024
1025 /* Check mask for validity:
1026 a) it must be contiguous.
1027 b) destination must have all host bits clear.
1028 c) if application forgot to set correct family (AF_INET),
1029 reject request unless it is absolutely clear i.e.
1030 both family and mask are zero.
1031 */
1032 plen = 32;
1033 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1034 if (!(r->rt_flags&RTF_HOST)) {
1035 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1036 if (r->rt_genmask.sa_family != AF_INET) {
1037 if (mask || r->rt_genmask.sa_family)
1038 return -EAFNOSUPPORT;
1039 }
1040 if (bad_mask(mask, *ptr))
1041 return -EINVAL;
1042 plen = inet_mask_len(mask);
1043 }
1044
1045 nl->nlmsg_flags = NLM_F_REQUEST;
Jamal Hadi Salim9ed19f32005-06-18 22:55:51 -07001046 nl->nlmsg_pid = current->pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 nl->nlmsg_seq = 0;
1048 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1049 if (cmd == SIOCDELRT) {
1050 nl->nlmsg_type = RTM_DELROUTE;
1051 nl->nlmsg_flags = 0;
1052 } else {
1053 nl->nlmsg_type = RTM_NEWROUTE;
1054 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1055 rtm->rtm_protocol = RTPROT_BOOT;
1056 }
1057
1058 rtm->rtm_dst_len = plen;
1059 rta->rta_dst = ptr;
1060
1061 if (r->rt_metric) {
1062 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1063 rta->rta_priority = (u32*)&r->rt_pad3;
1064 }
1065 if (r->rt_flags&RTF_REJECT) {
1066 rtm->rtm_scope = RT_SCOPE_HOST;
1067 rtm->rtm_type = RTN_UNREACHABLE;
1068 return 0;
1069 }
1070 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1071 rtm->rtm_type = RTN_UNICAST;
1072
1073 if (r->rt_dev) {
1074 char *colon;
1075 struct net_device *dev;
1076 char devname[IFNAMSIZ];
1077
1078 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1079 return -EFAULT;
1080 devname[IFNAMSIZ-1] = 0;
1081 colon = strchr(devname, ':');
1082 if (colon)
1083 *colon = 0;
1084 dev = __dev_get_by_name(devname);
1085 if (!dev)
1086 return -ENODEV;
1087 rta->rta_oif = &dev->ifindex;
1088 if (colon) {
1089 struct in_ifaddr *ifa;
Herbert Xue5ed6392005-10-03 14:35:55 -07001090 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 if (!in_dev)
1092 return -ENODEV;
1093 *colon = ':';
1094 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1095 if (strcmp(ifa->ifa_label, devname) == 0)
1096 break;
1097 if (ifa == NULL)
1098 return -ENODEV;
1099 rta->rta_prefsrc = &ifa->ifa_local;
1100 }
1101 }
1102
1103 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1104 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1105 rta->rta_gw = ptr;
1106 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1107 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1108 }
1109
1110 if (cmd == SIOCDELRT)
1111 return 0;
1112
1113 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1114 return -EINVAL;
1115
1116 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1117 rtm->rtm_scope = RT_SCOPE_LINK;
1118
1119 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1120 struct rtattr *rec;
1121 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1122 if (mx == NULL)
1123 return -ENOMEM;
1124 rta->rta_mx = mx;
1125 mx->rta_type = RTA_METRICS;
1126 mx->rta_len = RTA_LENGTH(0);
1127 if (r->rt_flags&RTF_MTU) {
1128 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1129 rec->rta_type = RTAX_ADVMSS;
1130 rec->rta_len = RTA_LENGTH(4);
1131 mx->rta_len += RTA_LENGTH(4);
1132 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1133 }
1134 if (r->rt_flags&RTF_WINDOW) {
1135 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1136 rec->rta_type = RTAX_WINDOW;
1137 rec->rta_len = RTA_LENGTH(4);
1138 mx->rta_len += RTA_LENGTH(4);
1139 *(u32*)RTA_DATA(rec) = r->rt_window;
1140 }
1141 if (r->rt_flags&RTF_IRTT) {
1142 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1143 rec->rta_type = RTAX_RTT;
1144 rec->rta_len = RTA_LENGTH(4);
1145 mx->rta_len += RTA_LENGTH(4);
1146 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1147 }
1148 }
1149 return 0;
1150}
1151
1152#endif
1153
1154/*
1155 Update FIB if:
1156 - local address disappeared -> we must delete all the entries
1157 referring to it.
1158 - device went down -> we must shutdown all nexthops going via it.
1159 */
1160
1161int fib_sync_down(u32 local, struct net_device *dev, int force)
1162{
1163 int ret = 0;
1164 int scope = RT_SCOPE_NOWHERE;
1165
1166 if (force)
1167 scope = -1;
1168
1169 if (local && fib_info_laddrhash) {
1170 unsigned int hash = fib_laddr_hashfn(local);
1171 struct hlist_head *head = &fib_info_laddrhash[hash];
1172 struct hlist_node *node;
1173 struct fib_info *fi;
1174
1175 hlist_for_each_entry(fi, node, head, fib_lhash) {
1176 if (fi->fib_prefsrc == local) {
1177 fi->fib_flags |= RTNH_F_DEAD;
1178 ret++;
1179 }
1180 }
1181 }
1182
1183 if (dev) {
1184 struct fib_info *prev_fi = NULL;
1185 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1186 struct hlist_head *head = &fib_info_devhash[hash];
1187 struct hlist_node *node;
1188 struct fib_nh *nh;
1189
1190 hlist_for_each_entry(nh, node, head, nh_hash) {
1191 struct fib_info *fi = nh->nh_parent;
1192 int dead;
1193
1194 BUG_ON(!fi->fib_nhs);
1195 if (nh->nh_dev != dev || fi == prev_fi)
1196 continue;
1197 prev_fi = fi;
1198 dead = 0;
1199 change_nexthops(fi) {
1200 if (nh->nh_flags&RTNH_F_DEAD)
1201 dead++;
1202 else if (nh->nh_dev == dev &&
1203 nh->nh_scope != scope) {
1204 nh->nh_flags |= RTNH_F_DEAD;
1205#ifdef CONFIG_IP_ROUTE_MULTIPATH
1206 spin_lock_bh(&fib_multipath_lock);
1207 fi->fib_power -= nh->nh_power;
1208 nh->nh_power = 0;
1209 spin_unlock_bh(&fib_multipath_lock);
1210#endif
1211 dead++;
1212 }
1213#ifdef CONFIG_IP_ROUTE_MULTIPATH
1214 if (force > 1 && nh->nh_dev == dev) {
1215 dead = fi->fib_nhs;
1216 break;
1217 }
1218#endif
1219 } endfor_nexthops(fi)
1220 if (dead == fi->fib_nhs) {
1221 fi->fib_flags |= RTNH_F_DEAD;
1222 ret++;
1223 }
1224 }
1225 }
1226
1227 return ret;
1228}
1229
1230#ifdef CONFIG_IP_ROUTE_MULTIPATH
1231
1232/*
1233 Dead device goes up. We wake up dead nexthops.
1234 It takes sense only on multipath routes.
1235 */
1236
1237int fib_sync_up(struct net_device *dev)
1238{
1239 struct fib_info *prev_fi;
1240 unsigned int hash;
1241 struct hlist_head *head;
1242 struct hlist_node *node;
1243 struct fib_nh *nh;
1244 int ret;
1245
1246 if (!(dev->flags&IFF_UP))
1247 return 0;
1248
1249 prev_fi = NULL;
1250 hash = fib_devindex_hashfn(dev->ifindex);
1251 head = &fib_info_devhash[hash];
1252 ret = 0;
1253
1254 hlist_for_each_entry(nh, node, head, nh_hash) {
1255 struct fib_info *fi = nh->nh_parent;
1256 int alive;
1257
1258 BUG_ON(!fi->fib_nhs);
1259 if (nh->nh_dev != dev || fi == prev_fi)
1260 continue;
1261
1262 prev_fi = fi;
1263 alive = 0;
1264 change_nexthops(fi) {
1265 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1266 alive++;
1267 continue;
1268 }
1269 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1270 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001271 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 continue;
1273 alive++;
1274 spin_lock_bh(&fib_multipath_lock);
1275 nh->nh_power = 0;
1276 nh->nh_flags &= ~RTNH_F_DEAD;
1277 spin_unlock_bh(&fib_multipath_lock);
1278 } endfor_nexthops(fi)
1279
1280 if (alive > 0) {
1281 fi->fib_flags &= ~RTNH_F_DEAD;
1282 ret++;
1283 }
1284 }
1285
1286 return ret;
1287}
1288
1289/*
1290 The algorithm is suboptimal, but it provides really
1291 fair weighted route distribution.
1292 */
1293
1294void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1295{
1296 struct fib_info *fi = res->fi;
1297 int w;
1298
1299 spin_lock_bh(&fib_multipath_lock);
1300 if (fi->fib_power <= 0) {
1301 int power = 0;
1302 change_nexthops(fi) {
1303 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1304 power += nh->nh_weight;
1305 nh->nh_power = nh->nh_weight;
1306 }
1307 } endfor_nexthops(fi);
1308 fi->fib_power = power;
1309 if (power <= 0) {
1310 spin_unlock_bh(&fib_multipath_lock);
1311 /* Race condition: route has just become dead. */
1312 res->nh_sel = 0;
1313 return;
1314 }
1315 }
1316
1317
1318 /* w should be random number [0..fi->fib_power-1],
1319 it is pretty bad approximation.
1320 */
1321
1322 w = jiffies % fi->fib_power;
1323
1324 change_nexthops(fi) {
1325 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1326 if ((w -= nh->nh_power) <= 0) {
1327 nh->nh_power--;
1328 fi->fib_power--;
1329 res->nh_sel = nhsel;
1330 spin_unlock_bh(&fib_multipath_lock);
1331 return;
1332 }
1333 }
1334 } endfor_nexthops(fi);
1335
1336 /* Race condition: route has just become dead. */
1337 res->nh_sel = 0;
1338 spin_unlock_bh(&fib_multipath_lock);
1339}
1340#endif