blob: 3ed920b92fb67fa7fbc85735c268c4e851b348ed [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070045#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070046#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
Stephen Hemminger832b4c52006-08-29 16:48:09 -070050static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070051static struct hlist_head *fib_info_hash;
52static struct hlist_head *fib_info_laddrhash;
53static unsigned int fib_hash_size;
54static unsigned int fib_info_cnt;
55
56#define DEVINDEX_HASHBITS 8
57#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60#ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62static DEFINE_SPINLOCK(fib_multipath_lock);
63
64#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72/* Hope, that gcc will optimize it to get rid of dummy loop */
73
74#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75for (nhsel=0; nhsel < 1; nhsel++)
76
77#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82#define endfor_nexthops(fi) }
83
84
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090085static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070086{
87 int error;
88 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070089} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090090 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 .error = 0,
92 .scope = RT_SCOPE_NOWHERE,
93 }, /* RTN_UNSPEC */
94 {
95 .error = 0,
96 .scope = RT_SCOPE_UNIVERSE,
97 }, /* RTN_UNICAST */
98 {
99 .error = 0,
100 .scope = RT_SCOPE_HOST,
101 }, /* RTN_LOCAL */
102 {
103 .error = 0,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
106 {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 }, /* RTN_ANYCAST */
110 {
111 .error = 0,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
114 {
115 .error = -EINVAL,
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
118 {
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
122 {
123 .error = -EACCES,
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
126 {
127 .error = -EAGAIN,
128 .scope = RT_SCOPE_UNIVERSE,
129 }, /* RTN_THROW */
130 {
131 .error = -EINVAL,
132 .scope = RT_SCOPE_NOWHERE,
133 }, /* RTN_NAT */
134 {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
138};
139
140
141/* Release a nexthop info record */
142
143void free_fib_info(struct fib_info *fi)
144{
145 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 return;
148 }
149 change_nexthops(fi) {
150 if (nh->nh_dev)
151 dev_put(nh->nh_dev);
152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi);
154 fib_info_cnt--;
155 kfree(fi);
156}
157
158void fib_release_info(struct fib_info *fi)
159{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700160 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
166 if (!nh->nh_dev)
167 continue;
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700173 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174}
175
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184#ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186#endif
187#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195}
196
197static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
198{
199 unsigned int mask = (fib_hash_size - 1);
200 unsigned int val = fi->fib_nhs;
201
202 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700203 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 val ^= fi->fib_priority;
205
206 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
207}
208
209static struct fib_info *fib_find_info(const struct fib_info *nfi)
210{
211 struct hlist_head *head;
212 struct hlist_node *node;
213 struct fib_info *fi;
214 unsigned int hash;
215
216 hash = fib_info_hashfn(nfi);
217 head = &fib_info_hash[hash];
218
219 hlist_for_each_entry(fi, node, head, fib_hash) {
220 if (fi->fib_nhs != nfi->fib_nhs)
221 continue;
222 if (nfi->fib_protocol == fi->fib_protocol &&
223 nfi->fib_prefsrc == fi->fib_prefsrc &&
224 nfi->fib_priority == fi->fib_priority &&
225 memcmp(nfi->fib_metrics, fi->fib_metrics,
226 sizeof(fi->fib_metrics)) == 0 &&
227 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
228 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
229 return fi;
230 }
231
232 return NULL;
233}
234
235static inline unsigned int fib_devindex_hashfn(unsigned int val)
236{
237 unsigned int mask = DEVINDEX_HASHSIZE - 1;
238
239 return (val ^
240 (val >> DEVINDEX_HASHBITS) ^
241 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
242}
243
244/* Check, that the gateway is already configured.
245 Used only by redirect accept routine.
246 */
247
Al Virod878e72e2006-09-26 22:18:13 -0700248int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249{
250 struct hlist_head *head;
251 struct hlist_node *node;
252 struct fib_nh *nh;
253 unsigned int hash;
254
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700255 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257 hash = fib_devindex_hashfn(dev->ifindex);
258 head = &fib_info_devhash[hash];
259 hlist_for_each_entry(nh, node, head, nh_hash) {
260 if (nh->nh_dev == dev &&
261 nh->nh_gw == gw &&
262 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700263 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 return 0;
265 }
266 }
267
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700268 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
270 return -1;
271}
272
Thomas Graf339bf982006-11-10 14:10:15 -0800273static inline size_t fib_nlmsg_size(struct fib_info *fi)
274{
275 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
276 + nla_total_size(4) /* RTA_TABLE */
277 + nla_total_size(4) /* RTA_DST */
278 + nla_total_size(4) /* RTA_PRIORITY */
279 + nla_total_size(4); /* RTA_PREFSRC */
280
281 /* space for nested metrics */
282 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
283
284 if (fi->fib_nhs) {
285 /* Also handles the special case fib_nhs == 1 */
286
287 /* each nexthop is packed in an attribute */
288 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
289
290 /* may contain flow and gateway attribute */
291 nhsize += 2 * nla_total_size(4);
292
293 /* all nexthops are packed in a nested attribute */
294 payload += nla_total_size(fi->fib_nhs * nhsize);
295 }
296
297 return payload;
298}
299
Al Viro81f7bf62006-09-27 18:40:00 -0700300void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700301 int dst_len, u32 tb_id, struct nl_info *info,
302 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303{
304 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700305 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700306 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
Thomas Graf339bf982006-11-10 14:10:15 -0800308 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700309 if (skb == NULL)
310 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
Thomas Graf4e902c52006-08-17 18:14:52 -0700312 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700313 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700314 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800315 if (err < 0) {
316 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
317 WARN_ON(err == -EMSGSIZE);
318 kfree_skb(skb);
319 goto errout;
320 }
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800321 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
Thomas Graf4e902c52006-08-17 18:14:52 -0700322 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700323errout:
324 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800325 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
328/* Return the first fib alias matching TOS with
329 * priority less than or equal to PRIO.
330 */
331struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
332{
333 if (fah) {
334 struct fib_alias *fa;
335 list_for_each_entry(fa, fah, fa_list) {
336 if (fa->fa_tos > tos)
337 continue;
338 if (fa->fa_info->fib_priority >= prio ||
339 fa->fa_tos < tos)
340 return fa;
341 }
342 }
343 return NULL;
344}
345
346int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800347 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348{
349 struct neighbour *n;
350 int state = NUD_NONE;
351
352 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
353 if (n) {
354 state = n->nud_state;
355 neigh_release(n);
356 }
357 if (state==NUD_REACHABLE)
358 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800359 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 return 0;
361 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800362 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 *last_resort = fi;
364 *last_idx = order;
365 }
366 return 1;
367}
368
369#ifdef CONFIG_IP_ROUTE_MULTIPATH
370
Thomas Graf4e902c52006-08-17 18:14:52 -0700371static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372{
373 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
Thomas Graf4e902c52006-08-17 18:14:52 -0700375 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700377 rtnh = rtnh_next(rtnh, &remaining);
378 }
379
380 /* leftover implies invalid nexthop configuration, discard it */
381 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382}
383
Thomas Graf4e902c52006-08-17 18:14:52 -0700384static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
385 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700388 int attrlen;
389
390 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700392
393 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
394 nh->nh_oif = rtnh->rtnh_ifindex;
395 nh->nh_weight = rtnh->rtnh_hops + 1;
396
397 attrlen = rtnh_attrlen(rtnh);
398 if (attrlen > 0) {
399 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
400
401 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700402 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700404 nla = nla_find(attrs, attrlen, RTA_FLOW);
405 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406#endif
407 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700408
409 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 return 0;
413}
414
415#endif
416
Thomas Graf4e902c52006-08-17 18:14:52 -0700417int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418{
419#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700420 struct rtnexthop *rtnh;
421 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422#endif
423
Thomas Graf4e902c52006-08-17 18:14:52 -0700424 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 return 1;
426
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 if (cfg->fc_oif || cfg->fc_gw) {
428 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
429 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 return 0;
431 return 1;
432 }
433
434#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700435 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700437
438 rtnh = cfg->fc_mp;
439 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900440
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700442 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443
Thomas Graf4e902c52006-08-17 18:14:52 -0700444 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700446
447 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700449
450 attrlen = rtnh_attrlen(rtnh);
451 if (attrlen < 0) {
452 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
453
454 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700455 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 return 1;
457#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700458 nla = nla_find(attrs, attrlen, RTA_FLOW);
459 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 return 1;
461#endif
462 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700463
464 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 } endfor_nexthops(fi);
466#endif
467 return 0;
468}
469
470
471/*
472 Picture
473 -------
474
475 Semantics of nexthop is very messy by historical reasons.
476 We have to take into account, that:
477 a) gateway can be actually local interface address,
478 so that gatewayed route is direct.
479 b) gateway must be on-link address, possibly
480 described not by an ifaddr, but also by a direct route.
481 c) If both gateway and interface are specified, they should not
482 contradict.
483 d) If we use tunnel routes, gateway could be not on-link.
484
485 Attempt to reconcile all of these (alas, self-contradictory) conditions
486 results in pretty ugly and hairy code with obscure logic.
487
488 I chose to generalized it instead, so that the size
489 of code does not increase practically, but it becomes
490 much more general.
491 Every prefix is assigned a "scope" value: "host" is local address,
492 "link" is direct route,
493 [ ... "site" ... "interior" ... ]
494 and "universe" is true gateway route with global meaning.
495
496 Every prefix refers to a set of "nexthop"s (gw, oif),
497 where gw must have narrower scope. This recursion stops
498 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
499 which means that gw is forced to be on link.
500
501 Code is still hairy, but now it is apparently logically
502 consistent and very flexible. F.e. as by-product it allows
503 to co-exists in peace independent exterior and interior
504 routing processes.
505
506 Normally it looks as following.
507
508 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900509 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 |-> {local prefix} (terminal node)
513 */
514
Thomas Graf4e902c52006-08-17 18:14:52 -0700515static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
516 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
518 int err;
519
520 if (nh->nh_gw) {
521 struct fib_result res;
522
523#ifdef CONFIG_IP_ROUTE_PERVASIVE
524 if (nh->nh_flags&RTNH_F_PERVASIVE)
525 return 0;
526#endif
527 if (nh->nh_flags&RTNH_F_ONLINK) {
528 struct net_device *dev;
529
Thomas Graf4e902c52006-08-17 18:14:52 -0700530 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 return -EINVAL;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800532 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
533 nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 return -EINVAL;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800535 if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
536 nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return -ENODEV;
538 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN;
540 nh->nh_dev = dev;
541 dev_hold(dev);
542 nh->nh_scope = RT_SCOPE_LINK;
543 return 0;
544 }
545 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700546 struct flowi fl = {
547 .nl_u = {
548 .ip4_u = {
549 .daddr = nh->nh_gw,
550 .scope = cfg->fc_scope + 1,
551 },
552 },
553 .oif = nh->nh_oif,
554 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
560 return err;
561 }
562 err = -EINVAL;
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564 goto out;
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568 goto out;
569 dev_hold(nh->nh_dev);
570 err = -ENETDOWN;
571 if (!(nh->nh_dev->flags & IFF_UP))
572 goto out;
573 err = 0;
574out:
575 fib_res_put(&res);
576 return err;
577 } else {
578 struct in_device *in_dev;
579
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL;
582
583 in_dev = inetdev_by_index(nh->nh_oif);
584 if (in_dev == NULL)
585 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) {
587 in_dev_put(in_dev);
588 return -ENETDOWN;
589 }
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
593 in_dev_put(in_dev);
594 }
595 return 0;
596}
597
Al Viro81f7bf62006-09-27 18:40:00 -0700598static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599{
600 unsigned int mask = (fib_hash_size - 1);
601
Al Viro81f7bf62006-09-27 18:40:00 -0700602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603}
604
605static struct hlist_head *fib_hash_alloc(int bytes)
606{
607 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800608 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 else
610 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800611 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612}
613
614static void fib_hash_free(struct hlist_head *hash, int bytes)
615{
616 if (!hash)
617 return;
618
619 if (bytes <= PAGE_SIZE)
620 kfree(hash);
621 else
622 free_pages((unsigned long) hash, get_order(bytes));
623}
624
625static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
628{
David S. Millerb7656e72005-08-05 04:12:48 -0700629 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700631 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700633 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 fib_hash_size = new_size;
637
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
641 struct fib_info *fi;
642
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
646
647 hlist_del(&fi->fib_hash);
648
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
652 }
653 }
654 fib_info_hash = new_info_hash;
655
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
659 struct fib_info *fi;
660
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
664
665 hlist_del(&fi->fib_lhash);
666
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
670 }
671 }
672 fib_info_laddrhash = new_laddrhash;
673
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700674 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700675
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679}
680
Thomas Graf4e902c52006-08-17 18:14:52 -0700681struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682{
683 int err;
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687
688 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 goto err_inval;
691
692#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700693 if (cfg->fc_mp) {
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 if (nhs == 0)
696 goto err_inval;
697 }
698#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 err = -ENOBUFS;
701 if (fib_info_cnt >= fib_hash_size) {
702 unsigned int new_size = fib_hash_size << 1;
703 struct hlist_head *new_info_hash;
704 struct hlist_head *new_laddrhash;
705 unsigned int bytes;
706
707 if (!new_size)
708 new_size = 1;
709 bytes = new_size * sizeof(struct hlist_head *);
710 new_info_hash = fib_hash_alloc(bytes);
711 new_laddrhash = fib_hash_alloc(bytes);
712 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800715 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 if (!fib_hash_size)
719 goto failure;
720 }
721
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700722 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 if (fi == NULL)
724 goto failure;
725 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
Thomas Graf4e902c52006-08-17 18:14:52 -0700727 fi->fib_protocol = cfg->fc_protocol;
728 fi->fib_flags = cfg->fc_flags;
729 fi->fib_priority = cfg->fc_priority;
730 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
732 fi->fib_nhs = nhs;
733 change_nexthops(fi) {
734 nh->nh_parent = fi;
735 } endfor_nexthops(fi)
736
Thomas Graf4e902c52006-08-17 18:14:52 -0700737 if (cfg->fc_mx) {
738 struct nlattr *nla;
739 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Thomas Graf4e902c52006-08-17 18:14:52 -0700741 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200742 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700743
744 if (type) {
745 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700747 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 }
750 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751
Thomas Graf4e902c52006-08-17 18:14:52 -0700752 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700754 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
755 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700757 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 goto err_inval;
761#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 goto err_inval;
764#endif
765#else
766 goto err_inval;
767#endif
768 } else {
769 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700770
771 nh->nh_oif = cfg->fc_oif;
772 nh->nh_gw = cfg->fc_gw;
773 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700775 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777#ifdef CONFIG_IP_ROUTE_MULTIPATH
778 nh->nh_weight = 1;
779#endif
780 }
781
Thomas Graf4e902c52006-08-17 18:14:52 -0700782 if (fib_props[cfg->fc_type].error) {
783 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 goto err_inval;
785 goto link_it;
786 }
787
Thomas Graf4e902c52006-08-17 18:14:52 -0700788 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 goto err_inval;
790
Thomas Graf4e902c52006-08-17 18:14:52 -0700791 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 struct fib_nh *nh = fi->fib_nh;
793
794 /* Local address is added. */
795 if (nhs != 1 || nh->nh_gw)
796 goto err_inval;
797 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800798 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
799 fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 err = -ENODEV;
801 if (nh->nh_dev == NULL)
802 goto failure;
803 } else {
804 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700805 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 goto failure;
807 } endfor_nexthops(fi)
808 }
809
810 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700811 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
812 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800813 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
814 fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 goto err_inval;
816 }
817
818link_it:
819 if ((ofi = fib_find_info(fi)) != NULL) {
820 fi->fib_dead = 1;
821 free_fib_info(fi);
822 ofi->fib_treeref++;
823 return ofi;
824 }
825
826 fi->fib_treeref++;
827 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700828 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 hlist_add_head(&fi->fib_hash,
830 &fib_info_hash[fib_info_hashfn(fi)]);
831 if (fi->fib_prefsrc) {
832 struct hlist_head *head;
833
834 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
835 hlist_add_head(&fi->fib_lhash, head);
836 }
837 change_nexthops(fi) {
838 struct hlist_head *head;
839 unsigned int hash;
840
841 if (!nh->nh_dev)
842 continue;
843 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
844 head = &fib_info_devhash[hash];
845 hlist_add_head(&nh->nh_hash, head);
846 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700847 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 return fi;
849
850err_inval:
851 err = -EINVAL;
852
853failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900854 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 fi->fib_dead = 1;
856 free_fib_info(fi);
857 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700858
859 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860}
861
Robert Olssone5b43762005-08-25 13:01:03 -0700862/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700864 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 int prefixlen)
866{
867 struct fib_alias *fa;
868 int nh_sel = 0;
869
Robert Olssone5b43762005-08-25 13:01:03 -0700870 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 int err;
872
873 if (fa->fa_tos &&
874 fa->fa_tos != flp->fl4_tos)
875 continue;
876
877 if (fa->fa_scope < flp->fl4_scope)
878 continue;
879
880 fa->fa_state |= FA_S_ACCESSED;
881
882 err = fib_props[fa->fa_type].error;
883 if (err == 0) {
884 struct fib_info *fi = fa->fa_info;
885
886 if (fi->fib_flags & RTNH_F_DEAD)
887 continue;
888
889 switch (fa->fa_type) {
890 case RTN_UNICAST:
891 case RTN_LOCAL:
892 case RTN_BROADCAST:
893 case RTN_ANYCAST:
894 case RTN_MULTICAST:
895 for_nexthops(fi) {
896 if (nh->nh_flags&RTNH_F_DEAD)
897 continue;
898 if (!flp->oif || flp->oif == nh->nh_oif)
899 break;
900 }
901#ifdef CONFIG_IP_ROUTE_MULTIPATH
902 if (nhsel < fi->fib_nhs) {
903 nh_sel = nhsel;
904 goto out_fill_res;
905 }
906#else
907 if (nhsel < 1) {
908 goto out_fill_res;
909 }
910#endif
911 endfor_nexthops(fi);
912 continue;
913
914 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800915 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
916 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700918 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
920 return err;
921 }
922 return 1;
923
924out_fill_res:
925 res->prefixlen = prefixlen;
926 res->nh_sel = nh_sel;
927 res->type = fa->fa_type;
928 res->scope = fa->fa_scope;
929 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 atomic_inc(&res->fi->fib_clntref);
931 return 0;
932}
933
934/* Find appropriate source address to this destination */
935
Al Virob83738a2006-09-26 22:14:15 -0700936__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937{
938 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
939}
940
Thomas Grafbe403ea2006-08-17 18:15:17 -0700941int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700942 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700943 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700945 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
949 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800950 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700951
952 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 rtm->rtm_family = AF_INET;
954 rtm->rtm_dst_len = dst_len;
955 rtm->rtm_src_len = 0;
956 rtm->rtm_tos = tos;
957 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700958 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 rtm->rtm_type = type;
960 rtm->rtm_flags = fi->fib_flags;
961 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700963
964 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700965 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700968 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971 goto nla_put_failure;
972
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700974 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700975
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 if (fi->fib_nhs == 1) {
977 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700978 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700981 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700982#ifdef CONFIG_NET_CLS_ROUTE
983 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700985#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
987#ifdef CONFIG_IP_ROUTE_MULTIPATH
988 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700989 struct rtnexthop *rtnh;
990 struct nlattr *mp;
991
992 mp = nla_nest_start(skb, RTA_MULTIPATH);
993 if (mp == NULL)
994 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
996 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700997 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
998 if (rtnh == NULL)
999 goto nla_put_failure;
1000
1001 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1002 rtnh->rtnh_hops = nh->nh_weight - 1;
1003 rtnh->rtnh_ifindex = nh->nh_oif;
1004
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001006 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001007#ifdef CONFIG_NET_CLS_ROUTE
1008 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001009 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001010#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001011 /* length of rtnetlink header + attributes */
1012 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001014
1015 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 }
1017#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001018 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Thomas Grafbe403ea2006-08-17 18:15:17 -07001020nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001021 nlmsg_cancel(skb, nlh);
1022 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023}
1024
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025/*
1026 Update FIB if:
1027 - local address disappeared -> we must delete all the entries
1028 referring to it.
1029 - device went down -> we must shutdown all nexthops going via it.
1030 */
1031
Al Viro81f7bf62006-09-27 18:40:00 -07001032int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033{
1034 int ret = 0;
1035 int scope = RT_SCOPE_NOWHERE;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001036
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 if (force)
1038 scope = -1;
1039
1040 if (local && fib_info_laddrhash) {
1041 unsigned int hash = fib_laddr_hashfn(local);
1042 struct hlist_head *head = &fib_info_laddrhash[hash];
1043 struct hlist_node *node;
1044 struct fib_info *fi;
1045
1046 hlist_for_each_entry(fi, node, head, fib_lhash) {
1047 if (fi->fib_prefsrc == local) {
1048 fi->fib_flags |= RTNH_F_DEAD;
1049 ret++;
1050 }
1051 }
1052 }
1053
1054 if (dev) {
1055 struct fib_info *prev_fi = NULL;
1056 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1057 struct hlist_head *head = &fib_info_devhash[hash];
1058 struct hlist_node *node;
1059 struct fib_nh *nh;
1060
1061 hlist_for_each_entry(nh, node, head, nh_hash) {
1062 struct fib_info *fi = nh->nh_parent;
1063 int dead;
1064
1065 BUG_ON(!fi->fib_nhs);
1066 if (nh->nh_dev != dev || fi == prev_fi)
1067 continue;
1068 prev_fi = fi;
1069 dead = 0;
1070 change_nexthops(fi) {
1071 if (nh->nh_flags&RTNH_F_DEAD)
1072 dead++;
1073 else if (nh->nh_dev == dev &&
1074 nh->nh_scope != scope) {
1075 nh->nh_flags |= RTNH_F_DEAD;
1076#ifdef CONFIG_IP_ROUTE_MULTIPATH
1077 spin_lock_bh(&fib_multipath_lock);
1078 fi->fib_power -= nh->nh_power;
1079 nh->nh_power = 0;
1080 spin_unlock_bh(&fib_multipath_lock);
1081#endif
1082 dead++;
1083 }
1084#ifdef CONFIG_IP_ROUTE_MULTIPATH
1085 if (force > 1 && nh->nh_dev == dev) {
1086 dead = fi->fib_nhs;
1087 break;
1088 }
1089#endif
1090 } endfor_nexthops(fi)
1091 if (dead == fi->fib_nhs) {
1092 fi->fib_flags |= RTNH_F_DEAD;
1093 ret++;
1094 }
1095 }
1096 }
1097
1098 return ret;
1099}
1100
1101#ifdef CONFIG_IP_ROUTE_MULTIPATH
1102
1103/*
1104 Dead device goes up. We wake up dead nexthops.
1105 It takes sense only on multipath routes.
1106 */
1107
1108int fib_sync_up(struct net_device *dev)
1109{
1110 struct fib_info *prev_fi;
1111 unsigned int hash;
1112 struct hlist_head *head;
1113 struct hlist_node *node;
1114 struct fib_nh *nh;
1115 int ret;
1116
1117 if (!(dev->flags&IFF_UP))
1118 return 0;
1119
1120 prev_fi = NULL;
1121 hash = fib_devindex_hashfn(dev->ifindex);
1122 head = &fib_info_devhash[hash];
1123 ret = 0;
1124
1125 hlist_for_each_entry(nh, node, head, nh_hash) {
1126 struct fib_info *fi = nh->nh_parent;
1127 int alive;
1128
1129 BUG_ON(!fi->fib_nhs);
1130 if (nh->nh_dev != dev || fi == prev_fi)
1131 continue;
1132
1133 prev_fi = fi;
1134 alive = 0;
1135 change_nexthops(fi) {
1136 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1137 alive++;
1138 continue;
1139 }
1140 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1141 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001142 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 continue;
1144 alive++;
1145 spin_lock_bh(&fib_multipath_lock);
1146 nh->nh_power = 0;
1147 nh->nh_flags &= ~RTNH_F_DEAD;
1148 spin_unlock_bh(&fib_multipath_lock);
1149 } endfor_nexthops(fi)
1150
1151 if (alive > 0) {
1152 fi->fib_flags &= ~RTNH_F_DEAD;
1153 ret++;
1154 }
1155 }
1156
1157 return ret;
1158}
1159
1160/*
1161 The algorithm is suboptimal, but it provides really
1162 fair weighted route distribution.
1163 */
1164
1165void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1166{
1167 struct fib_info *fi = res->fi;
1168 int w;
1169
1170 spin_lock_bh(&fib_multipath_lock);
1171 if (fi->fib_power <= 0) {
1172 int power = 0;
1173 change_nexthops(fi) {
1174 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1175 power += nh->nh_weight;
1176 nh->nh_power = nh->nh_weight;
1177 }
1178 } endfor_nexthops(fi);
1179 fi->fib_power = power;
1180 if (power <= 0) {
1181 spin_unlock_bh(&fib_multipath_lock);
1182 /* Race condition: route has just become dead. */
1183 res->nh_sel = 0;
1184 return;
1185 }
1186 }
1187
1188
1189 /* w should be random number [0..fi->fib_power-1],
1190 it is pretty bad approximation.
1191 */
1192
1193 w = jiffies % fi->fib_power;
1194
1195 change_nexthops(fi) {
1196 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1197 if ((w -= nh->nh_power) <= 0) {
1198 nh->nh_power--;
1199 fi->fib_power--;
1200 res->nh_sel = nhsel;
1201 spin_unlock_bh(&fib_multipath_lock);
1202 return;
1203 }
1204 }
1205 } endfor_nexthops(fi);
1206
1207 /* Race condition: route has just become dead. */
1208 res->nh_sel = 0;
1209 spin_unlock_bh(&fib_multipath_lock);
1210}
1211#endif