blob: a13c84763d4c10e503e418f2e6afef3e353193c3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070045#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070046#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
Stephen Hemminger832b4c52006-08-29 16:48:09 -070050static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070051static struct hlist_head *fib_info_hash;
52static struct hlist_head *fib_info_laddrhash;
53static unsigned int fib_hash_size;
54static unsigned int fib_info_cnt;
55
56#define DEVINDEX_HASHBITS 8
57#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60#ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62static DEFINE_SPINLOCK(fib_multipath_lock);
63
64#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72/* Hope, that gcc will optimize it to get rid of dummy loop */
73
74#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75for (nhsel=0; nhsel < 1; nhsel++)
76
77#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82#define endfor_nexthops(fi) }
83
84
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090085static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070086{
87 int error;
88 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070089} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090090 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 .error = 0,
92 .scope = RT_SCOPE_NOWHERE,
93 }, /* RTN_UNSPEC */
94 {
95 .error = 0,
96 .scope = RT_SCOPE_UNIVERSE,
97 }, /* RTN_UNICAST */
98 {
99 .error = 0,
100 .scope = RT_SCOPE_HOST,
101 }, /* RTN_LOCAL */
102 {
103 .error = 0,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
106 {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 }, /* RTN_ANYCAST */
110 {
111 .error = 0,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
114 {
115 .error = -EINVAL,
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
118 {
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
122 {
123 .error = -EACCES,
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
126 {
127 .error = -EAGAIN,
128 .scope = RT_SCOPE_UNIVERSE,
129 }, /* RTN_THROW */
130 {
131 .error = -EINVAL,
132 .scope = RT_SCOPE_NOWHERE,
133 }, /* RTN_NAT */
134 {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
138};
139
140
141/* Release a nexthop info record */
142
143void free_fib_info(struct fib_info *fi)
144{
145 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 return;
148 }
149 change_nexthops(fi) {
150 if (nh->nh_dev)
151 dev_put(nh->nh_dev);
152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi);
154 fib_info_cnt--;
155 kfree(fi);
156}
157
158void fib_release_info(struct fib_info *fi)
159{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700160 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
166 if (!nh->nh_dev)
167 continue;
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700173 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174}
175
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184#ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186#endif
187#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195}
196
David S. Miller88ebc722008-01-12 21:49:01 -0800197static inline unsigned int fib_devindex_hashfn(unsigned int val)
198{
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204}
205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207{
208 unsigned int mask = (fib_hash_size - 1);
209 unsigned int val = fi->fib_nhs;
210
211 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700212 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 val ^= fi->fib_priority;
David S. Miller88ebc722008-01-12 21:49:01 -0800214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219}
220
221static struct fib_info *fib_find_info(const struct fib_info *nfi)
222{
223 struct hlist_head *head;
224 struct hlist_node *node;
225 struct fib_info *fi;
226 unsigned int hash;
227
228 hash = fib_info_hashfn(nfi);
229 head = &fib_info_hash[hash];
230
231 hlist_for_each_entry(fi, node, head, fib_hash) {
Denis V. Lunev4814bdb2008-01-31 18:50:07 -0800232 if (fi->fib_net != nfi->fib_net)
233 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 if (fi->fib_nhs != nfi->fib_nhs)
235 continue;
236 if (nfi->fib_protocol == fi->fib_protocol &&
237 nfi->fib_prefsrc == fi->fib_prefsrc &&
238 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243 return fi;
244 }
245
246 return NULL;
247}
248
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249/* Check, that the gateway is already configured.
250 Used only by redirect accept routine.
251 */
252
Al Virod878e72e2006-09-26 22:18:13 -0700253int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
255 struct hlist_head *head;
256 struct hlist_node *node;
257 struct fib_nh *nh;
258 unsigned int hash;
259
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700260 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 hash = fib_devindex_hashfn(dev->ifindex);
263 head = &fib_info_devhash[hash];
264 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev &&
266 nh->nh_gw == gw &&
267 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700268 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return 0;
270 }
271 }
272
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700273 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275 return -1;
276}
277
Thomas Graf339bf982006-11-10 14:10:15 -0800278static inline size_t fib_nlmsg_size(struct fib_info *fi)
279{
280 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
281 + nla_total_size(4) /* RTA_TABLE */
282 + nla_total_size(4) /* RTA_DST */
283 + nla_total_size(4) /* RTA_PRIORITY */
284 + nla_total_size(4); /* RTA_PREFSRC */
285
286 /* space for nested metrics */
287 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
288
289 if (fi->fib_nhs) {
290 /* Also handles the special case fib_nhs == 1 */
291
292 /* each nexthop is packed in an attribute */
293 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
294
295 /* may contain flow and gateway attribute */
296 nhsize += 2 * nla_total_size(4);
297
298 /* all nexthops are packed in a nested attribute */
299 payload += nla_total_size(fi->fib_nhs * nhsize);
300 }
301
302 return payload;
303}
304
Al Viro81f7bf62006-09-27 18:40:00 -0700305void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700306 int dst_len, u32 tb_id, struct nl_info *info,
307 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700310 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
Thomas Graf339bf982006-11-10 14:10:15 -0800313 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700314 if (skb == NULL)
315 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Thomas Graf4e902c52006-08-17 18:14:52 -0700317 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700318 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700319 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800320 if (err < 0) {
321 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
322 WARN_ON(err == -EMSGSIZE);
323 kfree_skb(skb);
324 goto errout;
325 }
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800326 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
Thomas Graf4e902c52006-08-17 18:14:52 -0700327 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700328errout:
329 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331}
332
333/* Return the first fib alias matching TOS with
334 * priority less than or equal to PRIO.
335 */
336struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
337{
338 if (fah) {
339 struct fib_alias *fa;
340 list_for_each_entry(fa, fah, fa_list) {
341 if (fa->fa_tos > tos)
342 continue;
343 if (fa->fa_info->fib_priority >= prio ||
344 fa->fa_tos < tos)
345 return fa;
346 }
347 }
348 return NULL;
349}
350
351int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800352 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353{
354 struct neighbour *n;
355 int state = NUD_NONE;
356
357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
358 if (n) {
359 state = n->nud_state;
360 neigh_release(n);
361 }
362 if (state==NUD_REACHABLE)
363 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800364 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 return 0;
366 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800367 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 *last_resort = fi;
369 *last_idx = order;
370 }
371 return 1;
372}
373
374#ifdef CONFIG_IP_ROUTE_MULTIPATH
375
Thomas Graf4e902c52006-08-17 18:14:52 -0700376static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377{
378 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Thomas Graf4e902c52006-08-17 18:14:52 -0700380 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700382 rtnh = rtnh_next(rtnh, &remaining);
383 }
384
385 /* leftover implies invalid nexthop configuration, discard it */
386 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387}
388
Thomas Graf4e902c52006-08-17 18:14:52 -0700389static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
390 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700393 int attrlen;
394
395 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700397
398 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
399 nh->nh_oif = rtnh->rtnh_ifindex;
400 nh->nh_weight = rtnh->rtnh_hops + 1;
401
402 attrlen = rtnh_attrlen(rtnh);
403 if (attrlen > 0) {
404 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
405
406 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700407 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700409 nla = nla_find(attrs, attrlen, RTA_FLOW);
410 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411#endif
412 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700413
414 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700416
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 return 0;
418}
419
420#endif
421
Thomas Graf4e902c52006-08-17 18:14:52 -0700422int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423{
424#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700425 struct rtnexthop *rtnh;
426 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427#endif
428
Thomas Graf4e902c52006-08-17 18:14:52 -0700429 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 return 1;
431
Thomas Graf4e902c52006-08-17 18:14:52 -0700432 if (cfg->fc_oif || cfg->fc_gw) {
433 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
434 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 return 0;
436 return 1;
437 }
438
439#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700440 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700442
443 rtnh = cfg->fc_mp;
444 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700447 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
Thomas Graf4e902c52006-08-17 18:14:52 -0700449 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700451
452 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700454
455 attrlen = rtnh_attrlen(rtnh);
456 if (attrlen < 0) {
457 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
458
459 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700460 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 return 1;
462#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700463 nla = nla_find(attrs, attrlen, RTA_FLOW);
464 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 return 1;
466#endif
467 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700468
469 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 } endfor_nexthops(fi);
471#endif
472 return 0;
473}
474
475
476/*
477 Picture
478 -------
479
480 Semantics of nexthop is very messy by historical reasons.
481 We have to take into account, that:
482 a) gateway can be actually local interface address,
483 so that gatewayed route is direct.
484 b) gateway must be on-link address, possibly
485 described not by an ifaddr, but also by a direct route.
486 c) If both gateway and interface are specified, they should not
487 contradict.
488 d) If we use tunnel routes, gateway could be not on-link.
489
490 Attempt to reconcile all of these (alas, self-contradictory) conditions
491 results in pretty ugly and hairy code with obscure logic.
492
493 I chose to generalized it instead, so that the size
494 of code does not increase practically, but it becomes
495 much more general.
496 Every prefix is assigned a "scope" value: "host" is local address,
497 "link" is direct route,
498 [ ... "site" ... "interior" ... ]
499 and "universe" is true gateway route with global meaning.
500
501 Every prefix refers to a set of "nexthop"s (gw, oif),
502 where gw must have narrower scope. This recursion stops
503 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
504 which means that gw is forced to be on link.
505
506 Code is still hairy, but now it is apparently logically
507 consistent and very flexible. F.e. as by-product it allows
508 to co-exists in peace independent exterior and interior
509 routing processes.
510
511 Normally it looks as following.
512
513 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900514 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900516 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 |-> {local prefix} (terminal node)
518 */
519
Thomas Graf4e902c52006-08-17 18:14:52 -0700520static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
521 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
523 int err;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800524 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
Denis V. Lunev86167a32008-01-21 17:34:00 -0800526 net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 if (nh->nh_gw) {
528 struct fib_result res;
529
530#ifdef CONFIG_IP_ROUTE_PERVASIVE
531 if (nh->nh_flags&RTNH_F_PERVASIVE)
532 return 0;
533#endif
534 if (nh->nh_flags&RTNH_F_ONLINK) {
535 struct net_device *dev;
536
Thomas Graf4e902c52006-08-17 18:14:52 -0700537 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800539 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800541 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 return -ENODEV;
543 if (!(dev->flags&IFF_UP))
544 return -ENETDOWN;
545 nh->nh_dev = dev;
546 dev_hold(dev);
547 nh->nh_scope = RT_SCOPE_LINK;
548 return 0;
549 }
550 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700551 struct flowi fl = {
552 .nl_u = {
553 .ip4_u = {
554 .daddr = nh->nh_gw,
555 .scope = cfg->fc_scope + 1,
556 },
557 },
558 .oif = nh->nh_oif,
559 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
561 /* It is not necessary, but requires a bit of thinking */
562 if (fl.fl4_scope < RT_SCOPE_LINK)
563 fl.fl4_scope = RT_SCOPE_LINK;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800564 if ((err = fib_lookup(net, &fl, &res)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 return err;
566 }
567 err = -EINVAL;
568 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
569 goto out;
570 nh->nh_scope = res.scope;
571 nh->nh_oif = FIB_RES_OIF(res);
572 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
573 goto out;
574 dev_hold(nh->nh_dev);
575 err = -ENETDOWN;
576 if (!(nh->nh_dev->flags & IFF_UP))
577 goto out;
578 err = 0;
579out:
580 fib_res_put(&res);
581 return err;
582 } else {
583 struct in_device *in_dev;
584
585 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
586 return -EINVAL;
587
Denis V. Lunev86167a32008-01-21 17:34:00 -0800588 in_dev = inetdev_by_index(net, nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 if (in_dev == NULL)
590 return -ENODEV;
591 if (!(in_dev->dev->flags&IFF_UP)) {
592 in_dev_put(in_dev);
593 return -ENETDOWN;
594 }
595 nh->nh_dev = in_dev->dev;
596 dev_hold(nh->nh_dev);
597 nh->nh_scope = RT_SCOPE_HOST;
598 in_dev_put(in_dev);
599 }
600 return 0;
601}
602
Al Viro81f7bf62006-09-27 18:40:00 -0700603static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604{
605 unsigned int mask = (fib_hash_size - 1);
606
Al Viro81f7bf62006-09-27 18:40:00 -0700607 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608}
609
610static struct hlist_head *fib_hash_alloc(int bytes)
611{
612 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800613 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 else
615 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800616 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617}
618
619static void fib_hash_free(struct hlist_head *hash, int bytes)
620{
621 if (!hash)
622 return;
623
624 if (bytes <= PAGE_SIZE)
625 kfree(hash);
626 else
627 free_pages((unsigned long) hash, get_order(bytes));
628}
629
630static void fib_hash_move(struct hlist_head *new_info_hash,
631 struct hlist_head *new_laddrhash,
632 unsigned int new_size)
633{
David S. Millerb7656e72005-08-05 04:12:48 -0700634 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700636 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700638 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700639 old_info_hash = fib_info_hash;
640 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 fib_hash_size = new_size;
642
643 for (i = 0; i < old_size; i++) {
644 struct hlist_head *head = &fib_info_hash[i];
645 struct hlist_node *node, *n;
646 struct fib_info *fi;
647
648 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
649 struct hlist_head *dest;
650 unsigned int new_hash;
651
652 hlist_del(&fi->fib_hash);
653
654 new_hash = fib_info_hashfn(fi);
655 dest = &new_info_hash[new_hash];
656 hlist_add_head(&fi->fib_hash, dest);
657 }
658 }
659 fib_info_hash = new_info_hash;
660
661 for (i = 0; i < old_size; i++) {
662 struct hlist_head *lhead = &fib_info_laddrhash[i];
663 struct hlist_node *node, *n;
664 struct fib_info *fi;
665
666 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
667 struct hlist_head *ldest;
668 unsigned int new_hash;
669
670 hlist_del(&fi->fib_lhash);
671
672 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
673 ldest = &new_laddrhash[new_hash];
674 hlist_add_head(&fi->fib_lhash, ldest);
675 }
676 }
677 fib_info_laddrhash = new_laddrhash;
678
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700679 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700680
681 bytes = old_size * sizeof(struct hlist_head *);
682 fib_hash_free(old_info_hash, bytes);
683 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684}
685
Thomas Graf4e902c52006-08-17 18:14:52 -0700686struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687{
688 int err;
689 struct fib_info *fi = NULL;
690 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 int nhs = 1;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800692 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
694 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700695 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 goto err_inval;
697
698#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700699 if (cfg->fc_mp) {
700 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 if (nhs == 0)
702 goto err_inval;
703 }
704#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
706 err = -ENOBUFS;
707 if (fib_info_cnt >= fib_hash_size) {
708 unsigned int new_size = fib_hash_size << 1;
709 struct hlist_head *new_info_hash;
710 struct hlist_head *new_laddrhash;
711 unsigned int bytes;
712
713 if (!new_size)
714 new_size = 1;
715 bytes = new_size * sizeof(struct hlist_head *);
716 new_info_hash = fib_hash_alloc(bytes);
717 new_laddrhash = fib_hash_alloc(bytes);
718 if (!new_info_hash || !new_laddrhash) {
719 fib_hash_free(new_info_hash, bytes);
720 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800721 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724 if (!fib_hash_size)
725 goto failure;
726 }
727
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700728 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 if (fi == NULL)
730 goto failure;
731 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800733 fi->fib_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700734 fi->fib_protocol = cfg->fc_protocol;
735 fi->fib_flags = cfg->fc_flags;
736 fi->fib_priority = cfg->fc_priority;
737 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
739 fi->fib_nhs = nhs;
740 change_nexthops(fi) {
741 nh->nh_parent = fi;
742 } endfor_nexthops(fi)
743
Thomas Graf4e902c52006-08-17 18:14:52 -0700744 if (cfg->fc_mx) {
745 struct nlattr *nla;
746 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
Thomas Graf4e902c52006-08-17 18:14:52 -0700748 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200749 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700750
751 if (type) {
752 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700754 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 }
757 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700761 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
762 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700764 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700766 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 goto err_inval;
768#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700769 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 goto err_inval;
771#endif
772#else
773 goto err_inval;
774#endif
775 } else {
776 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700777
778 nh->nh_oif = cfg->fc_oif;
779 nh->nh_gw = cfg->fc_gw;
780 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700782 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784#ifdef CONFIG_IP_ROUTE_MULTIPATH
785 nh->nh_weight = 1;
786#endif
787 }
788
Thomas Graf4e902c52006-08-17 18:14:52 -0700789 if (fib_props[cfg->fc_type].error) {
790 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 goto err_inval;
792 goto link_it;
793 }
794
Thomas Graf4e902c52006-08-17 18:14:52 -0700795 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 goto err_inval;
797
Thomas Graf4e902c52006-08-17 18:14:52 -0700798 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 struct fib_nh *nh = fi->fib_nh;
800
801 /* Local address is added. */
802 if (nhs != 1 || nh->nh_gw)
803 goto err_inval;
804 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800805 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 err = -ENODEV;
807 if (nh->nh_dev == NULL)
808 goto failure;
809 } else {
810 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700811 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 goto failure;
813 } endfor_nexthops(fi)
814 }
815
816 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700817 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
818 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800819 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 goto err_inval;
821 }
822
823link_it:
824 if ((ofi = fib_find_info(fi)) != NULL) {
825 fi->fib_dead = 1;
826 free_fib_info(fi);
827 ofi->fib_treeref++;
828 return ofi;
829 }
830
831 fi->fib_treeref++;
832 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700833 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 hlist_add_head(&fi->fib_hash,
835 &fib_info_hash[fib_info_hashfn(fi)]);
836 if (fi->fib_prefsrc) {
837 struct hlist_head *head;
838
839 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
840 hlist_add_head(&fi->fib_lhash, head);
841 }
842 change_nexthops(fi) {
843 struct hlist_head *head;
844 unsigned int hash;
845
846 if (!nh->nh_dev)
847 continue;
848 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
849 head = &fib_info_devhash[hash];
850 hlist_add_head(&nh->nh_hash, head);
851 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700852 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 return fi;
854
855err_inval:
856 err = -EINVAL;
857
858failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900859 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 fi->fib_dead = 1;
861 free_fib_info(fi);
862 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700863
864 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865}
866
Robert Olssone5b43762005-08-25 13:01:03 -0700867/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700869 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 int prefixlen)
871{
872 struct fib_alias *fa;
873 int nh_sel = 0;
874
Robert Olssone5b43762005-08-25 13:01:03 -0700875 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 int err;
877
878 if (fa->fa_tos &&
879 fa->fa_tos != flp->fl4_tos)
880 continue;
881
882 if (fa->fa_scope < flp->fl4_scope)
883 continue;
884
885 fa->fa_state |= FA_S_ACCESSED;
886
887 err = fib_props[fa->fa_type].error;
888 if (err == 0) {
889 struct fib_info *fi = fa->fa_info;
890
891 if (fi->fib_flags & RTNH_F_DEAD)
892 continue;
893
894 switch (fa->fa_type) {
895 case RTN_UNICAST:
896 case RTN_LOCAL:
897 case RTN_BROADCAST:
898 case RTN_ANYCAST:
899 case RTN_MULTICAST:
900 for_nexthops(fi) {
901 if (nh->nh_flags&RTNH_F_DEAD)
902 continue;
903 if (!flp->oif || flp->oif == nh->nh_oif)
904 break;
905 }
906#ifdef CONFIG_IP_ROUTE_MULTIPATH
907 if (nhsel < fi->fib_nhs) {
908 nh_sel = nhsel;
909 goto out_fill_res;
910 }
911#else
912 if (nhsel < 1) {
913 goto out_fill_res;
914 }
915#endif
916 endfor_nexthops(fi);
917 continue;
918
919 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800920 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
921 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700923 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 }
925 return err;
926 }
927 return 1;
928
929out_fill_res:
930 res->prefixlen = prefixlen;
931 res->nh_sel = nh_sel;
932 res->type = fa->fa_type;
933 res->scope = fa->fa_scope;
934 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 atomic_inc(&res->fi->fib_clntref);
936 return 0;
937}
938
939/* Find appropriate source address to this destination */
940
Al Virob83738a2006-09-26 22:14:15 -0700941__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942{
943 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
944}
945
Thomas Grafbe403ea2006-08-17 18:15:17 -0700946int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700947 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700950 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
Thomas Grafbe403ea2006-08-17 18:15:17 -0700953 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
954 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800955 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700956
957 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 rtm->rtm_family = AF_INET;
959 rtm->rtm_dst_len = dst_len;
960 rtm->rtm_src_len = 0;
961 rtm->rtm_tos = tos;
962 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700963 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 rtm->rtm_type = type;
965 rtm->rtm_flags = fi->fib_flags;
966 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700968
969 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700970 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700973 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
974
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976 goto nla_put_failure;
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700979 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (fi->fib_nhs == 1) {
982 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700983 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700986 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700987#ifdef CONFIG_NET_CLS_ROUTE
988 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700989 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700990#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992#ifdef CONFIG_IP_ROUTE_MULTIPATH
993 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700994 struct rtnexthop *rtnh;
995 struct nlattr *mp;
996
997 mp = nla_nest_start(skb, RTA_MULTIPATH);
998 if (mp == NULL)
999 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
1001 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001002 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1003 if (rtnh == NULL)
1004 goto nla_put_failure;
1005
1006 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1007 rtnh->rtnh_hops = nh->nh_weight - 1;
1008 rtnh->rtnh_ifindex = nh->nh_oif;
1009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001011 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001012#ifdef CONFIG_NET_CLS_ROUTE
1013 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001014 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001015#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001016 /* length of rtnetlink header + attributes */
1017 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001019
1020 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 }
1022#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001023 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
Thomas Grafbe403ea2006-08-17 18:15:17 -07001025nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001026 nlmsg_cancel(skb, nlh);
1027 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028}
1029
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030/*
1031 Update FIB if:
1032 - local address disappeared -> we must delete all the entries
1033 referring to it.
1034 - device went down -> we must shutdown all nexthops going via it.
1035 */
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001036int fib_sync_down_addr(struct net *net, __be32 local)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001037{
1038 int ret = 0;
1039 unsigned int hash = fib_laddr_hashfn(local);
1040 struct hlist_head *head = &fib_info_laddrhash[hash];
1041 struct hlist_node *node;
1042 struct fib_info *fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001044 if (fib_info_laddrhash == NULL || local == 0)
1045 return 0;
1046
1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001048 if (fi->fib_net != net)
1049 continue;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001050 if (fi->fib_prefsrc == local) {
1051 fi->fib_flags |= RTNH_F_DEAD;
1052 ret++;
1053 }
1054 }
1055 return ret;
1056}
1057
1058int fib_sync_down_dev(struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059{
1060 int ret = 0;
1061 int scope = RT_SCOPE_NOWHERE;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001062 struct fib_info *prev_fi = NULL;
1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064 struct hlist_head *head = &fib_info_devhash[hash];
1065 struct hlist_node *node;
1066 struct fib_nh *nh;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001067
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 if (force)
1069 scope = -1;
1070
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001071 hlist_for_each_entry(nh, node, head, nh_hash) {
1072 struct fib_info *fi = nh->nh_parent;
1073 int dead;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001075 BUG_ON(!fi->fib_nhs);
1076 if (nh->nh_dev != dev || fi == prev_fi)
1077 continue;
1078 prev_fi = fi;
1079 dead = 0;
1080 change_nexthops(fi) {
1081 if (nh->nh_flags&RTNH_F_DEAD)
1082 dead++;
1083 else if (nh->nh_dev == dev &&
1084 nh->nh_scope != scope) {
1085 nh->nh_flags |= RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001087 spin_lock_bh(&fib_multipath_lock);
1088 fi->fib_power -= nh->nh_power;
1089 nh->nh_power = 0;
1090 spin_unlock_bh(&fib_multipath_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091#endif
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001092 dead++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 }
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001094#ifdef CONFIG_IP_ROUTE_MULTIPATH
1095 if (force > 1 && nh->nh_dev == dev) {
1096 dead = fi->fib_nhs;
1097 break;
1098 }
1099#endif
1100 } endfor_nexthops(fi)
1101 if (dead == fi->fib_nhs) {
1102 fi->fib_flags |= RTNH_F_DEAD;
1103 ret++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 }
1105 }
1106
1107 return ret;
1108}
1109
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111
1112/*
1113 Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes.
1115 */
1116
1117int fib_sync_up(struct net_device *dev)
1118{
1119 struct fib_info *prev_fi;
1120 unsigned int hash;
1121 struct hlist_head *head;
1122 struct hlist_node *node;
1123 struct fib_nh *nh;
1124 int ret;
1125
1126 if (!(dev->flags&IFF_UP))
1127 return 0;
1128
1129 prev_fi = NULL;
1130 hash = fib_devindex_hashfn(dev->ifindex);
1131 head = &fib_info_devhash[hash];
1132 ret = 0;
1133
1134 hlist_for_each_entry(nh, node, head, nh_hash) {
1135 struct fib_info *fi = nh->nh_parent;
1136 int alive;
1137
1138 BUG_ON(!fi->fib_nhs);
1139 if (nh->nh_dev != dev || fi == prev_fi)
1140 continue;
1141
1142 prev_fi = fi;
1143 alive = 0;
1144 change_nexthops(fi) {
1145 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1146 alive++;
1147 continue;
1148 }
1149 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1150 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001151 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 continue;
1153 alive++;
1154 spin_lock_bh(&fib_multipath_lock);
1155 nh->nh_power = 0;
1156 nh->nh_flags &= ~RTNH_F_DEAD;
1157 spin_unlock_bh(&fib_multipath_lock);
1158 } endfor_nexthops(fi)
1159
1160 if (alive > 0) {
1161 fi->fib_flags &= ~RTNH_F_DEAD;
1162 ret++;
1163 }
1164 }
1165
1166 return ret;
1167}
1168
1169/*
1170 The algorithm is suboptimal, but it provides really
1171 fair weighted route distribution.
1172 */
1173
1174void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1175{
1176 struct fib_info *fi = res->fi;
1177 int w;
1178
1179 spin_lock_bh(&fib_multipath_lock);
1180 if (fi->fib_power <= 0) {
1181 int power = 0;
1182 change_nexthops(fi) {
1183 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1184 power += nh->nh_weight;
1185 nh->nh_power = nh->nh_weight;
1186 }
1187 } endfor_nexthops(fi);
1188 fi->fib_power = power;
1189 if (power <= 0) {
1190 spin_unlock_bh(&fib_multipath_lock);
1191 /* Race condition: route has just become dead. */
1192 res->nh_sel = 0;
1193 return;
1194 }
1195 }
1196
1197
1198 /* w should be random number [0..fi->fib_power-1],
1199 it is pretty bad approximation.
1200 */
1201
1202 w = jiffies % fi->fib_power;
1203
1204 change_nexthops(fi) {
1205 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1206 if ((w -= nh->nh_power) <= 0) {
1207 nh->nh_power--;
1208 fi->fib_power--;
1209 res->nh_sel = nhsel;
1210 spin_unlock_bh(&fib_multipath_lock);
1211 return;
1212 }
1213 }
1214 } endfor_nexthops(fi);
1215
1216 /* Race condition: route has just become dead. */
1217 res->nh_sel = 0;
1218 spin_unlock_bh(&fib_multipath_lock);
1219}
1220#endif