blob: 1af0ea0fb6a20227c941e8ad910d3634ff2a1319 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/jiffies.h>
22#include <linux/mm.h>
23#include <linux/string.h>
24#include <linux/socket.h>
25#include <linux/sockios.h>
26#include <linux/errno.h>
27#include <linux/in.h>
28#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020029#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/proc_fs.h>
33#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/init.h>
35
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020036#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
38#include <net/protocol.h>
39#include <net/route.h>
40#include <net/tcp.h>
41#include <net/sock.h>
42#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070043#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070044#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include "fib_lookup.h"
47
Stephen Hemminger832b4c52006-08-29 16:48:09 -070048static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049static struct hlist_head *fib_info_hash;
50static struct hlist_head *fib_info_laddrhash;
51static unsigned int fib_hash_size;
52static unsigned int fib_info_cnt;
53
54#define DEVINDEX_HASHBITS 8
55#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
56static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
57
58#ifdef CONFIG_IP_ROUTE_MULTIPATH
59
60static DEFINE_SPINLOCK(fib_multipath_lock);
61
62#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
63for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64
David S. Miller71fceff2010-01-15 01:16:40 -080065#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
66for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#else /* CONFIG_IP_ROUTE_MULTIPATH */
69
70/* Hope, that gcc will optimize it to get rid of dummy loop */
71
Jianjun Kongd93191002008-11-03 00:23:42 -080072#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
Linus Torvalds1da177e2005-04-16 15:20:36 -070073for (nhsel=0; nhsel < 1; nhsel++)
74
David S. Miller71fceff2010-01-15 01:16:40 -080075#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
Linus Torvalds1da177e2005-04-16 15:20:36 -070076for (nhsel=0; nhsel < 1; nhsel++)
77
78#endif /* CONFIG_IP_ROUTE_MULTIPATH */
79
80#define endfor_nexthops(fi) }
81
82
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090083static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070084{
85 int error;
86 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070087} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090088 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 .error = 0,
90 .scope = RT_SCOPE_NOWHERE,
91 }, /* RTN_UNSPEC */
92 {
93 .error = 0,
94 .scope = RT_SCOPE_UNIVERSE,
95 }, /* RTN_UNICAST */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_HOST,
99 }, /* RTN_LOCAL */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_LINK,
103 }, /* RTN_BROADCAST */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_ANYCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_UNIVERSE,
111 }, /* RTN_MULTICAST */
112 {
113 .error = -EINVAL,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_BLACKHOLE */
116 {
117 .error = -EHOSTUNREACH,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_UNREACHABLE */
120 {
121 .error = -EACCES,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_PROHIBIT */
124 {
125 .error = -EAGAIN,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_THROW */
128 {
129 .error = -EINVAL,
130 .scope = RT_SCOPE_NOWHERE,
131 }, /* RTN_NAT */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_XRESOLVE */
136};
137
138
139/* Release a nexthop info record */
140
141void free_fib_info(struct fib_info *fi)
142{
143 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800144 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 return;
146 }
147 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800148 if (nexthop_nh->nh_dev)
149 dev_put(nexthop_nh->nh_dev);
150 nexthop_nh->nh_dev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 } endfor_nexthops(fi);
152 fib_info_cnt--;
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700153 release_net(fi->fib_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 kfree(fi);
155}
156
157void fib_release_info(struct fib_info *fi)
158{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700159 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 if (fi && --fi->fib_treeref == 0) {
161 hlist_del(&fi->fib_hash);
162 if (fi->fib_prefsrc)
163 hlist_del(&fi->fib_lhash);
164 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800165 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800167 hlist_del(&nexthop_nh->nh_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 } endfor_nexthops(fi)
169 fi->fib_dead = 1;
170 fib_info_put(fi);
171 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700172 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173}
174
175static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
176{
177 const struct fib_nh *onh = ofi->fib_nh;
178
179 for_nexthops(fi) {
180 if (nh->nh_oif != onh->nh_oif ||
181 nh->nh_gw != onh->nh_gw ||
182 nh->nh_scope != onh->nh_scope ||
183#ifdef CONFIG_IP_ROUTE_MULTIPATH
184 nh->nh_weight != onh->nh_weight ||
185#endif
186#ifdef CONFIG_NET_CLS_ROUTE
187 nh->nh_tclassid != onh->nh_tclassid ||
188#endif
189 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
190 return -1;
191 onh++;
192 } endfor_nexthops(fi);
193 return 0;
194}
195
David S. Miller88ebc722008-01-12 21:49:01 -0800196static inline unsigned int fib_devindex_hashfn(unsigned int val)
197{
198 unsigned int mask = DEVINDEX_HASHSIZE - 1;
199
200 return (val ^
201 (val >> DEVINDEX_HASHBITS) ^
202 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
203}
204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
206{
207 unsigned int mask = (fib_hash_size - 1);
208 unsigned int val = fi->fib_nhs;
209
210 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700211 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 val ^= fi->fib_priority;
David S. Miller88ebc722008-01-12 21:49:01 -0800213 for_nexthops(fi) {
214 val ^= fib_devindex_hashfn(nh->nh_oif);
215 } endfor_nexthops(fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
217 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
218}
219
220static struct fib_info *fib_find_info(const struct fib_info *nfi)
221{
222 struct hlist_head *head;
223 struct hlist_node *node;
224 struct fib_info *fi;
225 unsigned int hash;
226
227 hash = fib_info_hashfn(nfi);
228 head = &fib_info_hash[hash];
229
230 hlist_for_each_entry(fi, node, head, fib_hash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800231 if (!net_eq(fi->fib_net, nfi->fib_net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -0800232 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (fi->fib_nhs != nfi->fib_nhs)
234 continue;
235 if (nfi->fib_protocol == fi->fib_protocol &&
236 nfi->fib_prefsrc == fi->fib_prefsrc &&
237 nfi->fib_priority == fi->fib_priority &&
238 memcmp(nfi->fib_metrics, fi->fib_metrics,
239 sizeof(fi->fib_metrics)) == 0 &&
240 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
241 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
242 return fi;
243 }
244
245 return NULL;
246}
247
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248/* Check, that the gateway is already configured.
249 Used only by redirect accept routine.
250 */
251
Al Virod878e72e2006-09-26 22:18:13 -0700252int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
254 struct hlist_head *head;
255 struct hlist_node *node;
256 struct fib_nh *nh;
257 unsigned int hash;
258
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700259 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
261 hash = fib_devindex_hashfn(dev->ifindex);
262 head = &fib_info_devhash[hash];
263 hlist_for_each_entry(nh, node, head, nh_hash) {
264 if (nh->nh_dev == dev &&
265 nh->nh_gw == gw &&
266 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700267 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return 0;
269 }
270 }
271
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700272 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
274 return -1;
275}
276
Thomas Graf339bf982006-11-10 14:10:15 -0800277static inline size_t fib_nlmsg_size(struct fib_info *fi)
278{
279 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
280 + nla_total_size(4) /* RTA_TABLE */
281 + nla_total_size(4) /* RTA_DST */
282 + nla_total_size(4) /* RTA_PRIORITY */
283 + nla_total_size(4); /* RTA_PREFSRC */
284
285 /* space for nested metrics */
286 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
287
288 if (fi->fib_nhs) {
289 /* Also handles the special case fib_nhs == 1 */
290
291 /* each nexthop is packed in an attribute */
292 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
293
294 /* may contain flow and gateway attribute */
295 nhsize += 2 * nla_total_size(4);
296
297 /* all nexthops are packed in a nested attribute */
298 payload += nla_total_size(fi->fib_nhs * nhsize);
299 }
300
301 return payload;
302}
303
Al Viro81f7bf62006-09-27 18:40:00 -0700304void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700305 int dst_len, u32 tb_id, struct nl_info *info,
306 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
308 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700309 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700310 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
Thomas Graf339bf982006-11-10 14:10:15 -0800312 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700313 if (skb == NULL)
314 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
Thomas Graf4e902c52006-08-17 18:14:52 -0700316 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700317 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700318 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800319 if (err < 0) {
320 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
321 WARN_ON(err == -EMSGSIZE);
322 kfree_skb(skb);
323 goto errout;
324 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -0800325 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
326 info->nlh, GFP_KERNEL);
327 return;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700328errout:
329 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331}
332
333/* Return the first fib alias matching TOS with
334 * priority less than or equal to PRIO.
335 */
336struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
337{
338 if (fah) {
339 struct fib_alias *fa;
340 list_for_each_entry(fa, fah, fa_list) {
341 if (fa->fa_tos > tos)
342 continue;
343 if (fa->fa_info->fib_priority >= prio ||
344 fa->fa_tos < tos)
345 return fa;
346 }
347 }
348 return NULL;
349}
350
351int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800352 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353{
354 struct neighbour *n;
355 int state = NUD_NONE;
356
357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
358 if (n) {
359 state = n->nud_state;
360 neigh_release(n);
361 }
Jianjun Kongd93191002008-11-03 00:23:42 -0800362 if (state == NUD_REACHABLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800364 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 return 0;
366 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800367 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 *last_resort = fi;
369 *last_idx = order;
370 }
371 return 1;
372}
373
374#ifdef CONFIG_IP_ROUTE_MULTIPATH
375
Thomas Graf4e902c52006-08-17 18:14:52 -0700376static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377{
378 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Thomas Graf4e902c52006-08-17 18:14:52 -0700380 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700382 rtnh = rtnh_next(rtnh, &remaining);
383 }
384
385 /* leftover implies invalid nexthop configuration, discard it */
386 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387}
388
Thomas Graf4e902c52006-08-17 18:14:52 -0700389static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
390 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700393 int attrlen;
394
395 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700397
David S. Miller71fceff2010-01-15 01:16:40 -0800398 nexthop_nh->nh_flags =
399 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
400 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
401 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700402
403 attrlen = rtnh_attrlen(rtnh);
404 if (attrlen > 0) {
405 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
406
407 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
David S. Miller71fceff2010-01-15 01:16:40 -0800408 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700410 nla = nla_find(attrs, attrlen, RTA_FLOW);
David S. Miller71fceff2010-01-15 01:16:40 -0800411 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412#endif
413 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700414
415 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return 0;
419}
420
421#endif
422
Thomas Graf4e902c52006-08-17 18:14:52 -0700423int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424{
425#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 struct rtnexthop *rtnh;
427 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428#endif
429
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 return 1;
432
Thomas Graf4e902c52006-08-17 18:14:52 -0700433 if (cfg->fc_oif || cfg->fc_gw) {
434 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
435 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 return 0;
437 return 1;
438 }
439
440#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700441 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700443
444 rtnh = cfg->fc_mp;
445 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700448 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
Thomas Graf4e902c52006-08-17 18:14:52 -0700450 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700452
453 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700455
456 attrlen = rtnh_attrlen(rtnh);
457 if (attrlen < 0) {
458 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
459
460 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700461 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 1;
463#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700464 nla = nla_find(attrs, attrlen, RTA_FLOW);
465 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 return 1;
467#endif
468 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700469
470 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 } endfor_nexthops(fi);
472#endif
473 return 0;
474}
475
476
477/*
478 Picture
479 -------
480
481 Semantics of nexthop is very messy by historical reasons.
482 We have to take into account, that:
483 a) gateway can be actually local interface address,
484 so that gatewayed route is direct.
485 b) gateway must be on-link address, possibly
486 described not by an ifaddr, but also by a direct route.
487 c) If both gateway and interface are specified, they should not
488 contradict.
489 d) If we use tunnel routes, gateway could be not on-link.
490
491 Attempt to reconcile all of these (alas, self-contradictory) conditions
492 results in pretty ugly and hairy code with obscure logic.
493
494 I chose to generalized it instead, so that the size
495 of code does not increase practically, but it becomes
496 much more general.
497 Every prefix is assigned a "scope" value: "host" is local address,
498 "link" is direct route,
499 [ ... "site" ... "interior" ... ]
500 and "universe" is true gateway route with global meaning.
501
502 Every prefix refers to a set of "nexthop"s (gw, oif),
503 where gw must have narrower scope. This recursion stops
504 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
505 which means that gw is forced to be on link.
506
507 Code is still hairy, but now it is apparently logically
508 consistent and very flexible. F.e. as by-product it allows
509 to co-exists in peace independent exterior and interior
510 routing processes.
511
512 Normally it looks as following.
513
514 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900515 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900517 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 |-> {local prefix} (terminal node)
519 */
520
Thomas Graf4e902c52006-08-17 18:14:52 -0700521static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
522 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 int err;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800525 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526
Denis V. Lunev86167a32008-01-21 17:34:00 -0800527 net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 if (nh->nh_gw) {
529 struct fib_result res;
530
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 if (nh->nh_flags&RTNH_F_ONLINK) {
532 struct net_device *dev;
533
Thomas Graf4e902c52006-08-17 18:14:52 -0700534 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800536 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800538 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 return -ENODEV;
540 if (!(dev->flags&IFF_UP))
541 return -ENETDOWN;
542 nh->nh_dev = dev;
543 dev_hold(dev);
544 nh->nh_scope = RT_SCOPE_LINK;
545 return 0;
546 }
547 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700548 struct flowi fl = {
549 .nl_u = {
550 .ip4_u = {
551 .daddr = nh->nh_gw,
552 .scope = cfg->fc_scope + 1,
553 },
554 },
555 .oif = nh->nh_oif,
556 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558 /* It is not necessary, but requires a bit of thinking */
559 if (fl.fl4_scope < RT_SCOPE_LINK)
560 fl.fl4_scope = RT_SCOPE_LINK;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800561 if ((err = fib_lookup(net, &fl, &res)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 return err;
563 }
564 err = -EINVAL;
565 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
566 goto out;
567 nh->nh_scope = res.scope;
568 nh->nh_oif = FIB_RES_OIF(res);
569 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
570 goto out;
571 dev_hold(nh->nh_dev);
572 err = -ENETDOWN;
573 if (!(nh->nh_dev->flags & IFF_UP))
574 goto out;
575 err = 0;
576out:
577 fib_res_put(&res);
578 return err;
579 } else {
580 struct in_device *in_dev;
581
582 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
583 return -EINVAL;
584
Denis V. Lunev86167a32008-01-21 17:34:00 -0800585 in_dev = inetdev_by_index(net, nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 if (in_dev == NULL)
587 return -ENODEV;
588 if (!(in_dev->dev->flags&IFF_UP)) {
589 in_dev_put(in_dev);
590 return -ENETDOWN;
591 }
592 nh->nh_dev = in_dev->dev;
593 dev_hold(nh->nh_dev);
594 nh->nh_scope = RT_SCOPE_HOST;
595 in_dev_put(in_dev);
596 }
597 return 0;
598}
599
Al Viro81f7bf62006-09-27 18:40:00 -0700600static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 unsigned int mask = (fib_hash_size - 1);
603
Al Viro81f7bf62006-09-27 18:40:00 -0700604 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605}
606
607static struct hlist_head *fib_hash_alloc(int bytes)
608{
609 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800610 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 else
612 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800613 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614}
615
616static void fib_hash_free(struct hlist_head *hash, int bytes)
617{
618 if (!hash)
619 return;
620
621 if (bytes <= PAGE_SIZE)
622 kfree(hash);
623 else
624 free_pages((unsigned long) hash, get_order(bytes));
625}
626
627static void fib_hash_move(struct hlist_head *new_info_hash,
628 struct hlist_head *new_laddrhash,
629 unsigned int new_size)
630{
David S. Millerb7656e72005-08-05 04:12:48 -0700631 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700633 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700635 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700636 old_info_hash = fib_info_hash;
637 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 fib_hash_size = new_size;
639
640 for (i = 0; i < old_size; i++) {
641 struct hlist_head *head = &fib_info_hash[i];
642 struct hlist_node *node, *n;
643 struct fib_info *fi;
644
645 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
646 struct hlist_head *dest;
647 unsigned int new_hash;
648
649 hlist_del(&fi->fib_hash);
650
651 new_hash = fib_info_hashfn(fi);
652 dest = &new_info_hash[new_hash];
653 hlist_add_head(&fi->fib_hash, dest);
654 }
655 }
656 fib_info_hash = new_info_hash;
657
658 for (i = 0; i < old_size; i++) {
659 struct hlist_head *lhead = &fib_info_laddrhash[i];
660 struct hlist_node *node, *n;
661 struct fib_info *fi;
662
663 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
664 struct hlist_head *ldest;
665 unsigned int new_hash;
666
667 hlist_del(&fi->fib_lhash);
668
669 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
670 ldest = &new_laddrhash[new_hash];
671 hlist_add_head(&fi->fib_lhash, ldest);
672 }
673 }
674 fib_info_laddrhash = new_laddrhash;
675
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700676 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700677
678 bytes = old_size * sizeof(struct hlist_head *);
679 fib_hash_free(old_info_hash, bytes);
680 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681}
682
Thomas Graf4e902c52006-08-17 18:14:52 -0700683struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684{
685 int err;
686 struct fib_info *fi = NULL;
687 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 int nhs = 1;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800689 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700692 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 goto err_inval;
694
695#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700696 if (cfg->fc_mp) {
697 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 if (nhs == 0)
699 goto err_inval;
700 }
701#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
703 err = -ENOBUFS;
704 if (fib_info_cnt >= fib_hash_size) {
705 unsigned int new_size = fib_hash_size << 1;
706 struct hlist_head *new_info_hash;
707 struct hlist_head *new_laddrhash;
708 unsigned int bytes;
709
710 if (!new_size)
711 new_size = 1;
712 bytes = new_size * sizeof(struct hlist_head *);
713 new_info_hash = fib_hash_alloc(bytes);
714 new_laddrhash = fib_hash_alloc(bytes);
715 if (!new_info_hash || !new_laddrhash) {
716 fib_hash_free(new_info_hash, bytes);
717 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800718 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721 if (!fib_hash_size)
722 goto failure;
723 }
724
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700725 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if (fi == NULL)
727 goto failure;
728 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700730 fi->fib_net = hold_net(net);
Thomas Graf4e902c52006-08-17 18:14:52 -0700731 fi->fib_protocol = cfg->fc_protocol;
732 fi->fib_flags = cfg->fc_flags;
733 fi->fib_priority = cfg->fc_priority;
734 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736 fi->fib_nhs = nhs;
737 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800738 nexthop_nh->nh_parent = fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 } endfor_nexthops(fi)
740
Thomas Graf4e902c52006-08-17 18:14:52 -0700741 if (cfg->fc_mx) {
742 struct nlattr *nla;
743 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
Thomas Graf4e902c52006-08-17 18:14:52 -0700745 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200746 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700747
748 if (type) {
749 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700751 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 }
754 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
Thomas Graf4e902c52006-08-17 18:14:52 -0700756 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700758 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
759 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700761 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700763 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 goto err_inval;
765#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700766 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 goto err_inval;
768#endif
769#else
770 goto err_inval;
771#endif
772 } else {
773 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700774
775 nh->nh_oif = cfg->fc_oif;
776 nh->nh_gw = cfg->fc_gw;
777 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700779 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781#ifdef CONFIG_IP_ROUTE_MULTIPATH
782 nh->nh_weight = 1;
783#endif
784 }
785
Thomas Graf4e902c52006-08-17 18:14:52 -0700786 if (fib_props[cfg->fc_type].error) {
787 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 goto err_inval;
789 goto link_it;
790 }
791
Thomas Graf4e902c52006-08-17 18:14:52 -0700792 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 goto err_inval;
794
Thomas Graf4e902c52006-08-17 18:14:52 -0700795 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 struct fib_nh *nh = fi->fib_nh;
797
798 /* Local address is added. */
799 if (nhs != 1 || nh->nh_gw)
800 goto err_inval;
801 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800802 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 err = -ENODEV;
804 if (nh->nh_dev == NULL)
805 goto failure;
806 } else {
807 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800808 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 goto failure;
810 } endfor_nexthops(fi)
811 }
812
813 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800816 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 goto err_inval;
818 }
819
820link_it:
821 if ((ofi = fib_find_info(fi)) != NULL) {
822 fi->fib_dead = 1;
823 free_fib_info(fi);
824 ofi->fib_treeref++;
825 return ofi;
826 }
827
828 fi->fib_treeref++;
829 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700830 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 hlist_add_head(&fi->fib_hash,
832 &fib_info_hash[fib_info_hashfn(fi)]);
833 if (fi->fib_prefsrc) {
834 struct hlist_head *head;
835
836 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
837 hlist_add_head(&fi->fib_lhash, head);
838 }
839 change_nexthops(fi) {
840 struct hlist_head *head;
841 unsigned int hash;
842
David S. Miller71fceff2010-01-15 01:16:40 -0800843 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800845 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 head = &fib_info_devhash[hash];
David S. Miller71fceff2010-01-15 01:16:40 -0800847 hlist_add_head(&nexthop_nh->nh_hash, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700849 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 return fi;
851
852err_inval:
853 err = -EINVAL;
854
855failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900856 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 fi->fib_dead = 1;
858 free_fib_info(fi);
859 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700860
861 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862}
863
Robert Olssone5b43762005-08-25 13:01:03 -0700864/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Rami Rosene204a342009-05-18 01:19:12 +0000866 struct fib_result *res, int prefixlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867{
868 struct fib_alias *fa;
869 int nh_sel = 0;
870
Robert Olssone5b43762005-08-25 13:01:03 -0700871 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 int err;
873
874 if (fa->fa_tos &&
875 fa->fa_tos != flp->fl4_tos)
876 continue;
877
878 if (fa->fa_scope < flp->fl4_scope)
879 continue;
880
881 fa->fa_state |= FA_S_ACCESSED;
882
883 err = fib_props[fa->fa_type].error;
884 if (err == 0) {
885 struct fib_info *fi = fa->fa_info;
886
887 if (fi->fib_flags & RTNH_F_DEAD)
888 continue;
889
890 switch (fa->fa_type) {
891 case RTN_UNICAST:
892 case RTN_LOCAL:
893 case RTN_BROADCAST:
894 case RTN_ANYCAST:
895 case RTN_MULTICAST:
896 for_nexthops(fi) {
897 if (nh->nh_flags&RTNH_F_DEAD)
898 continue;
899 if (!flp->oif || flp->oif == nh->nh_oif)
900 break;
901 }
902#ifdef CONFIG_IP_ROUTE_MULTIPATH
903 if (nhsel < fi->fib_nhs) {
904 nh_sel = nhsel;
905 goto out_fill_res;
906 }
907#else
908 if (nhsel < 1) {
909 goto out_fill_res;
910 }
911#endif
912 endfor_nexthops(fi);
913 continue;
914
915 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800916 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
917 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700919 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 }
921 return err;
922 }
923 return 1;
924
925out_fill_res:
926 res->prefixlen = prefixlen;
927 res->nh_sel = nh_sel;
928 res->type = fa->fa_type;
929 res->scope = fa->fa_scope;
930 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 atomic_inc(&res->fi->fib_clntref);
932 return 0;
933}
934
935/* Find appropriate source address to this destination */
936
Al Virob83738a2006-09-26 22:14:15 -0700937__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938{
939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940}
941
Thomas Grafbe403ea2006-08-17 18:15:17 -0700942int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700943 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700944 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700946 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
Thomas Grafbe403ea2006-08-17 18:15:17 -0700949 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800951 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700952
953 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 rtm->rtm_family = AF_INET;
955 rtm->rtm_dst_len = dst_len;
956 rtm->rtm_src_len = 0;
957 rtm->rtm_tos = tos;
Krzysztof Piotr Oledzki709772e2008-06-10 15:44:49 -0700958 if (tb_id < 256)
959 rtm->rtm_table = tb_id;
960 else
961 rtm->rtm_table = RT_TABLE_COMPAT;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700962 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 rtm->rtm_type = type;
964 rtm->rtm_flags = fi->fib_flags;
965 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700967
968 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700969 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700972 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
973
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700975 goto nla_put_failure;
976
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700978 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 if (fi->fib_nhs == 1) {
981 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700982 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700985 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700986#ifdef CONFIG_NET_CLS_ROUTE
987 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700988 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700989#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 }
991#ifdef CONFIG_IP_ROUTE_MULTIPATH
992 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700993 struct rtnexthop *rtnh;
994 struct nlattr *mp;
995
996 mp = nla_nest_start(skb, RTA_MULTIPATH);
997 if (mp == NULL)
998 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999
1000 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001001 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1002 if (rtnh == NULL)
1003 goto nla_put_failure;
1004
1005 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1006 rtnh->rtnh_hops = nh->nh_weight - 1;
1007 rtnh->rtnh_ifindex = nh->nh_oif;
1008
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001010 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001011#ifdef CONFIG_NET_CLS_ROUTE
1012 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001013 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001014#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001015 /* length of rtnetlink header + attributes */
1016 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001018
1019 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 }
1021#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001022 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
Thomas Grafbe403ea2006-08-17 18:15:17 -07001024nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001025 nlmsg_cancel(skb, nlh);
1026 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027}
1028
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029/*
1030 Update FIB if:
1031 - local address disappeared -> we must delete all the entries
1032 referring to it.
1033 - device went down -> we must shutdown all nexthops going via it.
1034 */
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001035int fib_sync_down_addr(struct net *net, __be32 local)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001036{
1037 int ret = 0;
1038 unsigned int hash = fib_laddr_hashfn(local);
1039 struct hlist_head *head = &fib_info_laddrhash[hash];
1040 struct hlist_node *node;
1041 struct fib_info *fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001043 if (fib_info_laddrhash == NULL || local == 0)
1044 return 0;
1045
1046 hlist_for_each_entry(fi, node, head, fib_lhash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08001047 if (!net_eq(fi->fib_net, net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001048 continue;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001049 if (fi->fib_prefsrc == local) {
1050 fi->fib_flags |= RTNH_F_DEAD;
1051 ret++;
1052 }
1053 }
1054 return ret;
1055}
1056
1057int fib_sync_down_dev(struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
1059 int ret = 0;
1060 int scope = RT_SCOPE_NOWHERE;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001061 struct fib_info *prev_fi = NULL;
1062 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1063 struct hlist_head *head = &fib_info_devhash[hash];
1064 struct hlist_node *node;
1065 struct fib_nh *nh;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001066
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 if (force)
1068 scope = -1;
1069
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001070 hlist_for_each_entry(nh, node, head, nh_hash) {
1071 struct fib_info *fi = nh->nh_parent;
1072 int dead;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001074 BUG_ON(!fi->fib_nhs);
1075 if (nh->nh_dev != dev || fi == prev_fi)
1076 continue;
1077 prev_fi = fi;
1078 dead = 0;
1079 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001080 if (nexthop_nh->nh_flags&RTNH_F_DEAD)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001081 dead++;
David S. Miller71fceff2010-01-15 01:16:40 -08001082 else if (nexthop_nh->nh_dev == dev &&
1083 nexthop_nh->nh_scope != scope) {
1084 nexthop_nh->nh_flags |= RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001086 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001087 fi->fib_power -= nexthop_nh->nh_power;
1088 nexthop_nh->nh_power = 0;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001089 spin_unlock_bh(&fib_multipath_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090#endif
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001091 dead++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 }
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001093#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller71fceff2010-01-15 01:16:40 -08001094 if (force > 1 && nexthop_nh->nh_dev == dev) {
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001095 dead = fi->fib_nhs;
1096 break;
1097 }
1098#endif
1099 } endfor_nexthops(fi)
1100 if (dead == fi->fib_nhs) {
1101 fi->fib_flags |= RTNH_F_DEAD;
1102 ret++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 }
1104 }
1105
1106 return ret;
1107}
1108
1109#ifdef CONFIG_IP_ROUTE_MULTIPATH
1110
1111/*
1112 Dead device goes up. We wake up dead nexthops.
1113 It takes sense only on multipath routes.
1114 */
1115
1116int fib_sync_up(struct net_device *dev)
1117{
1118 struct fib_info *prev_fi;
1119 unsigned int hash;
1120 struct hlist_head *head;
1121 struct hlist_node *node;
1122 struct fib_nh *nh;
1123 int ret;
1124
1125 if (!(dev->flags&IFF_UP))
1126 return 0;
1127
1128 prev_fi = NULL;
1129 hash = fib_devindex_hashfn(dev->ifindex);
1130 head = &fib_info_devhash[hash];
1131 ret = 0;
1132
1133 hlist_for_each_entry(nh, node, head, nh_hash) {
1134 struct fib_info *fi = nh->nh_parent;
1135 int alive;
1136
1137 BUG_ON(!fi->fib_nhs);
1138 if (nh->nh_dev != dev || fi == prev_fi)
1139 continue;
1140
1141 prev_fi = fi;
1142 alive = 0;
1143 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001144 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 alive++;
1146 continue;
1147 }
David S. Miller71fceff2010-01-15 01:16:40 -08001148 if (nexthop_nh->nh_dev == NULL ||
1149 !(nexthop_nh->nh_dev->flags&IFF_UP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 continue;
David S. Miller71fceff2010-01-15 01:16:40 -08001151 if (nexthop_nh->nh_dev != dev ||
1152 !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 continue;
1154 alive++;
1155 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001156 nexthop_nh->nh_power = 0;
1157 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 spin_unlock_bh(&fib_multipath_lock);
1159 } endfor_nexthops(fi)
1160
1161 if (alive > 0) {
1162 fi->fib_flags &= ~RTNH_F_DEAD;
1163 ret++;
1164 }
1165 }
1166
1167 return ret;
1168}
1169
1170/*
1171 The algorithm is suboptimal, but it provides really
1172 fair weighted route distribution.
1173 */
1174
1175void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1176{
1177 struct fib_info *fi = res->fi;
1178 int w;
1179
1180 spin_lock_bh(&fib_multipath_lock);
1181 if (fi->fib_power <= 0) {
1182 int power = 0;
1183 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001184 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1185 power += nexthop_nh->nh_weight;
1186 nexthop_nh->nh_power = nexthop_nh->nh_weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 }
1188 } endfor_nexthops(fi);
1189 fi->fib_power = power;
1190 if (power <= 0) {
1191 spin_unlock_bh(&fib_multipath_lock);
1192 /* Race condition: route has just become dead. */
1193 res->nh_sel = 0;
1194 return;
1195 }
1196 }
1197
1198
1199 /* w should be random number [0..fi->fib_power-1],
1200 it is pretty bad approximation.
1201 */
1202
1203 w = jiffies % fi->fib_power;
1204
1205 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001206 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
1207 nexthop_nh->nh_power) {
1208 if ((w -= nexthop_nh->nh_power) <= 0) {
1209 nexthop_nh->nh_power--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 fi->fib_power--;
1211 res->nh_sel = nhsel;
1212 spin_unlock_bh(&fib_multipath_lock);
1213 return;
1214 }
1215 }
1216 } endfor_nexthops(fi);
1217
1218 /* Race condition: route has just become dead. */
1219 res->nh_sel = 0;
1220 spin_unlock_bh(&fib_multipath_lock);
1221}
1222#endif