blob: 1d4cd3b4fd69abd560b0f87d1c4df58c1db9cd78 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <linux/module.h>
66#include <asm/uaccess.h>
67#include <asm/system.h>
68#include <linux/bitops.h>
69#include <linux/types.h>
70#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <linux/mm.h>
Eric Dumazet424c4b72005-07-05 14:58:19 -070072#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070073#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
Eric Dumazet39c90ec2007-09-15 10:55:54 -070082#include <linux/workqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/inetdevice.h>
85#include <linux/igmp.h>
86#include <linux/pkt_sched.h>
87#include <linux/mroute.h>
88#include <linux/netfilter_ipv4.h>
89#include <linux/random.h>
90#include <linux/jhash.h>
91#include <linux/rcupdate.h>
92#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090093#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080094#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700106#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700107#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108#ifdef CONFIG_SYSCTL
109#include <linux/sysctl.h>
110#endif
David Miller3769cff2011-07-11 22:44:24 +0000111#include <net/atmclip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
David S. Miller68a5e3d2011-03-11 20:07:33 -0500113#define RT_FL_TOS(oldflp4) \
114 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116#define IP_MAX_MTU 0xFFF0
117
118#define RT_GC_TIMEOUT (300*HZ)
119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst __read_mostly = 5 * HZ;
129static int ip_rt_gc_elasticity __read_mostly = 8;
130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss __read_mostly = 256;
Neil Horman1080d702008-10-27 12:28:25 -0700133static int rt_chain_length_max __read_mostly = 20;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/*
136 * Interface to generic destination cache.
137 */
138
139static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800140static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -0800141static unsigned int ipv4_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
144static void ipv4_link_failure(struct sk_buff *skb);
145static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
Daniel Lezcano569d3642008-01-18 03:56:57 -0800146static int rt_garbage_collect(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
David S. Miller62fa8a82011-01-26 20:51:05 -0800153static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
154{
David S. Miller06582542011-01-27 14:58:42 -0800155 struct rtable *rt = (struct rtable *) dst;
156 struct inet_peer *peer;
157 u32 *p = NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800158
David S. Miller06582542011-01-27 14:58:42 -0800159 if (!rt->peer)
David S. Millera48eff12011-05-18 18:42:43 -0400160 rt_bind_peer(rt, rt->rt_dst, 1);
David S. Miller06582542011-01-27 14:58:42 -0800161
162 peer = rt->peer;
163 if (peer) {
David S. Miller62fa8a82011-01-26 20:51:05 -0800164 u32 *old_p = __DST_METRICS_PTR(old);
165 unsigned long prev, new;
166
David S. Miller06582542011-01-27 14:58:42 -0800167 p = peer->metrics;
168 if (inet_metrics_new(peer))
169 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
David S. Miller62fa8a82011-01-26 20:51:05 -0800170
171 new = (unsigned long) p;
172 prev = cmpxchg(&dst->_metrics, old, new);
173
174 if (prev != old) {
David S. Miller62fa8a82011-01-26 20:51:05 -0800175 p = __DST_METRICS_PTR(prev);
176 if (prev & DST_METRICS_READ_ONLY)
177 p = NULL;
178 } else {
David S. Miller62fa8a82011-01-26 20:51:05 -0800179 if (rt->fi) {
180 fib_info_put(rt->fi);
181 rt->fi = NULL;
182 }
183 }
184 }
185 return p;
186}
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188static struct dst_ops ipv4_dst_ops = {
189 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800190 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 .gc = rt_garbage_collect,
192 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800193 .default_advmss = ipv4_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800194 .default_mtu = ipv4_default_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800195 .cow_metrics = ipv4_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 .destroy = ipv4_dst_destroy,
197 .ifdown = ipv4_dst_ifdown,
198 .negative_advice = ipv4_negative_advice,
199 .link_failure = ipv4_link_failure,
200 .update_pmtu = ip_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700201 .local_out = __ip_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202};
203
204#define ECN_OR_COST(class) TC_PRIO_##class
205
Philippe De Muyter4839c522007-07-09 15:32:57 -0700206const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000208 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 TC_PRIO_BESTEFFORT,
210 ECN_OR_COST(BESTEFFORT),
211 TC_PRIO_BULK,
212 ECN_OR_COST(BULK),
213 TC_PRIO_BULK,
214 ECN_OR_COST(BULK),
215 TC_PRIO_INTERACTIVE,
216 ECN_OR_COST(INTERACTIVE),
217 TC_PRIO_INTERACTIVE,
218 ECN_OR_COST(INTERACTIVE),
219 TC_PRIO_INTERACTIVE_BULK,
220 ECN_OR_COST(INTERACTIVE_BULK),
221 TC_PRIO_INTERACTIVE_BULK,
222 ECN_OR_COST(INTERACTIVE_BULK)
223};
224
225
226/*
227 * Route cache.
228 */
229
230/* The locking scheme is rather straight forward:
231 *
232 * 1) Read-Copy Update protects the buckets of the central route hash.
233 * 2) Only writers remove entries, and they hold the lock
234 * as they look at rtable reference counts.
235 * 3) Only readers acquire references to rtable entries,
236 * they do so with atomic increments and with the
237 * lock held.
238 */
239
240struct rt_hash_bucket {
Eric Dumazet1c317202010-10-25 21:02:07 +0000241 struct rtable __rcu *chain;
Eric Dumazet22c047c2005-07-05 14:55:24 -0700242};
Neil Horman1080d702008-10-27 12:28:25 -0700243
Ingo Molnar8a25d5d2006-07-03 00:24:54 -0700244#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
245 defined(CONFIG_PROVE_LOCKING)
Eric Dumazet22c047c2005-07-05 14:55:24 -0700246/*
247 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
248 * The size of this table is a power of two and depends on the number of CPUS.
Ingo Molnar62051202006-07-03 00:24:59 -0700249 * (on lockdep we have a quite big spinlock_t, so keep the size down there)
Eric Dumazet22c047c2005-07-05 14:55:24 -0700250 */
Ingo Molnar62051202006-07-03 00:24:59 -0700251#ifdef CONFIG_LOCKDEP
252# define RT_HASH_LOCK_SZ 256
Eric Dumazet22c047c2005-07-05 14:55:24 -0700253#else
Ingo Molnar62051202006-07-03 00:24:59 -0700254# if NR_CPUS >= 32
255# define RT_HASH_LOCK_SZ 4096
256# elif NR_CPUS >= 16
257# define RT_HASH_LOCK_SZ 2048
258# elif NR_CPUS >= 8
259# define RT_HASH_LOCK_SZ 1024
260# elif NR_CPUS >= 4
261# define RT_HASH_LOCK_SZ 512
262# else
263# define RT_HASH_LOCK_SZ 256
264# endif
Eric Dumazet22c047c2005-07-05 14:55:24 -0700265#endif
266
267static spinlock_t *rt_hash_locks;
268# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
Pavel Emelyanov1ff1cc22007-12-05 21:15:05 -0800269
270static __init void rt_hash_lock_init(void)
271{
272 int i;
273
274 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ,
275 GFP_KERNEL);
276 if (!rt_hash_locks)
277 panic("IP: failed to allocate rt_hash_locks\n");
278
279 for (i = 0; i < RT_HASH_LOCK_SZ; i++)
280 spin_lock_init(&rt_hash_locks[i]);
281}
Eric Dumazet22c047c2005-07-05 14:55:24 -0700282#else
283# define rt_hash_lock_addr(slot) NULL
Pavel Emelyanov1ff1cc22007-12-05 21:15:05 -0800284
285static inline void rt_hash_lock_init(void)
286{
287}
Eric Dumazet22c047c2005-07-05 14:55:24 -0700288#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700290static struct rt_hash_bucket *rt_hash_table __read_mostly;
291static unsigned rt_hash_mask __read_mostly;
292static unsigned int rt_hash_log __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293
Eric Dumazet2f970d82006-01-17 02:54:36 -0800294static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000295#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296
Denis V. Lunevb00180d2008-07-05 19:04:09 -0700297static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700298 int genid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299{
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700300 return jhash_3words((__force u32)daddr, (__force u32)saddr,
Denis V. Lunevb00180d2008-07-05 19:04:09 -0700301 idx, genid)
Eric Dumazet29e75252008-01-31 17:05:09 -0800302 & rt_hash_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303}
304
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700305static inline int rt_genid(struct net *net)
306{
307 return atomic_read(&net->ipv4.rt_genid);
308}
309
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310#ifdef CONFIG_PROC_FS
311struct rt_cache_iter_state {
Denis V. Luneva75e9362008-02-28 20:50:55 -0800312 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 int bucket;
Eric Dumazet29e75252008-01-31 17:05:09 -0800314 int genid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315};
316
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900317static struct rtable *rt_cache_get_first(struct seq_file *seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900319 struct rt_cache_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 struct rtable *r = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
322 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
Eric Dumazet1c317202010-10-25 21:02:07 +0000323 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
Eric Dumazeta6272662008-08-28 01:11:25 -0700324 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -0800326 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
Eric Dumazet29e75252008-01-31 17:05:09 -0800327 while (r) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700328 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
Denis V. Luneva75e9362008-02-28 20:50:55 -0800329 r->rt_genid == st->genid)
Eric Dumazet29e75252008-01-31 17:05:09 -0800330 return r;
Changli Gaod8d1f302010-06-10 23:31:35 -0700331 r = rcu_dereference_bh(r->dst.rt_next);
Eric Dumazet29e75252008-01-31 17:05:09 -0800332 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 rcu_read_unlock_bh();
334 }
Eric Dumazet29e75252008-01-31 17:05:09 -0800335 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336}
337
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900338static struct rtable *__rt_cache_get_next(struct seq_file *seq,
Denis V. Lunev642d6312008-02-28 20:50:33 -0800339 struct rtable *r)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900341 struct rt_cache_iter_state *st = seq->private;
Eric Dumazeta6272662008-08-28 01:11:25 -0700342
Eric Dumazet1c317202010-10-25 21:02:07 +0000343 r = rcu_dereference_bh(r->dst.rt_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 while (!r) {
345 rcu_read_unlock_bh();
Eric Dumazeta6272662008-08-28 01:11:25 -0700346 do {
347 if (--st->bucket < 0)
348 return NULL;
Eric Dumazet1c317202010-10-25 21:02:07 +0000349 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 rcu_read_lock_bh();
Eric Dumazet1c317202010-10-25 21:02:07 +0000351 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 }
Eric Dumazet1c317202010-10-25 21:02:07 +0000353 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354}
355
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900356static struct rtable *rt_cache_get_next(struct seq_file *seq,
Denis V. Lunev642d6312008-02-28 20:50:33 -0800357 struct rtable *r)
358{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900359 struct rt_cache_iter_state *st = seq->private;
360 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700361 if (dev_net(r->dst.dev) != seq_file_net(seq))
Denis V. Luneva75e9362008-02-28 20:50:55 -0800362 continue;
Denis V. Lunev642d6312008-02-28 20:50:33 -0800363 if (r->rt_genid == st->genid)
364 break;
365 }
366 return r;
367}
368
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900369static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900371 struct rtable *r = rt_cache_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
373 if (r)
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900374 while (pos && (r = rt_cache_get_next(seq, r)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 --pos;
376 return pos ? NULL : r;
377}
378
379static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
380{
Eric Dumazet29e75252008-01-31 17:05:09 -0800381 struct rt_cache_iter_state *st = seq->private;
Eric Dumazet29e75252008-01-31 17:05:09 -0800382 if (*pos)
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900383 return rt_cache_get_idx(seq, *pos - 1);
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700384 st->genid = rt_genid(seq_file_net(seq));
Eric Dumazet29e75252008-01-31 17:05:09 -0800385 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386}
387
388static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
389{
Eric Dumazet29e75252008-01-31 17:05:09 -0800390 struct rtable *r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
392 if (v == SEQ_START_TOKEN)
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900393 r = rt_cache_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 else
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900395 r = rt_cache_get_next(seq, v);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 ++*pos;
397 return r;
398}
399
400static void rt_cache_seq_stop(struct seq_file *seq, void *v)
401{
402 if (v && v != SEQ_START_TOKEN)
403 rcu_read_unlock_bh();
404}
405
406static int rt_cache_seq_show(struct seq_file *seq, void *v)
407{
408 if (v == SEQ_START_TOKEN)
409 seq_printf(seq, "%-127s\n",
410 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
411 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
412 "HHUptod\tSpecDst");
413 else {
414 struct rtable *r = v;
David S. Miller69cce1d2011-07-17 23:09:49 -0700415 struct neighbour *n;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -0700416 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Miller69cce1d2011-07-17 23:09:49 -0700418 n = dst_get_neighbour(&r->dst);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700419 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
420 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
Changli Gaod8d1f302010-06-10 23:31:35 -0700421 r->dst.dev ? r->dst.dev->name : "*",
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700422 (__force u32)r->rt_dst,
423 (__force u32)r->rt_gateway,
Changli Gaod8d1f302010-06-10 23:31:35 -0700424 r->rt_flags, atomic_read(&r->dst.__refcnt),
425 r->dst.__use, 0, (__force u32)r->rt_src,
David S. Miller0dbaee32010-12-13 12:52:14 -0800426 dst_metric_advmss(&r->dst) + 40,
Changli Gaod8d1f302010-06-10 23:31:35 -0700427 dst_metric(&r->dst, RTAX_WINDOW),
428 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
429 dst_metric(&r->dst, RTAX_RTTVAR)),
David S. Miller475949d2011-05-03 19:45:15 -0700430 r->rt_key_tos,
David S. Millerf6b72b62011-07-14 07:53:20 -0700431 -1,
David S. Miller69cce1d2011-07-17 23:09:49 -0700432 (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -0700433 r->rt_spec_dst, &len);
434
435 seq_printf(seq, "%*s\n", 127 - len, "");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900436 }
437 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438}
439
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700440static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 .start = rt_cache_seq_start,
442 .next = rt_cache_seq_next,
443 .stop = rt_cache_seq_stop,
444 .show = rt_cache_seq_show,
445};
446
447static int rt_cache_seq_open(struct inode *inode, struct file *file)
448{
Denis V. Luneva75e9362008-02-28 20:50:55 -0800449 return seq_open_net(inode, file, &rt_cache_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -0700450 sizeof(struct rt_cache_iter_state));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451}
452
Arjan van de Ven9a321442007-02-12 00:55:35 -0800453static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 .owner = THIS_MODULE,
455 .open = rt_cache_seq_open,
456 .read = seq_read,
457 .llseek = seq_lseek,
Denis V. Luneva75e9362008-02-28 20:50:55 -0800458 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459};
460
461
462static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
463{
464 int cpu;
465
466 if (*pos == 0)
467 return SEQ_START_TOKEN;
468
Rusty Russell0f23174a2008-12-29 12:23:42 +0000469 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 if (!cpu_possible(cpu))
471 continue;
472 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800473 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 }
475 return NULL;
476}
477
478static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
479{
480 int cpu;
481
Rusty Russell0f23174a2008-12-29 12:23:42 +0000482 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (!cpu_possible(cpu))
484 continue;
485 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800486 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 }
488 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900489
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490}
491
492static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
493{
494
495}
496
497static int rt_cpu_seq_show(struct seq_file *seq, void *v)
498{
499 struct rt_cache_stat *st = v;
500
501 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700502 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 return 0;
504 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900505
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
507 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000508 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 st->in_hit,
510 st->in_slow_tot,
511 st->in_slow_mc,
512 st->in_no_route,
513 st->in_brd,
514 st->in_martian_dst,
515 st->in_martian_src,
516
517 st->out_hit,
518 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900519 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
521 st->gc_total,
522 st->gc_ignored,
523 st->gc_goal_miss,
524 st->gc_dst_overflow,
525 st->in_hlist_search,
526 st->out_hlist_search
527 );
528 return 0;
529}
530
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700531static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 .start = rt_cpu_seq_start,
533 .next = rt_cpu_seq_next,
534 .stop = rt_cpu_seq_stop,
535 .show = rt_cpu_seq_show,
536};
537
538
539static int rt_cpu_seq_open(struct inode *inode, struct file *file)
540{
541 return seq_open(file, &rt_cpu_seq_ops);
542}
543
Arjan van de Ven9a321442007-02-12 00:55:35 -0800544static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 .owner = THIS_MODULE,
546 .open = rt_cpu_seq_open,
547 .read = seq_read,
548 .llseek = seq_lseek,
549 .release = seq_release,
550};
551
Patrick McHardyc7066f72011-01-14 13:36:42 +0100552#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800553static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800554{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800555 struct ip_rt_acct *dst, *src;
556 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800557
Alexey Dobriyana661c412009-11-25 15:40:35 -0800558 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
559 if (!dst)
560 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800561
Alexey Dobriyana661c412009-11-25 15:40:35 -0800562 for_each_possible_cpu(i) {
563 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
564 for (j = 0; j < 256; j++) {
565 dst[j].o_bytes += src[j].o_bytes;
566 dst[j].o_packets += src[j].o_packets;
567 dst[j].i_bytes += src[j].i_bytes;
568 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800569 }
570 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800571
572 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
573 kfree(dst);
574 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800575}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800576
577static int rt_acct_proc_open(struct inode *inode, struct file *file)
578{
579 return single_open(file, rt_acct_proc_show, NULL);
580}
581
582static const struct file_operations rt_acct_proc_fops = {
583 .owner = THIS_MODULE,
584 .open = rt_acct_proc_open,
585 .read = seq_read,
586 .llseek = seq_lseek,
587 .release = single_release,
588};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800589#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800590
Denis V. Lunev73b38712008-02-28 20:51:18 -0800591static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800592{
593 struct proc_dir_entry *pde;
594
595 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
596 &rt_cache_seq_fops);
597 if (!pde)
598 goto err1;
599
Wang Chen77020722008-02-28 14:14:25 -0800600 pde = proc_create("rt_cache", S_IRUGO,
601 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800602 if (!pde)
603 goto err2;
604
Patrick McHardyc7066f72011-01-14 13:36:42 +0100605#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800606 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800607 if (!pde)
608 goto err3;
609#endif
610 return 0;
611
Patrick McHardyc7066f72011-01-14 13:36:42 +0100612#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800613err3:
614 remove_proc_entry("rt_cache", net->proc_net_stat);
615#endif
616err2:
617 remove_proc_entry("rt_cache", net->proc_net);
618err1:
619 return -ENOMEM;
620}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800621
622static void __net_exit ip_rt_do_proc_exit(struct net *net)
623{
624 remove_proc_entry("rt_cache", net->proc_net_stat);
625 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100626#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800627 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000628#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800629}
630
631static struct pernet_operations ip_rt_proc_ops __net_initdata = {
632 .init = ip_rt_do_proc_init,
633 .exit = ip_rt_do_proc_exit,
634};
635
636static int __init ip_rt_proc_init(void)
637{
638 return register_pernet_subsys(&ip_rt_proc_ops);
639}
640
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800641#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800642static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800643{
644 return 0;
645}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900647
Stephen Hemminger5969f712008-04-10 01:52:09 -0700648static inline void rt_free(struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
Changli Gaod8d1f302010-06-10 23:31:35 -0700650 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651}
652
Stephen Hemminger5969f712008-04-10 01:52:09 -0700653static inline void rt_drop(struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 ip_rt_put(rt);
Changli Gaod8d1f302010-06-10 23:31:35 -0700656 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657}
658
Stephen Hemminger5969f712008-04-10 01:52:09 -0700659static inline int rt_fast_clean(struct rtable *rth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660{
661 /* Kill broadcast/multicast entries very aggresively, if they
662 collide in hash table with more useful entries */
663 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
David S. Millerc7537962010-11-11 17:07:48 -0800664 rt_is_input_route(rth) && rth->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665}
666
Stephen Hemminger5969f712008-04-10 01:52:09 -0700667static inline int rt_valuable(struct rtable *rth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668{
669 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
David S. Miller2c8cec52011-02-09 20:42:07 -0800670 (rth->peer && rth->peer->pmtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671}
672
673static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
674{
675 unsigned long age;
676 int ret = 0;
677
Changli Gaod8d1f302010-06-10 23:31:35 -0700678 if (atomic_read(&rth->dst.__refcnt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 goto out;
680
Changli Gaod8d1f302010-06-10 23:31:35 -0700681 age = jiffies - rth->dst.lastuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
683 (age <= tmo2 && rt_valuable(rth)))
684 goto out;
685 ret = 1;
686out: return ret;
687}
688
689/* Bits of score are:
690 * 31: very valuable
691 * 30: not quite useless
692 * 29..0: usage counter
693 */
694static inline u32 rt_score(struct rtable *rt)
695{
Changli Gaod8d1f302010-06-10 23:31:35 -0700696 u32 score = jiffies - rt->dst.lastuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
698 score = ~score & ~(3<<30);
699
700 if (rt_valuable(rt))
701 score |= (1<<31);
702
David S. Millerc7537962010-11-11 17:07:48 -0800703 if (rt_is_output_route(rt) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
705 score |= (1<<30);
706
707 return score;
708}
709
Neil Horman1080d702008-10-27 12:28:25 -0700710static inline bool rt_caching(const struct net *net)
711{
712 return net->ipv4.current_rt_cache_rebuild_count <=
713 net->ipv4.sysctl_rt_cache_rebuild_count;
714}
715
David S. Miller5e2b61f2011-03-04 21:47:09 -0800716static inline bool compare_hash_inputs(const struct rtable *rt1,
717 const struct rtable *rt2)
Neil Horman1080d702008-10-27 12:28:25 -0700718{
David S. Miller5e2b61f2011-03-04 21:47:09 -0800719 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
720 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
721 (rt1->rt_iif ^ rt2->rt_iif)) == 0);
Neil Horman1080d702008-10-27 12:28:25 -0700722}
723
David S. Miller5e2b61f2011-03-04 21:47:09 -0800724static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725{
David S. Miller5e2b61f2011-03-04 21:47:09 -0800726 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
727 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
728 (rt1->rt_mark ^ rt2->rt_mark) |
David S. Miller475949d2011-05-03 19:45:15 -0700729 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
David S. Miller5e2b61f2011-03-04 21:47:09 -0800730 (rt1->rt_oif ^ rt2->rt_oif) |
731 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732}
733
Denis V. Lunevb5921912008-01-22 23:50:25 -0800734static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
735{
Changli Gaod8d1f302010-06-10 23:31:35 -0700736 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
Denis V. Lunevb5921912008-01-22 23:50:25 -0800737}
738
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700739static inline int rt_is_expired(struct rtable *rth)
740{
Changli Gaod8d1f302010-06-10 23:31:35 -0700741 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700742}
743
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800744/*
745 * Perform a full scan of hash table and free all entries.
746 * Can be called by a softirq or a process.
747 * In the later case, we want to be reschedule if necessary
748 */
David S. Miller6561a3b2010-12-19 21:11:20 -0800749static void rt_do_flush(struct net *net, int process_context)
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800750{
751 unsigned int i;
752 struct rtable *rth, *next;
753
754 for (i = 0; i <= rt_hash_mask; i++) {
David S. Miller6561a3b2010-12-19 21:11:20 -0800755 struct rtable __rcu **pprev;
756 struct rtable *list;
757
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800758 if (process_context && need_resched())
759 cond_resched();
Eric Dumazet1c317202010-10-25 21:02:07 +0000760 rth = rcu_dereference_raw(rt_hash_table[i].chain);
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800761 if (!rth)
762 continue;
763
764 spin_lock_bh(rt_hash_lock_addr(i));
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700765
David S. Miller6561a3b2010-12-19 21:11:20 -0800766 list = NULL;
767 pprev = &rt_hash_table[i].chain;
768 rth = rcu_dereference_protected(*pprev,
Eric Dumazet1c317202010-10-25 21:02:07 +0000769 lockdep_is_held(rt_hash_lock_addr(i)));
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700770
David S. Miller6561a3b2010-12-19 21:11:20 -0800771 while (rth) {
772 next = rcu_dereference_protected(rth->dst.rt_next,
773 lockdep_is_held(rt_hash_lock_addr(i)));
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700774
David S. Miller6561a3b2010-12-19 21:11:20 -0800775 if (!net ||
776 net_eq(dev_net(rth->dst.dev), net)) {
777 rcu_assign_pointer(*pprev, next);
778 rcu_assign_pointer(rth->dst.rt_next, list);
779 list = rth;
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700780 } else {
David S. Miller6561a3b2010-12-19 21:11:20 -0800781 pprev = &rth->dst.rt_next;
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700782 }
David S. Miller6561a3b2010-12-19 21:11:20 -0800783 rth = next;
Denis V. Lunev32cb5b42008-07-05 19:06:12 -0700784 }
David S. Miller6561a3b2010-12-19 21:11:20 -0800785
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800786 spin_unlock_bh(rt_hash_lock_addr(i));
787
David S. Miller6561a3b2010-12-19 21:11:20 -0800788 for (; list; list = next) {
789 next = rcu_dereference_protected(list->dst.rt_next, 1);
790 rt_free(list);
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800791 }
792 }
793}
794
Neil Horman1080d702008-10-27 12:28:25 -0700795/*
796 * While freeing expired entries, we compute average chain length
797 * and standard deviation, using fixed-point arithmetic.
798 * This to have an estimation of rt_chain_length_max
799 * rt_chain_length_max = max(elasticity, AVG + 4*SD)
800 * We use 3 bits for frational part, and 29 (or 61) for magnitude.
801 */
802
803#define FRACT_BITS 3
804#define ONE (1UL << FRACT_BITS)
805
Eric Dumazet98376382010-03-08 03:20:00 +0000806/*
807 * Given a hash chain and an item in this hash chain,
808 * find if a previous entry has the same hash_inputs
809 * (but differs on tos, mark or oif)
810 * Returns 0 if an alias is found.
811 * Returns ONE if rth has no alias before itself.
812 */
813static int has_noalias(const struct rtable *head, const struct rtable *rth)
814{
815 const struct rtable *aux = head;
816
817 while (aux != rth) {
David S. Miller5e2b61f2011-03-04 21:47:09 -0800818 if (compare_hash_inputs(aux, rth))
Eric Dumazet98376382010-03-08 03:20:00 +0000819 return 0;
Eric Dumazet1c317202010-10-25 21:02:07 +0000820 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
Eric Dumazet98376382010-03-08 03:20:00 +0000821 }
822 return ONE;
823}
824
Eric Dumazet29e75252008-01-31 17:05:09 -0800825/*
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300826 * Perturbation of rt_genid by a small quantity [1..256]
Eric Dumazet29e75252008-01-31 17:05:09 -0800827 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
828 * many times (2^24) without giving recent rt_genid.
829 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 */
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700831static void rt_cache_invalidate(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832{
Eric Dumazet29e75252008-01-31 17:05:09 -0800833 unsigned char shuffle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Eric Dumazet29e75252008-01-31 17:05:09 -0800835 get_random_bytes(&shuffle, sizeof(shuffle));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700836 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837}
838
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800839/*
Eric Dumazet29e75252008-01-31 17:05:09 -0800840 * delay < 0 : invalidate cache (fast : entries will be deleted later)
841 * delay >= 0 : invalidate & flush cache (can be long)
842 */
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700843void rt_cache_flush(struct net *net, int delay)
Eric Dumazet29e75252008-01-31 17:05:09 -0800844{
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700845 rt_cache_invalidate(net);
Eric Dumazet29e75252008-01-31 17:05:09 -0800846 if (delay >= 0)
David S. Miller6561a3b2010-12-19 21:11:20 -0800847 rt_do_flush(net, !in_softirq());
Eric Dumazet29e75252008-01-31 17:05:09 -0800848}
849
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000850/* Flush previous cache invalidated entries from the cache */
David S. Miller6561a3b2010-12-19 21:11:20 -0800851void rt_cache_flush_batch(struct net *net)
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000852{
David S. Miller6561a3b2010-12-19 21:11:20 -0800853 rt_do_flush(net, !in_softirq());
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000854}
855
Neil Horman1080d702008-10-27 12:28:25 -0700856static void rt_emergency_hash_rebuild(struct net *net)
857{
Neil Horman3ee94372010-05-08 01:57:52 -0700858 if (net_ratelimit())
Neil Horman1080d702008-10-27 12:28:25 -0700859 printk(KERN_WARNING "Route hash chain too long!\n");
Neil Horman3ee94372010-05-08 01:57:52 -0700860 rt_cache_invalidate(net);
Neil Horman1080d702008-10-27 12:28:25 -0700861}
862
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863/*
864 Short description of GC goals.
865
866 We want to build algorithm, which will keep routing cache
867 at some equilibrium point, when number of aged off entries
868 is kept approximately equal to newly generated ones.
869
870 Current expiration strength is variable "expire".
871 We try to adjust it dynamically, so that if networking
872 is idle expires is large enough to keep enough of warm entries,
873 and when load increases it reduces to limit cache size.
874 */
875
Daniel Lezcano569d3642008-01-18 03:56:57 -0800876static int rt_garbage_collect(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877{
878 static unsigned long expire = RT_GC_TIMEOUT;
879 static unsigned long last_gc;
880 static int rover;
881 static int equilibrium;
Eric Dumazet1c317202010-10-25 21:02:07 +0000882 struct rtable *rth;
883 struct rtable __rcu **rthp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 unsigned long now = jiffies;
885 int goal;
Eric Dumazetfc66f952010-10-08 06:37:34 +0000886 int entries = dst_entries_get_fast(&ipv4_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887
888 /*
889 * Garbage collection is pretty expensive,
890 * do not make it too frequently.
891 */
892
893 RT_CACHE_STAT_INC(gc_total);
894
895 if (now - last_gc < ip_rt_gc_min_interval &&
Eric Dumazetfc66f952010-10-08 06:37:34 +0000896 entries < ip_rt_max_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 RT_CACHE_STAT_INC(gc_ignored);
898 goto out;
899 }
900
Eric Dumazetfc66f952010-10-08 06:37:34 +0000901 entries = dst_entries_get_slow(&ipv4_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 /* Calculate number of entries, which we want to expire now. */
Eric Dumazetfc66f952010-10-08 06:37:34 +0000903 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (goal <= 0) {
905 if (equilibrium < ipv4_dst_ops.gc_thresh)
906 equilibrium = ipv4_dst_ops.gc_thresh;
Eric Dumazetfc66f952010-10-08 06:37:34 +0000907 goal = entries - equilibrium;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 if (goal > 0) {
Eric Dumazetb790ced2007-12-21 01:49:07 -0800909 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
Eric Dumazetfc66f952010-10-08 06:37:34 +0000910 goal = entries - equilibrium;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 }
912 } else {
913 /* We are in dangerous area. Try to reduce cache really
914 * aggressively.
915 */
Eric Dumazetb790ced2007-12-21 01:49:07 -0800916 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
Eric Dumazetfc66f952010-10-08 06:37:34 +0000917 equilibrium = entries - goal;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 }
919
920 if (now - last_gc >= ip_rt_gc_min_interval)
921 last_gc = now;
922
923 if (goal <= 0) {
924 equilibrium += goal;
925 goto work_done;
926 }
927
928 do {
929 int i, k;
930
931 for (i = rt_hash_mask, k = rover; i >= 0; i--) {
932 unsigned long tmo = expire;
933
934 k = (k + 1) & rt_hash_mask;
935 rthp = &rt_hash_table[k].chain;
Eric Dumazet22c047c2005-07-05 14:55:24 -0700936 spin_lock_bh(rt_hash_lock_addr(k));
Eric Dumazet1c317202010-10-25 21:02:07 +0000937 while ((rth = rcu_dereference_protected(*rthp,
938 lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700939 if (!rt_is_expired(rth) &&
Eric Dumazet29e75252008-01-31 17:05:09 -0800940 !rt_may_expire(rth, tmo, expire)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 tmo >>= 1;
Changli Gaod8d1f302010-06-10 23:31:35 -0700942 rthp = &rth->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 continue;
944 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700945 *rthp = rth->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 rt_free(rth);
947 goal--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 }
Eric Dumazet22c047c2005-07-05 14:55:24 -0700949 spin_unlock_bh(rt_hash_lock_addr(k));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 if (goal <= 0)
951 break;
952 }
953 rover = k;
954
955 if (goal <= 0)
956 goto work_done;
957
958 /* Goal is not achieved. We stop process if:
959
960 - if expire reduced to zero. Otherwise, expire is halfed.
961 - if table is not full.
962 - if we are called from interrupt.
963 - jiffies check is just fallback/debug loop breaker.
964 We will not spin here for long time in any case.
965 */
966
967 RT_CACHE_STAT_INC(gc_goal_miss);
968
969 if (expire == 0)
970 break;
971
972 expire >>= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973
Eric Dumazetfc66f952010-10-08 06:37:34 +0000974 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 goto out;
976 } while (!in_softirq() && time_before_eq(jiffies, now));
977
Eric Dumazetfc66f952010-10-08 06:37:34 +0000978 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
979 goto out;
980 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 goto out;
982 if (net_ratelimit())
983 printk(KERN_WARNING "dst cache overflow\n");
984 RT_CACHE_STAT_INC(gc_dst_overflow);
985 return 1;
986
987work_done:
988 expire += ip_rt_gc_min_interval;
989 if (expire > ip_rt_gc_timeout ||
Eric Dumazetfc66f952010-10-08 06:37:34 +0000990 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
991 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 expire = ip_rt_gc_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993out: return 0;
994}
995
Eric Dumazet98376382010-03-08 03:20:00 +0000996/*
997 * Returns number of entries in a hash chain that have different hash_inputs
998 */
999static int slow_chain_length(const struct rtable *head)
1000{
1001 int length = 0;
1002 const struct rtable *rth = head;
1003
1004 while (rth) {
1005 length += has_noalias(head, rth);
Eric Dumazet1c317202010-10-25 21:02:07 +00001006 rth = rcu_dereference_protected(rth->dst.rt_next, 1);
Eric Dumazet98376382010-03-08 03:20:00 +00001007 }
1008 return length >> FRACT_BITS;
1009}
1010
David Miller3769cff2011-07-11 22:44:24 +00001011static int rt_bind_neighbour(struct rtable *rt)
1012{
1013 static const __be32 inaddr_any = 0;
1014 struct net_device *dev = rt->dst.dev;
1015 struct neigh_table *tbl = &arp_tbl;
1016 const __be32 *nexthop;
1017 struct neighbour *n;
1018
1019#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1020 if (dev->type == ARPHRD_ATM)
1021 tbl = clip_tbl_hook;
1022#endif
1023 nexthop = &rt->rt_gateway;
1024 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
1025 nexthop = &inaddr_any;
1026 n = ipv4_neigh_lookup(tbl, dev, nexthop);
1027 if (IS_ERR(n))
1028 return PTR_ERR(n);
David S. Miller69cce1d2011-07-17 23:09:49 -07001029 dst_set_neighbour(&rt->dst, n);
David Miller3769cff2011-07-11 22:44:24 +00001030
1031 return 0;
1032}
1033
David S. Millerb23dd4f2011-03-02 14:31:35 -08001034static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1035 struct sk_buff *skb, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036{
Eric Dumazet1c317202010-10-25 21:02:07 +00001037 struct rtable *rth, *cand;
1038 struct rtable __rcu **rthp, **candp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 unsigned long now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 u32 min_score;
1041 int chain_length;
1042 int attempts = !in_softirq();
1043
1044restart:
1045 chain_length = 0;
1046 min_score = ~(u32)0;
1047 cand = NULL;
1048 candp = NULL;
1049 now = jiffies;
1050
Changli Gaod8d1f302010-06-10 23:31:35 -07001051 if (!rt_caching(dev_net(rt->dst.dev))) {
Neil Horman73e42892009-06-20 01:15:16 -07001052 /*
1053 * If we're not caching, just tell the caller we
1054 * were successful and don't touch the route. The
1055 * caller hold the sole reference to the cache entry, and
1056 * it will be released when the caller is done with it.
1057 * If we drop it here, the callers have no way to resolve routes
1058 * when we're not caching. Instead, just point *rp at rt, so
1059 * the caller gets a single use out of the route
Neil Hormanb6280b42009-06-22 10:18:53 +00001060 * Note that we do rt_free on this new route entry, so that
1061 * once its refcount hits zero, we are still able to reap it
1062 * (Thanks Alexey)
Eric Dumazet27b75c92010-10-15 05:44:11 +00001063 * Note: To avoid expensive rcu stuff for this uncached dst,
1064 * we set DST_NOCACHE so that dst_release() can free dst without
1065 * waiting a grace period.
Neil Horman73e42892009-06-20 01:15:16 -07001066 */
Neil Hormanb6280b42009-06-22 10:18:53 +00001067
Eric Dumazetc7d44262010-10-03 22:17:54 -07001068 rt->dst.flags |= DST_NOCACHE;
David S. Millerc7537962010-11-11 17:07:48 -08001069 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
David Miller3769cff2011-07-11 22:44:24 +00001070 int err = rt_bind_neighbour(rt);
Neil Hormanb6280b42009-06-22 10:18:53 +00001071 if (err) {
1072 if (net_ratelimit())
1073 printk(KERN_WARNING
1074 "Neighbour table failure & not caching routes.\n");
Eric Dumazet27b75c92010-10-15 05:44:11 +00001075 ip_rt_put(rt);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001076 return ERR_PTR(err);
Neil Hormanb6280b42009-06-22 10:18:53 +00001077 }
1078 }
1079
Neil Hormanb6280b42009-06-22 10:18:53 +00001080 goto skip_hashing;
Neil Horman1080d702008-10-27 12:28:25 -07001081 }
1082
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 rthp = &rt_hash_table[hash].chain;
1084
Eric Dumazet22c047c2005-07-05 14:55:24 -07001085 spin_lock_bh(rt_hash_lock_addr(hash));
Eric Dumazet1c317202010-10-25 21:02:07 +00001086 while ((rth = rcu_dereference_protected(*rthp,
1087 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001088 if (rt_is_expired(rth)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 *rthp = rth->dst.rt_next;
Eric Dumazet29e75252008-01-31 17:05:09 -08001090 rt_free(rth);
1091 continue;
1092 }
David S. Miller5e2b61f2011-03-04 21:47:09 -08001093 if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 /* Put it first */
Changli Gaod8d1f302010-06-10 23:31:35 -07001095 *rthp = rth->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 /*
1097 * Since lookup is lockfree, the deletion
1098 * must be visible to another weakly ordered CPU before
1099 * the insertion at the start of the hash chain.
1100 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001101 rcu_assign_pointer(rth->dst.rt_next,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 rt_hash_table[hash].chain);
1103 /*
1104 * Since lookup is lockfree, the update writes
1105 * must be ordered for consistency on SMP.
1106 */
1107 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1108
Changli Gaod8d1f302010-06-10 23:31:35 -07001109 dst_use(&rth->dst, now);
Eric Dumazet22c047c2005-07-05 14:55:24 -07001110 spin_unlock_bh(rt_hash_lock_addr(hash));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
1112 rt_drop(rt);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001113 if (skb)
Changli Gaod8d1f302010-06-10 23:31:35 -07001114 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001115 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 }
1117
Changli Gaod8d1f302010-06-10 23:31:35 -07001118 if (!atomic_read(&rth->dst.__refcnt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 u32 score = rt_score(rth);
1120
1121 if (score <= min_score) {
1122 cand = rth;
1123 candp = rthp;
1124 min_score = score;
1125 }
1126 }
1127
1128 chain_length++;
1129
Changli Gaod8d1f302010-06-10 23:31:35 -07001130 rthp = &rth->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 }
1132
1133 if (cand) {
1134 /* ip_rt_gc_elasticity used to be average length of chain
1135 * length, when exceeded gc becomes really aggressive.
1136 *
1137 * The second limit is less certain. At the moment it allows
1138 * only 2 entries per bucket. We will see.
1139 */
1140 if (chain_length > ip_rt_gc_elasticity) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001141 *candp = cand->dst.rt_next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 rt_free(cand);
1143 }
Neil Horman1080d702008-10-27 12:28:25 -07001144 } else {
Eric Dumazet98376382010-03-08 03:20:00 +00001145 if (chain_length > rt_chain_length_max &&
1146 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001147 struct net *net = dev_net(rt->dst.dev);
Neil Horman1080d702008-10-27 12:28:25 -07001148 int num = ++net->ipv4.current_rt_cache_rebuild_count;
Pavel Emelyanovb35ecb52010-03-24 07:43:17 +00001149 if (!rt_caching(net)) {
Neil Horman1080d702008-10-27 12:28:25 -07001150 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07001151 rt->dst.dev->name, num);
Neil Horman1080d702008-10-27 12:28:25 -07001152 }
Pavel Emelyanovb35ecb52010-03-24 07:43:17 +00001153 rt_emergency_hash_rebuild(net);
Pavel Emelyanov6a2bad72010-03-24 21:51:22 +00001154 spin_unlock_bh(rt_hash_lock_addr(hash));
1155
David S. Miller5e2b61f2011-03-04 21:47:09 -08001156 hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
Pavel Emelyanov6a2bad72010-03-24 21:51:22 +00001157 ifindex, rt_genid(net));
1158 goto restart;
Neil Horman1080d702008-10-27 12:28:25 -07001159 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 }
1161
1162 /* Try to bind route to arp only if it is output
1163 route or unicast forwarding path.
1164 */
David S. Millerc7537962010-11-11 17:07:48 -08001165 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
David Miller3769cff2011-07-11 22:44:24 +00001166 int err = rt_bind_neighbour(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (err) {
Eric Dumazet22c047c2005-07-05 14:55:24 -07001168 spin_unlock_bh(rt_hash_lock_addr(hash));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
1170 if (err != -ENOBUFS) {
1171 rt_drop(rt);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001172 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 }
1174
1175 /* Neighbour tables are full and nothing
1176 can be released. Try to shrink route cache,
1177 it is most likely it holds some neighbour records.
1178 */
1179 if (attempts-- > 0) {
1180 int saved_elasticity = ip_rt_gc_elasticity;
1181 int saved_int = ip_rt_gc_min_interval;
1182 ip_rt_gc_elasticity = 1;
1183 ip_rt_gc_min_interval = 0;
Daniel Lezcano569d3642008-01-18 03:56:57 -08001184 rt_garbage_collect(&ipv4_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 ip_rt_gc_min_interval = saved_int;
1186 ip_rt_gc_elasticity = saved_elasticity;
1187 goto restart;
1188 }
1189
1190 if (net_ratelimit())
Ulrich Weber7e1b33e2010-09-27 15:02:18 -07001191 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 rt_drop(rt);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001193 return ERR_PTR(-ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 }
1195 }
1196
Changli Gaod8d1f302010-06-10 23:31:35 -07001197 rt->dst.rt_next = rt_hash_table[hash].chain;
Neil Horman1080d702008-10-27 12:28:25 -07001198
Eric Dumazet00269b52008-10-16 14:18:29 -07001199 /*
1200 * Since lookup is lockfree, we must make sure
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001201 * previous writes to rt are committed to memory
Eric Dumazet00269b52008-10-16 14:18:29 -07001202 * before making rt visible to other CPUS.
1203 */
Eric Dumazet1ddbcb02009-05-19 20:14:28 +00001204 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
Neil Horman1080d702008-10-27 12:28:25 -07001205
Eric Dumazet22c047c2005-07-05 14:55:24 -07001206 spin_unlock_bh(rt_hash_lock_addr(hash));
Neil Horman73e42892009-06-20 01:15:16 -07001207
Neil Hormanb6280b42009-06-22 10:18:53 +00001208skip_hashing:
David S. Millerb23dd4f2011-03-02 14:31:35 -08001209 if (skb)
Changli Gaod8d1f302010-06-10 23:31:35 -07001210 skb_dst_set(skb, &rt->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001211 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212}
1213
David S. Miller6431cbc2011-02-07 20:38:06 -08001214static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
1215
1216static u32 rt_peer_genid(void)
1217{
1218 return atomic_read(&__rt_peer_genid);
1219}
1220
David S. Millera48eff12011-05-18 18:42:43 -04001221void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 struct inet_peer *peer;
1224
David S. Millera48eff12011-05-18 18:42:43 -04001225 peer = inet_getpeer_v4(daddr, create);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Eric Dumazet49e8ab02010-08-19 06:10:45 +00001227 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -08001229 else
1230 rt->rt_peer_genid = rt_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231}
1232
1233/*
1234 * Peer allocation may fail only in serious out-of-memory conditions. However
1235 * we still can generate some output.
1236 * Random ID selection looks a bit dangerous because we have no chances to
1237 * select ID being unique in a reasonable period of time.
1238 * But broken packet identifier may be better than no packet at all.
1239 */
1240static void ip_select_fb_ident(struct iphdr *iph)
1241{
1242 static DEFINE_SPINLOCK(ip_fb_id_lock);
1243 static u32 ip_fallback_id;
1244 u32 salt;
1245
1246 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -07001247 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 iph->id = htons(salt & 0xFFFF);
1249 ip_fallback_id = salt;
1250 spin_unlock_bh(&ip_fb_id_lock);
1251}
1252
1253void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1254{
1255 struct rtable *rt = (struct rtable *) dst;
1256
1257 if (rt) {
1258 if (rt->peer == NULL)
David S. Millera48eff12011-05-18 18:42:43 -04001259 rt_bind_peer(rt, rt->rt_dst, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
1261 /* If peer is attached to destination, it is never detached,
1262 so that we need not to grab a lock to dereference it.
1263 */
1264 if (rt->peer) {
1265 iph->id = htons(inet_getid(rt->peer, more));
1266 return;
1267 }
1268 } else
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001269 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
Stephen Hemminger9c2b3322005-04-19 22:39:42 -07001270 __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271
1272 ip_select_fb_ident(iph);
1273}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001274EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276static void rt_del(unsigned hash, struct rtable *rt)
1277{
Eric Dumazet1c317202010-10-25 21:02:07 +00001278 struct rtable __rcu **rthp;
1279 struct rtable *aux;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
Eric Dumazet29e75252008-01-31 17:05:09 -08001281 rthp = &rt_hash_table[hash].chain;
Eric Dumazet22c047c2005-07-05 14:55:24 -07001282 spin_lock_bh(rt_hash_lock_addr(hash));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 ip_rt_put(rt);
Eric Dumazet1c317202010-10-25 21:02:07 +00001284 while ((aux = rcu_dereference_protected(*rthp,
1285 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001286 if (aux == rt || rt_is_expired(aux)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001287 *rthp = aux->dst.rt_next;
Eric Dumazet29e75252008-01-31 17:05:09 -08001288 rt_free(aux);
1289 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001291 rthp = &aux->dst.rt_next;
Eric Dumazet29e75252008-01-31 17:05:09 -08001292 }
Eric Dumazet22c047c2005-07-05 14:55:24 -07001293 spin_unlock_bh(rt_hash_lock_addr(hash));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294}
1295
Eric Dumazeted7865a42010-06-07 21:49:44 -07001296/* called in rcu_read_lock() section */
Al Virof7655222006-09-26 21:25:43 -07001297void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1298 __be32 saddr, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299{
Eric Dumazeted7865a42010-06-07 21:49:44 -07001300 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Millerf39925d2011-02-09 22:00:16 -08001301 struct inet_peer *peer;
Denis V. Lunev317805b2008-02-28 20:50:06 -08001302 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 if (!in_dev)
1305 return;
1306
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001307 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -08001308 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
1309 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
1310 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 goto reject_redirect;
1312
1313 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1314 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1315 goto reject_redirect;
1316 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1317 goto reject_redirect;
1318 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -08001319 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 goto reject_redirect;
1321 }
1322
David S. Millerf39925d2011-02-09 22:00:16 -08001323 peer = inet_getpeer_v4(daddr, 1);
1324 if (peer) {
1325 peer->redirect_learned.a4 = new_gw;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
David S. Millerf39925d2011-02-09 22:00:16 -08001327 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
David S. Millerf39925d2011-02-09 22:00:16 -08001329 atomic_inc(&__rt_peer_genid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 return;
1332
1333reject_redirect:
1334#ifdef CONFIG_IP_ROUTE_VERBOSE
1335 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
Harvey Harrison673d57e2008-10-31 00:53:57 -07001336 printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n"
1337 " Advised path = %pI4 -> %pI4\n",
1338 &old_gw, dev->name, &new_gw,
1339 &saddr, &daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340#endif
Eric Dumazeted7865a42010-06-07 21:49:44 -07001341 ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342}
1343
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001344static bool peer_pmtu_expired(struct inet_peer *peer)
1345{
1346 unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
1347
1348 return orig &&
1349 time_after_eq(jiffies, orig) &&
1350 cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
1351}
1352
1353static bool peer_pmtu_cleaned(struct inet_peer *peer)
1354{
1355 unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
1356
1357 return orig &&
1358 cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
1359}
1360
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1362{
Eric Dumazetee6b9672008-03-05 18:30:47 -08001363 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 struct dst_entry *ret = dst;
1365
1366 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001367 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 ip_rt_put(rt);
1369 ret = NULL;
David S. Miller2c8cec52011-02-09 20:42:07 -08001370 } else if (rt->rt_flags & RTCF_REDIRECTED) {
David S. Miller5e2b61f2011-03-04 21:47:09 -08001371 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1372 rt->rt_oif,
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001373 rt_genid(dev_net(dst->dev)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 rt_del(hash, rt);
1375 ret = NULL;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001376 } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
1377 dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 }
1379 }
1380 return ret;
1381}
1382
1383/*
1384 * Algorithm:
1385 * 1. The first ip_rt_redirect_number redirects are sent
1386 * with exponential backoff, then we stop sending them at all,
1387 * assuming that the host ignores our redirects.
1388 * 2. If we did not see packets requiring redirects
1389 * during ip_rt_redirect_silence, we assume that the host
1390 * forgot redirected route and start to send redirects again.
1391 *
1392 * This algorithm is much cheaper and more intelligent than dumb load limiting
1393 * in icmp.c.
1394 *
1395 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
1396 * and "frag. need" (breaks PMTU discovery) in icmp.c.
1397 */
1398
1399void ip_rt_send_redirect(struct sk_buff *skb)
1400{
Eric Dumazet511c3f92009-06-02 05:14:27 +00001401 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -07001402 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -08001403 struct inet_peer *peer;
Eric Dumazet30038fc2009-08-28 23:52:01 -07001404 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
Eric Dumazet30038fc2009-08-28 23:52:01 -07001406 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -07001407 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -07001408 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1409 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -07001411 }
1412 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1413 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
David S. Miller92d86822011-02-04 15:55:25 -08001415 if (!rt->peer)
David S. Millera48eff12011-05-18 18:42:43 -04001416 rt_bind_peer(rt, rt->rt_dst, 1);
David S. Miller92d86822011-02-04 15:55:25 -08001417 peer = rt->peer;
1418 if (!peer) {
1419 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1420 return;
1421 }
1422
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 /* No redirected packets during ip_rt_redirect_silence;
1424 * reset the algorithm.
1425 */
David S. Miller92d86822011-02-04 15:55:25 -08001426 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1427 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -07001430 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 */
David S. Miller92d86822011-02-04 15:55:25 -08001432 if (peer->rate_tokens >= ip_rt_redirect_number) {
1433 peer->rate_last = jiffies;
Eric Dumazet30038fc2009-08-28 23:52:01 -07001434 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 }
1436
1437 /* Check for load limit; set rate_last to the latest sent
1438 * redirect.
1439 */
David S. Miller92d86822011-02-04 15:55:25 -08001440 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -08001441 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -08001442 (peer->rate_last +
1443 (ip_rt_redirect_load << peer->rate_tokens)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
David S. Miller92d86822011-02-04 15:55:25 -08001445 peer->rate_last = jiffies;
1446 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -07001448 if (log_martians &&
David S. Miller92d86822011-02-04 15:55:25 -08001449 peer->rate_tokens == ip_rt_redirect_number &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 net_ratelimit())
Harvey Harrison673d57e2008-10-31 00:53:57 -07001451 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
David S. Millerc5be24f2011-05-13 18:01:21 -04001452 &ip_hdr(skb)->saddr, rt->rt_iif,
Harvey Harrison673d57e2008-10-31 00:53:57 -07001453 &rt->rt_dst, &rt->rt_gateway);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454#endif
1455 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456}
1457
1458static int ip_error(struct sk_buff *skb)
1459{
Eric Dumazet511c3f92009-06-02 05:14:27 +00001460 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -08001461 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 unsigned long now;
David S. Miller92d86822011-02-04 15:55:25 -08001463 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 int code;
1465
Changli Gaod8d1f302010-06-10 23:31:35 -07001466 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +00001467 case EINVAL:
1468 default:
1469 goto out;
1470 case EHOSTUNREACH:
1471 code = ICMP_HOST_UNREACH;
1472 break;
1473 case ENETUNREACH:
1474 code = ICMP_NET_UNREACH;
1475 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1476 IPSTATS_MIB_INNOROUTES);
1477 break;
1478 case EACCES:
1479 code = ICMP_PKT_FILTERED;
1480 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 }
1482
David S. Miller92d86822011-02-04 15:55:25 -08001483 if (!rt->peer)
David S. Millera48eff12011-05-18 18:42:43 -04001484 rt_bind_peer(rt, rt->rt_dst, 1);
David S. Miller92d86822011-02-04 15:55:25 -08001485 peer = rt->peer;
1486
1487 send = true;
1488 if (peer) {
1489 now = jiffies;
1490 peer->rate_tokens += now - peer->rate_last;
1491 if (peer->rate_tokens > ip_rt_error_burst)
1492 peer->rate_tokens = ip_rt_error_burst;
1493 peer->rate_last = now;
1494 if (peer->rate_tokens >= ip_rt_error_cost)
1495 peer->rate_tokens -= ip_rt_error_cost;
1496 else
1497 send = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 }
David S. Miller92d86822011-02-04 15:55:25 -08001499 if (send)
1500 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501
1502out: kfree_skb(skb);
1503 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001504}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505
1506/*
1507 * The last two values are not from the RFC but
1508 * are needed for AMPRnet AX.25 paths.
1509 */
1510
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08001511static const unsigned short mtu_plateau[] =
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1513
Stephen Hemminger5969f712008-04-10 01:52:09 -07001514static inline unsigned short guess_mtu(unsigned short old_mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515{
1516 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001517
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
1519 if (old_mtu > mtu_plateau[i])
1520 return mtu_plateau[i];
1521 return 68;
1522}
1523
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001524unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
Timo Teras0010e462008-04-29 03:32:25 -07001525 unsigned short new_mtu,
1526 struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 unsigned short old_mtu = ntohs(iph->tot_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 unsigned short est_mtu = 0;
David S. Miller2c8cec52011-02-09 20:42:07 -08001530 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
David S. Miller2c8cec52011-02-09 20:42:07 -08001532 peer = inet_getpeer_v4(iph->daddr, 1);
1533 if (peer) {
1534 unsigned short mtu = new_mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
David S. Miller2c8cec52011-02-09 20:42:07 -08001536 if (new_mtu < 68 || new_mtu >= old_mtu) {
1537 /* BSD 4.2 derived systems incorrectly adjust
1538 * tot_len by the IP header length, and report
1539 * a zero MTU in the ICMP message.
1540 */
1541 if (mtu == 0 &&
1542 old_mtu >= 68 + (iph->ihl << 2))
1543 old_mtu -= iph->ihl << 2;
1544 mtu = guess_mtu(old_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 }
David S. Miller2c8cec52011-02-09 20:42:07 -08001546
1547 if (mtu < ip_rt_min_pmtu)
1548 mtu = ip_rt_min_pmtu;
1549 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001550 unsigned long pmtu_expires;
1551
1552 pmtu_expires = jiffies + ip_rt_mtu_expires;
1553 if (!pmtu_expires)
1554 pmtu_expires = 1UL;
1555
David S. Miller2c8cec52011-02-09 20:42:07 -08001556 est_mtu = mtu;
1557 peer->pmtu_learned = mtu;
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001558 peer->pmtu_expires = pmtu_expires;
David S. Miller2c8cec52011-02-09 20:42:07 -08001559 }
1560
1561 inet_putpeer(peer);
1562
1563 atomic_inc(&__rt_peer_genid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 }
1565 return est_mtu ? : new_mtu;
1566}
1567
David S. Miller2c8cec52011-02-09 20:42:07 -08001568static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1569{
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001570 unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
David S. Miller2c8cec52011-02-09 20:42:07 -08001571
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001572 if (!expires)
1573 return;
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001574 if (time_before(jiffies, expires)) {
David S. Miller2c8cec52011-02-09 20:42:07 -08001575 u32 orig_dst_mtu = dst_mtu(dst);
1576 if (peer->pmtu_learned < orig_dst_mtu) {
1577 if (!peer->pmtu_orig)
1578 peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1579 dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1580 }
1581 } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1582 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1583}
1584
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1586{
David S. Miller2c8cec52011-02-09 20:42:07 -08001587 struct rtable *rt = (struct rtable *) dst;
1588 struct inet_peer *peer;
1589
1590 dst_confirm(dst);
1591
1592 if (!rt->peer)
David S. Millera48eff12011-05-18 18:42:43 -04001593 rt_bind_peer(rt, rt->rt_dst, 1);
David S. Miller2c8cec52011-02-09 20:42:07 -08001594 peer = rt->peer;
1595 if (peer) {
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001596 unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
1597
David S. Miller2c8cec52011-02-09 20:42:07 -08001598 if (mtu < ip_rt_min_pmtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001600 if (!pmtu_expires || mtu < peer->pmtu_learned) {
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001601
1602 pmtu_expires = jiffies + ip_rt_mtu_expires;
1603 if (!pmtu_expires)
1604 pmtu_expires = 1UL;
1605
David S. Miller2c8cec52011-02-09 20:42:07 -08001606 peer->pmtu_learned = mtu;
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001607 peer->pmtu_expires = pmtu_expires;
David S. Miller2c8cec52011-02-09 20:42:07 -08001608
1609 atomic_inc(&__rt_peer_genid);
1610 rt->rt_peer_genid = rt_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 }
Hiroaki SHIMODA46af3182011-03-09 20:09:58 +00001612 check_peer_pmtu(dst, peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 }
1614}
1615
David S. Millerf39925d2011-02-09 22:00:16 -08001616static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1617{
1618 struct rtable *rt = (struct rtable *) dst;
1619 __be32 orig_gw = rt->rt_gateway;
David S. Miller69cce1d2011-07-17 23:09:49 -07001620 struct neighbour *n;
David S. Millerf39925d2011-02-09 22:00:16 -08001621
1622 dst_confirm(&rt->dst);
1623
David S. Miller69cce1d2011-07-17 23:09:49 -07001624 neigh_release(dst_get_neighbour(&rt->dst));
1625 dst_set_neighbour(&rt->dst, NULL);
David S. Millerf39925d2011-02-09 22:00:16 -08001626
1627 rt->rt_gateway = peer->redirect_learned.a4;
David S. Miller69cce1d2011-07-17 23:09:49 -07001628 rt_bind_neighbour(rt);
1629 n = dst_get_neighbour(&rt->dst);
1630 if (!n || !(n->nud_state & NUD_VALID)) {
1631 if (n)
1632 neigh_event_send(n, NULL);
David S. Millerf39925d2011-02-09 22:00:16 -08001633 rt->rt_gateway = orig_gw;
1634 return -EAGAIN;
1635 } else {
1636 rt->rt_flags |= RTCF_REDIRECTED;
David S. Miller69cce1d2011-07-17 23:09:49 -07001637 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
David S. Millerf39925d2011-02-09 22:00:16 -08001638 }
1639 return 0;
1640}
1641
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1643{
David S. Miller6431cbc2011-02-07 20:38:06 -08001644 struct rtable *rt = (struct rtable *) dst;
1645
1646 if (rt_is_expired(rt))
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001647 return NULL;
David S. Miller6431cbc2011-02-07 20:38:06 -08001648 if (rt->rt_peer_genid != rt_peer_genid()) {
David S. Miller2c8cec52011-02-09 20:42:07 -08001649 struct inet_peer *peer;
1650
David S. Miller6431cbc2011-02-07 20:38:06 -08001651 if (!rt->peer)
David S. Millera48eff12011-05-18 18:42:43 -04001652 rt_bind_peer(rt, rt->rt_dst, 0);
David S. Miller6431cbc2011-02-07 20:38:06 -08001653
David S. Miller2c8cec52011-02-09 20:42:07 -08001654 peer = rt->peer;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001655 if (peer) {
David S. Miller2c8cec52011-02-09 20:42:07 -08001656 check_peer_pmtu(dst, peer);
1657
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001658 if (peer->redirect_learned.a4 &&
1659 peer->redirect_learned.a4 != rt->rt_gateway) {
1660 if (check_peer_redir(dst, peer))
1661 return NULL;
1662 }
David S. Millerf39925d2011-02-09 22:00:16 -08001663 }
1664
David S. Miller6431cbc2011-02-07 20:38:06 -08001665 rt->rt_peer_genid = rt_peer_genid();
1666 }
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001667 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668}
1669
1670static void ipv4_dst_destroy(struct dst_entry *dst)
1671{
1672 struct rtable *rt = (struct rtable *) dst;
1673 struct inet_peer *peer = rt->peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
David S. Miller62fa8a82011-01-26 20:51:05 -08001675 if (rt->fi) {
1676 fib_info_put(rt->fi);
1677 rt->fi = NULL;
1678 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 if (peer) {
1680 rt->peer = NULL;
1681 inet_putpeer(peer);
1682 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683}
1684
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
1686static void ipv4_link_failure(struct sk_buff *skb)
1687{
1688 struct rtable *rt;
1689
1690 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1691
Eric Dumazet511c3f92009-06-02 05:14:27 +00001692 rt = skb_rtable(skb);
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001693 if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
1694 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695}
1696
1697static int ip_rt_bug(struct sk_buff *skb)
1698{
Harvey Harrison673d57e2008-10-31 00:53:57 -07001699 printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n",
1700 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 skb->dev ? skb->dev->name : "?");
1702 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001703 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 return 0;
1705}
1706
1707/*
1708 We do not cache source address of outgoing interface,
1709 because it is used only by IP RR, TS and SRR options,
1710 so that it out of fast path.
1711
1712 BTW remember: "addr" is allowed to be not aligned
1713 in IP options!
1714 */
1715
David S. Miller8e363602011-05-13 17:29:41 -04001716void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717{
Al Viroa61ced52006-09-26 21:27:54 -07001718 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
David S. Millerc7537962010-11-11 17:07:48 -08001720 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001721 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001722 else {
David S. Miller8e363602011-05-13 17:29:41 -04001723 struct fib_result res;
1724 struct flowi4 fl4;
1725 struct iphdr *iph;
1726
1727 iph = ip_hdr(skb);
1728
1729 memset(&fl4, 0, sizeof(fl4));
1730 fl4.daddr = iph->daddr;
1731 fl4.saddr = iph->saddr;
1732 fl4.flowi4_tos = iph->tos;
1733 fl4.flowi4_oif = rt->dst.dev->ifindex;
1734 fl4.flowi4_iif = skb->dev->ifindex;
1735 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001736
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001737 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001738 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001739 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001740 else
1741 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001743 rcu_read_unlock();
1744 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 memcpy(addr, &src, 4);
1746}
1747
Patrick McHardyc7066f72011-01-14 13:36:42 +01001748#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749static void set_class_tag(struct rtable *rt, u32 tag)
1750{
Changli Gaod8d1f302010-06-10 23:31:35 -07001751 if (!(rt->dst.tclassid & 0xFFFF))
1752 rt->dst.tclassid |= tag & 0xFFFF;
1753 if (!(rt->dst.tclassid & 0xFFFF0000))
1754 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755}
1756#endif
1757
David S. Miller0dbaee32010-12-13 12:52:14 -08001758static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1759{
1760 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1761
1762 if (advmss == 0) {
1763 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1764 ip_rt_min_advmss);
1765 if (advmss > 65535 - 40)
1766 advmss = 65535 - 40;
1767 }
1768 return advmss;
1769}
1770
David S. Millerd33e4552010-12-14 13:01:14 -08001771static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1772{
1773 unsigned int mtu = dst->dev->mtu;
1774
1775 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1776 const struct rtable *rt = (const struct rtable *) dst;
1777
1778 if (rt->rt_gateway != rt->rt_dst && mtu > 576)
1779 mtu = 576;
1780 }
1781
1782 if (mtu > IP_MAX_MTU)
1783 mtu = IP_MAX_MTU;
1784
1785 return mtu;
1786}
1787
David S. Miller813b3b52011-04-28 14:48:42 -07001788static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001789 struct fib_info *fi)
David S. Millera4daad62011-01-27 22:01:53 -08001790{
David S. Miller0131ba42011-02-04 14:37:30 -08001791 struct inet_peer *peer;
1792 int create = 0;
1793
1794 /* If a peer entry exists for this destination, we must hook
1795 * it up in order to get at cached metrics.
1796 */
David S. Miller813b3b52011-04-28 14:48:42 -07001797 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
David S. Miller0131ba42011-02-04 14:37:30 -08001798 create = 1;
1799
David S. Miller3c0afdc2011-03-04 21:26:07 -08001800 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
David S. Miller0131ba42011-02-04 14:37:30 -08001801 if (peer) {
David S. Miller3c0afdc2011-03-04 21:26:07 -08001802 rt->rt_peer_genid = rt_peer_genid();
David S. Miller0131ba42011-02-04 14:37:30 -08001803 if (inet_metrics_new(peer))
1804 memcpy(peer->metrics, fi->fib_metrics,
1805 sizeof(u32) * RTAX_MAX);
1806 dst_init_metrics(&rt->dst, peer->metrics, false);
David S. Miller2c8cec52011-02-09 20:42:07 -08001807
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001808 check_peer_pmtu(&rt->dst, peer);
David S. Millerf39925d2011-02-09 22:00:16 -08001809 if (peer->redirect_learned.a4 &&
1810 peer->redirect_learned.a4 != rt->rt_gateway) {
1811 rt->rt_gateway = peer->redirect_learned.a4;
1812 rt->rt_flags |= RTCF_REDIRECTED;
1813 }
David S. Miller0131ba42011-02-04 14:37:30 -08001814 } else {
David S. Millerb8dad612011-01-28 14:07:16 -08001815 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1816 rt->fi = fi;
1817 atomic_inc(&fi->fib_clntref);
1818 }
David S. Millera4daad62011-01-27 22:01:53 -08001819 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
David S. Millera4daad62011-01-27 22:01:53 -08001820 }
1821}
1822
David S. Miller813b3b52011-04-28 14:48:42 -07001823static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001824 const struct fib_result *res,
David S. Miller982721f2011-02-16 21:44:24 -08001825 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826{
David S. Millerdefb3512010-12-08 21:16:57 -08001827 struct dst_entry *dst = &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828
1829 if (fi) {
1830 if (FIB_RES_GW(*res) &&
1831 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1832 rt->rt_gateway = FIB_RES_GW(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07001833 rt_init_metrics(rt, fl4, fi);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001834#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerdefb3512010-12-08 21:16:57 -08001835 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836#endif
David S. Millerd33e4552010-12-14 13:01:14 -08001837 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
David S. Millerdefb3512010-12-08 21:16:57 -08001839 if (dst_mtu(dst) > IP_MAX_MTU)
1840 dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
David S. Miller0dbaee32010-12-13 12:52:14 -08001841 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
David S. Millerdefb3512010-12-08 21:16:57 -08001842 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
Patrick McHardyc7066f72011-01-14 13:36:42 +01001844#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845#ifdef CONFIG_IP_MULTIPLE_TABLES
1846 set_class_tag(rt, fib_rules_tclass(res));
1847#endif
1848 set_class_tag(rt, itag);
1849#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850}
1851
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001852static struct rtable *rt_dst_alloc(struct net_device *dev,
1853 bool nopolicy, bool noxfrm)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001854{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001855 return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
1856 DST_HOST |
1857 (nopolicy ? DST_NOPOLICY : 0) |
1858 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001859}
1860
Eric Dumazet96d36222010-06-02 19:21:31 +00001861/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001862static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 u8 tos, struct net_device *dev, int our)
1864{
Eric Dumazet96d36222010-06-02 19:21:31 +00001865 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 struct rtable *rth;
Al Viroa61ced52006-09-26 21:27:54 -07001867 __be32 spec_dst;
Eric Dumazet96d36222010-06-02 19:21:31 +00001868 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001870 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871
1872 /* Primary sanity checks. */
1873
1874 if (in_dev == NULL)
1875 return -EINVAL;
1876
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001877 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Joe Perchesf97c1e02007-12-16 13:45:43 -08001878 ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 goto e_inval;
1880
Joe Perchesf97c1e02007-12-16 13:45:43 -08001881 if (ipv4_is_zeronet(saddr)) {
1882 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 goto e_inval;
1884 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001885 } else {
Michael Smith5c04c812011-04-07 04:51:50 +00001886 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1887 &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001888 if (err < 0)
1889 goto e_err;
1890 }
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001891 rth = rt_dst_alloc(init_net.loopback_dev,
1892 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 if (!rth)
1894 goto e_nobufs;
1895
Patrick McHardyc7066f72011-01-14 13:36:42 +01001896#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001897 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898#endif
David S. Millercf911662011-04-28 14:31:47 -07001899 rth->dst.output = ip_rt_bug;
1900
1901 rth->rt_key_dst = daddr;
1902 rth->rt_key_src = saddr;
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001903 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001905 rth->rt_type = RTN_MULTICAST;
David S. Miller475949d2011-05-03 19:45:15 -07001906 rth->rt_key_tos = tos;
David S. Millercf911662011-04-28 14:31:47 -07001907 rth->rt_dst = daddr;
1908 rth->rt_src = saddr;
1909 rth->rt_route_iif = dev->ifindex;
1910 rth->rt_iif = dev->ifindex;
1911 rth->rt_oif = 0;
1912 rth->rt_mark = skb->mark;
1913 rth->rt_gateway = daddr;
1914 rth->rt_spec_dst= spec_dst;
1915 rth->rt_peer_genid = 0;
1916 rth->peer = NULL;
1917 rth->fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001919 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 rth->rt_flags |= RTCF_LOCAL;
1921 }
1922
1923#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001924 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001925 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926#endif
1927 RT_CACHE_STAT_INC(in_slow_mc);
1928
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001929 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
David S. Millerb23dd4f2011-03-02 14:31:35 -08001930 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
Eric Dumazet9aa3c942011-06-18 11:59:18 -07001931 return IS_ERR(rth) ? PTR_ERR(rth) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932
1933e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001936 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001937e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001938 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939}
1940
1941
1942static void ip_handle_martian_source(struct net_device *dev,
1943 struct in_device *in_dev,
1944 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001945 __be32 daddr,
1946 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
1948 RT_CACHE_STAT_INC(in_martian_src);
1949#ifdef CONFIG_IP_ROUTE_VERBOSE
1950 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1951 /*
1952 * RFC1812 recommendation, if source is martian,
1953 * the only hint is MAC header.
1954 */
Harvey Harrison673d57e2008-10-31 00:53:57 -07001955 printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n",
1956 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001957 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 int i;
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001959 const unsigned char *p = skb_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 printk(KERN_WARNING "ll header: ");
1961 for (i = 0; i < dev->hard_header_len; i++, p++) {
1962 printk("%02x", *p);
1963 if (i < (dev->hard_header_len - 1))
1964 printk(":");
1965 }
1966 printk("\n");
1967 }
1968 }
1969#endif
1970}
1971
Eric Dumazet47360222010-06-03 04:13:21 +00001972/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001973static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001974 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001975 struct in_device *in_dev,
1976 __be32 daddr, __be32 saddr, u32 tos,
1977 struct rtable **result)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 struct rtable *rth;
1980 int err;
1981 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001982 unsigned int flags = 0;
Al Virod9c9df82006-09-26 21:28:14 -07001983 __be32 spec_dst;
1984 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001987 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 if (out_dev == NULL) {
1989 if (net_ratelimit())
1990 printk(KERN_CRIT "Bug in ip_route_input" \
1991 "_slow(). Please, report\n");
1992 return -EINVAL;
1993 }
1994
1995
Michael Smith5c04c812011-04-07 04:51:50 +00001996 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1997 in_dev->dev, &spec_dst, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001999 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002001
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 goto cleanup;
2003 }
2004
2005 if (err)
2006 flags |= RTCF_DIRECTSRC;
2007
Thomas Graf51b77ca2008-06-03 16:36:01 -07002008 if (out_dev == in_dev && err &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 (IN_DEV_SHARED_MEDIA(out_dev) ||
2010 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
2011 flags |= RTCF_DOREDIRECT;
2012
2013 if (skb->protocol != htons(ETH_P_IP)) {
2014 /* Not IP (i.e. ARP). Do not create route, if it is
2015 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00002016 *
2017 * Proxy arp feature have been extended to allow, ARP
2018 * replies back to the same interface, to support
2019 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00002021 if (out_dev == in_dev &&
2022 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 err = -EINVAL;
2024 goto cleanup;
2025 }
2026 }
2027
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002028 rth = rt_dst_alloc(out_dev->dev,
2029 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Miller0c4dcd52011-02-17 15:42:37 -08002030 IN_DEV_CONF_GET(out_dev, NOXFRM));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031 if (!rth) {
2032 err = -ENOBUFS;
2033 goto cleanup;
2034 }
2035
David S. Miller5e2b61f2011-03-04 21:47:09 -08002036 rth->rt_key_dst = daddr;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002037 rth->rt_key_src = saddr;
David S. Millercf911662011-04-28 14:31:47 -07002038 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2039 rth->rt_flags = flags;
2040 rth->rt_type = res->type;
David S. Miller475949d2011-05-03 19:45:15 -07002041 rth->rt_key_tos = tos;
David S. Millercf911662011-04-28 14:31:47 -07002042 rth->rt_dst = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 rth->rt_src = saddr;
OGAWA Hirofumi1b86a582011-04-07 14:04:08 -07002044 rth->rt_route_iif = in_dev->dev->ifindex;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002045 rth->rt_iif = in_dev->dev->ifindex;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002046 rth->rt_oif = 0;
David S. Millercf911662011-04-28 14:31:47 -07002047 rth->rt_mark = skb->mark;
2048 rth->rt_gateway = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 rth->rt_spec_dst= spec_dst;
David S. Millercf911662011-04-28 14:31:47 -07002050 rth->rt_peer_genid = 0;
2051 rth->peer = NULL;
2052 rth->fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
Changli Gaod8d1f302010-06-10 23:31:35 -07002054 rth->dst.input = ip_forward;
2055 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056
David S. Miller5e2b61f2011-03-04 21:47:09 -08002057 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 *result = rth;
2060 err = 0;
2061 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002063}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064
Stephen Hemminger5969f712008-04-10 01:52:09 -07002065static int ip_mkroute_input(struct sk_buff *skb,
2066 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05002067 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07002068 struct in_device *in_dev,
2069 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070{
Chuck Short7abaa272005-06-22 22:10:23 -07002071 struct rtable* rth = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 int err;
2073 unsigned hash;
2074
2075#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08002076 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002077 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078#endif
2079
2080 /* create a routing cache entry */
2081 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
2082 if (err)
2083 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084
2085 /* put it into the cache */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002086 hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
Changli Gaod8d1f302010-06-10 23:31:35 -07002087 rt_genid(dev_net(rth->dst.dev)));
David S. Miller68a5e3d2011-03-11 20:07:33 -05002088 rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002089 if (IS_ERR(rth))
2090 return PTR_ERR(rth);
2091 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092}
2093
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094/*
2095 * NOTE. We drop all the packets that has local source
2096 * addresses, because every properly looped back packet
2097 * must have correct destination already attached by output routine.
2098 *
2099 * Such approach solves two big problems:
2100 * 1. Not simplex devices are handled properly.
2101 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002102 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 */
2104
Al Viro9e12bb22006-09-26 21:25:20 -07002105static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 u8 tos, struct net_device *dev)
2107{
2108 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00002109 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05002110 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 unsigned flags = 0;
2112 u32 itag = 0;
2113 struct rtable * rth;
2114 unsigned hash;
Al Viro9e12bb22006-09-26 21:25:20 -07002115 __be32 spec_dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 int err = -EINVAL;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002117 struct net * net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118
2119 /* IP on this device is disabled. */
2120
2121 if (!in_dev)
2122 goto out;
2123
2124 /* Check for the most weird martians, which can be not detected
2125 by fib_lookup.
2126 */
2127
Jan Engelhardt1e637c72008-01-21 03:18:08 -08002128 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Joe Perchesf97c1e02007-12-16 13:45:43 -08002129 ipv4_is_loopback(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 goto martian_source;
2131
Andy Walls27a954b2010-10-17 15:11:22 +00002132 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 goto brd_input;
2134
2135 /* Accept zero addresses only to limited broadcast;
2136 * I even do not know to fix it or not. Waiting for complains :-)
2137 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002138 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 goto martian_source;
2140
Andy Walls27a954b2010-10-17 15:11:22 +00002141 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 goto martian_destination;
2143
2144 /*
2145 * Now we are ready to route packet.
2146 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002147 fl4.flowi4_oif = 0;
2148 fl4.flowi4_iif = dev->ifindex;
2149 fl4.flowi4_mark = skb->mark;
2150 fl4.flowi4_tos = tos;
2151 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2152 fl4.daddr = daddr;
2153 fl4.saddr = saddr;
2154 err = fib_lookup(net, &fl4, &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002155 if (err != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 if (!IN_DEV_FORWARD(in_dev))
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002157 goto e_hostunreach;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 goto no_route;
2159 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160
2161 RT_CACHE_STAT_INC(in_slow_tot);
2162
2163 if (res.type == RTN_BROADCAST)
2164 goto brd_input;
2165
2166 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002167 err = fib_validate_source(skb, saddr, daddr, tos,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002168 net->loopback_dev->ifindex,
Michael Smith5c04c812011-04-07 04:51:50 +00002169 dev, &spec_dst, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002170 if (err < 0)
2171 goto martian_source_keep_err;
2172 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 flags |= RTCF_DIRECTSRC;
2174 spec_dst = daddr;
2175 goto local_input;
2176 }
2177
2178 if (!IN_DEV_FORWARD(in_dev))
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002179 goto e_hostunreach;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 if (res.type != RTN_UNICAST)
2181 goto martian_destination;
2182
David S. Miller68a5e3d2011-03-11 20:07:33 -05002183 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184out: return err;
2185
2186brd_input:
2187 if (skb->protocol != htons(ETH_P_IP))
2188 goto e_inval;
2189
Joe Perchesf97c1e02007-12-16 13:45:43 -08002190 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2192 else {
Michael Smith5c04c812011-04-07 04:51:50 +00002193 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2194 &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002196 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 if (err)
2198 flags |= RTCF_DIRECTSRC;
2199 }
2200 flags |= RTCF_BROADCAST;
2201 res.type = RTN_BROADCAST;
2202 RT_CACHE_STAT_INC(in_brd);
2203
2204local_input:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002205 rth = rt_dst_alloc(net->loopback_dev,
2206 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 if (!rth)
2208 goto e_nobufs;
2209
David S. Millercf911662011-04-28 14:31:47 -07002210 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07002211 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002212#ifdef CONFIG_IP_ROUTE_CLASSID
2213 rth->dst.tclassid = itag;
2214#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215
David S. Miller5e2b61f2011-03-04 21:47:09 -08002216 rth->rt_key_dst = daddr;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002217 rth->rt_key_src = saddr;
David S. Millercf911662011-04-28 14:31:47 -07002218 rth->rt_genid = rt_genid(net);
2219 rth->rt_flags = flags|RTCF_LOCAL;
2220 rth->rt_type = res.type;
David S. Miller475949d2011-05-03 19:45:15 -07002221 rth->rt_key_tos = tos;
David S. Millercf911662011-04-28 14:31:47 -07002222 rth->rt_dst = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 rth->rt_src = saddr;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002224#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07002225 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226#endif
OGAWA Hirofumi1b86a582011-04-07 14:04:08 -07002227 rth->rt_route_iif = dev->ifindex;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002228 rth->rt_iif = dev->ifindex;
David S. Millercf911662011-04-28 14:31:47 -07002229 rth->rt_oif = 0;
2230 rth->rt_mark = skb->mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 rth->rt_gateway = daddr;
2232 rth->rt_spec_dst= spec_dst;
David S. Millercf911662011-04-28 14:31:47 -07002233 rth->rt_peer_genid = 0;
2234 rth->peer = NULL;
2235 rth->fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002237 rth->dst.input= ip_error;
2238 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 rth->rt_flags &= ~RTCF_LOCAL;
2240 }
David S. Miller68a5e3d2011-03-11 20:07:33 -05002241 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2242 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002243 err = 0;
2244 if (IS_ERR(rth))
2245 err = PTR_ERR(rth);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002246 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
2248no_route:
2249 RT_CACHE_STAT_INC(in_no_route);
2250 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
2251 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08002252 if (err == -ESRCH)
2253 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 goto local_input;
2255
2256 /*
2257 * Do not cache martian addresses: they should be logged (RFC1812)
2258 */
2259martian_destination:
2260 RT_CACHE_STAT_INC(in_martian_dst);
2261#ifdef CONFIG_IP_ROUTE_VERBOSE
2262 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
Harvey Harrison673d57e2008-10-31 00:53:57 -07002263 printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n",
2264 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002266
2267e_hostunreach:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002268 err = -EHOSTUNREACH;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002269 goto out;
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002270
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271e_inval:
2272 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002273 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274
2275e_nobufs:
2276 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002277 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278
2279martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002280 err = -EINVAL;
2281martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002283 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284}
2285
Eric Dumazet407eadd2010-05-10 11:32:55 +00002286int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2287 u8 tos, struct net_device *dev, bool noref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288{
2289 struct rtable * rth;
2290 unsigned hash;
2291 int iif = dev->ifindex;
Denis V. Lunevb5921912008-01-22 23:50:25 -08002292 struct net *net;
Eric Dumazet96d36222010-06-02 19:21:31 +00002293 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002295 net = dev_net(dev);
Neil Horman1080d702008-10-27 12:28:25 -07002296
Eric Dumazet96d36222010-06-02 19:21:31 +00002297 rcu_read_lock();
2298
Neil Horman1080d702008-10-27 12:28:25 -07002299 if (!rt_caching(net))
2300 goto skip_cache;
2301
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 tos &= IPTOS_RT_MASK;
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002303 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
Changli Gaod8d1f302010-06-10 23:31:35 -07002306 rth = rcu_dereference(rth->dst.rt_next)) {
David S. Miller5e2b61f2011-03-04 21:47:09 -08002307 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2308 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2309 (rth->rt_iif ^ iif) |
2310 rth->rt_oif |
David S. Miller475949d2011-05-03 19:45:15 -07002311 (rth->rt_key_tos ^ tos)) == 0 &&
David S. Miller5e2b61f2011-03-04 21:47:09 -08002312 rth->rt_mark == skb->mark &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002313 net_eq(dev_net(rth->dst.dev), net) &&
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002314 !rt_is_expired(rth)) {
Eric Dumazet407eadd2010-05-10 11:32:55 +00002315 if (noref) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002316 dst_use_noref(&rth->dst, jiffies);
2317 skb_dst_set_noref(skb, &rth->dst);
Eric Dumazet407eadd2010-05-10 11:32:55 +00002318 } else {
Changli Gaod8d1f302010-06-10 23:31:35 -07002319 dst_use(&rth->dst, jiffies);
2320 skb_dst_set(skb, &rth->dst);
Eric Dumazet407eadd2010-05-10 11:32:55 +00002321 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 RT_CACHE_STAT_INC(in_hit);
2323 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 return 0;
2325 }
2326 RT_CACHE_STAT_INC(in_hlist_search);
2327 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328
Neil Horman1080d702008-10-27 12:28:25 -07002329skip_cache:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 /* Multicast recognition logic is moved from route cache to here.
2331 The problem was that too many Ethernet cards have broken/missing
2332 hardware multicast filters :-( As result the host on multicasting
2333 network acquires a lot of useless route cache entries, sort of
2334 SDR messages from all the world. Now we try to get rid of them.
2335 Really, provided software IP multicast filter is organized
2336 reasonably (at least, hashed), it does not result in a slowdown
2337 comparing with route cache reject entries.
2338 Note, that multicast routers are not affected, because
2339 route cache entry is created eventually.
2340 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002341 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002342 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343
Eric Dumazet96d36222010-06-02 19:21:31 +00002344 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08002345 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
2346 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 if (our
2348#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08002349 ||
2350 (!ipv4_is_local_multicast(daddr) &&
2351 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08002353 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002354 int res = ip_route_input_mc(skb, daddr, saddr,
2355 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002357 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 }
2359 }
2360 rcu_read_unlock();
2361 return -EINVAL;
2362 }
Eric Dumazet96d36222010-06-02 19:21:31 +00002363 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2364 rcu_read_unlock();
2365 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366}
Eric Dumazet407eadd2010-05-10 11:32:55 +00002367EXPORT_SYMBOL(ip_route_input_common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002369/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002370static struct rtable *__mkroute_output(const struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05002371 const struct flowi4 *fl4,
David S. Miller813b3b52011-04-28 14:48:42 -07002372 __be32 orig_daddr, __be32 orig_saddr,
2373 int orig_oif, struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002374 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375{
David S. Miller982721f2011-02-16 21:44:24 -08002376 struct fib_info *fi = res->fi;
David S. Miller813b3b52011-04-28 14:48:42 -07002377 u32 tos = RT_FL_TOS(fl4);
David S. Miller5ada5522011-02-17 15:29:00 -08002378 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002379 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002380 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381
David S. Miller68a5e3d2011-03-11 20:07:33 -05002382 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
David S. Miller5ada5522011-02-17 15:29:00 -08002383 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384
David S. Miller68a5e3d2011-03-11 20:07:33 -05002385 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002386 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002387 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002388 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002389 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002390 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
2392 if (dev_out->flags & IFF_LOOPBACK)
2393 flags |= RTCF_LOCAL;
2394
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002395 in_dev = __in_dev_get_rcu(dev_out);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002396 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002397 return ERR_PTR(-EINVAL);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002398
David S. Miller982721f2011-02-16 21:44:24 -08002399 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002401 fi = NULL;
2402 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002403 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002404 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2405 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 flags &= ~RTCF_LOCAL;
2407 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002408 * default one, but do not gateway in this case.
2409 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 */
David S. Miller982721f2011-02-16 21:44:24 -08002411 if (fi && res->prefixlen < 4)
2412 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 }
2414
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002415 rth = rt_dst_alloc(dev_out,
2416 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Miller0c4dcd52011-02-17 15:42:37 -08002417 IN_DEV_CONF_GET(in_dev, NOXFRM));
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002418 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002419 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002420
David S. Millercf911662011-04-28 14:31:47 -07002421 rth->dst.output = ip_output;
2422
David S. Miller813b3b52011-04-28 14:48:42 -07002423 rth->rt_key_dst = orig_daddr;
2424 rth->rt_key_src = orig_saddr;
David S. Millercf911662011-04-28 14:31:47 -07002425 rth->rt_genid = rt_genid(dev_net(dev_out));
2426 rth->rt_flags = flags;
2427 rth->rt_type = type;
David S. Miller475949d2011-05-03 19:45:15 -07002428 rth->rt_key_tos = tos;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002429 rth->rt_dst = fl4->daddr;
2430 rth->rt_src = fl4->saddr;
OGAWA Hirofumi1b86a582011-04-07 14:04:08 -07002431 rth->rt_route_iif = 0;
David S. Miller813b3b52011-04-28 14:48:42 -07002432 rth->rt_iif = orig_oif ? : dev_out->ifindex;
2433 rth->rt_oif = orig_oif;
2434 rth->rt_mark = fl4->flowi4_mark;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002435 rth->rt_gateway = fl4->daddr;
2436 rth->rt_spec_dst= fl4->saddr;
David S. Millercf911662011-04-28 14:31:47 -07002437 rth->rt_peer_genid = 0;
2438 rth->peer = NULL;
2439 rth->fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440
2441 RT_CACHE_STAT_INC(out_slow_tot);
2442
2443 if (flags & RTCF_LOCAL) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002444 rth->dst.input = ip_local_deliver;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002445 rth->rt_spec_dst = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 }
2447 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
David S. Miller68a5e3d2011-03-11 20:07:33 -05002448 rth->rt_spec_dst = fl4->saddr;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002449 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002451 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 RT_CACHE_STAT_INC(out_slow_mc);
2453 }
2454#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002455 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002457 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002458 rth->dst.input = ip_mr_input;
2459 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 }
2461 }
2462#endif
2463 }
2464
David S. Miller813b3b52011-04-28 14:48:42 -07002465 rt_set_nexthop(rth, fl4, res, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466
David S. Miller5ada5522011-02-17 15:29:00 -08002467 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468}
2469
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470/*
2471 * Major route resolver routine.
Eric Dumazet0197aa32010-09-30 03:33:58 +00002472 * called with rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 */
2474
David S. Miller813b3b52011-04-28 14:48:42 -07002475static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 struct net_device *dev_out = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002478 u32 tos = RT_FL_TOS(fl4);
2479 unsigned int flags = 0;
2480 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002481 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07002482 __be32 orig_daddr;
2483 __be32 orig_saddr;
2484 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485
2486 res.fi = NULL;
2487#ifdef CONFIG_IP_MULTIPLE_TABLES
2488 res.r = NULL;
2489#endif
2490
David S. Miller813b3b52011-04-28 14:48:42 -07002491 orig_daddr = fl4->daddr;
2492 orig_saddr = fl4->saddr;
2493 orig_oif = fl4->flowi4_oif;
2494
2495 fl4->flowi4_iif = net->loopback_dev->ifindex;
2496 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2497 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2498 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002499
David S. Miller010c2702011-02-17 15:37:09 -08002500 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07002501 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002502 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002503 if (ipv4_is_multicast(fl4->saddr) ||
2504 ipv4_is_lbcast(fl4->saddr) ||
2505 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 goto out;
2507
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508 /* I removed check for oif == dev_out->oif here.
2509 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002510 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2511 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 2. Moreover, we are allowed to send packets with saddr
2513 of another iface. --ANK
2514 */
2515
David S. Miller813b3b52011-04-28 14:48:42 -07002516 if (fl4->flowi4_oif == 0 &&
2517 (ipv4_is_multicast(fl4->daddr) ||
2518 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002519 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002520 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07002521 if (dev_out == NULL)
2522 goto out;
2523
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 /* Special hack: user can direct multicasts
2525 and limited broadcast via necessary interface
2526 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2527 This hack is not just for fun, it allows
2528 vic,vat and friends to work.
2529 They bind socket to loopback, set ttl to zero
2530 and expect that it will work.
2531 From the viewpoint of routing cache they are broken,
2532 because we are not allowed to build multicast path
2533 with loopback source addr (look, routing cache
2534 cannot know, that ttl is zero, so that packet
2535 will not leave this host and route is valid).
2536 Luckily, this hack is good workaround.
2537 */
2538
David S. Miller813b3b52011-04-28 14:48:42 -07002539 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 goto make_route;
2541 }
Julian Anastasova210d012008-10-01 07:28:28 -07002542
David S. Miller813b3b52011-04-28 14:48:42 -07002543 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002544 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002545 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002546 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002547 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 }
2549
2550
David S. Miller813b3b52011-04-28 14:48:42 -07002551 if (fl4->flowi4_oif) {
2552 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002553 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 if (dev_out == NULL)
2555 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002556
2557 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002558 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002559 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002560 goto out;
2561 }
David S. Miller813b3b52011-04-28 14:48:42 -07002562 if (ipv4_is_local_multicast(fl4->daddr) ||
2563 ipv4_is_lbcast(fl4->daddr)) {
2564 if (!fl4->saddr)
2565 fl4->saddr = inet_select_addr(dev_out, 0,
2566 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 goto make_route;
2568 }
David S. Miller813b3b52011-04-28 14:48:42 -07002569 if (fl4->saddr) {
2570 if (ipv4_is_multicast(fl4->daddr))
2571 fl4->saddr = inet_select_addr(dev_out, 0,
2572 fl4->flowi4_scope);
2573 else if (!fl4->daddr)
2574 fl4->saddr = inet_select_addr(dev_out, 0,
2575 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 }
2577 }
2578
David S. Miller813b3b52011-04-28 14:48:42 -07002579 if (!fl4->daddr) {
2580 fl4->daddr = fl4->saddr;
2581 if (!fl4->daddr)
2582 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002583 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002584 fl4->flowi4_oif = net->loopback_dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 res.type = RTN_LOCAL;
2586 flags |= RTCF_LOCAL;
2587 goto make_route;
2588 }
2589
David S. Miller813b3b52011-04-28 14:48:42 -07002590 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 res.fi = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002592 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 /* Apparently, routing tables are wrong. Assume,
2594 that the destination is on link.
2595
2596 WHY? DW.
2597 Because we are allowed to send to iface
2598 even if it has NO routes and NO assigned
2599 addresses. When oif is specified, routing
2600 tables are looked up with only one purpose:
2601 to catch if destination is gatewayed, rather than
2602 direct. Moreover, if MSG_DONTROUTE is set,
2603 we send packet, ignoring both routing tables
2604 and ifaddr state. --ANK
2605
2606
2607 We could make it even if oif is unknown,
2608 likely IPv6, but we do not.
2609 */
2610
David S. Miller813b3b52011-04-28 14:48:42 -07002611 if (fl4->saddr == 0)
2612 fl4->saddr = inet_select_addr(dev_out, 0,
2613 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 res.type = RTN_UNICAST;
2615 goto make_route;
2616 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002617 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 goto out;
2619 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620
2621 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002622 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002623 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002624 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002625 else
David S. Miller813b3b52011-04-28 14:48:42 -07002626 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002627 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002628 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002629 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 res.fi = NULL;
2631 flags |= RTCF_LOCAL;
2632 goto make_route;
2633 }
2634
2635#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002636 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002637 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 else
2639#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002640 if (!res.prefixlen &&
2641 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002642 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002643 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644
David S. Miller813b3b52011-04-28 14:48:42 -07002645 if (!fl4->saddr)
2646 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002649 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650
2651
2652make_route:
David S. Miller813b3b52011-04-28 14:48:42 -07002653 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2654 dev_out, flags);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002655 if (!IS_ERR(rth)) {
David S. Miller5ada5522011-02-17 15:29:00 -08002656 unsigned int hash;
2657
David S. Miller813b3b52011-04-28 14:48:42 -07002658 hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
David S. Miller5ada5522011-02-17 15:29:00 -08002659 rt_genid(dev_net(dev_out)));
David S. Miller813b3b52011-04-28 14:48:42 -07002660 rth = rt_intern_hash(hash, rth, NULL, orig_oif);
David S. Miller5ada5522011-02-17 15:29:00 -08002661 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662
David S. Miller010c2702011-02-17 15:37:09 -08002663out:
2664 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002665 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002666}
2667
David S. Miller813b3b52011-04-28 14:48:42 -07002668struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670 struct rtable *rth;
David S. Miller010c2702011-02-17 15:37:09 -08002671 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672
Neil Horman1080d702008-10-27 12:28:25 -07002673 if (!rt_caching(net))
2674 goto slow_output;
2675
David S. Miller9d6ec932011-03-12 01:12:47 -05002676 hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677
2678 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -08002679 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
Changli Gaod8d1f302010-06-10 23:31:35 -07002680 rth = rcu_dereference_bh(rth->dst.rt_next)) {
David S. Miller9d6ec932011-03-12 01:12:47 -05002681 if (rth->rt_key_dst == flp4->daddr &&
2682 rth->rt_key_src == flp4->saddr &&
David S. Millerc7537962010-11-11 17:07:48 -08002683 rt_is_output_route(rth) &&
David S. Miller9d6ec932011-03-12 01:12:47 -05002684 rth->rt_oif == flp4->flowi4_oif &&
2685 rth->rt_mark == flp4->flowi4_mark &&
David S. Miller475949d2011-05-03 19:45:15 -07002686 !((rth->rt_key_tos ^ flp4->flowi4_tos) &
Denis V. Lunevb5921912008-01-22 23:50:25 -08002687 (IPTOS_RT_MASK | RTO_ONLINK)) &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002688 net_eq(dev_net(rth->dst.dev), net) &&
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002689 !rt_is_expired(rth)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002690 dst_use(&rth->dst, jiffies);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002691 RT_CACHE_STAT_INC(out_hit);
2692 rcu_read_unlock_bh();
David S. Miller56157872011-05-02 14:37:45 -07002693 if (!flp4->saddr)
2694 flp4->saddr = rth->rt_src;
2695 if (!flp4->daddr)
2696 flp4->daddr = rth->rt_dst;
David S. Millerb23dd4f2011-03-02 14:31:35 -08002697 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 }
2699 RT_CACHE_STAT_INC(out_hlist_search);
2700 }
2701 rcu_read_unlock_bh();
2702
Neil Horman1080d702008-10-27 12:28:25 -07002703slow_output:
David S. Miller9d6ec932011-03-12 01:12:47 -05002704 return ip_route_output_slow(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002706EXPORT_SYMBOL_GPL(__ip_route_output_key);
2707
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002708static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2709{
2710 return NULL;
2711}
2712
Roland Dreierec831ea2011-01-31 13:16:00 -08002713static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
2714{
2715 return 0;
2716}
2717
David S. Miller14e50e52007-05-24 18:17:54 -07002718static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2719{
2720}
2721
Held Bernhard0972ddb2011-04-24 22:07:32 +00002722static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2723 unsigned long old)
2724{
2725 return NULL;
2726}
2727
David S. Miller14e50e52007-05-24 18:17:54 -07002728static struct dst_ops ipv4_dst_blackhole_ops = {
2729 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002730 .protocol = cpu_to_be16(ETH_P_IP),
David S. Miller14e50e52007-05-24 18:17:54 -07002731 .destroy = ipv4_dst_destroy,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002732 .check = ipv4_blackhole_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -08002733 .default_mtu = ipv4_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002734 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002735 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002736 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Miller14e50e52007-05-24 18:17:54 -07002737};
2738
David S. Miller2774c132011-03-01 14:59:04 -08002739struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002740{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002741 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
David S. Miller2774c132011-03-01 14:59:04 -08002742 struct rtable *ort = (struct rtable *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -07002743
2744 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002745 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002746
David S. Miller14e50e52007-05-24 18:17:54 -07002747 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002748 new->input = dst_discard;
2749 new->output = dst_discard;
David S. Millerdefb3512010-12-08 21:16:57 -08002750 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -07002751
Changli Gaod8d1f302010-06-10 23:31:35 -07002752 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002753 if (new->dev)
2754 dev_hold(new->dev);
2755
David S. Miller5e2b61f2011-03-04 21:47:09 -08002756 rt->rt_key_dst = ort->rt_key_dst;
2757 rt->rt_key_src = ort->rt_key_src;
David S. Miller475949d2011-05-03 19:45:15 -07002758 rt->rt_key_tos = ort->rt_key_tos;
OGAWA Hirofumi1b86a582011-04-07 14:04:08 -07002759 rt->rt_route_iif = ort->rt_route_iif;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002760 rt->rt_iif = ort->rt_iif;
2761 rt->rt_oif = ort->rt_oif;
2762 rt->rt_mark = ort->rt_mark;
David S. Miller14e50e52007-05-24 18:17:54 -07002763
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002764 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002765 rt->rt_flags = ort->rt_flags;
2766 rt->rt_type = ort->rt_type;
2767 rt->rt_dst = ort->rt_dst;
2768 rt->rt_src = ort->rt_src;
David S. Miller14e50e52007-05-24 18:17:54 -07002769 rt->rt_gateway = ort->rt_gateway;
2770 rt->rt_spec_dst = ort->rt_spec_dst;
2771 rt->peer = ort->peer;
2772 if (rt->peer)
2773 atomic_inc(&rt->peer->refcnt);
David S. Miller62fa8a82011-01-26 20:51:05 -08002774 rt->fi = ort->fi;
2775 if (rt->fi)
2776 atomic_inc(&rt->fi->fib_clntref);
David S. Miller14e50e52007-05-24 18:17:54 -07002777
2778 dst_free(new);
2779 }
2780
David S. Miller2774c132011-03-01 14:59:04 -08002781 dst_release(dst_orig);
2782
2783 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002784}
2785
David S. Miller9d6ec932011-03-12 01:12:47 -05002786struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002787 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788{
David S. Miller9d6ec932011-03-12 01:12:47 -05002789 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790
David S. Millerb23dd4f2011-03-02 14:31:35 -08002791 if (IS_ERR(rt))
2792 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793
David S. Miller56157872011-05-02 14:37:45 -07002794 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002795 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2796 flowi4_to_flowi(flp4),
2797 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798
David S. Millerb23dd4f2011-03-02 14:31:35 -08002799 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002801EXPORT_SYMBOL_GPL(ip_route_output_flow);
2802
Benjamin Thery4feb88e2009-01-22 04:56:23 +00002803static int rt_fill_info(struct net *net,
2804 struct sk_buff *skb, u32 pid, u32 seq, int event,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07002805 int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002807 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002809 struct nlmsghdr *nlh;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00002810 long expires = 0;
2811 const struct inet_peer *peer = rt->peer;
Thomas Grafe3703b32006-11-27 09:27:07 -08002812 u32 id = 0, ts = 0, tsage = 0, error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002813
2814 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2815 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002816 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002817
2818 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819 r->rtm_family = AF_INET;
2820 r->rtm_dst_len = 32;
2821 r->rtm_src_len = 0;
David S. Miller475949d2011-05-03 19:45:15 -07002822 r->rtm_tos = rt->rt_key_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002823 r->rtm_table = RT_TABLE_MAIN;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002824 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002825 r->rtm_type = rt->rt_type;
2826 r->rtm_scope = RT_SCOPE_UNIVERSE;
2827 r->rtm_protocol = RTPROT_UNSPEC;
2828 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2829 if (rt->rt_flags & RTCF_NOTIFY)
2830 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002831
Al Viro17fb2c62006-09-26 22:15:25 -07002832 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002833
David S. Miller5e2b61f2011-03-04 21:47:09 -08002834 if (rt->rt_key_src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002835 r->rtm_src_len = 32;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002836 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837 }
Changli Gaod8d1f302010-06-10 23:31:35 -07002838 if (rt->dst.dev)
2839 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
Patrick McHardyc7066f72011-01-14 13:36:42 +01002840#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07002841 if (rt->dst.tclassid)
2842 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002843#endif
David S. Millerc7537962010-11-11 17:07:48 -08002844 if (rt_is_input_route(rt))
Al Viro17fb2c62006-09-26 22:15:25 -07002845 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
David S. Miller5e2b61f2011-03-04 21:47:09 -08002846 else if (rt->rt_src != rt->rt_key_src)
Al Viro17fb2c62006-09-26 22:15:25 -07002847 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002848
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849 if (rt->rt_dst != rt->rt_gateway)
Al Viro17fb2c62006-09-26 22:15:25 -07002850 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002851
David S. Millerdefb3512010-12-08 21:16:57 -08002852 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002853 goto nla_put_failure;
2854
David S. Miller5e2b61f2011-03-04 21:47:09 -08002855 if (rt->rt_mark)
2856 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
Eric Dumazet963bfee2010-07-20 22:03:14 +00002857
Changli Gaod8d1f302010-06-10 23:31:35 -07002858 error = rt->dst.error;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00002859 if (peer) {
Eric Dumazet317fe0e2010-06-16 04:52:13 +00002860 inet_peer_refcheck(rt->peer);
Eric Dumazetfe6fe792011-06-08 06:07:07 +00002861 id = atomic_read(&peer->ip_id_count) & 0xffff;
2862 if (peer->tcp_ts_stamp) {
2863 ts = peer->tcp_ts;
2864 tsage = get_seconds() - peer->tcp_ts_stamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002865 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00002866 expires = ACCESS_ONCE(peer->pmtu_expires);
2867 if (expires)
2868 expires -= jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002870
David S. Millerc7537962010-11-11 17:07:48 -08002871 if (rt_is_input_route(rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002872#ifdef CONFIG_IP_MROUTE
Al Viroe4485152006-09-26 22:15:01 -07002873 __be32 dst = rt->rt_dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874
Joe Perchesf97c1e02007-12-16 13:45:43 -08002875 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
Benjamin Thery4feb88e2009-01-22 04:56:23 +00002876 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
David S. Miller9a1b9492011-05-04 12:18:54 -07002877 int err = ipmr_get_route(net, skb,
2878 rt->rt_src, rt->rt_dst,
2879 r, nowait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002880 if (err <= 0) {
2881 if (!nowait) {
2882 if (err == 0)
2883 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002884 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002885 } else {
2886 if (err == -EMSGSIZE)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002887 goto nla_put_failure;
Thomas Grafe3703b32006-11-27 09:27:07 -08002888 error = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002889 }
2890 }
2891 } else
2892#endif
David S. Miller5e2b61f2011-03-04 21:47:09 -08002893 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002894 }
2895
Changli Gaod8d1f302010-06-10 23:31:35 -07002896 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
Thomas Grafe3703b32006-11-27 09:27:07 -08002897 expires, error) < 0)
2898 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002899
Thomas Grafbe403ea2006-08-17 18:15:17 -07002900 return nlmsg_end(skb, nlh);
2901
2902nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002903 nlmsg_cancel(skb, nlh);
2904 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002905}
2906
Thomas Graf63f34442007-03-22 11:55:17 -07002907static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002909 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002910 struct rtmsg *rtm;
2911 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002912 struct rtable *rt = NULL;
Al Viro9e12bb22006-09-26 21:25:20 -07002913 __be32 dst = 0;
2914 __be32 src = 0;
2915 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002916 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002917 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002918 struct sk_buff *skb;
2919
Thomas Grafd889ce32006-08-17 18:15:44 -07002920 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2921 if (err < 0)
2922 goto errout;
2923
2924 rtm = nlmsg_data(nlh);
2925
Linus Torvalds1da177e2005-04-16 15:20:36 -07002926 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002927 if (skb == NULL) {
2928 err = -ENOBUFS;
2929 goto errout;
2930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931
2932 /* Reserve room for dummy headers, this skb can pass
2933 through good chunk of routing engine.
2934 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002935 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002936 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002937
2938 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002939 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002940 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2941
Al Viro17fb2c62006-09-26 22:15:25 -07002942 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2943 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002944 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002945 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002946
2947 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002948 struct net_device *dev;
2949
Denis V. Lunev19375042008-02-28 20:52:04 -08002950 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002951 if (dev == NULL) {
2952 err = -ENODEV;
2953 goto errout_free;
2954 }
2955
Linus Torvalds1da177e2005-04-16 15:20:36 -07002956 skb->protocol = htons(ETH_P_IP);
2957 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002958 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002959 local_bh_disable();
2960 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2961 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002962
Eric Dumazet511c3f92009-06-02 05:14:27 +00002963 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002964 if (err == 0 && rt->dst.error)
2965 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002966 } else {
David S. Miller68a5e3d2011-03-11 20:07:33 -05002967 struct flowi4 fl4 = {
2968 .daddr = dst,
2969 .saddr = src,
2970 .flowi4_tos = rtm->rtm_tos,
2971 .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2972 .flowi4_mark = mark,
Thomas Grafd889ce32006-08-17 18:15:44 -07002973 };
David S. Miller9d6ec932011-03-12 01:12:47 -05002974 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002975
2976 err = 0;
2977 if (IS_ERR(rt))
2978 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002979 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002980
Linus Torvalds1da177e2005-04-16 15:20:36 -07002981 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002982 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002983
Changli Gaod8d1f302010-06-10 23:31:35 -07002984 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985 if (rtm->rtm_flags & RTM_F_NOTIFY)
2986 rt->rt_flags |= RTCF_NOTIFY;
2987
Benjamin Thery4feb88e2009-01-22 04:56:23 +00002988 err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002989 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002990 if (err <= 0)
2991 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002992
Denis V. Lunev19375042008-02-28 20:52:04 -08002993 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002994errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002995 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996
Thomas Grafd889ce32006-08-17 18:15:44 -07002997errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002999 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000}
3001
3002int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3003{
3004 struct rtable *rt;
3005 int h, s_h;
3006 int idx, s_idx;
Denis V. Lunev19375042008-02-28 20:52:04 -08003007 struct net *net;
3008
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09003009 net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003010
3011 s_h = cb->args[0];
Eric Dumazetd8c92832008-01-07 21:52:14 -08003012 if (s_h < 0)
3013 s_h = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003014 s_idx = idx = cb->args[1];
Eric Dumazeta6272662008-08-28 01:11:25 -07003015 for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) {
3016 if (!rt_hash_table[h].chain)
3017 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003018 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -08003019 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
Changli Gaod8d1f302010-06-10 23:31:35 -07003020 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3021 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022 continue;
Denis V. Luneve84f84f2008-07-05 19:04:32 -07003023 if (rt_is_expired(rt))
Eric Dumazet29e75252008-01-31 17:05:09 -08003024 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07003025 skb_dst_set_noref(skb, &rt->dst);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00003026 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003027 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07003028 1, NLM_F_MULTI) <= 0) {
Eric Dumazetadf30902009-06-02 05:19:30 +00003029 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003030 rcu_read_unlock_bh();
3031 goto done;
3032 }
Eric Dumazetadf30902009-06-02 05:19:30 +00003033 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034 }
3035 rcu_read_unlock_bh();
3036 }
3037
3038done:
3039 cb->args[0] = h;
3040 cb->args[1] = idx;
3041 return skb->len;
3042}
3043
3044void ip_rt_multicast_event(struct in_device *in_dev)
3045{
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07003046 rt_cache_flush(dev_net(in_dev->dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003047}
3048
3049#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07003050static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003051 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003052 size_t *lenp, loff_t *ppos)
3053{
3054 if (write) {
Denis V. Lunev639e1042008-07-05 19:02:06 -07003055 int flush_delay;
Denis V. Lunev81c684d2008-07-08 03:05:28 -07003056 ctl_table ctl;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003057 struct net *net;
Denis V. Lunev639e1042008-07-05 19:02:06 -07003058
Denis V. Lunev81c684d2008-07-08 03:05:28 -07003059 memcpy(&ctl, __ctl, sizeof(ctl));
3060 ctl.data = &flush_delay;
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003061 proc_dointvec(&ctl, write, buffer, lenp, ppos);
Denis V. Lunev639e1042008-07-05 19:02:06 -07003062
Denis V. Lunev81c684d2008-07-08 03:05:28 -07003063 net = (struct net *)__ctl->extra1;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003064 rt_cache_flush(net, flush_delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003066 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003067
3068 return -EINVAL;
3069}
3070
Al Viroeeb61f72008-07-27 08:59:33 +01003071static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003072 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003073 .procname = "gc_thresh",
3074 .data = &ipv4_dst_ops.gc_thresh,
3075 .maxlen = sizeof(int),
3076 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003077 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003078 },
3079 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003080 .procname = "max_size",
3081 .data = &ip_rt_max_size,
3082 .maxlen = sizeof(int),
3083 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003084 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003085 },
3086 {
3087 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003088
Linus Torvalds1da177e2005-04-16 15:20:36 -07003089 .procname = "gc_min_interval",
3090 .data = &ip_rt_gc_min_interval,
3091 .maxlen = sizeof(int),
3092 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003093 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003094 },
3095 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003096 .procname = "gc_min_interval_ms",
3097 .data = &ip_rt_gc_min_interval,
3098 .maxlen = sizeof(int),
3099 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003100 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101 },
3102 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003103 .procname = "gc_timeout",
3104 .data = &ip_rt_gc_timeout,
3105 .maxlen = sizeof(int),
3106 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003107 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003108 },
3109 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003110 .procname = "gc_interval",
3111 .data = &ip_rt_gc_interval,
3112 .maxlen = sizeof(int),
3113 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003114 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003115 },
3116 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003117 .procname = "redirect_load",
3118 .data = &ip_rt_redirect_load,
3119 .maxlen = sizeof(int),
3120 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003121 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 },
3123 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003124 .procname = "redirect_number",
3125 .data = &ip_rt_redirect_number,
3126 .maxlen = sizeof(int),
3127 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003128 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003129 },
3130 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003131 .procname = "redirect_silence",
3132 .data = &ip_rt_redirect_silence,
3133 .maxlen = sizeof(int),
3134 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003135 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003136 },
3137 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003138 .procname = "error_cost",
3139 .data = &ip_rt_error_cost,
3140 .maxlen = sizeof(int),
3141 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003142 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003143 },
3144 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003145 .procname = "error_burst",
3146 .data = &ip_rt_error_burst,
3147 .maxlen = sizeof(int),
3148 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003149 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003150 },
3151 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003152 .procname = "gc_elasticity",
3153 .data = &ip_rt_gc_elasticity,
3154 .maxlen = sizeof(int),
3155 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003156 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003157 },
3158 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 .procname = "mtu_expires",
3160 .data = &ip_rt_mtu_expires,
3161 .maxlen = sizeof(int),
3162 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003163 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003164 },
3165 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003166 .procname = "min_pmtu",
3167 .data = &ip_rt_min_pmtu,
3168 .maxlen = sizeof(int),
3169 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003170 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003171 },
3172 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003173 .procname = "min_adv_mss",
3174 .data = &ip_rt_min_advmss,
3175 .maxlen = sizeof(int),
3176 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003177 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003178 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003179 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003180};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003181
Al Viro2f4520d2008-08-25 15:17:44 -07003182static struct ctl_table empty[1];
3183
3184static struct ctl_table ipv4_skeleton[] =
3185{
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003186 { .procname = "route",
Hugh Dickinsd994af02008-08-27 02:35:18 -07003187 .mode = 0555, .child = ipv4_route_table},
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003188 { .procname = "neigh",
Hugh Dickinsd994af02008-08-27 02:35:18 -07003189 .mode = 0555, .child = empty},
Al Viro2f4520d2008-08-25 15:17:44 -07003190 { }
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003191};
3192
Al Viro2f4520d2008-08-25 15:17:44 -07003193static __net_initdata struct ctl_path ipv4_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003194 { .procname = "net", },
3195 { .procname = "ipv4", },
Al Viro2f4520d2008-08-25 15:17:44 -07003196 { },
3197};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003198
3199static struct ctl_table ipv4_route_flush_table[] = {
3200 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003201 .procname = "flush",
3202 .maxlen = sizeof(int),
3203 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003204 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003205 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003206 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003207};
3208
Al Viro2f4520d2008-08-25 15:17:44 -07003209static __net_initdata struct ctl_path ipv4_route_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003210 { .procname = "net", },
3211 { .procname = "ipv4", },
3212 { .procname = "route", },
Al Viro2f4520d2008-08-25 15:17:44 -07003213 { },
3214};
3215
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003216static __net_init int sysctl_route_net_init(struct net *net)
3217{
3218 struct ctl_table *tbl;
3219
3220 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003221 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003222 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
3223 if (tbl == NULL)
3224 goto err_dup;
3225 }
3226 tbl[0].extra1 = net;
3227
3228 net->ipv4.route_hdr =
3229 register_net_sysctl_table(net, ipv4_route_path, tbl);
3230 if (net->ipv4.route_hdr == NULL)
3231 goto err_reg;
3232 return 0;
3233
3234err_reg:
3235 if (tbl != ipv4_route_flush_table)
3236 kfree(tbl);
3237err_dup:
3238 return -ENOMEM;
3239}
3240
3241static __net_exit void sysctl_route_net_exit(struct net *net)
3242{
3243 struct ctl_table *tbl;
3244
3245 tbl = net->ipv4.route_hdr->ctl_table_arg;
3246 unregister_net_sysctl_table(net->ipv4.route_hdr);
3247 BUG_ON(tbl == ipv4_route_flush_table);
3248 kfree(tbl);
3249}
3250
3251static __net_initdata struct pernet_operations sysctl_route_ops = {
3252 .init = sysctl_route_net_init,
3253 .exit = sysctl_route_net_exit,
3254};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003255#endif
3256
Neil Horman3ee94372010-05-08 01:57:52 -07003257static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003258{
Neil Horman3ee94372010-05-08 01:57:52 -07003259 get_random_bytes(&net->ipv4.rt_genid,
3260 sizeof(net->ipv4.rt_genid));
David S. Miller436c3b62011-03-24 17:42:21 -07003261 get_random_bytes(&net->ipv4.dev_addr_genid,
3262 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003263 return 0;
3264}
3265
Neil Horman3ee94372010-05-08 01:57:52 -07003266static __net_initdata struct pernet_operations rt_genid_ops = {
3267 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003268};
3269
3270
Patrick McHardyc7066f72011-01-14 13:36:42 +01003271#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003272struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003273#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003274
3275static __initdata unsigned long rhash_entries;
3276static int __init set_rhash_entries(char *str)
3277{
3278 if (!str)
3279 return 0;
3280 rhash_entries = simple_strtoul(str, &str, 0);
3281 return 1;
3282}
3283__setup("rhash_entries=", set_rhash_entries);
3284
3285int __init ip_rt_init(void)
3286{
Eric Dumazet424c4b72005-07-05 14:58:19 -07003287 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003288
Patrick McHardyc7066f72011-01-14 13:36:42 +01003289#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003290 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003291 if (!ip_rt_acct)
3292 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003293#endif
3294
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003295 ipv4_dst_ops.kmem_cachep =
3296 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003297 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003298
David S. Miller14e50e52007-05-24 18:17:54 -07003299 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3300
Eric Dumazetfc66f952010-10-08 06:37:34 +00003301 if (dst_entries_init(&ipv4_dst_ops) < 0)
3302 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3303
3304 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3305 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3306
Eric Dumazet424c4b72005-07-05 14:58:19 -07003307 rt_hash_table = (struct rt_hash_bucket *)
3308 alloc_large_system_hash("IP route cache",
3309 sizeof(struct rt_hash_bucket),
3310 rhash_entries,
Jan Beulich44813742009-09-21 17:03:05 -07003311 (totalram_pages >= 128 * 1024) ?
Mike Stroyan18955cf2005-11-29 16:12:55 -08003312 15 : 17,
Kirill Korotaev8d1502d2006-08-07 20:44:22 -07003313 0,
Eric Dumazet424c4b72005-07-05 14:58:19 -07003314 &rt_hash_log,
3315 &rt_hash_mask,
Anton Blanchardc9503e02009-04-27 05:42:24 -07003316 rhash_entries ? 0 : 512 * 1024);
Eric Dumazet22c047c2005-07-05 14:55:24 -07003317 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3318 rt_hash_lock_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003319
3320 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3321 ip_rt_max_size = (rt_hash_mask + 1) * 16;
3322
Linus Torvalds1da177e2005-04-16 15:20:36 -07003323 devinet_init();
3324 ip_fib_init();
3325
Denis V. Lunev73b38712008-02-28 20:51:18 -08003326 if (ip_rt_proc_init())
Pavel Emelyanov107f1632007-12-05 21:14:28 -08003327 printk(KERN_ERR "Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003328#ifdef CONFIG_XFRM
3329 xfrm_init();
Neil Hormana33bc5c2009-07-30 18:52:15 -07003330 xfrm4_init(ip_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003331#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00003332 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07003333
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003334#ifdef CONFIG_SYSCTL
3335 register_pernet_subsys(&sysctl_route_ops);
3336#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003337 register_pernet_subsys(&rt_genid_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003338 return rc;
3339}
3340
Al Viroa1bc6eb2008-07-30 06:32:52 -04003341#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003342/*
3343 * We really need to sanitize the damn ipv4 init order, then all
3344 * this nonsense will go away.
3345 */
3346void __init ip_static_sysctl_init(void)
3347{
Al Viro2f4520d2008-08-25 15:17:44 -07003348 register_sysctl_paths(ipv4_path, ipv4_skeleton);
Al Viroeeb61f72008-07-27 08:59:33 +01003349}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003350#endif