blob: 561a37833d86c3d3d13c002e4e7a18005f97b751 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000135static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
David S. Miller62fa8a82011-01-26 20:51:05 -0800149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
David S. Miller31248732012-07-10 07:08:18 -0700151 WARN_ON(1);
152 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800153}
154
David S. Millerf894cbf2012-07-02 21:52:24 -0700155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800161 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800163 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000164 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800165 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700166 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
Philippe De Muyter4839c522007-07-09 15:32:57 -0700178const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000180 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
Amir Vadaid4a96862012-04-04 21:33:28 +0000196EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Eric Dumazet2f970d82006-01-17 02:54:36 -0800198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700205 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700212 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700229static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
David S. Miller89aef892012-07-17 11:00:09 -0700238 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
Arjan van de Ven9a321442007-02-12 00:55:35 -0800241static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800377#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378
Denis V. Lunev73b38712008-02-28 20:51:18 -0800379static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800380{
381 struct proc_dir_entry *pde;
382
Gao fengd4beaa62013-02-18 01:34:54 +0000383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385 if (!pde)
386 goto err1;
387
Wang Chen77020722008-02-28 14:14:25 -0800388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800390 if (!pde)
391 goto err2;
392
Patrick McHardyc7066f72011-01-14 13:36:42 +0100393#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
Patrick McHardyc7066f72011-01-14 13:36:42 +0100400#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100414#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000416#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800431{
432 return 0;
433}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900435
Eric Dumazet4331deb2012-07-25 05:11:23 +0000436static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437{
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700439}
440
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000441void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800442{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000443 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000444}
445
David S. Millerf894cbf2012-07-02 21:52:24 -0700446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000449{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500452 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000453 struct neighbour *n;
454
David S. Miller39232972012-01-26 15:22:32 -0500455 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700456 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500457 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460
David S. Miller80703d22012-02-15 17:48:35 -0500461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462 if (n)
463 return n;
David Miller32092ec2011-07-25 00:01:41 +0000464 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700465}
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
David S. Miller1d861aa2012-07-10 03:58:16 -0700489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
David S. Miller1d861aa2012-07-10 03:58:16 -0700492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 ip_select_fb_ident(iph);
500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000501EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700535{
536 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
David S. Millerc5038a82012-07-31 15:02:02 -0700561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700567
Julian Anastasovaee06da2012-07-18 10:15:35 +0000568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700569{
570 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700571 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
David S. Millerc5038a82012-07-31 15:02:02 -0700579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
David S. Miller4895c772012-07-17 04:19:00 -0700584 return oldest;
585}
586
David S. Millerd3a25c92012-07-17 13:23:08 -0700587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
Timo Teräs387aa652013-05-27 20:46:31 +0000597static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
598{
599 rt->rt_pmtu = fnhe->fnhe_pmtu;
600 rt->dst.expires = fnhe->fnhe_expires;
601
602 if (fnhe->fnhe_gw) {
603 rt->rt_flags |= RTCF_REDIRECTED;
604 rt->rt_gateway = fnhe->fnhe_gw;
605 rt->rt_uses_gateway = 1;
606 }
607}
608
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
610 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700611{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000612 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700613 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000614 struct rtable *rt;
615 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700616 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000617 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700618
David S. Millerc5038a82012-07-31 15:02:02 -0700619 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000620
621 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700622 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000623 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700624 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 goto out_unlock;
626 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700627 }
628
David S. Miller4895c772012-07-17 04:19:00 -0700629 hash += hval;
630
631 depth = 0;
632 for (fnhe = rcu_dereference(hash->chain); fnhe;
633 fnhe = rcu_dereference(fnhe->fnhe_next)) {
634 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000635 break;
David S. Miller4895c772012-07-17 04:19:00 -0700636 depth++;
637 }
638
Julian Anastasovaee06da2012-07-18 10:15:35 +0000639 if (fnhe) {
640 if (gw)
641 fnhe->fnhe_gw = gw;
642 if (pmtu) {
643 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000644 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000645 }
Timo Teräs387aa652013-05-27 20:46:31 +0000646 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth);
648 if (rt)
649 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
661 fnhe->fnhe_daddr = daddr;
662 fnhe->fnhe_gw = gw;
663 fnhe->fnhe_pmtu = pmtu;
664 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000665
666 /* Exception created; mark the cached routes for the nexthop
667 * stale, so anyone caching it rechecks if this exception
668 * applies to them.
669 */
670 for_each_possible_cpu(i) {
671 struct rtable __rcu **prt;
672 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
673 rt = rcu_dereference(*prt);
674 if (rt)
675 rt->dst.obsolete = DST_OBSOLETE_KILL;
676 }
David S. Miller4895c772012-07-17 04:19:00 -0700677 }
David S. Miller4895c772012-07-17 04:19:00 -0700678
David S. Miller4895c772012-07-17 04:19:00 -0700679 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000680
681out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700682 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000683 return;
David S. Miller4895c772012-07-17 04:19:00 -0700684}
685
David S. Millerceb33202012-07-17 11:31:28 -0700686static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
687 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688{
David S. Millere47a1852012-07-11 20:55:47 -0700689 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700690 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700691 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700692 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700693 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700694 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800695 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
David S. Miller94206122012-07-11 20:38:08 -0700697 switch (icmp_hdr(skb)->code & 7) {
698 case ICMP_REDIR_NET:
699 case ICMP_REDIR_NETTOS:
700 case ICMP_REDIR_HOST:
701 case ICMP_REDIR_HOSTTOS:
702 break;
703
704 default:
705 return;
706 }
707
David S. Millere47a1852012-07-11 20:55:47 -0700708 if (rt->rt_gateway != old_gw)
709 return;
710
711 in_dev = __in_dev_get_rcu(dev);
712 if (!in_dev)
713 return;
714
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900715 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800716 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
717 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
718 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 goto reject_redirect;
720
721 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
722 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
723 goto reject_redirect;
724 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
725 goto reject_redirect;
726 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800727 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 goto reject_redirect;
729 }
730
David S. Miller4895c772012-07-17 04:19:00 -0700731 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700732 if (n) {
733 if (!(n->nud_state & NUD_VALID)) {
734 neigh_event_send(n, NULL);
735 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700736 if (fib_lookup(net, fl4, &res) == 0) {
737 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700738
Julian Anastasovaee06da2012-07-18 10:15:35 +0000739 update_or_create_fnhe(nh, fl4->daddr, new_gw,
740 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700741 }
David S. Millerceb33202012-07-17 11:31:28 -0700742 if (kill_route)
743 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700744 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
745 }
746 neigh_release(n);
747 }
748 return;
749
750reject_redirect:
751#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700752 if (IN_DEV_LOG_MARTIANS(in_dev)) {
753 const struct iphdr *iph = (const struct iphdr *) skb->data;
754 __be32 daddr = iph->daddr;
755 __be32 saddr = iph->saddr;
756
David S. Millere47a1852012-07-11 20:55:47 -0700757 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
758 " Advised path = %pI4 -> %pI4\n",
759 &old_gw, dev->name, &new_gw,
760 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700761 }
David S. Millere47a1852012-07-11 20:55:47 -0700762#endif
763 ;
764}
765
David S. Miller4895c772012-07-17 04:19:00 -0700766static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
767{
768 struct rtable *rt;
769 struct flowi4 fl4;
770
771 rt = (struct rtable *) dst;
772
773 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700774 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700775}
776
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
778{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800779 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 struct dst_entry *ret = dst;
781
782 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000783 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 ip_rt_put(rt);
785 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700786 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
787 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700788 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 ret = NULL;
790 }
791 }
792 return ret;
793}
794
795/*
796 * Algorithm:
797 * 1. The first ip_rt_redirect_number redirects are sent
798 * with exponential backoff, then we stop sending them at all,
799 * assuming that the host ignores our redirects.
800 * 2. If we did not see packets requiring redirects
801 * during ip_rt_redirect_silence, we assume that the host
802 * forgot redirected route and start to send redirects again.
803 *
804 * This algorithm is much cheaper and more intelligent than dumb load limiting
805 * in icmp.c.
806 *
807 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
808 * and "frag. need" (breaks PMTU discovery) in icmp.c.
809 */
810
811void ip_rt_send_redirect(struct sk_buff *skb)
812{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000813 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700814 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800815 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700816 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700817 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818
Eric Dumazet30038fc2009-08-28 23:52:01 -0700819 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700820 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700821 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
822 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700824 }
825 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
826 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
David S. Miller1d861aa2012-07-10 03:58:16 -0700828 net = dev_net(rt->dst.dev);
829 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800830 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000831 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
832 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800833 return;
834 }
835
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 /* No redirected packets during ip_rt_redirect_silence;
837 * reset the algorithm.
838 */
David S. Miller92d86822011-02-04 15:55:25 -0800839 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
840 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 */
David S. Miller92d86822011-02-04 15:55:25 -0800845 if (peer->rate_tokens >= ip_rt_redirect_number) {
846 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700847 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 }
849
850 /* Check for load limit; set rate_last to the latest sent
851 * redirect.
852 */
David S. Miller92d86822011-02-04 15:55:25 -0800853 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800854 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800855 (peer->rate_last +
856 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000857 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
858
859 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800860 peer->rate_last = jiffies;
861 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700863 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000864 peer->rate_tokens == ip_rt_redirect_number)
865 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700866 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000867 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868#endif
869 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700870out_put_peer:
871 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872}
873
874static int ip_error(struct sk_buff *skb)
875{
David S. Miller251da412012-06-26 16:27:09 -0700876 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000877 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800878 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700880 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800881 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 int code;
883
David S. Miller251da412012-06-26 16:27:09 -0700884 net = dev_net(rt->dst.dev);
885 if (!IN_DEV_FORWARD(in_dev)) {
886 switch (rt->dst.error) {
887 case EHOSTUNREACH:
888 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
889 break;
890
891 case ENETUNREACH:
892 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
893 break;
894 }
895 goto out;
896 }
897
Changli Gaod8d1f302010-06-10 23:31:35 -0700898 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000899 case EINVAL:
900 default:
901 goto out;
902 case EHOSTUNREACH:
903 code = ICMP_HOST_UNREACH;
904 break;
905 case ENETUNREACH:
906 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700907 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000908 break;
909 case EACCES:
910 code = ICMP_PKT_FILTERED;
911 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 }
913
David S. Miller1d861aa2012-07-10 03:58:16 -0700914 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800915
916 send = true;
917 if (peer) {
918 now = jiffies;
919 peer->rate_tokens += now - peer->rate_last;
920 if (peer->rate_tokens > ip_rt_error_burst)
921 peer->rate_tokens = ip_rt_error_burst;
922 peer->rate_last = now;
923 if (peer->rate_tokens >= ip_rt_error_cost)
924 peer->rate_tokens -= ip_rt_error_cost;
925 else
926 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700927 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 }
David S. Miller92d86822011-02-04 15:55:25 -0800929 if (send)
930 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
932out: kfree_skb(skb);
933 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900934}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Steffen Klassertd851c122012-10-07 22:47:25 +0000936static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937{
Steffen Klassertd851c122012-10-07 22:47:25 +0000938 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700939 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800940
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000941 if (dst_metric_locked(dst, RTAX_MTU))
942 return;
943
Steffen Klassert7f92d3342012-10-07 22:48:18 +0000944 if (dst->dev->mtu < mtu)
945 return;
946
David S. Miller59436342012-07-10 06:58:42 -0700947 if (mtu < ip_rt_min_pmtu)
948 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000949
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000950 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000951 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700952 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700953
Julian Anastasovaee06da2012-07-18 10:15:35 +0000954 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
955 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700956 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000957 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958}
959
David S. Miller4895c772012-07-17 04:19:00 -0700960static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
961 struct sk_buff *skb, u32 mtu)
962{
963 struct rtable *rt = (struct rtable *) dst;
964 struct flowi4 fl4;
965
966 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000967 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700968}
969
David S. Miller36393392012-06-14 22:21:46 -0700970void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
971 int oif, u32 mark, u8 protocol, int flow_flags)
972{
David S. Miller4895c772012-07-17 04:19:00 -0700973 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700974 struct flowi4 fl4;
975 struct rtable *rt;
976
David S. Miller4895c772012-07-17 04:19:00 -0700977 __build_flow_key(&fl4, NULL, iph, oif,
978 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700979 rt = __ip_route_output_key(net, &fl4);
980 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700981 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700982 ip_rt_put(rt);
983 }
984}
985EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
986
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000987static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -0700988{
David S. Miller4895c772012-07-17 04:19:00 -0700989 const struct iphdr *iph = (const struct iphdr *) skb->data;
990 struct flowi4 fl4;
991 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700992
David S. Miller4895c772012-07-17 04:19:00 -0700993 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
994 rt = __ip_route_output_key(sock_net(sk), &fl4);
995 if (!IS_ERR(rt)) {
996 __ip_rt_update_pmtu(rt, &fl4, mtu);
997 ip_rt_put(rt);
998 }
David S. Miller36393392012-06-14 22:21:46 -0700999}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001000
1001void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1002{
1003 const struct iphdr *iph = (const struct iphdr *) skb->data;
1004 struct flowi4 fl4;
1005 struct rtable *rt;
1006 struct dst_entry *dst;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001007 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001008
1009 bh_lock_sock(sk);
1010 rt = (struct rtable *) __sk_dst_get(sk);
1011
1012 if (sock_owned_by_user(sk) || !rt) {
1013 __ipv4_sk_update_pmtu(skb, sk, mtu);
1014 goto out;
1015 }
1016
1017 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1018
1019 if (!__sk_dst_check(sk, 0)) {
1020 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1021 if (IS_ERR(rt))
1022 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001023
1024 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001025 }
1026
1027 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1028
1029 dst = dst_check(&rt->dst, 0);
1030 if (!dst) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001031 if (new)
1032 dst_release(&rt->dst);
1033
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001034 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1035 if (IS_ERR(rt))
1036 goto out;
1037
Steffen Klassertb44108d2013-01-22 00:01:28 +00001038 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001039 }
1040
Steffen Klassertb44108d2013-01-22 00:01:28 +00001041 if (new)
1042 __sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001043
1044out:
1045 bh_unlock_sock(sk);
1046}
David S. Miller36393392012-06-14 22:21:46 -07001047EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001048
David S. Millerb42597e2012-07-11 21:25:45 -07001049void ipv4_redirect(struct sk_buff *skb, struct net *net,
1050 int oif, u32 mark, u8 protocol, int flow_flags)
1051{
David S. Miller4895c772012-07-17 04:19:00 -07001052 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001053 struct flowi4 fl4;
1054 struct rtable *rt;
1055
David S. Miller4895c772012-07-17 04:19:00 -07001056 __build_flow_key(&fl4, NULL, iph, oif,
1057 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001058 rt = __ip_route_output_key(net, &fl4);
1059 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001060 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001061 ip_rt_put(rt);
1062 }
1063}
1064EXPORT_SYMBOL_GPL(ipv4_redirect);
1065
1066void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1067{
David S. Miller4895c772012-07-17 04:19:00 -07001068 const struct iphdr *iph = (const struct iphdr *) skb->data;
1069 struct flowi4 fl4;
1070 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001071
David S. Miller4895c772012-07-17 04:19:00 -07001072 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1073 rt = __ip_route_output_key(sock_net(sk), &fl4);
1074 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001075 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001076 ip_rt_put(rt);
1077 }
David S. Millerb42597e2012-07-11 21:25:45 -07001078}
1079EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1080
David S. Millerefbc3682011-12-01 13:38:59 -05001081static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1082{
1083 struct rtable *rt = (struct rtable *) dst;
1084
David S. Millerceb33202012-07-17 11:31:28 -07001085 /* All IPV4 dsts are created with ->obsolete set to the value
1086 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1087 * into this function always.
1088 *
Timo Teräs387aa652013-05-27 20:46:31 +00001089 * When a PMTU/redirect information update invalidates a route,
1090 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1091 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001092 */
Timo Teräs387aa652013-05-27 20:46:31 +00001093 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001094 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001095 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096}
1097
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098static void ipv4_link_failure(struct sk_buff *skb)
1099{
1100 struct rtable *rt;
1101
1102 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1103
Eric Dumazet511c3f92009-06-02 05:14:27 +00001104 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001105 if (rt)
1106 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107}
1108
1109static int ip_rt_bug(struct sk_buff *skb)
1110{
Joe Perches91df42b2012-05-15 14:11:54 +00001111 pr_debug("%s: %pI4 -> %pI4, %s\n",
1112 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1113 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001115 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 return 0;
1117}
1118
1119/*
1120 We do not cache source address of outgoing interface,
1121 because it is used only by IP RR, TS and SRR options,
1122 so that it out of fast path.
1123
1124 BTW remember: "addr" is allowed to be not aligned
1125 in IP options!
1126 */
1127
David S. Miller8e363602011-05-13 17:29:41 -04001128void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129{
Al Viroa61ced52006-09-26 21:27:54 -07001130 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131
David S. Millerc7537962010-11-11 17:07:48 -08001132 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001133 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001134 else {
David S. Miller8e363602011-05-13 17:29:41 -04001135 struct fib_result res;
1136 struct flowi4 fl4;
1137 struct iphdr *iph;
1138
1139 iph = ip_hdr(skb);
1140
1141 memset(&fl4, 0, sizeof(fl4));
1142 fl4.daddr = iph->daddr;
1143 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001144 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001145 fl4.flowi4_oif = rt->dst.dev->ifindex;
1146 fl4.flowi4_iif = skb->dev->ifindex;
1147 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001148
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001149 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001150 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001151 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001152 else
David S. Millerf8126f12012-07-13 05:03:45 -07001153 src = inet_select_addr(rt->dst.dev,
1154 rt_nexthop(rt, iph->daddr),
1155 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001156 rcu_read_unlock();
1157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 memcpy(addr, &src, 4);
1159}
1160
Patrick McHardyc7066f72011-01-14 13:36:42 +01001161#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162static void set_class_tag(struct rtable *rt, u32 tag)
1163{
Changli Gaod8d1f302010-06-10 23:31:35 -07001164 if (!(rt->dst.tclassid & 0xFFFF))
1165 rt->dst.tclassid |= tag & 0xFFFF;
1166 if (!(rt->dst.tclassid & 0xFFFF0000))
1167 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168}
1169#endif
1170
David S. Miller0dbaee32010-12-13 12:52:14 -08001171static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1172{
1173 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1174
1175 if (advmss == 0) {
1176 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1177 ip_rt_min_advmss);
1178 if (advmss > 65535 - 40)
1179 advmss = 65535 - 40;
1180 }
1181 return advmss;
1182}
1183
Steffen Klassertebb762f2011-11-23 02:12:51 +00001184static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001185{
Steffen Klassert261663b2011-11-23 02:14:50 +00001186 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001187 unsigned int mtu = rt->rt_pmtu;
1188
Alexander Duyck98d75c32012-08-27 06:30:01 +00001189 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001190 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001191
Steffen Klassert38d523e2013-01-16 20:55:01 +00001192 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001193 return mtu;
1194
1195 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001196
1197 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001198 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001199 mtu = 576;
1200 }
1201
1202 if (mtu > IP_MAX_MTU)
1203 mtu = IP_MAX_MTU;
1204
1205 return mtu;
1206}
1207
David S. Millerf2bb4be2012-07-17 12:20:47 -07001208static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001209{
1210 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1211 struct fib_nh_exception *fnhe;
1212 u32 hval;
1213
David S. Millerf2bb4be2012-07-17 12:20:47 -07001214 if (!hash)
1215 return NULL;
1216
David S. Millerd3a25c92012-07-17 13:23:08 -07001217 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001218
1219 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1220 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001221 if (fnhe->fnhe_daddr == daddr)
1222 return fnhe;
1223 }
1224 return NULL;
1225}
David S. Miller4895c772012-07-17 04:19:00 -07001226
David S. Millercaacf052012-07-31 15:06:50 -07001227static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001228 __be32 daddr)
1229{
David S. Millercaacf052012-07-31 15:06:50 -07001230 bool ret = false;
1231
David S. Millerc5038a82012-07-31 15:02:02 -07001232 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001233
David S. Millerc5038a82012-07-31 15:02:02 -07001234 if (daddr == fnhe->fnhe_daddr) {
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001235 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1236 if (orig && rt_is_expired(orig)) {
1237 fnhe->fnhe_gw = 0;
1238 fnhe->fnhe_pmtu = 0;
1239 fnhe->fnhe_expires = 0;
1240 }
Timo Teräs387aa652013-05-27 20:46:31 +00001241 fill_route_from_fnhe(rt, fnhe);
1242 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001243 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001244
David S. Millerc5038a82012-07-31 15:02:02 -07001245 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1246 if (orig)
1247 rt_free(orig);
1248
1249 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001250 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001251 }
1252 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001253
1254 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001255}
1256
David S. Millercaacf052012-07-31 15:06:50 -07001257static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001258{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001259 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001260 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001261
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001262 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001263 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001264 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001265 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1266 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001267 orig = *p;
1268
1269 prev = cmpxchg(p, orig, rt);
1270 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001271 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001272 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001273 } else
David S. Millercaacf052012-07-31 15:06:50 -07001274 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001275
1276 return ret;
1277}
1278
1279static DEFINE_SPINLOCK(rt_uncached_lock);
1280static LIST_HEAD(rt_uncached_list);
1281
1282static void rt_add_uncached_list(struct rtable *rt)
1283{
1284 spin_lock_bh(&rt_uncached_lock);
1285 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1286 spin_unlock_bh(&rt_uncached_lock);
1287}
1288
1289static void ipv4_dst_destroy(struct dst_entry *dst)
1290{
1291 struct rtable *rt = (struct rtable *) dst;
1292
Eric Dumazet78df76a2012-08-24 05:40:47 +00001293 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001294 spin_lock_bh(&rt_uncached_lock);
1295 list_del(&rt->rt_uncached);
1296 spin_unlock_bh(&rt_uncached_lock);
1297 }
1298}
1299
1300void rt_flush_dev(struct net_device *dev)
1301{
1302 if (!list_empty(&rt_uncached_list)) {
1303 struct net *net = dev_net(dev);
1304 struct rtable *rt;
1305
1306 spin_lock_bh(&rt_uncached_lock);
1307 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1308 if (rt->dst.dev != dev)
1309 continue;
1310 rt->dst.dev = net->loopback_dev;
1311 dev_hold(rt->dst.dev);
1312 dev_put(dev);
1313 }
1314 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001315 }
1316}
1317
Eric Dumazet4331deb2012-07-25 05:11:23 +00001318static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001319{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001320 return rt &&
1321 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1322 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001323}
1324
David S. Millerf2bb4be2012-07-17 12:20:47 -07001325static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001326 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001327 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001328 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329{
David S. Millercaacf052012-07-31 15:06:50 -07001330 bool cached = false;
1331
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001333 struct fib_nh *nh = &FIB_RES_NH(*res);
1334
Julian Anastasov155e8332012-10-08 11:41:18 +00001335 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001336 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001337 rt->rt_uses_gateway = 1;
1338 }
David S. Miller28605832012-07-17 14:55:59 -07001339 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001340#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001341 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001343 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001344 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001345 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001346 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001347 if (unlikely(!cached)) {
1348 /* Routes we intend to cache in nexthop exception or
1349 * FIB nexthop have the DST_NOCACHE bit clear.
1350 * However, if we are unsuccessful at storing this
1351 * route into the cache we really need to set it.
1352 */
1353 rt->dst.flags |= DST_NOCACHE;
1354 if (!rt->rt_gateway)
1355 rt->rt_gateway = daddr;
1356 rt_add_uncached_list(rt);
1357 }
1358 } else
David S. Millercaacf052012-07-31 15:06:50 -07001359 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360
Patrick McHardyc7066f72011-01-14 13:36:42 +01001361#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001363 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364#endif
1365 set_class_tag(rt, itag);
1366#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367}
1368
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001369static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001370 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001371{
David S. Millerf5b0a872012-07-19 12:31:33 -07001372 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001373 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001374 (nopolicy ? DST_NOPOLICY : 0) |
1375 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001376}
1377
Eric Dumazet96d36222010-06-02 19:21:31 +00001378/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001379static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 u8 tos, struct net_device *dev, int our)
1381{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001383 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001385 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
1387 /* Primary sanity checks. */
1388
1389 if (in_dev == NULL)
1390 return -EINVAL;
1391
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001392 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001393 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 goto e_inval;
1395
Thomas Grafd0daebc32012-06-12 00:44:01 +00001396 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1397 if (ipv4_is_loopback(saddr))
1398 goto e_inval;
1399
Joe Perchesf97c1e02007-12-16 13:45:43 -08001400 if (ipv4_is_zeronet(saddr)) {
1401 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001403 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001404 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1405 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001406 if (err < 0)
1407 goto e_err;
1408 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001409 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001410 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 if (!rth)
1412 goto e_nobufs;
1413
Patrick McHardyc7066f72011-01-14 13:36:42 +01001414#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001415 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416#endif
David S. Millercf911662011-04-28 14:31:47 -07001417 rth->dst.output = ip_rt_bug;
1418
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001419 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001421 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001422 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001423 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001424 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001425 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001426 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001427 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001429 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 rth->rt_flags |= RTCF_LOCAL;
1431 }
1432
1433#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001434 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001435 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436#endif
1437 RT_CACHE_STAT_INC(in_slow_mc);
1438
David S. Miller89aef892012-07-17 11:00:09 -07001439 skb_dst_set(skb, &rth->dst);
1440 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001445 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001446e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001447 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448}
1449
1450
1451static void ip_handle_martian_source(struct net_device *dev,
1452 struct in_device *in_dev,
1453 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001454 __be32 daddr,
1455 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456{
1457 RT_CACHE_STAT_INC(in_martian_src);
1458#ifdef CONFIG_IP_ROUTE_VERBOSE
1459 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1460 /*
1461 * RFC1812 recommendation, if source is martian,
1462 * the only hint is MAC header.
1463 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001464 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001465 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001466 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001467 print_hex_dump(KERN_WARNING, "ll header: ",
1468 DUMP_PREFIX_OFFSET, 16, 1,
1469 skb_mac_header(skb),
1470 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 }
1472 }
1473#endif
1474}
1475
Eric Dumazet47360222010-06-03 04:13:21 +00001476/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001477static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001478 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001479 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001480 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 struct rtable *rth;
1483 int err;
1484 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001485 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001486 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001487 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
1489 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001490 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001492 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 return -EINVAL;
1494 }
1495
Michael Smith5c04c812011-04-07 04:51:50 +00001496 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001497 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001499 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001501
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 goto cleanup;
1503 }
1504
Julian Anastasove81da0e2012-10-08 11:41:15 +00001505 do_cache = res->fi && !itag;
1506 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001508 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001510 do_cache = false;
1511 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512
1513 if (skb->protocol != htons(ETH_P_IP)) {
1514 /* Not IP (i.e. ARP). Do not create route, if it is
1515 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001516 *
1517 * Proxy arp feature have been extended to allow, ARP
1518 * replies back to the same interface, to support
1519 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001521 if (out_dev == in_dev &&
1522 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 err = -EINVAL;
1524 goto cleanup;
1525 }
1526 }
1527
Julian Anastasove81da0e2012-10-08 11:41:15 +00001528 if (do_cache) {
1529 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1530 if (rt_cache_valid(rth)) {
1531 skb_dst_set_noref(skb, &rth->dst);
1532 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001533 }
1534 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001535
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001536 rth = rt_dst_alloc(out_dev->dev,
1537 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001538 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 if (!rth) {
1540 err = -ENOBUFS;
1541 goto cleanup;
1542 }
1543
David S. Millercf911662011-04-28 14:31:47 -07001544 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1545 rth->rt_flags = flags;
1546 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001547 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001548 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001549 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001550 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001551 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001552 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
Changli Gaod8d1f302010-06-10 23:31:35 -07001554 rth->dst.input = ip_forward;
1555 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556
David S. Millerd2d68ba2012-07-17 12:58:50 -07001557 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001558 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001559out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 err = 0;
1561 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001563}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564
Stephen Hemminger5969f712008-04-10 01:52:09 -07001565static int ip_mkroute_input(struct sk_buff *skb,
1566 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001567 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001568 struct in_device *in_dev,
1569 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001572 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001573 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574#endif
1575
1576 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001577 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578}
1579
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580/*
1581 * NOTE. We drop all the packets that has local source
1582 * addresses, because every properly looped back packet
1583 * must have correct destination already attached by output routine.
1584 *
1585 * Such approach solves two big problems:
1586 * 1. Not simplex devices are handled properly.
1587 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001588 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 */
1590
Al Viro9e12bb22006-09-26 21:25:20 -07001591static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001592 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593{
1594 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001595 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001596 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001597 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001599 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001601 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001602 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
1604 /* IP on this device is disabled. */
1605
1606 if (!in_dev)
1607 goto out;
1608
1609 /* Check for the most weird martians, which can be not detected
1610 by fib_lookup.
1611 */
1612
Thomas Grafd0daebc32012-06-12 00:44:01 +00001613 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 goto martian_source;
1615
David S. Millerd2d68ba2012-07-17 12:58:50 -07001616 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001617 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 goto brd_input;
1619
1620 /* Accept zero addresses only to limited broadcast;
1621 * I even do not know to fix it or not. Waiting for complains :-)
1622 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001623 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 goto martian_source;
1625
Thomas Grafd0daebc32012-06-12 00:44:01 +00001626 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 goto martian_destination;
1628
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001629 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1630 * and call it once if daddr or/and saddr are loopback addresses
1631 */
1632 if (ipv4_is_loopback(daddr)) {
1633 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001634 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001635 } else if (ipv4_is_loopback(saddr)) {
1636 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001637 goto martian_source;
1638 }
1639
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 /*
1641 * Now we are ready to route packet.
1642 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001643 fl4.flowi4_oif = 0;
1644 fl4.flowi4_iif = dev->ifindex;
1645 fl4.flowi4_mark = skb->mark;
1646 fl4.flowi4_tos = tos;
1647 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1648 fl4.daddr = daddr;
1649 fl4.saddr = saddr;
1650 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001651 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653
1654 RT_CACHE_STAT_INC(in_slow_tot);
1655
1656 if (res.type == RTN_BROADCAST)
1657 goto brd_input;
1658
1659 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001660 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001661 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001662 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001663 if (err < 0)
1664 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto local_input;
1666 }
1667
1668 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001669 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 if (res.type != RTN_UNICAST)
1671 goto martian_destination;
1672
David S. Miller68a5e3d2011-03-11 20:07:33 -05001673 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674out: return err;
1675
1676brd_input:
1677 if (skb->protocol != htons(ETH_P_IP))
1678 goto e_inval;
1679
David S. Miller41347dc2012-06-28 04:05:27 -07001680 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001681 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1682 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001684 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 }
1686 flags |= RTCF_BROADCAST;
1687 res.type = RTN_BROADCAST;
1688 RT_CACHE_STAT_INC(in_brd);
1689
1690local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001691 do_cache = false;
1692 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001693 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001694 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001695 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001696 skb_dst_set_noref(skb, &rth->dst);
1697 err = 0;
1698 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001699 }
1700 do_cache = true;
1701 }
1702 }
1703
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001704 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001705 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 if (!rth)
1707 goto e_nobufs;
1708
David S. Millercf911662011-04-28 14:31:47 -07001709 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001710 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001711#ifdef CONFIG_IP_ROUTE_CLASSID
1712 rth->dst.tclassid = itag;
1713#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
David S. Millercf911662011-04-28 14:31:47 -07001715 rth->rt_genid = rt_genid(net);
1716 rth->rt_flags = flags|RTCF_LOCAL;
1717 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001718 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001719 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001720 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001721 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001722 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001723 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001725 rth->dst.input= ip_error;
1726 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 rth->rt_flags &= ~RTCF_LOCAL;
1728 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001729 if (do_cache)
1730 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001731 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001732 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001733 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734
1735no_route:
1736 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001738 if (err == -ESRCH)
1739 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 goto local_input;
1741
1742 /*
1743 * Do not cache martian addresses: they should be logged (RFC1812)
1744 */
1745martian_destination:
1746 RT_CACHE_STAT_INC(in_martian_dst);
1747#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001748 if (IN_DEV_LOG_MARTIANS(in_dev))
1749 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1750 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001752
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753e_inval:
1754 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001755 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756
1757e_nobufs:
1758 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001759 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760
1761martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001762 err = -EINVAL;
1763martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001765 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766}
1767
David S. Millerc6cffba2012-07-26 11:14:38 +00001768int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1769 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770{
Eric Dumazet96d36222010-06-02 19:21:31 +00001771 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
Eric Dumazet96d36222010-06-02 19:21:31 +00001773 rcu_read_lock();
1774
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 /* Multicast recognition logic is moved from route cache to here.
1776 The problem was that too many Ethernet cards have broken/missing
1777 hardware multicast filters :-( As result the host on multicasting
1778 network acquires a lot of useless route cache entries, sort of
1779 SDR messages from all the world. Now we try to get rid of them.
1780 Really, provided software IP multicast filter is organized
1781 reasonably (at least, hashed), it does not result in a slowdown
1782 comparing with route cache reject entries.
1783 Note, that multicast routers are not affected, because
1784 route cache entry is created eventually.
1785 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001786 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001787 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788
Eric Dumazet96d36222010-06-02 19:21:31 +00001789 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001790 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1791 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 if (our
1793#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001794 ||
1795 (!ipv4_is_local_multicast(daddr) &&
1796 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001798 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001799 int res = ip_route_input_mc(skb, daddr, saddr,
1800 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001802 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 }
1804 }
1805 rcu_read_unlock();
1806 return -EINVAL;
1807 }
David S. Millerc10237e2012-06-27 17:05:06 -07001808 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001809 rcu_read_unlock();
1810 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811}
David S. Millerc6cffba2012-07-26 11:14:38 +00001812EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001814/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001815static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001816 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001817 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001818 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819{
David S. Miller982721f2011-02-16 21:44:24 -08001820 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001821 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001822 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001823 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001824 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001825 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Thomas Grafd0daebc32012-06-12 00:44:01 +00001827 in_dev = __in_dev_get_rcu(dev_out);
1828 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001829 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
Thomas Grafd0daebc32012-06-12 00:44:01 +00001831 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1832 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1833 return ERR_PTR(-EINVAL);
1834
David S. Miller68a5e3d2011-03-11 20:07:33 -05001835 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001836 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001837 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001838 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001839 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001840 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
1842 if (dev_out->flags & IFF_LOOPBACK)
1843 flags |= RTCF_LOCAL;
1844
Julian Anastasov63617422012-11-22 23:04:14 +02001845 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001846 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001848 fi = NULL;
1849 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001850 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001851 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1852 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001854 else
1855 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001857 * default one, but do not gateway in this case.
1858 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 */
David S. Miller982721f2011-02-16 21:44:24 -08001860 if (fi && res->prefixlen < 4)
1861 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 }
1863
David S. Millerf2bb4be2012-07-17 12:20:47 -07001864 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001865 do_cache &= fi != NULL;
1866 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001867 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001868 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001869
Julian Anastasovc92b9652012-10-08 11:41:19 +00001870 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001871 if (fnhe)
1872 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001873 else {
1874 if (unlikely(fl4->flowi4_flags &
1875 FLOWI_FLAG_KNOWN_NH &&
1876 !(nh->nh_gw &&
1877 nh->nh_scope == RT_SCOPE_LINK))) {
1878 do_cache = false;
1879 goto add;
1880 }
1881 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1882 }
David S. Millerc5038a82012-07-31 15:02:02 -07001883 rth = rcu_dereference(*prth);
1884 if (rt_cache_valid(rth)) {
1885 dst_hold(&rth->dst);
1886 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001887 }
1888 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001889
1890add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001891 rth = rt_dst_alloc(dev_out,
1892 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001893 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001894 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001895 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001896 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001897
David S. Millercf911662011-04-28 14:31:47 -07001898 rth->dst.output = ip_output;
1899
David S. Millercf911662011-04-28 14:31:47 -07001900 rth->rt_genid = rt_genid(dev_net(dev_out));
1901 rth->rt_flags = flags;
1902 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001903 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001904 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001905 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001906 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001907 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001908 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909
1910 RT_CACHE_STAT_INC(out_slow_tot);
1911
David S. Miller41347dc2012-06-28 04:05:27 -07001912 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001913 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001915 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001917 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 RT_CACHE_STAT_INC(out_slow_mc);
1919 }
1920#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001921 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001923 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001924 rth->dst.input = ip_mr_input;
1925 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 }
1927 }
1928#endif
1929 }
1930
David S. Millerf2bb4be2012-07-17 12:20:47 -07001931 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932
David S. Miller5ada5522011-02-17 15:29:00 -08001933 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934}
1935
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936/*
1937 * Major route resolver routine.
1938 */
1939
David S. Miller89aef892012-07-17 11:00:09 -07001940struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001943 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001944 unsigned int flags = 0;
1945 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001946 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001947 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948
David S. Miller85b91b02012-07-13 08:21:29 -07001949 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001951 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
David S. Miller813b3b52011-04-28 14:48:42 -07001953 orig_oif = fl4->flowi4_oif;
1954
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001955 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001956 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1957 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1958 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001959
David S. Miller010c2702011-02-17 15:37:09 -08001960 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001961 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001962 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001963 if (ipv4_is_multicast(fl4->saddr) ||
1964 ipv4_is_lbcast(fl4->saddr) ||
1965 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 goto out;
1967
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 /* I removed check for oif == dev_out->oif here.
1969 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001970 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1971 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 2. Moreover, we are allowed to send packets with saddr
1973 of another iface. --ANK
1974 */
1975
David S. Miller813b3b52011-04-28 14:48:42 -07001976 if (fl4->flowi4_oif == 0 &&
1977 (ipv4_is_multicast(fl4->daddr) ||
1978 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001979 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001980 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001981 if (dev_out == NULL)
1982 goto out;
1983
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984 /* Special hack: user can direct multicasts
1985 and limited broadcast via necessary interface
1986 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1987 This hack is not just for fun, it allows
1988 vic,vat and friends to work.
1989 They bind socket to loopback, set ttl to zero
1990 and expect that it will work.
1991 From the viewpoint of routing cache they are broken,
1992 because we are not allowed to build multicast path
1993 with loopback source addr (look, routing cache
1994 cannot know, that ttl is zero, so that packet
1995 will not leave this host and route is valid).
1996 Luckily, this hack is good workaround.
1997 */
1998
David S. Miller813b3b52011-04-28 14:48:42 -07001999 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 goto make_route;
2001 }
Julian Anastasova210d012008-10-01 07:28:28 -07002002
David S. Miller813b3b52011-04-28 14:48:42 -07002003 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002004 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002005 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002006 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002007 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 }
2009
2010
David S. Miller813b3b52011-04-28 14:48:42 -07002011 if (fl4->flowi4_oif) {
2012 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002013 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 if (dev_out == NULL)
2015 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002016
2017 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002018 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002019 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002020 goto out;
2021 }
David S. Miller813b3b52011-04-28 14:48:42 -07002022 if (ipv4_is_local_multicast(fl4->daddr) ||
2023 ipv4_is_lbcast(fl4->daddr)) {
2024 if (!fl4->saddr)
2025 fl4->saddr = inet_select_addr(dev_out, 0,
2026 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 goto make_route;
2028 }
David S. Miller813b3b52011-04-28 14:48:42 -07002029 if (fl4->saddr) {
2030 if (ipv4_is_multicast(fl4->daddr))
2031 fl4->saddr = inet_select_addr(dev_out, 0,
2032 fl4->flowi4_scope);
2033 else if (!fl4->daddr)
2034 fl4->saddr = inet_select_addr(dev_out, 0,
2035 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 }
2037 }
2038
David S. Miller813b3b52011-04-28 14:48:42 -07002039 if (!fl4->daddr) {
2040 fl4->daddr = fl4->saddr;
2041 if (!fl4->daddr)
2042 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002043 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002044 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 res.type = RTN_LOCAL;
2046 flags |= RTCF_LOCAL;
2047 goto make_route;
2048 }
2049
David S. Miller813b3b52011-04-28 14:48:42 -07002050 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002052 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002053 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 /* Apparently, routing tables are wrong. Assume,
2055 that the destination is on link.
2056
2057 WHY? DW.
2058 Because we are allowed to send to iface
2059 even if it has NO routes and NO assigned
2060 addresses. When oif is specified, routing
2061 tables are looked up with only one purpose:
2062 to catch if destination is gatewayed, rather than
2063 direct. Moreover, if MSG_DONTROUTE is set,
2064 we send packet, ignoring both routing tables
2065 and ifaddr state. --ANK
2066
2067
2068 We could make it even if oif is unknown,
2069 likely IPv6, but we do not.
2070 */
2071
David S. Miller813b3b52011-04-28 14:48:42 -07002072 if (fl4->saddr == 0)
2073 fl4->saddr = inet_select_addr(dev_out, 0,
2074 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 res.type = RTN_UNICAST;
2076 goto make_route;
2077 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002078 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 goto out;
2080 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
2082 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002083 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002084 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002085 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002086 else
David S. Miller813b3b52011-04-28 14:48:42 -07002087 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002088 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002089 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002090 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 flags |= RTCF_LOCAL;
2092 goto make_route;
2093 }
2094
2095#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002096 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002097 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 else
2099#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002100 if (!res.prefixlen &&
2101 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002102 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002103 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104
David S. Miller813b3b52011-04-28 14:48:42 -07002105 if (!fl4->saddr)
2106 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002109 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110
2111
2112make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002113 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114
David S. Miller010c2702011-02-17 15:37:09 -08002115out:
2116 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002117 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002119EXPORT_SYMBOL_GPL(__ip_route_output_key);
2120
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002121static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2122{
2123 return NULL;
2124}
2125
Steffen Klassertebb762f2011-11-23 02:12:51 +00002126static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002127{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002128 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2129
2130 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002131}
2132
David S. Miller6700c272012-07-17 03:29:28 -07002133static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2134 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002135{
2136}
2137
David S. Miller6700c272012-07-17 03:29:28 -07002138static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2139 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002140{
2141}
2142
Held Bernhard0972ddb2011-04-24 22:07:32 +00002143static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2144 unsigned long old)
2145{
2146 return NULL;
2147}
2148
David S. Miller14e50e52007-05-24 18:17:54 -07002149static struct dst_ops ipv4_dst_blackhole_ops = {
2150 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002151 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002152 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002153 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002154 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002155 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002156 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002157 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002158 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002159};
2160
David S. Miller2774c132011-03-01 14:59:04 -08002161struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002162{
David S. Miller2774c132011-03-01 14:59:04 -08002163 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002164 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002165
David S. Millerf5b0a872012-07-19 12:31:33 -07002166 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002167 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002168 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002169
David S. Miller14e50e52007-05-24 18:17:54 -07002170 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002171 new->input = dst_discard;
2172 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002173
Changli Gaod8d1f302010-06-10 23:31:35 -07002174 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002175 if (new->dev)
2176 dev_hold(new->dev);
2177
David S. Miller9917e1e82012-07-17 14:44:26 -07002178 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002179 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002180 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002181
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002182 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002183 rt->rt_flags = ort->rt_flags;
2184 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002185 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002186 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002187
David S. Millercaacf052012-07-31 15:06:50 -07002188 INIT_LIST_HEAD(&rt->rt_uncached);
2189
David S. Miller14e50e52007-05-24 18:17:54 -07002190 dst_free(new);
2191 }
2192
David S. Miller2774c132011-03-01 14:59:04 -08002193 dst_release(dst_orig);
2194
2195 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002196}
2197
David S. Miller9d6ec932011-03-12 01:12:47 -05002198struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002199 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002200{
David S. Miller9d6ec932011-03-12 01:12:47 -05002201 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202
David S. Millerb23dd4f2011-03-02 14:31:35 -08002203 if (IS_ERR(rt))
2204 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
David S. Miller56157872011-05-02 14:37:45 -07002206 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002207 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2208 flowi4_to_flowi(flp4),
2209 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
David S. Millerb23dd4f2011-03-02 14:31:35 -08002211 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002213EXPORT_SYMBOL_GPL(ip_route_output_flow);
2214
David S. Millerf1ce3062012-07-12 10:10:17 -07002215static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002216 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002217 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002219 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002221 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002222 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002223 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002224 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002225
Eric W. Biederman15e47302012-09-07 20:12:54 +00002226 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002227 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002228 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002229
2230 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 r->rtm_family = AF_INET;
2232 r->rtm_dst_len = 32;
2233 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002234 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002236 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2237 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 r->rtm_type = rt->rt_type;
2239 r->rtm_scope = RT_SCOPE_UNIVERSE;
2240 r->rtm_protocol = RTPROT_UNSPEC;
2241 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2242 if (rt->rt_flags & RTCF_NOTIFY)
2243 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002244
David S. Millerf1ce3062012-07-12 10:10:17 -07002245 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002246 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002247 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002249 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002250 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 }
David S. Millerf3756b72012-04-01 20:39:02 -04002252 if (rt->dst.dev &&
2253 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2254 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002255#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002256 if (rt->dst.tclassid &&
2257 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2258 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002260 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002261 fl4->saddr != src) {
2262 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002263 goto nla_put_failure;
2264 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002265 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002266 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2267 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002268
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002269 expires = rt->dst.expires;
2270 if (expires) {
2271 unsigned long now = jiffies;
2272
2273 if (time_before(now, expires))
2274 expires -= now;
2275 else
2276 expires = 0;
2277 }
2278
Julian Anastasov521f5492012-07-20 12:02:08 +03002279 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002280 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002281 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2282 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002283 goto nla_put_failure;
2284
David Millerb4869882012-07-01 02:03:01 +00002285 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002286 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002287 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002288
Changli Gaod8d1f302010-06-10 23:31:35 -07002289 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002290
David S. Millerc7537962010-11-11 17:07:48 -08002291 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002292#ifdef CONFIG_IP_MROUTE
2293 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2294 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2295 int err = ipmr_get_route(net, skb,
2296 fl4->saddr, fl4->daddr,
2297 r, nowait);
2298 if (err <= 0) {
2299 if (!nowait) {
2300 if (err == 0)
2301 return 0;
2302 goto nla_put_failure;
2303 } else {
2304 if (err == -EMSGSIZE)
2305 goto nla_put_failure;
2306 error = err;
2307 }
2308 }
2309 } else
2310#endif
2311 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2312 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 }
2314
David S. Millerf1850712012-07-10 07:26:01 -07002315 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002316 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317
Thomas Grafbe403ea2006-08-17 18:15:17 -07002318 return nlmsg_end(skb, nlh);
2319
2320nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002321 nlmsg_cancel(skb, nlh);
2322 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323}
2324
Thomas Graf661d2962013-03-21 07:45:29 +00002325static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002327 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002328 struct rtmsg *rtm;
2329 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002331 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002332 __be32 dst = 0;
2333 __be32 src = 0;
2334 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002335 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002336 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 struct sk_buff *skb;
2338
Thomas Grafd889ce32006-08-17 18:15:44 -07002339 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2340 if (err < 0)
2341 goto errout;
2342
2343 rtm = nlmsg_data(nlh);
2344
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002346 if (skb == NULL) {
2347 err = -ENOBUFS;
2348 goto errout;
2349 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350
2351 /* Reserve room for dummy headers, this skb can pass
2352 through good chunk of routing engine.
2353 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002354 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002355 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002356
2357 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002358 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2360
Al Viro17fb2c62006-09-26 22:15:25 -07002361 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2362 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002363 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002364 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365
David Millerd6c0a4f2012-07-01 02:02:59 +00002366 memset(&fl4, 0, sizeof(fl4));
2367 fl4.daddr = dst;
2368 fl4.saddr = src;
2369 fl4.flowi4_tos = rtm->rtm_tos;
2370 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2371 fl4.flowi4_mark = mark;
2372
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002374 struct net_device *dev;
2375
Denis V. Lunev19375042008-02-28 20:52:04 -08002376 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002377 if (dev == NULL) {
2378 err = -ENODEV;
2379 goto errout_free;
2380 }
2381
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 skb->protocol = htons(ETH_P_IP);
2383 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002384 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 local_bh_disable();
2386 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2387 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002388
Eric Dumazet511c3f92009-06-02 05:14:27 +00002389 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002390 if (err == 0 && rt->dst.error)
2391 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002393 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002394
2395 err = 0;
2396 if (IS_ERR(rt))
2397 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002399
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002401 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402
Changli Gaod8d1f302010-06-10 23:31:35 -07002403 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 if (rtm->rtm_flags & RTM_F_NOTIFY)
2405 rt->rt_flags |= RTCF_NOTIFY;
2406
David S. Millerf1ce3062012-07-12 10:10:17 -07002407 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002408 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002409 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002410 if (err <= 0)
2411 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412
Eric W. Biederman15e47302012-09-07 20:12:54 +00002413 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002414errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002415 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416
Thomas Grafd889ce32006-08-17 18:15:44 -07002417errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002419 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420}
2421
2422int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2423{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 return skb->len;
2425}
2426
2427void ip_rt_multicast_event(struct in_device *in_dev)
2428{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002429 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430}
2431
2432#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002433static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2434static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2435static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2436static int ip_rt_gc_elasticity __read_mostly = 8;
2437
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002438static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002439 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 size_t *lenp, loff_t *ppos)
2441{
2442 if (write) {
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002443 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002445 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446
2447 return -EINVAL;
2448}
2449
Al Viroeeb61f72008-07-27 08:59:33 +01002450static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002451 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 .procname = "gc_thresh",
2453 .data = &ipv4_dst_ops.gc_thresh,
2454 .maxlen = sizeof(int),
2455 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002456 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 },
2458 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 .procname = "max_size",
2460 .data = &ip_rt_max_size,
2461 .maxlen = sizeof(int),
2462 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002463 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 },
2465 {
2466 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002467
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 .procname = "gc_min_interval",
2469 .data = &ip_rt_gc_min_interval,
2470 .maxlen = sizeof(int),
2471 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002472 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 },
2474 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 .procname = "gc_min_interval_ms",
2476 .data = &ip_rt_gc_min_interval,
2477 .maxlen = sizeof(int),
2478 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002479 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 },
2481 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 .procname = "gc_timeout",
2483 .data = &ip_rt_gc_timeout,
2484 .maxlen = sizeof(int),
2485 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002486 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 },
2488 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002489 .procname = "gc_interval",
2490 .data = &ip_rt_gc_interval,
2491 .maxlen = sizeof(int),
2492 .mode = 0644,
2493 .proc_handler = proc_dointvec_jiffies,
2494 },
2495 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 .procname = "redirect_load",
2497 .data = &ip_rt_redirect_load,
2498 .maxlen = sizeof(int),
2499 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002500 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 },
2502 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .procname = "redirect_number",
2504 .data = &ip_rt_redirect_number,
2505 .maxlen = sizeof(int),
2506 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002507 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508 },
2509 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 .procname = "redirect_silence",
2511 .data = &ip_rt_redirect_silence,
2512 .maxlen = sizeof(int),
2513 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002514 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 },
2516 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 .procname = "error_cost",
2518 .data = &ip_rt_error_cost,
2519 .maxlen = sizeof(int),
2520 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002521 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 },
2523 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 .procname = "error_burst",
2525 .data = &ip_rt_error_burst,
2526 .maxlen = sizeof(int),
2527 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002528 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 },
2530 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 .procname = "gc_elasticity",
2532 .data = &ip_rt_gc_elasticity,
2533 .maxlen = sizeof(int),
2534 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002535 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 },
2537 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .procname = "mtu_expires",
2539 .data = &ip_rt_mtu_expires,
2540 .maxlen = sizeof(int),
2541 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002542 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 },
2544 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .procname = "min_pmtu",
2546 .data = &ip_rt_min_pmtu,
2547 .maxlen = sizeof(int),
2548 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002549 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 },
2551 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 .procname = "min_adv_mss",
2553 .data = &ip_rt_min_advmss,
2554 .maxlen = sizeof(int),
2555 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002556 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002558 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002560
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002561static struct ctl_table ipv4_route_flush_table[] = {
2562 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002563 .procname = "flush",
2564 .maxlen = sizeof(int),
2565 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002566 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002567 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002568 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002569};
2570
2571static __net_init int sysctl_route_net_init(struct net *net)
2572{
2573 struct ctl_table *tbl;
2574
2575 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002576 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002577 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2578 if (tbl == NULL)
2579 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002580
2581 /* Don't export sysctls to unprivileged users */
2582 if (net->user_ns != &init_user_ns)
2583 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002584 }
2585 tbl[0].extra1 = net;
2586
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002587 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002588 if (net->ipv4.route_hdr == NULL)
2589 goto err_reg;
2590 return 0;
2591
2592err_reg:
2593 if (tbl != ipv4_route_flush_table)
2594 kfree(tbl);
2595err_dup:
2596 return -ENOMEM;
2597}
2598
2599static __net_exit void sysctl_route_net_exit(struct net *net)
2600{
2601 struct ctl_table *tbl;
2602
2603 tbl = net->ipv4.route_hdr->ctl_table_arg;
2604 unregister_net_sysctl_table(net->ipv4.route_hdr);
2605 BUG_ON(tbl == ipv4_route_flush_table);
2606 kfree(tbl);
2607}
2608
2609static __net_initdata struct pernet_operations sysctl_route_ops = {
2610 .init = sysctl_route_net_init,
2611 .exit = sysctl_route_net_exit,
2612};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613#endif
2614
Neil Horman3ee94372010-05-08 01:57:52 -07002615static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002616{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002617 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002618 get_random_bytes(&net->ipv4.dev_addr_genid,
2619 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002620 return 0;
2621}
2622
Neil Horman3ee94372010-05-08 01:57:52 -07002623static __net_initdata struct pernet_operations rt_genid_ops = {
2624 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002625};
2626
David S. Millerc3426b42012-06-09 16:27:05 -07002627static int __net_init ipv4_inetpeer_init(struct net *net)
2628{
2629 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2630
2631 if (!bp)
2632 return -ENOMEM;
2633 inet_peer_base_init(bp);
2634 net->ipv4.peers = bp;
2635 return 0;
2636}
2637
2638static void __net_exit ipv4_inetpeer_exit(struct net *net)
2639{
2640 struct inet_peer_base *bp = net->ipv4.peers;
2641
2642 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002643 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002644 kfree(bp);
2645}
2646
2647static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2648 .init = ipv4_inetpeer_init,
2649 .exit = ipv4_inetpeer_exit,
2650};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002651
Patrick McHardyc7066f72011-01-14 13:36:42 +01002652#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002653struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002654#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656int __init ip_rt_init(void)
2657{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002658 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659
Patrick McHardyc7066f72011-01-14 13:36:42 +01002660#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002661 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662 if (!ip_rt_acct)
2663 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664#endif
2665
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002666 ipv4_dst_ops.kmem_cachep =
2667 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002668 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
David S. Miller14e50e52007-05-24 18:17:54 -07002670 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2671
Eric Dumazetfc66f952010-10-08 06:37:34 +00002672 if (dst_entries_init(&ipv4_dst_ops) < 0)
2673 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2674
2675 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2676 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2677
David S. Miller89aef892012-07-17 11:00:09 -07002678 ipv4_dst_ops.gc_thresh = ~0;
2679 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681 devinet_init();
2682 ip_fib_init();
2683
Denis V. Lunev73b38712008-02-28 20:51:18 -08002684 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002685 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686#ifdef CONFIG_XFRM
2687 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002688 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002690 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002691
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002692#ifdef CONFIG_SYSCTL
2693 register_pernet_subsys(&sysctl_route_ops);
2694#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002695 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002696 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697 return rc;
2698}
2699
Al Viroa1bc6eb2008-07-30 06:32:52 -04002700#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002701/*
2702 * We really need to sanitize the damn ipv4 init order, then all
2703 * this nonsense will go away.
2704 */
2705void __init ip_static_sysctl_init(void)
2706{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002707 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002708}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002709#endif