blob: fa5c037227cb2a503c88b0990932a888ca2e8957 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000111#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
David S. Miller68a5e3d2011-03-11 20:07:33 -0500117#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120#define RT_GC_TIMEOUT (300*HZ)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700128static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500131
Xin Longdeed49d2016-02-18 21:21:19 +0800132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
David S. Miller62fa8a82011-01-26 20:51:05 -0800148static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
149{
David S. Miller31248732012-07-10 07:08:18 -0700150 WARN_ON(1);
151 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800152}
153
David S. Millerf894cbf2012-07-02 21:52:24 -0700154static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
155 struct sk_buff *skb,
156 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800161 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000162 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800163 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700164 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700168 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500169 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700170 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
Philippe De Muyter4839c522007-07-09 15:32:57 -0700175const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000177 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
Amir Vadaid4a96862012-04-04 21:33:28 +0000193EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazet2f970d82006-01-17 02:54:36 -0800195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
Eric Dumazet29e75252008-01-31 17:05:09 -0800201 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700202 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800203 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700226static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
David S. Miller89aef892012-07-17 11:00:09 -0700235 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236}
237
Arjan van de Ven9a321442007-02-12 00:55:35 -0800238static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 .owner = THIS_MODULE,
240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700243 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
Rusty Russell0f23174a2008-12-29 12:23:42 +0000254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800258 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
Rusty Russell0f23174a2008-12-29 12:23:42 +0000267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800271 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
273 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return 0;
289 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000293 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700294 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700302 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->gc_total */
307 0, /* st->gc_ignored */
308 0, /* st->gc_goal_miss */
309 0, /* st->gc_dst_overflow */
310 0, /* st->in_hlist_search */
311 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 );
313 return 0;
314}
315
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700316static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
Arjan van de Ven9a321442007-02-12 00:55:35 -0800329static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 .owner = THIS_MODULE,
331 .open = rt_cpu_seq_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = seq_release,
335};
336
Patrick McHardyc7066f72011-01-14 13:36:42 +0100337#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800338static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800339{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800340 struct ip_rt_acct *dst, *src;
341 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
344 if (!dst)
345 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 for_each_possible_cpu(i) {
348 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
349 for (j = 0; j < 256; j++) {
350 dst[j].o_bytes += src[j].o_bytes;
351 dst[j].o_packets += src[j].o_packets;
352 dst[j].i_bytes += src[j].i_bytes;
353 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800354 }
355 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800356
357 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
358 kfree(dst);
359 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800360}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800361
362static int rt_acct_proc_open(struct inode *inode, struct file *file)
363{
364 return single_open(file, rt_acct_proc_show, NULL);
365}
366
367static const struct file_operations rt_acct_proc_fops = {
368 .owner = THIS_MODULE,
369 .open = rt_acct_proc_open,
370 .read = seq_read,
371 .llseek = seq_lseek,
372 .release = single_release,
373};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800374#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800375
Denis V. Lunev73b38712008-02-28 20:51:18 -0800376static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800377{
378 struct proc_dir_entry *pde;
379
Gao fengd4beaa62013-02-18 01:34:54 +0000380 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
381 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382 if (!pde)
383 goto err1;
384
Wang Chen77020722008-02-28 14:14:25 -0800385 pde = proc_create("rt_cache", S_IRUGO,
386 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800387 if (!pde)
388 goto err2;
389
Patrick McHardyc7066f72011-01-14 13:36:42 +0100390#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800391 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800392 if (!pde)
393 goto err3;
394#endif
395 return 0;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800398err3:
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400#endif
401err2:
402 remove_proc_entry("rt_cache", net->proc_net);
403err1:
404 return -ENOMEM;
405}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800406
407static void __net_exit ip_rt_do_proc_exit(struct net *net)
408{
409 remove_proc_entry("rt_cache", net->proc_net_stat);
410 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100411#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800412 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000413#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800414}
415
416static struct pernet_operations ip_rt_proc_ops __net_initdata = {
417 .init = ip_rt_do_proc_init,
418 .exit = ip_rt_do_proc_exit,
419};
420
421static int __init ip_rt_proc_init(void)
422{
423 return register_pernet_subsys(&ip_rt_proc_ops);
424}
425
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800426#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800427static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800428{
429 return 0;
430}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900432
Eric Dumazet4331deb2012-07-25 05:11:23 +0000433static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700434{
fan.duca4c3fc2013-07-30 08:33:53 +0800435 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700436}
437
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000438void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800439{
fan.duca4c3fc2013-07-30 08:33:53 +0800440 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000441}
442
David S. Millerf894cbf2012-07-02 21:52:24 -0700443static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
444 struct sk_buff *skb,
445 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000446{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700447 struct net_device *dev = dst->dev;
448 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500449 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000450 struct neighbour *n;
451
David S. Miller39232972012-01-26 15:22:32 -0500452 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700453 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500454 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700455 else if (skb)
456 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700457
David S. Miller80703d22012-02-15 17:48:35 -0500458 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700459 if (n)
460 return n;
David Miller32092ec2011-07-25 00:01:41 +0000461 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462}
463
Eric Dumazet04ca6972014-07-26 08:58:10 +0200464#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200465
Eric Dumazet355b5902015-05-01 10:37:49 -0700466static atomic_t *ip_idents __read_mostly;
467static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200468
469/* In order to protect privacy, we add a perturbation to identifiers
470 * if one generator is seldom used. This makes hard for an attacker
471 * to infer how many packets were sent between two points in time.
472 */
473u32 ip_idents_reserve(u32 hash, int segs)
474{
Eric Dumazet355b5902015-05-01 10:37:49 -0700475 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
476 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
477 u32 old = ACCESS_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200478 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700479 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480
Eric Dumazet355b5902015-05-01 10:37:49 -0700481 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200482 delta = prandom_u32_max(now - old);
483
Eric Dumazetadb03112016-09-20 18:06:17 -0700484 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
485 do {
486 old = (u32)atomic_read(p_id);
487 new = old + delta + segs;
488 } while (atomic_cmpxchg(p_id, old, new) != old);
489
490 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200491}
492EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700493
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100494void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700496 static u32 ip_idents_hashrnd __read_mostly;
497 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Eric Dumazet73f156a2014-06-02 05:26:03 -0700499 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
Eric Dumazet04ca6972014-07-26 08:58:10 +0200501 hash = jhash_3words((__force u32)iph->daddr,
502 (__force u32)iph->saddr,
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100503 iph->protocol ^ net_hash_mix(net),
Eric Dumazet04ca6972014-07-26 08:58:10 +0200504 ip_idents_hashrnd);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700505 id = ip_idents_reserve(hash, segs);
506 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000508EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900510static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
511 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700512 const struct iphdr *iph,
513 int oif, u8 tos,
514 u8 prot, u32 mark, int flow_flags)
515{
516 if (sk) {
517 const struct inet_sock *inet = inet_sk(sk);
518
519 oif = sk->sk_bound_dev_if;
520 mark = sk->sk_mark;
521 tos = RT_CONN_FLAGS(sk);
522 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
523 }
524 flowi4_init_output(fl4, oif, mark, tos,
525 RT_SCOPE_UNIVERSE, prot,
526 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900527 iph->daddr, iph->saddr, 0, 0,
528 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700529}
530
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200531static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
532 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700533{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900534 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700535 const struct iphdr *iph = ip_hdr(skb);
536 int oif = skb->dev->ifindex;
537 u8 tos = RT_TOS(iph->tos);
538 u8 prot = iph->protocol;
539 u32 mark = skb->mark;
540
Lorenzo Colittid109e612016-11-30 02:56:47 +0900541 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700542}
543
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200544static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700545{
546 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200547 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700548 __be32 daddr = inet->inet_daddr;
549
550 rcu_read_lock();
551 inet_opt = rcu_dereference(inet->inet_opt);
552 if (inet_opt && inet_opt->opt.srr)
553 daddr = inet_opt->opt.faddr;
554 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
555 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
556 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
557 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900558 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700559 rcu_read_unlock();
560}
561
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
563 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 if (skb)
566 build_skb_flow_key(fl4, skb, sk);
567 else
568 build_sk_flow_key(fl4, sk);
569}
570
David S. Millerc5038a82012-07-31 15:02:02 -0700571static inline void rt_free(struct rtable *rt)
572{
573 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
574}
575
576static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700577
Timo Teräs2ffae992013-06-27 10:27:05 +0300578static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
579{
580 struct rtable *rt;
581
582 rt = rcu_dereference(fnhe->fnhe_rth_input);
583 if (rt) {
584 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
585 rt_free(rt);
586 }
587 rt = rcu_dereference(fnhe->fnhe_rth_output);
588 if (rt) {
589 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
590 rt_free(rt);
591 }
592}
593
Julian Anastasovaee06da2012-07-18 10:15:35 +0000594static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700595{
596 struct fib_nh_exception *fnhe, *oldest;
597
598 oldest = rcu_dereference(hash->chain);
599 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
600 fnhe = rcu_dereference(fnhe->fnhe_next)) {
601 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
602 oldest = fnhe;
603 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300604 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700605 return oldest;
606}
607
David S. Millerd3a25c92012-07-17 13:23:08 -0700608static inline u32 fnhe_hashfun(__be32 daddr)
609{
Eric Dumazetd546c622014-09-04 08:21:31 -0700610 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700611 u32 hval;
612
Eric Dumazetd546c622014-09-04 08:21:31 -0700613 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
614 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
615 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700616}
617
Timo Teräs387aa652013-05-27 20:46:31 +0000618static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
619{
620 rt->rt_pmtu = fnhe->fnhe_pmtu;
621 rt->dst.expires = fnhe->fnhe_expires;
622
623 if (fnhe->fnhe_gw) {
624 rt->rt_flags |= RTCF_REDIRECTED;
625 rt->rt_gateway = fnhe->fnhe_gw;
626 rt->rt_uses_gateway = 1;
627 }
628}
629
Julian Anastasovaee06da2012-07-18 10:15:35 +0000630static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
631 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700632{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000633 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700634 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000635 struct rtable *rt;
636 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700637 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000638 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700639
David S. Millerc5038a82012-07-31 15:02:02 -0700640 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000641
Eric Dumazetcaa41522014-09-03 22:21:56 -0700642 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700643 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000644 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700645 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700647 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700648 }
649
David S. Miller4895c772012-07-17 04:19:00 -0700650 hash += hval;
651
652 depth = 0;
653 for (fnhe = rcu_dereference(hash->chain); fnhe;
654 fnhe = rcu_dereference(fnhe->fnhe_next)) {
655 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000656 break;
David S. Miller4895c772012-07-17 04:19:00 -0700657 depth++;
658 }
659
Julian Anastasovaee06da2012-07-18 10:15:35 +0000660 if (fnhe) {
661 if (gw)
662 fnhe->fnhe_gw = gw;
663 if (pmtu) {
664 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000665 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000666 }
Timo Teräs387aa652013-05-27 20:46:31 +0000667 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300668 rt = rcu_dereference(fnhe->fnhe_rth_input);
669 if (rt)
670 fill_route_from_fnhe(rt, fnhe);
671 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000672 if (rt)
673 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000674 } else {
675 if (depth > FNHE_RECLAIM_DEPTH)
676 fnhe = fnhe_oldest(hash);
677 else {
678 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
679 if (!fnhe)
680 goto out_unlock;
681
682 fnhe->fnhe_next = hash->chain;
683 rcu_assign_pointer(hash->chain, fnhe);
684 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000685 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000686 fnhe->fnhe_daddr = daddr;
687 fnhe->fnhe_gw = gw;
688 fnhe->fnhe_pmtu = pmtu;
689 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000690
691 /* Exception created; mark the cached routes for the nexthop
692 * stale, so anyone caching it rechecks if this exception
693 * applies to them.
694 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300695 rt = rcu_dereference(nh->nh_rth_input);
696 if (rt)
697 rt->dst.obsolete = DST_OBSOLETE_KILL;
698
Timo Teräs387aa652013-05-27 20:46:31 +0000699 for_each_possible_cpu(i) {
700 struct rtable __rcu **prt;
701 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
702 rt = rcu_dereference(*prt);
703 if (rt)
704 rt->dst.obsolete = DST_OBSOLETE_KILL;
705 }
David S. Miller4895c772012-07-17 04:19:00 -0700706 }
David S. Miller4895c772012-07-17 04:19:00 -0700707
David S. Miller4895c772012-07-17 04:19:00 -0700708 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000709
710out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700711 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700712}
713
David S. Millerceb33202012-07-17 11:31:28 -0700714static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
715 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716{
David S. Millere47a1852012-07-11 20:55:47 -0700717 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700718 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700719 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700720 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700721 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700722 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800723 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
David S. Miller94206122012-07-11 20:38:08 -0700725 switch (icmp_hdr(skb)->code & 7) {
726 case ICMP_REDIR_NET:
727 case ICMP_REDIR_NETTOS:
728 case ICMP_REDIR_HOST:
729 case ICMP_REDIR_HOSTTOS:
730 break;
731
732 default:
733 return;
734 }
735
David S. Millere47a1852012-07-11 20:55:47 -0700736 if (rt->rt_gateway != old_gw)
737 return;
738
739 in_dev = __in_dev_get_rcu(dev);
740 if (!in_dev)
741 return;
742
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900743 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800744 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
745 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
746 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 goto reject_redirect;
748
749 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
750 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
751 goto reject_redirect;
752 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
753 goto reject_redirect;
754 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800755 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 goto reject_redirect;
757 }
758
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500759 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
760 if (!n)
761 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700762 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700763 if (!(n->nud_state & NUD_VALID)) {
764 neigh_event_send(n, NULL);
765 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400766 if (fib_lookup(net, fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700767 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700768
Julian Anastasovaee06da2012-07-18 10:15:35 +0000769 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Xin Longdeed49d2016-02-18 21:21:19 +0800770 0, jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700771 }
David S. Millerceb33202012-07-17 11:31:28 -0700772 if (kill_route)
773 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700774 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
775 }
776 neigh_release(n);
777 }
778 return;
779
780reject_redirect:
781#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700782 if (IN_DEV_LOG_MARTIANS(in_dev)) {
783 const struct iphdr *iph = (const struct iphdr *) skb->data;
784 __be32 daddr = iph->daddr;
785 __be32 saddr = iph->saddr;
786
David S. Millere47a1852012-07-11 20:55:47 -0700787 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
788 " Advised path = %pI4 -> %pI4\n",
789 &old_gw, dev->name, &new_gw,
790 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700791 }
David S. Millere47a1852012-07-11 20:55:47 -0700792#endif
793 ;
794}
795
David S. Miller4895c772012-07-17 04:19:00 -0700796static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
797{
798 struct rtable *rt;
799 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200800 const struct iphdr *iph = (const struct iphdr *) skb->data;
801 int oif = skb->dev->ifindex;
802 u8 tos = RT_TOS(iph->tos);
803 u8 prot = iph->protocol;
804 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700805
806 rt = (struct rtable *) dst;
807
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900808 __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700809 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700810}
811
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
813{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800814 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 struct dst_entry *ret = dst;
816
817 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000818 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 ip_rt_put(rt);
820 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700821 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
822 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700823 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 ret = NULL;
825 }
826 }
827 return ret;
828}
829
830/*
831 * Algorithm:
832 * 1. The first ip_rt_redirect_number redirects are sent
833 * with exponential backoff, then we stop sending them at all,
834 * assuming that the host ignores our redirects.
835 * 2. If we did not see packets requiring redirects
836 * during ip_rt_redirect_silence, we assume that the host
837 * forgot redirected route and start to send redirects again.
838 *
839 * This algorithm is much cheaper and more intelligent than dumb load limiting
840 * in icmp.c.
841 *
842 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
843 * and "frag. need" (breaks PMTU discovery) in icmp.c.
844 */
845
846void ip_rt_send_redirect(struct sk_buff *skb)
847{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000848 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700849 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800850 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700851 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700852 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700853 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
Eric Dumazet30038fc2009-08-28 23:52:01 -0700855 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700856 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700857 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
858 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700860 }
861 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700862 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700863 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864
David S. Miller1d861aa2012-07-10 03:58:16 -0700865 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700866 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800867 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000868 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
869 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800870 return;
871 }
872
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 /* No redirected packets during ip_rt_redirect_silence;
874 * reset the algorithm.
875 */
David S. Miller92d86822011-02-04 15:55:25 -0800876 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
877 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
879 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700880 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 */
David S. Miller92d86822011-02-04 15:55:25 -0800882 if (peer->rate_tokens >= ip_rt_redirect_number) {
883 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700884 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 }
886
887 /* Check for load limit; set rate_last to the latest sent
888 * redirect.
889 */
David S. Miller92d86822011-02-04 15:55:25 -0800890 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800891 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800892 (peer->rate_last +
893 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000894 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
895
896 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800897 peer->rate_last = jiffies;
898 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700900 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000901 peer->rate_tokens == ip_rt_redirect_number)
902 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700903 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000904 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905#endif
906 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700907out_put_peer:
908 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909}
910
911static int ip_error(struct sk_buff *skb)
912{
David S. Miller251da412012-06-26 16:27:09 -0700913 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000914 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800915 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700917 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800918 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 int code;
920
Eric W. Biederman381c7592015-05-22 04:58:12 -0500921 /* IP on this device is disabled. */
922 if (!in_dev)
923 goto out;
924
David S. Miller251da412012-06-26 16:27:09 -0700925 net = dev_net(rt->dst.dev);
926 if (!IN_DEV_FORWARD(in_dev)) {
927 switch (rt->dst.error) {
928 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700929 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700930 break;
931
932 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700933 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700934 break;
935 }
936 goto out;
937 }
938
Changli Gaod8d1f302010-06-10 23:31:35 -0700939 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000940 case EINVAL:
941 default:
942 goto out;
943 case EHOSTUNREACH:
944 code = ICMP_HOST_UNREACH;
945 break;
946 case ENETUNREACH:
947 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700948 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000949 break;
950 case EACCES:
951 code = ICMP_PKT_FILTERED;
952 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 }
954
David Ahern192132b2015-08-27 16:07:03 -0700955 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700956 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800957
958 send = true;
959 if (peer) {
960 now = jiffies;
961 peer->rate_tokens += now - peer->rate_last;
962 if (peer->rate_tokens > ip_rt_error_burst)
963 peer->rate_tokens = ip_rt_error_burst;
964 peer->rate_last = now;
965 if (peer->rate_tokens >= ip_rt_error_cost)
966 peer->rate_tokens -= ip_rt_error_cost;
967 else
968 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700969 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 }
David S. Miller92d86822011-02-04 15:55:25 -0800971 if (send)
972 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973
974out: kfree_skb(skb);
975 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900976}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
Steffen Klassertd851c122012-10-07 22:47:25 +0000978static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979{
Steffen Klassertd851c122012-10-07 22:47:25 +0000980 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700981 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800982
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000983 if (dst_metric_locked(dst, RTAX_MTU))
984 return;
985
Herbert Xucb6ccf02015-04-28 11:43:15 +0800986 if (ipv4_mtu(dst) < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +0800987 return;
988
David S. Miller59436342012-07-10 06:58:42 -0700989 if (mtu < ip_rt_min_pmtu)
990 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000991
Timo Teräsf0162292013-05-27 20:46:32 +0000992 if (rt->rt_pmtu == mtu &&
993 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
994 return;
995
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000996 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400997 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700998 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700999
Julian Anastasovaee06da2012-07-18 10:15:35 +00001000 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1001 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001002 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001003 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004}
1005
David S. Miller4895c772012-07-17 04:19:00 -07001006static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1007 struct sk_buff *skb, u32 mtu)
1008{
1009 struct rtable *rt = (struct rtable *) dst;
1010 struct flowi4 fl4;
1011
1012 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001013 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001014}
1015
David S. Miller36393392012-06-14 22:21:46 -07001016void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1017 int oif, u32 mark, u8 protocol, int flow_flags)
1018{
David S. Miller4895c772012-07-17 04:19:00 -07001019 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001020 struct flowi4 fl4;
1021 struct rtable *rt;
1022
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001023 if (!mark)
1024 mark = IP4_REPLY_MARK(net, skb->mark);
1025
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001026 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001027 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001028 rt = __ip_route_output_key(net, &fl4);
1029 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001030 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001031 ip_rt_put(rt);
1032 }
1033}
1034EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1035
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001036static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001037{
David S. Miller4895c772012-07-17 04:19:00 -07001038 const struct iphdr *iph = (const struct iphdr *) skb->data;
1039 struct flowi4 fl4;
1040 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001041
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001042 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001043
1044 if (!fl4.flowi4_mark)
1045 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1046
David S. Miller4895c772012-07-17 04:19:00 -07001047 rt = __ip_route_output_key(sock_net(sk), &fl4);
1048 if (!IS_ERR(rt)) {
1049 __ip_rt_update_pmtu(rt, &fl4, mtu);
1050 ip_rt_put(rt);
1051 }
David S. Miller36393392012-06-14 22:21:46 -07001052}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001053
1054void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1055{
1056 const struct iphdr *iph = (const struct iphdr *) skb->data;
1057 struct flowi4 fl4;
1058 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001059 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001060 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001061 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001062
1063 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001064
1065 if (!ip_sk_accept_pmtu(sk))
1066 goto out;
1067
Eric Dumazet7f502362014-06-30 01:26:23 -07001068 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001069
Eric Dumazet7f502362014-06-30 01:26:23 -07001070 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001071 __ipv4_sk_update_pmtu(skb, sk, mtu);
1072 goto out;
1073 }
1074
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001075 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001076
Eric Dumazet7f502362014-06-30 01:26:23 -07001077 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001078 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001079 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1080 if (IS_ERR(rt))
1081 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001082
1083 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001084 }
1085
1086 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1087
Eric Dumazet7f502362014-06-30 01:26:23 -07001088 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001089 if (new)
1090 dst_release(&rt->dst);
1091
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001092 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1093 if (IS_ERR(rt))
1094 goto out;
1095
Steffen Klassertb44108d2013-01-22 00:01:28 +00001096 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001097 }
1098
Steffen Klassertb44108d2013-01-22 00:01:28 +00001099 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001100 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001101
1102out:
1103 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001104 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001105}
David S. Miller36393392012-06-14 22:21:46 -07001106EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001107
David S. Millerb42597e2012-07-11 21:25:45 -07001108void ipv4_redirect(struct sk_buff *skb, struct net *net,
1109 int oif, u32 mark, u8 protocol, int flow_flags)
1110{
David S. Miller4895c772012-07-17 04:19:00 -07001111 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001112 struct flowi4 fl4;
1113 struct rtable *rt;
1114
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001115 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001116 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001117 rt = __ip_route_output_key(net, &fl4);
1118 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001119 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001120 ip_rt_put(rt);
1121 }
1122}
1123EXPORT_SYMBOL_GPL(ipv4_redirect);
1124
1125void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1126{
David S. Miller4895c772012-07-17 04:19:00 -07001127 const struct iphdr *iph = (const struct iphdr *) skb->data;
1128 struct flowi4 fl4;
1129 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001130 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001131
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001132 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1133 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001134 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001135 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001136 ip_rt_put(rt);
1137 }
David S. Millerb42597e2012-07-11 21:25:45 -07001138}
1139EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1140
David S. Millerefbc3682011-12-01 13:38:59 -05001141static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1142{
1143 struct rtable *rt = (struct rtable *) dst;
1144
David S. Millerceb33202012-07-17 11:31:28 -07001145 /* All IPV4 dsts are created with ->obsolete set to the value
1146 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1147 * into this function always.
1148 *
Timo Teräs387aa652013-05-27 20:46:31 +00001149 * When a PMTU/redirect information update invalidates a route,
1150 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1151 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001152 */
Timo Teräs387aa652013-05-27 20:46:31 +00001153 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001154 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001155 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156}
1157
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158static void ipv4_link_failure(struct sk_buff *skb)
1159{
1160 struct rtable *rt;
1161
1162 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1163
Eric Dumazet511c3f92009-06-02 05:14:27 +00001164 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001165 if (rt)
1166 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167}
1168
Eric W. Biedermanede20592015-10-07 16:48:47 -05001169static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170{
Joe Perches91df42b2012-05-15 14:11:54 +00001171 pr_debug("%s: %pI4 -> %pI4, %s\n",
1172 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1173 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001175 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 return 0;
1177}
1178
1179/*
1180 We do not cache source address of outgoing interface,
1181 because it is used only by IP RR, TS and SRR options,
1182 so that it out of fast path.
1183
1184 BTW remember: "addr" is allowed to be not aligned
1185 in IP options!
1186 */
1187
David S. Miller8e363602011-05-13 17:29:41 -04001188void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189{
Al Viroa61ced52006-09-26 21:27:54 -07001190 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
David S. Millerc7537962010-11-11 17:07:48 -08001192 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001193 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001194 else {
David S. Miller8e363602011-05-13 17:29:41 -04001195 struct fib_result res;
1196 struct flowi4 fl4;
1197 struct iphdr *iph;
1198
1199 iph = ip_hdr(skb);
1200
1201 memset(&fl4, 0, sizeof(fl4));
1202 fl4.daddr = iph->daddr;
1203 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001204 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001205 fl4.flowi4_oif = rt->dst.dev->ifindex;
1206 fl4.flowi4_iif = skb->dev->ifindex;
1207 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001208
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001209 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001210 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001211 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001212 else
David S. Millerf8126f12012-07-13 05:03:45 -07001213 src = inet_select_addr(rt->dst.dev,
1214 rt_nexthop(rt, iph->daddr),
1215 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001216 rcu_read_unlock();
1217 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 memcpy(addr, &src, 4);
1219}
1220
Patrick McHardyc7066f72011-01-14 13:36:42 +01001221#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222static void set_class_tag(struct rtable *rt, u32 tag)
1223{
Changli Gaod8d1f302010-06-10 23:31:35 -07001224 if (!(rt->dst.tclassid & 0xFFFF))
1225 rt->dst.tclassid |= tag & 0xFFFF;
1226 if (!(rt->dst.tclassid & 0xFFFF0000))
1227 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228}
1229#endif
1230
David S. Miller0dbaee32010-12-13 12:52:14 -08001231static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1232{
1233 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1234
1235 if (advmss == 0) {
1236 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1237 ip_rt_min_advmss);
1238 if (advmss > 65535 - 40)
1239 advmss = 65535 - 40;
1240 }
1241 return advmss;
1242}
1243
Steffen Klassertebb762f2011-11-23 02:12:51 +00001244static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001245{
Steffen Klassert261663b2011-11-23 02:14:50 +00001246 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001247 unsigned int mtu = rt->rt_pmtu;
1248
Alexander Duyck98d75c32012-08-27 06:30:01 +00001249 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001250 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001251
Steffen Klassert38d523e2013-01-16 20:55:01 +00001252 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001253 return mtu;
1254
1255 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001256
1257 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001258 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001259 mtu = 576;
1260 }
1261
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001262 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1263
1264 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001265}
1266
David S. Millerf2bb4be2012-07-17 12:20:47 -07001267static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001268{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001269 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001270 struct fib_nh_exception *fnhe;
1271 u32 hval;
1272
David S. Millerf2bb4be2012-07-17 12:20:47 -07001273 if (!hash)
1274 return NULL;
1275
David S. Millerd3a25c92012-07-17 13:23:08 -07001276 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001277
1278 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1279 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001280 if (fnhe->fnhe_daddr == daddr)
1281 return fnhe;
1282 }
1283 return NULL;
1284}
David S. Miller4895c772012-07-17 04:19:00 -07001285
David S. Millercaacf052012-07-31 15:06:50 -07001286static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001287 __be32 daddr)
1288{
David S. Millercaacf052012-07-31 15:06:50 -07001289 bool ret = false;
1290
David S. Millerc5038a82012-07-31 15:02:02 -07001291 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001292
David S. Millerc5038a82012-07-31 15:02:02 -07001293 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001294 struct rtable __rcu **porig;
1295 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001296 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001297
1298 if (rt_is_input_route(rt))
1299 porig = &fnhe->fnhe_rth_input;
1300 else
1301 porig = &fnhe->fnhe_rth_output;
1302 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001303
1304 if (fnhe->fnhe_genid != genid) {
1305 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001306 fnhe->fnhe_gw = 0;
1307 fnhe->fnhe_pmtu = 0;
1308 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001309 fnhe_flush_routes(fnhe);
1310 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001311 }
Timo Teräs387aa652013-05-27 20:46:31 +00001312 fill_route_from_fnhe(rt, fnhe);
1313 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001314 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001315
Timo Teräs2ffae992013-06-27 10:27:05 +03001316 if (!(rt->dst.flags & DST_NOCACHE)) {
1317 rcu_assign_pointer(*porig, rt);
1318 if (orig)
1319 rt_free(orig);
1320 ret = true;
1321 }
David S. Millerc5038a82012-07-31 15:02:02 -07001322
1323 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001324 }
1325 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001326
1327 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001328}
1329
David S. Millercaacf052012-07-31 15:06:50 -07001330static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001331{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001332 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001333 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001334
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001335 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001336 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001337 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001338 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001339 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001340 orig = *p;
1341
1342 prev = cmpxchg(p, orig, rt);
1343 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001344 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001345 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001346 } else
David S. Millercaacf052012-07-31 15:06:50 -07001347 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001348
1349 return ret;
1350}
1351
Eric Dumazet5055c372015-01-14 15:17:06 -08001352struct uncached_list {
1353 spinlock_t lock;
1354 struct list_head head;
1355};
1356
1357static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001358
1359static void rt_add_uncached_list(struct rtable *rt)
1360{
Eric Dumazet5055c372015-01-14 15:17:06 -08001361 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1362
1363 rt->rt_uncached_list = ul;
1364
1365 spin_lock_bh(&ul->lock);
1366 list_add_tail(&rt->rt_uncached, &ul->head);
1367 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001368}
1369
1370static void ipv4_dst_destroy(struct dst_entry *dst)
1371{
1372 struct rtable *rt = (struct rtable *) dst;
1373
Eric Dumazet78df76a2012-08-24 05:40:47 +00001374 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001375 struct uncached_list *ul = rt->rt_uncached_list;
1376
1377 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001378 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001379 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001380 }
1381}
1382
1383void rt_flush_dev(struct net_device *dev)
1384{
Eric Dumazet5055c372015-01-14 15:17:06 -08001385 struct net *net = dev_net(dev);
1386 struct rtable *rt;
1387 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001388
Eric Dumazet5055c372015-01-14 15:17:06 -08001389 for_each_possible_cpu(cpu) {
1390 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1391
1392 spin_lock_bh(&ul->lock);
1393 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001394 if (rt->dst.dev != dev)
1395 continue;
1396 rt->dst.dev = net->loopback_dev;
1397 dev_hold(rt->dst.dev);
1398 dev_put(dev);
1399 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001400 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001401 }
1402}
1403
Eric Dumazet4331deb2012-07-25 05:11:23 +00001404static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001405{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001406 return rt &&
1407 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1408 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001409}
1410
David S. Millerf2bb4be2012-07-17 12:20:47 -07001411static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001412 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001413 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001414 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415{
David S. Millercaacf052012-07-31 15:06:50 -07001416 bool cached = false;
1417
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001419 struct fib_nh *nh = &FIB_RES_NH(*res);
1420
Julian Anastasov155e8332012-10-08 11:41:18 +00001421 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001422 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001423 rt->rt_uses_gateway = 1;
1424 }
David S. Miller28605832012-07-17 14:55:59 -07001425 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001426#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001427 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428#endif
Jiri Benc61adedf2015-08-20 13:56:25 +02001429 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001430 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001431 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001432 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001433 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001434 if (unlikely(!cached)) {
1435 /* Routes we intend to cache in nexthop exception or
1436 * FIB nexthop have the DST_NOCACHE bit clear.
1437 * However, if we are unsuccessful at storing this
1438 * route into the cache we really need to set it.
1439 */
1440 rt->dst.flags |= DST_NOCACHE;
1441 if (!rt->rt_gateway)
1442 rt->rt_gateway = daddr;
1443 rt_add_uncached_list(rt);
1444 }
1445 } else
David S. Millercaacf052012-07-31 15:06:50 -07001446 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447
Patrick McHardyc7066f72011-01-14 13:36:42 +01001448#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001450 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451#endif
1452 set_class_tag(rt, itag);
1453#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454}
1455
David Ahern9ab179d2016-04-07 11:10:06 -07001456struct rtable *rt_dst_alloc(struct net_device *dev,
1457 unsigned int flags, u16 type,
1458 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001459{
David Ahernd08c4f32015-09-02 13:58:34 -07001460 struct rtable *rt;
1461
1462 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1463 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1464 (nopolicy ? DST_NOPOLICY : 0) |
1465 (noxfrm ? DST_NOXFRM : 0));
1466
1467 if (rt) {
1468 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1469 rt->rt_flags = flags;
1470 rt->rt_type = type;
1471 rt->rt_is_input = 0;
1472 rt->rt_iif = 0;
1473 rt->rt_pmtu = 0;
1474 rt->rt_gateway = 0;
1475 rt->rt_uses_gateway = 0;
David Ahernb7503e02015-09-02 13:58:35 -07001476 rt->rt_table_id = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001477 INIT_LIST_HEAD(&rt->rt_uncached);
1478
1479 rt->dst.output = ip_output;
1480 if (flags & RTCF_LOCAL)
1481 rt->dst.input = ip_local_deliver;
1482 }
1483
1484 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001485}
David Ahern9ab179d2016-04-07 11:10:06 -07001486EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001487
Eric Dumazet96d36222010-06-02 19:21:31 +00001488/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001489static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 u8 tos, struct net_device *dev, int our)
1491{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001493 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Ahernd08c4f32015-09-02 13:58:34 -07001494 unsigned int flags = RTCF_MULTICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001496 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
1498 /* Primary sanity checks. */
1499
Ian Morris51456b22015-04-03 09:17:26 +01001500 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 return -EINVAL;
1502
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001503 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001504 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 goto e_inval;
1506
Alexander Duyck75fea732015-09-28 11:10:38 -07001507 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1508 goto e_inval;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001509
Joe Perchesf97c1e02007-12-16 13:45:43 -08001510 if (ipv4_is_zeronet(saddr)) {
1511 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001513 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001514 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1515 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001516 if (err < 0)
1517 goto e_err;
1518 }
David Ahernd08c4f32015-09-02 13:58:34 -07001519 if (our)
1520 flags |= RTCF_LOCAL;
1521
1522 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001523 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 if (!rth)
1525 goto e_nobufs;
1526
Patrick McHardyc7066f72011-01-14 13:36:42 +01001527#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001528 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529#endif
David S. Millercf911662011-04-28 14:31:47 -07001530 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001531 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532
1533#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001534 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001535 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536#endif
1537 RT_CACHE_STAT_INC(in_slow_mc);
1538
David S. Miller89aef892012-07-17 11:00:09 -07001539 skb_dst_set(skb, &rth->dst);
1540 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541
1542e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001545 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001546e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001547 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548}
1549
1550
1551static void ip_handle_martian_source(struct net_device *dev,
1552 struct in_device *in_dev,
1553 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001554 __be32 daddr,
1555 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556{
1557 RT_CACHE_STAT_INC(in_martian_src);
1558#ifdef CONFIG_IP_ROUTE_VERBOSE
1559 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1560 /*
1561 * RFC1812 recommendation, if source is martian,
1562 * the only hint is MAC header.
1563 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001564 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001565 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001566 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001567 print_hex_dump(KERN_WARNING, "ll header: ",
1568 DUMP_PREFIX_OFFSET, 16, 1,
1569 skb_mac_header(skb),
1570 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 }
1572 }
1573#endif
1574}
1575
Xin Longdeed49d2016-02-18 21:21:19 +08001576static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1577{
1578 struct fnhe_hash_bucket *hash;
1579 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1580 u32 hval = fnhe_hashfun(daddr);
1581
1582 spin_lock_bh(&fnhe_lock);
1583
1584 hash = rcu_dereference_protected(nh->nh_exceptions,
1585 lockdep_is_held(&fnhe_lock));
1586 hash += hval;
1587
1588 fnhe_p = &hash->chain;
1589 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1590 while (fnhe) {
1591 if (fnhe->fnhe_daddr == daddr) {
1592 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1593 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1594 fnhe_flush_routes(fnhe);
1595 kfree_rcu(fnhe, rcu);
1596 break;
1597 }
1598 fnhe_p = &fnhe->fnhe_next;
1599 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1600 lockdep_is_held(&fnhe_lock));
1601 }
1602
1603 spin_unlock_bh(&fnhe_lock);
1604}
1605
Thomas Grafefd85702016-11-30 17:10:09 +01001606static void set_lwt_redirect(struct rtable *rth)
1607{
1608 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1609 rth->dst.lwtstate->orig_output = rth->dst.output;
1610 rth->dst.output = lwtunnel_output;
1611 }
1612
1613 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1614 rth->dst.lwtstate->orig_input = rth->dst.input;
1615 rth->dst.input = lwtunnel_input;
1616 }
1617}
1618
Eric Dumazet47360222010-06-03 04:13:21 +00001619/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001620static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001621 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001622 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001623 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624{
Timo Teräs2ffae992013-06-27 10:27:05 +03001625 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 struct rtable *rth;
1627 int err;
1628 struct in_device *out_dev;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001629 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001630 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631
1632 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001633 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Ian Morris51456b22015-04-03 09:17:26 +01001634 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001635 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 return -EINVAL;
1637 }
1638
Michael Smith5c04c812011-04-07 04:51:50 +00001639 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001640 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001642 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001644
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 goto cleanup;
1646 }
1647
Julian Anastasove81da0e2012-10-08 11:41:15 +00001648 do_cache = res->fi && !itag;
1649 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001650 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001652 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1653 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
1655 if (skb->protocol != htons(ETH_P_IP)) {
1656 /* Not IP (i.e. ARP). Do not create route, if it is
1657 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001658 *
1659 * Proxy arp feature have been extended to allow, ARP
1660 * replies back to the same interface, to support
1661 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001663 if (out_dev == in_dev &&
1664 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 err = -EINVAL;
1666 goto cleanup;
1667 }
1668 }
1669
Timo Teräs2ffae992013-06-27 10:27:05 +03001670 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001671 if (do_cache) {
Xin Longdeed49d2016-02-18 21:21:19 +08001672 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001673 rth = rcu_dereference(fnhe->fnhe_rth_input);
Xin Longdeed49d2016-02-18 21:21:19 +08001674 if (rth && rth->dst.expires &&
1675 time_after(jiffies, rth->dst.expires)) {
1676 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1677 fnhe = NULL;
1678 } else {
1679 goto rt_cache;
1680 }
1681 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001682
Xin Longdeed49d2016-02-18 21:21:19 +08001683 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1684
1685rt_cache:
Julian Anastasove81da0e2012-10-08 11:41:15 +00001686 if (rt_cache_valid(rth)) {
1687 skb_dst_set_noref(skb, &rth->dst);
1688 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001689 }
1690 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001691
David Ahernd08c4f32015-09-02 13:58:34 -07001692 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001693 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001694 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 if (!rth) {
1696 err = -ENOBUFS;
1697 goto cleanup;
1698 }
1699
David S. Miller9917e1e82012-07-17 14:44:26 -07001700 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001701 if (res->table)
1702 rth->rt_table_id = res->table->tb_id;
Duan Jionga6254862014-02-17 15:23:43 +08001703 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704
Changli Gaod8d1f302010-06-10 23:31:35 -07001705 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
Timo Teräs2ffae992013-06-27 10:27:05 +03001707 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
Thomas Grafefd85702016-11-30 17:10:09 +01001708 set_lwt_redirect(rth);
David S. Millerc6cffba2012-07-26 11:14:38 +00001709 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001710out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 err = 0;
1712 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001714}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
Peter Nørlund79a13152015-09-30 10:12:22 +02001716#ifdef CONFIG_IP_ROUTE_MULTIPATH
1717
1718/* To make ICMP packets follow the right flow, the multipath hash is
1719 * calculated from the inner IP addresses in reverse order.
1720 */
1721static int ip_multipath_icmp_hash(struct sk_buff *skb)
1722{
1723 const struct iphdr *outer_iph = ip_hdr(skb);
1724 struct icmphdr _icmph;
1725 const struct icmphdr *icmph;
1726 struct iphdr _inner_iph;
1727 const struct iphdr *inner_iph;
1728
1729 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1730 goto standard_hash;
1731
1732 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1733 &_icmph);
1734 if (!icmph)
1735 goto standard_hash;
1736
1737 if (icmph->type != ICMP_DEST_UNREACH &&
1738 icmph->type != ICMP_REDIRECT &&
1739 icmph->type != ICMP_TIME_EXCEEDED &&
1740 icmph->type != ICMP_PARAMETERPROB) {
1741 goto standard_hash;
1742 }
1743
1744 inner_iph = skb_header_pointer(skb,
1745 outer_iph->ihl * 4 + sizeof(_icmph),
1746 sizeof(_inner_iph), &_inner_iph);
1747 if (!inner_iph)
1748 goto standard_hash;
1749
1750 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1751
1752standard_hash:
1753 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1754}
1755
1756#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1757
Stephen Hemminger5969f712008-04-10 01:52:09 -07001758static int ip_mkroute_input(struct sk_buff *skb,
1759 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001760 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001761 struct in_device *in_dev,
1762 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001765 if (res->fi && res->fi->fib_nhs > 1) {
1766 int h;
1767
Peter Nørlund79a13152015-09-30 10:12:22 +02001768 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1769 h = ip_multipath_icmp_hash(skb);
1770 else
1771 h = fib_multipath_hash(saddr, daddr);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001772 fib_select_multipath(res, h);
1773 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774#endif
1775
1776 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001777 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778}
1779
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780/*
1781 * NOTE. We drop all the packets that has local source
1782 * addresses, because every properly looped back packet
1783 * must have correct destination already attached by output routine.
1784 *
1785 * Such approach solves two big problems:
1786 * 1. Not simplex devices are handled properly.
1787 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001788 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 */
1790
Al Viro9e12bb22006-09-26 21:25:20 -07001791static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001792 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793{
1794 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001795 struct in_device *in_dev = __in_dev_get_rcu(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001796 struct ip_tunnel_info *tun_info;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001797 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001798 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001800 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001802 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001803 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
1805 /* IP on this device is disabled. */
1806
1807 if (!in_dev)
1808 goto out;
1809
1810 /* Check for the most weird martians, which can be not detected
1811 by fib_lookup.
1812 */
1813
Jiri Benc61adedf2015-08-20 13:56:25 +02001814 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001815 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001816 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1817 else
1818 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001819 skb_dst_drop(skb);
1820
Thomas Grafd0daebc32012-06-12 00:44:01 +00001821 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 goto martian_source;
1823
David S. Millerd2d68ba2012-07-17 12:58:50 -07001824 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001825 res.table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001826 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 goto brd_input;
1828
1829 /* Accept zero addresses only to limited broadcast;
1830 * I even do not know to fix it or not. Waiting for complains :-)
1831 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001832 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 goto martian_source;
1834
Thomas Grafd0daebc32012-06-12 00:44:01 +00001835 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 goto martian_destination;
1837
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001838 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1839 * and call it once if daddr or/and saddr are loopback addresses
1840 */
1841 if (ipv4_is_loopback(daddr)) {
1842 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001843 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001844 } else if (ipv4_is_loopback(saddr)) {
1845 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001846 goto martian_source;
1847 }
1848
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 /*
1850 * Now we are ready to route packet.
1851 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001852 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07001853 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001854 fl4.flowi4_mark = skb->mark;
1855 fl4.flowi4_tos = tos;
1856 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07001857 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001858 fl4.daddr = daddr;
1859 fl4.saddr = saddr;
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001860 err = fib_lookup(net, &fl4, &res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001861 if (err != 0) {
1862 if (!IN_DEV_FORWARD(in_dev))
1863 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001865 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866
1867 if (res.type == RTN_BROADCAST)
1868 goto brd_input;
1869
1870 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001871 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07001872 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001873 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001874 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 goto local_input;
1876 }
1877
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001878 if (!IN_DEV_FORWARD(in_dev)) {
1879 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07001880 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001881 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 if (res.type != RTN_UNICAST)
1883 goto martian_destination;
1884
David S. Miller68a5e3d2011-03-11 20:07:33 -05001885 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886out: return err;
1887
1888brd_input:
1889 if (skb->protocol != htons(ETH_P_IP))
1890 goto e_inval;
1891
David S. Miller41347dc2012-06-28 04:05:27 -07001892 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001893 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1894 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001896 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 }
1898 flags |= RTCF_BROADCAST;
1899 res.type = RTN_BROADCAST;
1900 RT_CACHE_STAT_INC(in_brd);
1901
1902local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001903 do_cache = false;
1904 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001905 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001906 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001907 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001908 skb_dst_set_noref(skb, &rth->dst);
1909 err = 0;
1910 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001911 }
1912 do_cache = true;
1913 }
1914 }
1915
David Ahernd08c4f32015-09-02 13:58:34 -07001916 rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001917 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 if (!rth)
1919 goto e_nobufs;
1920
Changli Gaod8d1f302010-06-10 23:31:35 -07001921 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001922#ifdef CONFIG_IP_ROUTE_CLASSID
1923 rth->dst.tclassid = itag;
1924#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07001925 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001926 if (res.table)
1927 rth->rt_table_id = res.table->tb_id;
Roopa Prabhu571e7222015-07-21 10:43:47 +02001928
Duan Jionga6254862014-02-17 15:23:43 +08001929 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001931 rth->dst.input= ip_error;
1932 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 rth->rt_flags &= ~RTCF_LOCAL;
1934 }
Thomas Grafefd85702016-11-30 17:10:09 +01001935
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001936 if (do_cache) {
Thomas Grafefd85702016-11-30 17:10:09 +01001937 struct fib_nh *nh = &FIB_RES_NH(res);
1938
1939 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1940 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1941 WARN_ON(rth->dst.input == lwtunnel_input);
1942 rth->dst.lwtstate->orig_input = rth->dst.input;
1943 rth->dst.input = lwtunnel_input;
1944 }
1945
1946 if (unlikely(!rt_cache_route(nh, rth))) {
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001947 rth->dst.flags |= DST_NOCACHE;
1948 rt_add_uncached_list(rth);
1949 }
1950 }
David S. Miller89aef892012-07-17 11:00:09 -07001951 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001952 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001953 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
1955no_route:
1956 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 res.type = RTN_UNREACHABLE;
Nicolas Cavallarifa19c2b02014-10-30 10:09:53 +01001958 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001959 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 goto local_input;
1961
1962 /*
1963 * Do not cache martian addresses: they should be logged (RFC1812)
1964 */
1965martian_destination:
1966 RT_CACHE_STAT_INC(in_martian_dst);
1967#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001968 if (IN_DEV_LOG_MARTIANS(in_dev))
1969 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1970 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001972
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973e_inval:
1974 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001975 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976
1977e_nobufs:
1978 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001979 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980
1981martian_source:
1982 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001983 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984}
1985
David S. Millerc6cffba2012-07-26 11:14:38 +00001986int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1987 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988{
Eric Dumazet96d36222010-06-02 19:21:31 +00001989 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
Eric Dumazet96d36222010-06-02 19:21:31 +00001991 rcu_read_lock();
1992
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 /* Multicast recognition logic is moved from route cache to here.
1994 The problem was that too many Ethernet cards have broken/missing
1995 hardware multicast filters :-( As result the host on multicasting
1996 network acquires a lot of useless route cache entries, sort of
1997 SDR messages from all the world. Now we try to get rid of them.
1998 Really, provided software IP multicast filter is organized
1999 reasonably (at least, hashed), it does not result in a slowdown
2000 comparing with route cache reject entries.
2001 Note, that multicast routers are not affected, because
2002 route cache entry is created eventually.
2003 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002004 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002005 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002006 int our = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007
David Aherne58e4152016-10-31 15:54:00 -07002008 if (in_dev)
2009 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2010 ip_hdr(skb)->protocol);
2011
2012 /* check l3 master if no match yet */
2013 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2014 struct in_device *l3_in_dev;
2015
2016 l3_in_dev = __in_dev_get_rcu(skb->dev);
2017 if (l3_in_dev)
2018 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2019 ip_hdr(skb)->protocol);
2020 }
2021
2022 res = -EINVAL;
2023 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002025 ||
2026 (!ipv4_is_local_multicast(daddr) &&
2027 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028#endif
David Aherne58e4152016-10-31 15:54:00 -07002029 ) {
2030 res = ip_route_input_mc(skb, daddr, saddr,
2031 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 }
2033 rcu_read_unlock();
David Aherne58e4152016-10-31 15:54:00 -07002034 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 }
David S. Millerc10237e2012-06-27 17:05:06 -07002036 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00002037 rcu_read_unlock();
2038 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039}
David S. Millerc6cffba2012-07-26 11:14:38 +00002040EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002042/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002043static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002044 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002045 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002046 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047{
David S. Miller982721f2011-02-16 21:44:24 -08002048 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002049 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002050 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002051 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002052 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002053 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054
Thomas Grafd0daebc32012-06-12 00:44:01 +00002055 in_dev = __in_dev_get_rcu(dev_out);
2056 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002057 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058
Thomas Grafd0daebc32012-06-12 00:44:01 +00002059 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002060 if (ipv4_is_loopback(fl4->saddr) &&
2061 !(dev_out->flags & IFF_LOOPBACK) &&
2062 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002063 return ERR_PTR(-EINVAL);
2064
David S. Miller68a5e3d2011-03-11 20:07:33 -05002065 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002066 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002067 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002068 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002069 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002070 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071
2072 if (dev_out->flags & IFF_LOOPBACK)
2073 flags |= RTCF_LOCAL;
2074
Julian Anastasov63617422012-11-22 23:04:14 +02002075 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002076 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002078 fi = NULL;
2079 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002080 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002081 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2082 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002084 else
2085 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002087 * default one, but do not gateway in this case.
2088 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089 */
David S. Miller982721f2011-02-16 21:44:24 -08002090 if (fi && res->prefixlen < 4)
2091 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002092 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2093 (orig_oif != dev_out->ifindex)) {
2094 /* For local routes that require a particular output interface
2095 * we do not want to cache the result. Caching the result
2096 * causes incorrect behaviour when there are multiple source
2097 * addresses on the interface, the end result being that if the
2098 * intended recipient is waiting on that interface for the
2099 * packet he won't receive it because it will be delivered on
2100 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2101 * be set to the loopback interface as well.
2102 */
2103 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 }
2105
David S. Millerf2bb4be2012-07-17 12:20:47 -07002106 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002107 do_cache &= fi != NULL;
2108 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07002109 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002110 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002111
Julian Anastasovc92b9652012-10-08 11:41:19 +00002112 fnhe = find_exception(nh, fl4->daddr);
Xin Longdeed49d2016-02-18 21:21:19 +08002113 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002114 prth = &fnhe->fnhe_rth_output;
Xin Longdeed49d2016-02-18 21:21:19 +08002115 rth = rcu_dereference(*prth);
2116 if (rth && rth->dst.expires &&
2117 time_after(jiffies, rth->dst.expires)) {
2118 ip_del_fnhe(nh, fl4->daddr);
2119 fnhe = NULL;
2120 } else {
2121 goto rt_cache;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002122 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002123 }
Xin Longdeed49d2016-02-18 21:21:19 +08002124
2125 if (unlikely(fl4->flowi4_flags &
2126 FLOWI_FLAG_KNOWN_NH &&
2127 !(nh->nh_gw &&
2128 nh->nh_scope == RT_SCOPE_LINK))) {
2129 do_cache = false;
2130 goto add;
2131 }
2132 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07002133 rth = rcu_dereference(*prth);
Xin Longdeed49d2016-02-18 21:21:19 +08002134
2135rt_cache:
David S. Millerc5038a82012-07-31 15:02:02 -07002136 if (rt_cache_valid(rth)) {
2137 dst_hold(&rth->dst);
2138 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002139 }
2140 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002141
2142add:
David Ahernd08c4f32015-09-02 13:58:34 -07002143 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002144 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002145 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002146 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002147 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002148 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002149
David S. Miller13378ca2012-07-23 13:57:45 -07002150 rth->rt_iif = orig_oif ? : 0;
David Ahernb7503e02015-09-02 13:58:35 -07002151 if (res->table)
2152 rth->rt_table_id = res->table->tb_id;
2153
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 RT_CACHE_STAT_INC(out_slow_tot);
2155
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002157 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002159 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160 RT_CACHE_STAT_INC(out_slow_mc);
2161 }
2162#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002163 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002165 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002166 rth->dst.input = ip_mr_input;
2167 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 }
2169 }
2170#endif
2171 }
2172
David S. Millerf2bb4be2012-07-17 12:20:47 -07002173 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Thomas Grafefd85702016-11-30 17:10:09 +01002174 set_lwt_redirect(rth);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
David S. Miller5ada5522011-02-17 15:29:00 -08002176 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177}
2178
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179/*
2180 * Major route resolver routine.
2181 */
2182
Peter Nørlund79a13152015-09-30 10:12:22 +02002183struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2184 int mp_hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00002187 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07002188 unsigned int flags = 0;
2189 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002190 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07002191 int orig_oif;
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002192 int err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193
David S. Miller85b91b02012-07-13 08:21:29 -07002194 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002196 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197
David S. Miller813b3b52011-04-28 14:48:42 -07002198 orig_oif = fl4->flowi4_oif;
2199
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002200 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002201 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2202 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2203 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002204
David S. Miller010c2702011-02-17 15:37:09 -08002205 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07002206 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002207 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002208 if (ipv4_is_multicast(fl4->saddr) ||
2209 ipv4_is_lbcast(fl4->saddr) ||
2210 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 goto out;
2212
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 /* I removed check for oif == dev_out->oif here.
2214 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002215 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2216 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 2. Moreover, we are allowed to send packets with saddr
2218 of another iface. --ANK
2219 */
2220
David S. Miller813b3b52011-04-28 14:48:42 -07002221 if (fl4->flowi4_oif == 0 &&
2222 (ipv4_is_multicast(fl4->daddr) ||
2223 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002224 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002225 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002226 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002227 goto out;
2228
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229 /* Special hack: user can direct multicasts
2230 and limited broadcast via necessary interface
2231 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2232 This hack is not just for fun, it allows
2233 vic,vat and friends to work.
2234 They bind socket to loopback, set ttl to zero
2235 and expect that it will work.
2236 From the viewpoint of routing cache they are broken,
2237 because we are not allowed to build multicast path
2238 with loopback source addr (look, routing cache
2239 cannot know, that ttl is zero, so that packet
2240 will not leave this host and route is valid).
2241 Luckily, this hack is good workaround.
2242 */
2243
David S. Miller813b3b52011-04-28 14:48:42 -07002244 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 goto make_route;
2246 }
Julian Anastasova210d012008-10-01 07:28:28 -07002247
David S. Miller813b3b52011-04-28 14:48:42 -07002248 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002249 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002250 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002251 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 }
2254
2255
David S. Miller813b3b52011-04-28 14:48:42 -07002256 if (fl4->flowi4_oif) {
2257 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002258 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002259 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002261
2262 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002263 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002264 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002265 goto out;
2266 }
David S. Miller813b3b52011-04-28 14:48:42 -07002267 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002268 ipv4_is_lbcast(fl4->daddr) ||
2269 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002270 if (!fl4->saddr)
2271 fl4->saddr = inet_select_addr(dev_out, 0,
2272 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 goto make_route;
2274 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002275 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002276 if (ipv4_is_multicast(fl4->daddr))
2277 fl4->saddr = inet_select_addr(dev_out, 0,
2278 fl4->flowi4_scope);
2279 else if (!fl4->daddr)
2280 fl4->saddr = inet_select_addr(dev_out, 0,
2281 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 }
2283 }
2284
David S. Miller813b3b52011-04-28 14:48:42 -07002285 if (!fl4->daddr) {
2286 fl4->daddr = fl4->saddr;
2287 if (!fl4->daddr)
2288 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002289 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002290 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 res.type = RTN_LOCAL;
2292 flags |= RTCF_LOCAL;
2293 goto make_route;
2294 }
2295
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002296 err = fib_lookup(net, fl4, &res, 0);
2297 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002299 res.table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002300 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002301 (ipv4_is_multicast(fl4->daddr) ||
2302 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 /* Apparently, routing tables are wrong. Assume,
2304 that the destination is on link.
2305
2306 WHY? DW.
2307 Because we are allowed to send to iface
2308 even if it has NO routes and NO assigned
2309 addresses. When oif is specified, routing
2310 tables are looked up with only one purpose:
2311 to catch if destination is gatewayed, rather than
2312 direct. Moreover, if MSG_DONTROUTE is set,
2313 we send packet, ignoring both routing tables
2314 and ifaddr state. --ANK
2315
2316
2317 We could make it even if oif is unknown,
2318 likely IPv6, but we do not.
2319 */
2320
David S. Miller813b3b52011-04-28 14:48:42 -07002321 if (fl4->saddr == 0)
2322 fl4->saddr = inet_select_addr(dev_out, 0,
2323 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 res.type = RTN_UNICAST;
2325 goto make_route;
2326 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002327 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 goto out;
2329 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330
2331 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002332 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002333 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002334 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002335 else
David S. Miller813b3b52011-04-28 14:48:42 -07002336 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002337 }
David Ahern5f02ce242016-09-10 12:09:54 -07002338
2339 /* L3 master device is the loopback for that domain */
2340 dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002341 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 flags |= RTCF_LOCAL;
2343 goto make_route;
2344 }
2345
David Ahern3ce58d82015-10-05 08:51:25 -07002346 fib_select_path(net, &res, fl4, mp_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002349 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350
2351
2352make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002353 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354
David S. Miller010c2702011-02-17 15:37:09 -08002355out:
2356 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002357 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358}
Peter Nørlund79a13152015-09-30 10:12:22 +02002359EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002360
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002361static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2362{
2363 return NULL;
2364}
2365
Steffen Klassertebb762f2011-11-23 02:12:51 +00002366static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002367{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002368 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2369
2370 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002371}
2372
David S. Miller6700c272012-07-17 03:29:28 -07002373static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2374 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002375{
2376}
2377
David S. Miller6700c272012-07-17 03:29:28 -07002378static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2379 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002380{
2381}
2382
Held Bernhard0972ddb2011-04-24 22:07:32 +00002383static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2384 unsigned long old)
2385{
2386 return NULL;
2387}
2388
David S. Miller14e50e52007-05-24 18:17:54 -07002389static struct dst_ops ipv4_dst_blackhole_ops = {
2390 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002391 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002392 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002393 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002394 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002395 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002396 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002397 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002398};
2399
David S. Miller2774c132011-03-01 14:59:04 -08002400struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002401{
David S. Miller2774c132011-03-01 14:59:04 -08002402 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002403 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002404
David S. Millerf5b0a872012-07-19 12:31:33 -07002405 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002406 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002407 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002408
David S. Miller14e50e52007-05-24 18:17:54 -07002409 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002410 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002411 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002412
Changli Gaod8d1f302010-06-10 23:31:35 -07002413 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002414 if (new->dev)
2415 dev_hold(new->dev);
2416
David S. Miller9917e1e82012-07-17 14:44:26 -07002417 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002418 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002419 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002420
fan.duca4c3fc2013-07-30 08:33:53 +08002421 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002422 rt->rt_flags = ort->rt_flags;
2423 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002424 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002425 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002426
David S. Millercaacf052012-07-31 15:06:50 -07002427 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002428 dst_free(new);
2429 }
2430
David S. Miller2774c132011-03-01 14:59:04 -08002431 dst_release(dst_orig);
2432
2433 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002434}
2435
David S. Miller9d6ec932011-03-12 01:12:47 -05002436struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002437 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438{
David S. Miller9d6ec932011-03-12 01:12:47 -05002439 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440
David S. Millerb23dd4f2011-03-02 14:31:35 -08002441 if (IS_ERR(rt))
2442 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443
David S. Miller56157872011-05-02 14:37:45 -07002444 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002445 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2446 flowi4_to_flowi(flp4),
2447 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448
David S. Millerb23dd4f2011-03-02 14:31:35 -08002449 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002451EXPORT_SYMBOL_GPL(ip_route_output_flow);
2452
David Ahernc36ba662015-09-02 13:58:36 -07002453static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002454 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002455 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002457 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002459 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002460 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002461 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002462 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002463
Eric W. Biederman15e47302012-09-07 20:12:54 +00002464 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Ian Morris51456b22015-04-03 09:17:26 +01002465 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002466 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002467
2468 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 r->rtm_family = AF_INET;
2470 r->rtm_dst_len = 32;
2471 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002472 r->rtm_tos = fl4->flowi4_tos;
David Ahernc36ba662015-09-02 13:58:36 -07002473 r->rtm_table = table_id;
2474 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002475 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 r->rtm_type = rt->rt_type;
2477 r->rtm_scope = RT_SCOPE_UNIVERSE;
2478 r->rtm_protocol = RTPROT_UNSPEC;
2479 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2480 if (rt->rt_flags & RTCF_NOTIFY)
2481 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002482 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2483 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002484
Jiri Benc930345e2015-03-29 16:59:25 +02002485 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002486 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002487 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002489 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002490 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 }
David S. Millerf3756b72012-04-01 20:39:02 -04002492 if (rt->dst.dev &&
2493 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2494 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002495#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002496 if (rt->dst.tclassid &&
2497 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2498 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002500 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002501 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002502 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002503 goto nla_put_failure;
2504 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002505 if (rt->rt_uses_gateway &&
Jiri Benc930345e2015-03-29 16:59:25 +02002506 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
David S. Millerf3756b72012-04-01 20:39:02 -04002507 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002508
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002509 expires = rt->dst.expires;
2510 if (expires) {
2511 unsigned long now = jiffies;
2512
2513 if (time_before(now, expires))
2514 expires -= now;
2515 else
2516 expires = 0;
2517 }
2518
Julian Anastasov521f5492012-07-20 12:02:08 +03002519 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002520 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002521 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2522 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002523 goto nla_put_failure;
2524
David Millerb4869882012-07-01 02:03:01 +00002525 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002526 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002527 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002528
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002529 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2530 nla_put_u32(skb, RTA_UID,
2531 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2532 goto nla_put_failure;
2533
Changli Gaod8d1f302010-06-10 23:31:35 -07002534 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002535
David S. Millerc7537962010-11-11 17:07:48 -08002536 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002537#ifdef CONFIG_IP_MROUTE
2538 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2539 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2540 int err = ipmr_get_route(net, skb,
2541 fl4->saddr, fl4->daddr,
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002542 r, nowait, portid);
2543
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002544 if (err <= 0) {
2545 if (!nowait) {
2546 if (err == 0)
2547 return 0;
2548 goto nla_put_failure;
2549 } else {
2550 if (err == -EMSGSIZE)
2551 goto nla_put_failure;
2552 error = err;
2553 }
2554 }
2555 } else
2556#endif
Julian Anastasov91146152014-04-13 18:08:02 +03002557 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002558 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 }
2560
David S. Millerf1850712012-07-10 07:26:01 -07002561 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002562 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563
Johannes Berg053c0952015-01-16 22:09:00 +01002564 nlmsg_end(skb, nlh);
2565 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002566
2567nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002568 nlmsg_cancel(skb, nlh);
2569 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570}
2571
Thomas Graf661d2962013-03-21 07:45:29 +00002572static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002574 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002575 struct rtmsg *rtm;
2576 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002578 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002579 __be32 dst = 0;
2580 __be32 src = 0;
2581 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002582 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002583 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 struct sk_buff *skb;
David Ahernc36ba662015-09-02 13:58:36 -07002585 u32 table_id = RT_TABLE_MAIN;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002586 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587
Thomas Grafd889ce32006-08-17 18:15:44 -07002588 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2589 if (err < 0)
2590 goto errout;
2591
2592 rtm = nlmsg_data(nlh);
2593
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002595 if (!skb) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002596 err = -ENOBUFS;
2597 goto errout;
2598 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599
2600 /* Reserve room for dummy headers, this skb can pass
2601 through good chunk of routing engine.
2602 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002603 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002604 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002605
2606 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002607 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2609
Jiri Benc67b61f62015-03-29 16:59:26 +02002610 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2611 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002612 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002613 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002614 if (tb[RTA_UID])
2615 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2616 else
2617 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618
David Millerd6c0a4f2012-07-01 02:02:59 +00002619 memset(&fl4, 0, sizeof(fl4));
2620 fl4.daddr = dst;
2621 fl4.saddr = src;
2622 fl4.flowi4_tos = rtm->rtm_tos;
2623 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2624 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002625 fl4.flowi4_uid = uid;
David Millerd6c0a4f2012-07-01 02:02:59 +00002626
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002628 struct net_device *dev;
2629
Denis V. Lunev19375042008-02-28 20:52:04 -08002630 dev = __dev_get_by_index(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002631 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002632 err = -ENODEV;
2633 goto errout_free;
2634 }
2635
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636 skb->protocol = htons(ETH_P_IP);
2637 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002638 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 local_bh_disable();
2640 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2641 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002642
Eric Dumazet511c3f92009-06-02 05:14:27 +00002643 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002644 if (err == 0 && rt->dst.error)
2645 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002647 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002648
2649 err = 0;
2650 if (IS_ERR(rt))
2651 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002653
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002655 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656
Changli Gaod8d1f302010-06-10 23:31:35 -07002657 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658 if (rtm->rtm_flags & RTM_F_NOTIFY)
2659 rt->rt_flags |= RTCF_NOTIFY;
2660
David Ahernc36ba662015-09-02 13:58:36 -07002661 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2662 table_id = rt->rt_table_id;
2663
2664 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002665 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002666 RTM_NEWROUTE, 0, 0);
David S. Miller7b46a642015-01-18 23:36:08 -05002667 if (err < 0)
Thomas Grafd889ce32006-08-17 18:15:44 -07002668 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
Eric W. Biederman15e47302012-09-07 20:12:54 +00002670 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002671errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002672 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673
Thomas Grafd889ce32006-08-17 18:15:44 -07002674errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002676 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677}
2678
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679void ip_rt_multicast_event(struct in_device *in_dev)
2680{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002681 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682}
2683
2684#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002685static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2686static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2687static int ip_rt_gc_elasticity __read_mostly = 8;
2688
Joe Perchesfe2c6332013-06-11 23:04:25 -07002689static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002690 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002691 size_t *lenp, loff_t *ppos)
2692{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002693 struct net *net = (struct net *)__ctl->extra1;
2694
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002696 rt_cache_flush(net);
2697 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002699 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700
2701 return -EINVAL;
2702}
2703
Joe Perchesfe2c6332013-06-11 23:04:25 -07002704static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002705 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706 .procname = "gc_thresh",
2707 .data = &ipv4_dst_ops.gc_thresh,
2708 .maxlen = sizeof(int),
2709 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002710 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711 },
2712 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 .procname = "max_size",
2714 .data = &ip_rt_max_size,
2715 .maxlen = sizeof(int),
2716 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002717 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 },
2719 {
2720 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002721
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722 .procname = "gc_min_interval",
2723 .data = &ip_rt_gc_min_interval,
2724 .maxlen = sizeof(int),
2725 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002726 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 },
2728 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 .procname = "gc_min_interval_ms",
2730 .data = &ip_rt_gc_min_interval,
2731 .maxlen = sizeof(int),
2732 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002733 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002734 },
2735 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 .procname = "gc_timeout",
2737 .data = &ip_rt_gc_timeout,
2738 .maxlen = sizeof(int),
2739 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002740 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 },
2742 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002743 .procname = "gc_interval",
2744 .data = &ip_rt_gc_interval,
2745 .maxlen = sizeof(int),
2746 .mode = 0644,
2747 .proc_handler = proc_dointvec_jiffies,
2748 },
2749 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 .procname = "redirect_load",
2751 .data = &ip_rt_redirect_load,
2752 .maxlen = sizeof(int),
2753 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002754 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755 },
2756 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 .procname = "redirect_number",
2758 .data = &ip_rt_redirect_number,
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002761 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762 },
2763 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 .procname = "redirect_silence",
2765 .data = &ip_rt_redirect_silence,
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002768 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 },
2770 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 .procname = "error_cost",
2772 .data = &ip_rt_error_cost,
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002775 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 },
2777 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 .procname = "error_burst",
2779 .data = &ip_rt_error_burst,
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002782 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002783 },
2784 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 .procname = "gc_elasticity",
2786 .data = &ip_rt_gc_elasticity,
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002789 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790 },
2791 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792 .procname = "mtu_expires",
2793 .data = &ip_rt_mtu_expires,
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002796 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002797 },
2798 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799 .procname = "min_pmtu",
2800 .data = &ip_rt_min_pmtu,
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002803 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804 },
2805 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806 .procname = "min_adv_mss",
2807 .data = &ip_rt_min_advmss,
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002810 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002811 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002812 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002814
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002815static struct ctl_table ipv4_route_flush_table[] = {
2816 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002817 .procname = "flush",
2818 .maxlen = sizeof(int),
2819 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002820 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002821 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002822 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002823};
2824
2825static __net_init int sysctl_route_net_init(struct net *net)
2826{
2827 struct ctl_table *tbl;
2828
2829 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002830 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002831 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002832 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002833 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002834
2835 /* Don't export sysctls to unprivileged users */
2836 if (net->user_ns != &init_user_ns)
2837 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002838 }
2839 tbl[0].extra1 = net;
2840
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002841 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01002842 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002843 goto err_reg;
2844 return 0;
2845
2846err_reg:
2847 if (tbl != ipv4_route_flush_table)
2848 kfree(tbl);
2849err_dup:
2850 return -ENOMEM;
2851}
2852
2853static __net_exit void sysctl_route_net_exit(struct net *net)
2854{
2855 struct ctl_table *tbl;
2856
2857 tbl = net->ipv4.route_hdr->ctl_table_arg;
2858 unregister_net_sysctl_table(net->ipv4.route_hdr);
2859 BUG_ON(tbl == ipv4_route_flush_table);
2860 kfree(tbl);
2861}
2862
2863static __net_initdata struct pernet_operations sysctl_route_ops = {
2864 .init = sysctl_route_net_init,
2865 .exit = sysctl_route_net_exit,
2866};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867#endif
2868
Neil Horman3ee94372010-05-08 01:57:52 -07002869static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002870{
fan.duca4c3fc2013-07-30 08:33:53 +08002871 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002872 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002873 get_random_bytes(&net->ipv4.dev_addr_genid,
2874 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002875 return 0;
2876}
2877
Neil Horman3ee94372010-05-08 01:57:52 -07002878static __net_initdata struct pernet_operations rt_genid_ops = {
2879 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002880};
2881
David S. Millerc3426b42012-06-09 16:27:05 -07002882static int __net_init ipv4_inetpeer_init(struct net *net)
2883{
2884 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2885
2886 if (!bp)
2887 return -ENOMEM;
2888 inet_peer_base_init(bp);
2889 net->ipv4.peers = bp;
2890 return 0;
2891}
2892
2893static void __net_exit ipv4_inetpeer_exit(struct net *net)
2894{
2895 struct inet_peer_base *bp = net->ipv4.peers;
2896
2897 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002898 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002899 kfree(bp);
2900}
2901
2902static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2903 .init = ipv4_inetpeer_init,
2904 .exit = ipv4_inetpeer_exit,
2905};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002906
Patrick McHardyc7066f72011-01-14 13:36:42 +01002907#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002908struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002909#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910
Linus Torvalds1da177e2005-04-16 15:20:36 -07002911int __init ip_rt_init(void)
2912{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002913 int rc = 0;
Eric Dumazet5055c372015-01-14 15:17:06 -08002914 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915
Eric Dumazet73f156a2014-06-02 05:26:03 -07002916 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2917 if (!ip_idents)
2918 panic("IP: failed to allocate ip_idents\n");
2919
2920 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2921
Eric Dumazet355b5902015-05-01 10:37:49 -07002922 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2923 if (!ip_tstamps)
2924 panic("IP: failed to allocate ip_tstamps\n");
2925
Eric Dumazet5055c372015-01-14 15:17:06 -08002926 for_each_possible_cpu(cpu) {
2927 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2928
2929 INIT_LIST_HEAD(&ul->head);
2930 spin_lock_init(&ul->lock);
2931 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01002932#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002933 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002934 if (!ip_rt_acct)
2935 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002936#endif
2937
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002938 ipv4_dst_ops.kmem_cachep =
2939 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002940 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002941
David S. Miller14e50e52007-05-24 18:17:54 -07002942 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2943
Eric Dumazetfc66f952010-10-08 06:37:34 +00002944 if (dst_entries_init(&ipv4_dst_ops) < 0)
2945 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2946
2947 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2948 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2949
David S. Miller89aef892012-07-17 11:00:09 -07002950 ipv4_dst_ops.gc_thresh = ~0;
2951 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002952
Linus Torvalds1da177e2005-04-16 15:20:36 -07002953 devinet_init();
2954 ip_fib_init();
2955
Denis V. Lunev73b38712008-02-28 20:51:18 -08002956 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002957 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002958#ifdef CONFIG_XFRM
2959 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002960 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002961#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002962 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002963
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002964#ifdef CONFIG_SYSCTL
2965 register_pernet_subsys(&sysctl_route_ops);
2966#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002967 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002968 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002969 return rc;
2970}
2971
Al Viroa1bc6eb2008-07-30 06:32:52 -04002972#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002973/*
2974 * We really need to sanitize the damn ipv4 init order, then all
2975 * this nonsense will go away.
2976 */
2977void __init ip_static_sysctl_init(void)
2978{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002979 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002980}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002981#endif