blob: 741df67a81ec85b90c780dd94ee430440c0dcb9c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500121static int ip_rt_gc_interval __read_mostly = 60 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700122static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_gc_elasticity __read_mostly = 8;
129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
David S. Miller62fa8a82011-01-26 20:51:05 -0800153static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
154{
David S. Miller31248732012-07-10 07:08:18 -0700155 WARN_ON(1);
156 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800157}
158
David S. Millerf894cbf2012-07-02 21:52:24 -0700159static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
160 struct sk_buff *skb,
161 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static struct dst_ops ipv4_dst_ops = {
164 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800165 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800167 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000168 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800169 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700170 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 .ifdown = ipv4_dst_ifdown,
172 .negative_advice = ipv4_negative_advice,
173 .link_failure = ipv4_link_failure,
174 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700175 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700176 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700177 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178};
179
180#define ECN_OR_COST(class) TC_PRIO_##class
181
Philippe De Muyter4839c522007-07-09 15:32:57 -0700182const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000184 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 TC_PRIO_BESTEFFORT,
186 ECN_OR_COST(BESTEFFORT),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_BULK,
190 ECN_OR_COST(BULK),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE,
194 ECN_OR_COST(INTERACTIVE),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK),
197 TC_PRIO_INTERACTIVE_BULK,
198 ECN_OR_COST(INTERACTIVE_BULK)
199};
Amir Vadaid4a96862012-04-04 21:33:28 +0000200EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Eric Dumazet2f970d82006-01-17 02:54:36 -0800202static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000203#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
207{
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800210 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
213static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
214{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700216 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static void rt_cache_seq_stop(struct seq_file *seq, void *v)
220{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
223static int rt_cache_seq_show(struct seq_file *seq, void *v)
224{
225 if (v == SEQ_START_TOKEN)
226 seq_printf(seq, "%-127s\n",
227 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
228 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
229 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900230 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700233static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 .start = rt_cache_seq_start,
235 .next = rt_cache_seq_next,
236 .stop = rt_cache_seq_stop,
237 .show = rt_cache_seq_show,
238};
239
240static int rt_cache_seq_open(struct inode *inode, struct file *file)
241{
David S. Miller89aef892012-07-17 11:00:09 -0700242 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243}
244
Arjan van de Ven9a321442007-02-12 00:55:35 -0800245static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 .owner = THIS_MODULE,
247 .open = rt_cache_seq_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700250 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251};
252
253
254static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
255{
256 int cpu;
257
258 if (*pos == 0)
259 return SEQ_START_TOKEN;
260
Rusty Russell0f23174a2008-12-29 12:23:42 +0000261 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 if (!cpu_possible(cpu))
263 continue;
264 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800265 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 }
267 return NULL;
268}
269
270static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
271{
272 int cpu;
273
Rusty Russell0f23174a2008-12-29 12:23:42 +0000274 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (!cpu_possible(cpu))
276 continue;
277 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800278 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 }
280 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282}
283
284static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
285{
286
287}
288
289static int rt_cpu_seq_show(struct seq_file *seq, void *v)
290{
291 struct rt_cache_stat *st = v;
292
293 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700294 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 return 0;
296 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
299 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000300 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 st->in_hit,
302 st->in_slow_tot,
303 st->in_slow_mc,
304 st->in_no_route,
305 st->in_brd,
306 st->in_martian_dst,
307 st->in_martian_src,
308
309 st->out_hit,
310 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900311 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
313 st->gc_total,
314 st->gc_ignored,
315 st->gc_goal_miss,
316 st->gc_dst_overflow,
317 st->in_hlist_search,
318 st->out_hlist_search
319 );
320 return 0;
321}
322
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700323static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 .start = rt_cpu_seq_start,
325 .next = rt_cpu_seq_next,
326 .stop = rt_cpu_seq_stop,
327 .show = rt_cpu_seq_show,
328};
329
330
331static int rt_cpu_seq_open(struct inode *inode, struct file *file)
332{
333 return seq_open(file, &rt_cpu_seq_ops);
334}
335
Arjan van de Ven9a321442007-02-12 00:55:35 -0800336static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 .owner = THIS_MODULE,
338 .open = rt_cpu_seq_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343
Patrick McHardyc7066f72011-01-14 13:36:42 +0100344#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 struct ip_rt_acct *dst, *src;
348 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
351 if (!dst)
352 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800353
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354 for_each_possible_cpu(i) {
355 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
356 for (j = 0; j < 256; j++) {
357 dst[j].o_bytes += src[j].o_bytes;
358 dst[j].o_packets += src[j].o_packets;
359 dst[j].i_bytes += src[j].i_bytes;
360 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800361 }
362 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800363
364 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
365 kfree(dst);
366 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800367}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800368
369static int rt_acct_proc_open(struct inode *inode, struct file *file)
370{
371 return single_open(file, rt_acct_proc_show, NULL);
372}
373
374static const struct file_operations rt_acct_proc_fops = {
375 .owner = THIS_MODULE,
376 .open = rt_acct_proc_open,
377 .read = seq_read,
378 .llseek = seq_lseek,
379 .release = single_release,
380};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800381#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382
Denis V. Lunev73b38712008-02-28 20:51:18 -0800383static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384{
385 struct proc_dir_entry *pde;
386
387 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
388 &rt_cache_seq_fops);
389 if (!pde)
390 goto err1;
391
Wang Chen77020722008-02-28 14:14:25 -0800392 pde = proc_create("rt_cache", S_IRUGO,
393 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800394 if (!pde)
395 goto err2;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800398 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800399 if (!pde)
400 goto err3;
401#endif
402 return 0;
403
Patrick McHardyc7066f72011-01-14 13:36:42 +0100404#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800405err3:
406 remove_proc_entry("rt_cache", net->proc_net_stat);
407#endif
408err2:
409 remove_proc_entry("rt_cache", net->proc_net);
410err1:
411 return -ENOMEM;
412}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413
414static void __net_exit ip_rt_do_proc_exit(struct net *net)
415{
416 remove_proc_entry("rt_cache", net->proc_net_stat);
417 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100418#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800419 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000420#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800421}
422
423static struct pernet_operations ip_rt_proc_ops __net_initdata = {
424 .init = ip_rt_do_proc_init,
425 .exit = ip_rt_do_proc_exit,
426};
427
428static int __init ip_rt_proc_init(void)
429{
430 return register_pernet_subsys(&ip_rt_proc_ops);
431}
432
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800433#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800434static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800435{
436 return 0;
437}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900439
Eric Dumazet4331deb2012-07-25 05:11:23 +0000440static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700441{
Changli Gaod8d1f302010-06-10 23:31:35 -0700442 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700443}
444
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000445void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800446{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000447 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000448}
449
David S. Millerf894cbf2012-07-02 21:52:24 -0700450static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
451 struct sk_buff *skb,
452 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000453{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700454 struct net_device *dev = dst->dev;
455 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500456 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000457 struct neighbour *n;
458
David S. Miller39232972012-01-26 15:22:32 -0500459 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700460 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500461 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700462 else if (skb)
463 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700464
David S. Miller80703d22012-02-15 17:48:35 -0500465 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700466 if (n)
467 return n;
David Miller32092ec2011-07-25 00:01:41 +0000468 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700469}
470
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471/*
472 * Peer allocation may fail only in serious out-of-memory conditions. However
473 * we still can generate some output.
474 * Random ID selection looks a bit dangerous because we have no chances to
475 * select ID being unique in a reasonable period of time.
476 * But broken packet identifier may be better than no packet at all.
477 */
478static void ip_select_fb_ident(struct iphdr *iph)
479{
480 static DEFINE_SPINLOCK(ip_fb_id_lock);
481 static u32 ip_fallback_id;
482 u32 salt;
483
484 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700485 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 iph->id = htons(salt & 0xFFFF);
487 ip_fallback_id = salt;
488 spin_unlock_bh(&ip_fb_id_lock);
489}
490
491void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
492{
David S. Miller1d861aa2012-07-10 03:58:16 -0700493 struct net *net = dev_net(dst->dev);
494 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
David S. Miller1d861aa2012-07-10 03:58:16 -0700496 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
497 if (peer) {
498 iph->id = htons(inet_getid(peer, more));
499 inet_putpeer(peer);
500 return;
501 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 ip_select_fb_ident(iph);
504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000505EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200507static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700508 const struct iphdr *iph,
509 int oif, u8 tos,
510 u8 prot, u32 mark, int flow_flags)
511{
512 if (sk) {
513 const struct inet_sock *inet = inet_sk(sk);
514
515 oif = sk->sk_bound_dev_if;
516 mark = sk->sk_mark;
517 tos = RT_CONN_FLAGS(sk);
518 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
519 }
520 flowi4_init_output(fl4, oif, mark, tos,
521 RT_SCOPE_UNIVERSE, prot,
522 flow_flags,
523 iph->daddr, iph->saddr, 0, 0);
524}
525
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200526static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
527 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700528{
529 const struct iphdr *iph = ip_hdr(skb);
530 int oif = skb->dev->ifindex;
531 u8 tos = RT_TOS(iph->tos);
532 u8 prot = iph->protocol;
533 u32 mark = skb->mark;
534
535 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
536}
537
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200538static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700539{
540 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200541 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700542 __be32 daddr = inet->inet_daddr;
543
544 rcu_read_lock();
545 inet_opt = rcu_dereference(inet->inet_opt);
546 if (inet_opt && inet_opt->opt.srr)
547 daddr = inet_opt->opt.faddr;
548 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
549 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
550 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
551 inet_sk_flowi_flags(sk),
552 daddr, inet->inet_saddr, 0, 0);
553 rcu_read_unlock();
554}
555
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200556static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
557 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700558{
559 if (skb)
560 build_skb_flow_key(fl4, skb, sk);
561 else
562 build_sk_flow_key(fl4, sk);
563}
564
David S. Millerc5038a82012-07-31 15:02:02 -0700565static inline void rt_free(struct rtable *rt)
566{
567 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
568}
569
570static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700571
Julian Anastasovaee06da2012-07-18 10:15:35 +0000572static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700573{
574 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700575 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700576
577 oldest = rcu_dereference(hash->chain);
578 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
579 fnhe = rcu_dereference(fnhe->fnhe_next)) {
580 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
581 oldest = fnhe;
582 }
David S. Millerc5038a82012-07-31 15:02:02 -0700583 orig = rcu_dereference(oldest->fnhe_rth);
584 if (orig) {
585 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
586 rt_free(orig);
587 }
David S. Miller4895c772012-07-17 04:19:00 -0700588 return oldest;
589}
590
David S. Millerd3a25c92012-07-17 13:23:08 -0700591static inline u32 fnhe_hashfun(__be32 daddr)
592{
593 u32 hval;
594
595 hval = (__force u32) daddr;
596 hval ^= (hval >> 11) ^ (hval >> 22);
597
598 return hval & (FNHE_HASH_SIZE - 1);
599}
600
Julian Anastasovaee06da2012-07-18 10:15:35 +0000601static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
602 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700603{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000604 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700605 struct fib_nh_exception *fnhe;
606 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000607 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700608
David S. Millerc5038a82012-07-31 15:02:02 -0700609 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000610
611 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700612 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000613 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700614 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000615 goto out_unlock;
616 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700617 }
618
David S. Miller4895c772012-07-17 04:19:00 -0700619 hash += hval;
620
621 depth = 0;
622 for (fnhe = rcu_dereference(hash->chain); fnhe;
623 fnhe = rcu_dereference(fnhe->fnhe_next)) {
624 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 break;
David S. Miller4895c772012-07-17 04:19:00 -0700626 depth++;
627 }
628
Julian Anastasovaee06da2012-07-18 10:15:35 +0000629 if (fnhe) {
630 if (gw)
631 fnhe->fnhe_gw = gw;
632 if (pmtu) {
633 fnhe->fnhe_pmtu = pmtu;
634 fnhe->fnhe_expires = expires;
635 }
636 } else {
637 if (depth > FNHE_RECLAIM_DEPTH)
638 fnhe = fnhe_oldest(hash);
639 else {
640 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
641 if (!fnhe)
642 goto out_unlock;
643
644 fnhe->fnhe_next = hash->chain;
645 rcu_assign_pointer(hash->chain, fnhe);
646 }
647 fnhe->fnhe_daddr = daddr;
648 fnhe->fnhe_gw = gw;
649 fnhe->fnhe_pmtu = pmtu;
650 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700651 }
David S. Miller4895c772012-07-17 04:19:00 -0700652
David S. Miller4895c772012-07-17 04:19:00 -0700653 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000654
655out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700656 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657 return;
David S. Miller4895c772012-07-17 04:19:00 -0700658}
659
David S. Millerceb33202012-07-17 11:31:28 -0700660static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
661 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662{
David S. Millere47a1852012-07-11 20:55:47 -0700663 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700664 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700665 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700666 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700667 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700668 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800669 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
David S. Miller94206122012-07-11 20:38:08 -0700671 switch (icmp_hdr(skb)->code & 7) {
672 case ICMP_REDIR_NET:
673 case ICMP_REDIR_NETTOS:
674 case ICMP_REDIR_HOST:
675 case ICMP_REDIR_HOSTTOS:
676 break;
677
678 default:
679 return;
680 }
681
David S. Millere47a1852012-07-11 20:55:47 -0700682 if (rt->rt_gateway != old_gw)
683 return;
684
685 in_dev = __in_dev_get_rcu(dev);
686 if (!in_dev)
687 return;
688
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900689 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800690 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
691 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
692 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 goto reject_redirect;
694
695 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
696 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
697 goto reject_redirect;
698 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
699 goto reject_redirect;
700 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800701 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 goto reject_redirect;
703 }
704
David S. Miller4895c772012-07-17 04:19:00 -0700705 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700706 if (n) {
707 if (!(n->nud_state & NUD_VALID)) {
708 neigh_event_send(n, NULL);
709 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700710 if (fib_lookup(net, fl4, &res) == 0) {
711 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700712
Julian Anastasovaee06da2012-07-18 10:15:35 +0000713 update_or_create_fnhe(nh, fl4->daddr, new_gw,
714 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700715 }
David S. Millerceb33202012-07-17 11:31:28 -0700716 if (kill_route)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700718 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
719 }
720 neigh_release(n);
721 }
722 return;
723
724reject_redirect:
725#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700726 if (IN_DEV_LOG_MARTIANS(in_dev)) {
727 const struct iphdr *iph = (const struct iphdr *) skb->data;
728 __be32 daddr = iph->daddr;
729 __be32 saddr = iph->saddr;
730
David S. Millere47a1852012-07-11 20:55:47 -0700731 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
732 " Advised path = %pI4 -> %pI4\n",
733 &old_gw, dev->name, &new_gw,
734 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700735 }
David S. Millere47a1852012-07-11 20:55:47 -0700736#endif
737 ;
738}
739
David S. Miller4895c772012-07-17 04:19:00 -0700740static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
741{
742 struct rtable *rt;
743 struct flowi4 fl4;
744
745 rt = (struct rtable *) dst;
746
747 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700748 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700749}
750
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
752{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800753 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 struct dst_entry *ret = dst;
755
756 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000757 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 ip_rt_put(rt);
759 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700760 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
761 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700762 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 ret = NULL;
764 }
765 }
766 return ret;
767}
768
769/*
770 * Algorithm:
771 * 1. The first ip_rt_redirect_number redirects are sent
772 * with exponential backoff, then we stop sending them at all,
773 * assuming that the host ignores our redirects.
774 * 2. If we did not see packets requiring redirects
775 * during ip_rt_redirect_silence, we assume that the host
776 * forgot redirected route and start to send redirects again.
777 *
778 * This algorithm is much cheaper and more intelligent than dumb load limiting
779 * in icmp.c.
780 *
781 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
782 * and "frag. need" (breaks PMTU discovery) in icmp.c.
783 */
784
785void ip_rt_send_redirect(struct sk_buff *skb)
786{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000787 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700788 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800789 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700790 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700791 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
Eric Dumazet30038fc2009-08-28 23:52:01 -0700793 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700794 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700795 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
796 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700798 }
799 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
800 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
David S. Miller1d861aa2012-07-10 03:58:16 -0700802 net = dev_net(rt->dst.dev);
803 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800804 if (!peer) {
805 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
806 return;
807 }
808
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 /* No redirected packets during ip_rt_redirect_silence;
810 * reset the algorithm.
811 */
David S. Miller92d86822011-02-04 15:55:25 -0800812 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
813 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814
815 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 */
David S. Miller92d86822011-02-04 15:55:25 -0800818 if (peer->rate_tokens >= ip_rt_redirect_number) {
819 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700820 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 }
822
823 /* Check for load limit; set rate_last to the latest sent
824 * redirect.
825 */
David S. Miller92d86822011-02-04 15:55:25 -0800826 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800827 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800828 (peer->rate_last +
829 (ip_rt_redirect_load << peer->rate_tokens)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
David S. Miller92d86822011-02-04 15:55:25 -0800831 peer->rate_last = jiffies;
832 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700834 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000835 peer->rate_tokens == ip_rt_redirect_number)
836 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700837 &ip_hdr(skb)->saddr, inet_iif(skb),
David S. Millerf1ce3062012-07-12 10:10:17 -0700838 &ip_hdr(skb)->daddr, &rt->rt_gateway);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839#endif
840 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700841out_put_peer:
842 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843}
844
845static int ip_error(struct sk_buff *skb)
846{
David S. Miller251da412012-06-26 16:27:09 -0700847 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000848 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800849 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700851 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800852 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 int code;
854
David S. Miller251da412012-06-26 16:27:09 -0700855 net = dev_net(rt->dst.dev);
856 if (!IN_DEV_FORWARD(in_dev)) {
857 switch (rt->dst.error) {
858 case EHOSTUNREACH:
859 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
860 break;
861
862 case ENETUNREACH:
863 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
864 break;
865 }
866 goto out;
867 }
868
Changli Gaod8d1f302010-06-10 23:31:35 -0700869 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000870 case EINVAL:
871 default:
872 goto out;
873 case EHOSTUNREACH:
874 code = ICMP_HOST_UNREACH;
875 break;
876 case ENETUNREACH:
877 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700878 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000879 break;
880 case EACCES:
881 code = ICMP_PKT_FILTERED;
882 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 }
884
David S. Miller1d861aa2012-07-10 03:58:16 -0700885 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800886
887 send = true;
888 if (peer) {
889 now = jiffies;
890 peer->rate_tokens += now - peer->rate_last;
891 if (peer->rate_tokens > ip_rt_error_burst)
892 peer->rate_tokens = ip_rt_error_burst;
893 peer->rate_last = now;
894 if (peer->rate_tokens >= ip_rt_error_cost)
895 peer->rate_tokens -= ip_rt_error_cost;
896 else
897 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700898 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 }
David S. Miller92d86822011-02-04 15:55:25 -0800900 if (send)
901 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903out: kfree_skb(skb);
904 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900905}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
Steffen Klassertd851c122012-10-07 22:47:25 +0000907static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908{
Steffen Klassertd851c122012-10-07 22:47:25 +0000909 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700910 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800911
Steffen Klassert7f92d332012-10-07 22:48:18 +0000912 if (dst->dev->mtu < mtu)
913 return;
914
David S. Miller59436342012-07-10 06:58:42 -0700915 if (mtu < ip_rt_min_pmtu)
916 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000917
Steffen Klassertd851c122012-10-07 22:47:25 +0000918 if (!rt->rt_pmtu) {
919 dst->obsolete = DST_OBSOLETE_KILL;
920 } else {
921 rt->rt_pmtu = mtu;
922 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
923 }
924
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000925 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000926 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700927 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700928
Julian Anastasovaee06da2012-07-18 10:15:35 +0000929 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
930 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700931 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000932 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933}
934
David S. Miller4895c772012-07-17 04:19:00 -0700935static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
936 struct sk_buff *skb, u32 mtu)
937{
938 struct rtable *rt = (struct rtable *) dst;
939 struct flowi4 fl4;
940
941 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000942 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700943}
944
David S. Miller36393392012-06-14 22:21:46 -0700945void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
946 int oif, u32 mark, u8 protocol, int flow_flags)
947{
David S. Miller4895c772012-07-17 04:19:00 -0700948 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700949 struct flowi4 fl4;
950 struct rtable *rt;
951
David S. Miller4895c772012-07-17 04:19:00 -0700952 __build_flow_key(&fl4, NULL, iph, oif,
953 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700954 rt = __ip_route_output_key(net, &fl4);
955 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700956 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700957 ip_rt_put(rt);
958 }
959}
960EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
961
962void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
963{
David S. Miller4895c772012-07-17 04:19:00 -0700964 const struct iphdr *iph = (const struct iphdr *) skb->data;
965 struct flowi4 fl4;
966 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700967
David S. Miller4895c772012-07-17 04:19:00 -0700968 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
969 rt = __ip_route_output_key(sock_net(sk), &fl4);
970 if (!IS_ERR(rt)) {
971 __ip_rt_update_pmtu(rt, &fl4, mtu);
972 ip_rt_put(rt);
973 }
David S. Miller36393392012-06-14 22:21:46 -0700974}
975EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -0800976
David S. Millerb42597e2012-07-11 21:25:45 -0700977void ipv4_redirect(struct sk_buff *skb, struct net *net,
978 int oif, u32 mark, u8 protocol, int flow_flags)
979{
David S. Miller4895c772012-07-17 04:19:00 -0700980 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -0700981 struct flowi4 fl4;
982 struct rtable *rt;
983
David S. Miller4895c772012-07-17 04:19:00 -0700984 __build_flow_key(&fl4, NULL, iph, oif,
985 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -0700986 rt = __ip_route_output_key(net, &fl4);
987 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -0700988 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -0700989 ip_rt_put(rt);
990 }
991}
992EXPORT_SYMBOL_GPL(ipv4_redirect);
993
994void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
995{
David S. Miller4895c772012-07-17 04:19:00 -0700996 const struct iphdr *iph = (const struct iphdr *) skb->data;
997 struct flowi4 fl4;
998 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -0700999
David S. Miller4895c772012-07-17 04:19:00 -07001000 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1001 rt = __ip_route_output_key(sock_net(sk), &fl4);
1002 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001003 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001004 ip_rt_put(rt);
1005 }
David S. Millerb42597e2012-07-11 21:25:45 -07001006}
1007EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1008
David S. Millerefbc3682011-12-01 13:38:59 -05001009static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1010{
1011 struct rtable *rt = (struct rtable *) dst;
1012
David S. Millerceb33202012-07-17 11:31:28 -07001013 /* All IPV4 dsts are created with ->obsolete set to the value
1014 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1015 * into this function always.
1016 *
1017 * When a PMTU/redirect information update invalidates a
1018 * route, this is indicated by setting obsolete to
1019 * DST_OBSOLETE_KILL.
1020 */
1021 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001022 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001023 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026static void ipv4_link_failure(struct sk_buff *skb)
1027{
1028 struct rtable *rt;
1029
1030 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1031
Eric Dumazet511c3f92009-06-02 05:14:27 +00001032 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001033 if (rt)
1034 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035}
1036
1037static int ip_rt_bug(struct sk_buff *skb)
1038{
Joe Perches91df42b2012-05-15 14:11:54 +00001039 pr_debug("%s: %pI4 -> %pI4, %s\n",
1040 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1041 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001043 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 return 0;
1045}
1046
1047/*
1048 We do not cache source address of outgoing interface,
1049 because it is used only by IP RR, TS and SRR options,
1050 so that it out of fast path.
1051
1052 BTW remember: "addr" is allowed to be not aligned
1053 in IP options!
1054 */
1055
David S. Miller8e363602011-05-13 17:29:41 -04001056void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057{
Al Viroa61ced52006-09-26 21:27:54 -07001058 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059
David S. Millerc7537962010-11-11 17:07:48 -08001060 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001061 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001062 else {
David S. Miller8e363602011-05-13 17:29:41 -04001063 struct fib_result res;
1064 struct flowi4 fl4;
1065 struct iphdr *iph;
1066
1067 iph = ip_hdr(skb);
1068
1069 memset(&fl4, 0, sizeof(fl4));
1070 fl4.daddr = iph->daddr;
1071 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001072 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001073 fl4.flowi4_oif = rt->dst.dev->ifindex;
1074 fl4.flowi4_iif = skb->dev->ifindex;
1075 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001076
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001077 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001078 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001079 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001080 else
David S. Millerf8126f12012-07-13 05:03:45 -07001081 src = inet_select_addr(rt->dst.dev,
1082 rt_nexthop(rt, iph->daddr),
1083 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001084 rcu_read_unlock();
1085 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 memcpy(addr, &src, 4);
1087}
1088
Patrick McHardyc7066f72011-01-14 13:36:42 +01001089#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090static void set_class_tag(struct rtable *rt, u32 tag)
1091{
Changli Gaod8d1f302010-06-10 23:31:35 -07001092 if (!(rt->dst.tclassid & 0xFFFF))
1093 rt->dst.tclassid |= tag & 0xFFFF;
1094 if (!(rt->dst.tclassid & 0xFFFF0000))
1095 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096}
1097#endif
1098
David S. Miller0dbaee32010-12-13 12:52:14 -08001099static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1100{
1101 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1102
1103 if (advmss == 0) {
1104 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1105 ip_rt_min_advmss);
1106 if (advmss > 65535 - 40)
1107 advmss = 65535 - 40;
1108 }
1109 return advmss;
1110}
1111
Steffen Klassertebb762f2011-11-23 02:12:51 +00001112static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001113{
Steffen Klassert261663b2011-11-23 02:14:50 +00001114 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001115 unsigned int mtu = rt->rt_pmtu;
1116
Alexander Duyck98d75c32012-08-27 06:30:01 +00001117 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001118 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001119
Steffen Klassert261663b2011-11-23 02:14:50 +00001120 if (mtu && rt_is_output_route(rt))
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001121 return mtu;
1122
1123 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001124
1125 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
David S. Millerf8126f12012-07-13 05:03:45 -07001126 if (rt->rt_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001127 mtu = 576;
1128 }
1129
1130 if (mtu > IP_MAX_MTU)
1131 mtu = IP_MAX_MTU;
1132
1133 return mtu;
1134}
1135
David S. Millerf2bb4be2012-07-17 12:20:47 -07001136static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001137{
1138 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1139 struct fib_nh_exception *fnhe;
1140 u32 hval;
1141
David S. Millerf2bb4be2012-07-17 12:20:47 -07001142 if (!hash)
1143 return NULL;
1144
David S. Millerd3a25c92012-07-17 13:23:08 -07001145 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001146
1147 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1148 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001149 if (fnhe->fnhe_daddr == daddr)
1150 return fnhe;
1151 }
1152 return NULL;
1153}
David S. Miller4895c772012-07-17 04:19:00 -07001154
David S. Millercaacf052012-07-31 15:06:50 -07001155static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001156 __be32 daddr)
1157{
David S. Millercaacf052012-07-31 15:06:50 -07001158 bool ret = false;
1159
David S. Millerc5038a82012-07-31 15:02:02 -07001160 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001161
David S. Millerc5038a82012-07-31 15:02:02 -07001162 if (daddr == fnhe->fnhe_daddr) {
1163 struct rtable *orig;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001164
David S. Millerc5038a82012-07-31 15:02:02 -07001165 if (fnhe->fnhe_pmtu) {
1166 unsigned long expires = fnhe->fnhe_expires;
1167 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001168
David S. Millerc5038a82012-07-31 15:02:02 -07001169 if (time_before(jiffies, expires)) {
1170 rt->rt_pmtu = fnhe->fnhe_pmtu;
1171 dst_set_expires(&rt->dst, diff);
1172 }
David S. Miller4895c772012-07-17 04:19:00 -07001173 }
David S. Millerc5038a82012-07-31 15:02:02 -07001174 if (fnhe->fnhe_gw) {
1175 rt->rt_flags |= RTCF_REDIRECTED;
1176 rt->rt_gateway = fnhe->fnhe_gw;
1177 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001178
David S. Millerc5038a82012-07-31 15:02:02 -07001179 orig = rcu_dereference(fnhe->fnhe_rth);
1180 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1181 if (orig)
1182 rt_free(orig);
1183
1184 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001185 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001186 } else {
1187 /* Routes we intend to cache in nexthop exception have
1188 * the DST_NOCACHE bit clear. However, if we are
1189 * unsuccessful at storing this route into the cache
1190 * we really need to set it.
1191 */
1192 rt->dst.flags |= DST_NOCACHE;
1193 }
1194 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001195
1196 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001197}
1198
David S. Millercaacf052012-07-31 15:06:50 -07001199static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001200{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001201 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001202 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001203
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001204 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001205 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001206 } else {
1207 if (!nh->nh_pcpu_rth_output)
1208 goto nocache;
1209 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1210 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001211 orig = *p;
1212
1213 prev = cmpxchg(p, orig, rt);
1214 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001215 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001216 rt_free(orig);
David S. Millerc6cffba2012-07-26 11:14:38 +00001217 } else {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001218 /* Routes we intend to cache in the FIB nexthop have
1219 * the DST_NOCACHE bit clear. However, if we are
1220 * unsuccessful at storing this route into the cache
1221 * we really need to set it.
1222 */
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001223nocache:
Eric Dumazet54764bb2012-07-31 01:08:23 +00001224 rt->dst.flags |= DST_NOCACHE;
David S. Millercaacf052012-07-31 15:06:50 -07001225 ret = false;
1226 }
1227
1228 return ret;
1229}
1230
1231static DEFINE_SPINLOCK(rt_uncached_lock);
1232static LIST_HEAD(rt_uncached_list);
1233
1234static void rt_add_uncached_list(struct rtable *rt)
1235{
1236 spin_lock_bh(&rt_uncached_lock);
1237 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1238 spin_unlock_bh(&rt_uncached_lock);
1239}
1240
1241static void ipv4_dst_destroy(struct dst_entry *dst)
1242{
1243 struct rtable *rt = (struct rtable *) dst;
1244
Eric Dumazet78df76a2012-08-24 05:40:47 +00001245 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001246 spin_lock_bh(&rt_uncached_lock);
1247 list_del(&rt->rt_uncached);
1248 spin_unlock_bh(&rt_uncached_lock);
1249 }
1250}
1251
1252void rt_flush_dev(struct net_device *dev)
1253{
1254 if (!list_empty(&rt_uncached_list)) {
1255 struct net *net = dev_net(dev);
1256 struct rtable *rt;
1257
1258 spin_lock_bh(&rt_uncached_lock);
1259 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1260 if (rt->dst.dev != dev)
1261 continue;
1262 rt->dst.dev = net->loopback_dev;
1263 dev_hold(rt->dst.dev);
1264 dev_put(dev);
1265 }
1266 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001267 }
1268}
1269
Eric Dumazet4331deb2012-07-25 05:11:23 +00001270static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001271{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001272 return rt &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001275}
1276
David S. Millerf2bb4be2012-07-17 12:20:47 -07001277static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001278 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001279 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001280 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281{
David S. Millercaacf052012-07-31 15:06:50 -07001282 bool cached = false;
1283
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001285 struct fib_nh *nh = &FIB_RES_NH(*res);
1286
1287 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1288 rt->rt_gateway = nh->nh_gw;
David S. Miller28605832012-07-17 14:55:59 -07001289 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001290#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001291 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001293 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001294 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001295 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001296 cached = rt_cache_route(nh, rt);
David S. Millerd33e4552010-12-14 13:01:14 -08001297 }
David S. Millercaacf052012-07-31 15:06:50 -07001298 if (unlikely(!cached))
1299 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300
Patrick McHardyc7066f72011-01-14 13:36:42 +01001301#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001303 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304#endif
1305 set_class_tag(rt, itag);
1306#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307}
1308
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001309static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001310 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001311{
David S. Millerf5b0a872012-07-19 12:31:33 -07001312 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001313 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001314 (nopolicy ? DST_NOPOLICY : 0) |
1315 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001316}
1317
Eric Dumazet96d36222010-06-02 19:21:31 +00001318/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001319static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 u8 tos, struct net_device *dev, int our)
1321{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001323 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001325 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327 /* Primary sanity checks. */
1328
1329 if (in_dev == NULL)
1330 return -EINVAL;
1331
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001332 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001333 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 goto e_inval;
1335
Thomas Grafd0daebc32012-06-12 00:44:01 +00001336 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1337 if (ipv4_is_loopback(saddr))
1338 goto e_inval;
1339
Joe Perchesf97c1e02007-12-16 13:45:43 -08001340 if (ipv4_is_zeronet(saddr)) {
1341 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001343 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001344 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1345 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001346 if (err < 0)
1347 goto e_err;
1348 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001349 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001350 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 if (!rth)
1352 goto e_nobufs;
1353
Patrick McHardyc7066f72011-01-14 13:36:42 +01001354#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001355 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356#endif
David S. Millercf911662011-04-28 14:31:47 -07001357 rth->dst.output = ip_rt_bug;
1358
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001359 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001361 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001362 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001363 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001364 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001365 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001366 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001368 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 rth->rt_flags |= RTCF_LOCAL;
1370 }
1371
1372#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001373 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001374 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375#endif
1376 RT_CACHE_STAT_INC(in_slow_mc);
1377
David S. Miller89aef892012-07-17 11:00:09 -07001378 skb_dst_set(skb, &rth->dst);
1379 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001384 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001385e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001386 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387}
1388
1389
1390static void ip_handle_martian_source(struct net_device *dev,
1391 struct in_device *in_dev,
1392 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001393 __be32 daddr,
1394 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395{
1396 RT_CACHE_STAT_INC(in_martian_src);
1397#ifdef CONFIG_IP_ROUTE_VERBOSE
1398 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1399 /*
1400 * RFC1812 recommendation, if source is martian,
1401 * the only hint is MAC header.
1402 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001403 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001404 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001405 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001406 print_hex_dump(KERN_WARNING, "ll header: ",
1407 DUMP_PREFIX_OFFSET, 16, 1,
1408 skb_mac_header(skb),
1409 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 }
1411 }
1412#endif
1413}
1414
Eric Dumazet47360222010-06-03 04:13:21 +00001415/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001416static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001417 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001418 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001419 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 struct rtable *rth;
1422 int err;
1423 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001424 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001425 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001426 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001429 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001431 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 return -EINVAL;
1433 }
1434
1435
Michael Smith5c04c812011-04-07 04:51:50 +00001436 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001437 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001439 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001441
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 goto cleanup;
1443 }
1444
Thomas Graf51b77ca2008-06-03 16:36:01 -07001445 if (out_dev == in_dev && err &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 (IN_DEV_SHARED_MEDIA(out_dev) ||
1447 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1448 flags |= RTCF_DOREDIRECT;
1449
1450 if (skb->protocol != htons(ETH_P_IP)) {
1451 /* Not IP (i.e. ARP). Do not create route, if it is
1452 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001453 *
1454 * Proxy arp feature have been extended to allow, ARP
1455 * replies back to the same interface, to support
1456 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001458 if (out_dev == in_dev &&
1459 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 err = -EINVAL;
1461 goto cleanup;
1462 }
1463 }
1464
David S. Millerd2d68ba2012-07-17 12:58:50 -07001465 do_cache = false;
1466 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001467 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001468 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001469 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001470 skb_dst_set_noref(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001471 goto out;
1472 }
1473 do_cache = true;
1474 }
1475 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001476
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001477 rth = rt_dst_alloc(out_dev->dev,
1478 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001479 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 if (!rth) {
1481 err = -ENOBUFS;
1482 goto cleanup;
1483 }
1484
David S. Millercf911662011-04-28 14:31:47 -07001485 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1486 rth->rt_flags = flags;
1487 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001488 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001489 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001490 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001491 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001492 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Changli Gaod8d1f302010-06-10 23:31:35 -07001494 rth->dst.input = ip_forward;
1495 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
David S. Millerd2d68ba2012-07-17 12:58:50 -07001497 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001498 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001499out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 err = 0;
1501 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001503}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Stephen Hemminger5969f712008-04-10 01:52:09 -07001505static int ip_mkroute_input(struct sk_buff *skb,
1506 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001507 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001508 struct in_device *in_dev,
1509 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001512 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001513 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514#endif
1515
1516 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001517 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518}
1519
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520/*
1521 * NOTE. We drop all the packets that has local source
1522 * addresses, because every properly looped back packet
1523 * must have correct destination already attached by output routine.
1524 *
1525 * Such approach solves two big problems:
1526 * 1. Not simplex devices are handled properly.
1527 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001528 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 */
1530
Al Viro9e12bb22006-09-26 21:25:20 -07001531static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001532 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533{
1534 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001535 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001536 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001537 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001539 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001541 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001542 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543
1544 /* IP on this device is disabled. */
1545
1546 if (!in_dev)
1547 goto out;
1548
1549 /* Check for the most weird martians, which can be not detected
1550 by fib_lookup.
1551 */
1552
Thomas Grafd0daebc32012-06-12 00:44:01 +00001553 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 goto martian_source;
1555
David S. Millerd2d68ba2012-07-17 12:58:50 -07001556 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001557 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 goto brd_input;
1559
1560 /* Accept zero addresses only to limited broadcast;
1561 * I even do not know to fix it or not. Waiting for complains :-)
1562 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001563 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto martian_source;
1565
Thomas Grafd0daebc32012-06-12 00:44:01 +00001566 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto martian_destination;
1568
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001569 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1570 * and call it once if daddr or/and saddr are loopback addresses
1571 */
1572 if (ipv4_is_loopback(daddr)) {
1573 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001574 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001575 } else if (ipv4_is_loopback(saddr)) {
1576 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001577 goto martian_source;
1578 }
1579
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 /*
1581 * Now we are ready to route packet.
1582 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001583 fl4.flowi4_oif = 0;
1584 fl4.flowi4_iif = dev->ifindex;
1585 fl4.flowi4_mark = skb->mark;
1586 fl4.flowi4_tos = tos;
1587 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1588 fl4.daddr = daddr;
1589 fl4.saddr = saddr;
1590 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001591 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593
1594 RT_CACHE_STAT_INC(in_slow_tot);
1595
1596 if (res.type == RTN_BROADCAST)
1597 goto brd_input;
1598
1599 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001600 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001601 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001602 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001603 if (err < 0)
1604 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 goto local_input;
1606 }
1607
1608 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001609 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 if (res.type != RTN_UNICAST)
1611 goto martian_destination;
1612
David S. Miller68a5e3d2011-03-11 20:07:33 -05001613 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614out: return err;
1615
1616brd_input:
1617 if (skb->protocol != htons(ETH_P_IP))
1618 goto e_inval;
1619
David S. Miller41347dc2012-06-28 04:05:27 -07001620 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001621 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1622 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001624 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 }
1626 flags |= RTCF_BROADCAST;
1627 res.type = RTN_BROADCAST;
1628 RT_CACHE_STAT_INC(in_brd);
1629
1630local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001631 do_cache = false;
1632 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001633 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001634 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001635 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001636 skb_dst_set_noref(skb, &rth->dst);
1637 err = 0;
1638 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001639 }
1640 do_cache = true;
1641 }
1642 }
1643
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001644 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001645 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 if (!rth)
1647 goto e_nobufs;
1648
David S. Millercf911662011-04-28 14:31:47 -07001649 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001650 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001651#ifdef CONFIG_IP_ROUTE_CLASSID
1652 rth->dst.tclassid = itag;
1653#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
David S. Millercf911662011-04-28 14:31:47 -07001655 rth->rt_genid = rt_genid(net);
1656 rth->rt_flags = flags|RTCF_LOCAL;
1657 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001658 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001659 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001660 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001661 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001662 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001664 rth->dst.input= ip_error;
1665 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 rth->rt_flags &= ~RTCF_LOCAL;
1667 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001668 if (do_cache)
1669 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001670 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001671 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001672 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673
1674no_route:
1675 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001677 if (err == -ESRCH)
1678 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 goto local_input;
1680
1681 /*
1682 * Do not cache martian addresses: they should be logged (RFC1812)
1683 */
1684martian_destination:
1685 RT_CACHE_STAT_INC(in_martian_dst);
1686#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001687 if (IN_DEV_LOG_MARTIANS(in_dev))
1688 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1689 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001691
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692e_inval:
1693 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001694 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
1696e_nobufs:
1697 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001698 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699
1700martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001701 err = -EINVAL;
1702martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001704 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705}
1706
David S. Millerc6cffba2012-07-26 11:14:38 +00001707int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1708 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709{
Eric Dumazet96d36222010-06-02 19:21:31 +00001710 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711
Eric Dumazet96d36222010-06-02 19:21:31 +00001712 rcu_read_lock();
1713
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 /* Multicast recognition logic is moved from route cache to here.
1715 The problem was that too many Ethernet cards have broken/missing
1716 hardware multicast filters :-( As result the host on multicasting
1717 network acquires a lot of useless route cache entries, sort of
1718 SDR messages from all the world. Now we try to get rid of them.
1719 Really, provided software IP multicast filter is organized
1720 reasonably (at least, hashed), it does not result in a slowdown
1721 comparing with route cache reject entries.
1722 Note, that multicast routers are not affected, because
1723 route cache entry is created eventually.
1724 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001725 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001726 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727
Eric Dumazet96d36222010-06-02 19:21:31 +00001728 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001729 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1730 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 if (our
1732#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001733 ||
1734 (!ipv4_is_local_multicast(daddr) &&
1735 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001737 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001738 int res = ip_route_input_mc(skb, daddr, saddr,
1739 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001741 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 }
1743 }
1744 rcu_read_unlock();
1745 return -EINVAL;
1746 }
David S. Millerc10237e2012-06-27 17:05:06 -07001747 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001748 rcu_read_unlock();
1749 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750}
David S. Millerc6cffba2012-07-26 11:14:38 +00001751EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001753/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001754static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001755 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001756 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001757 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
David S. Miller982721f2011-02-16 21:44:24 -08001759 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001760 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001761 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001762 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001763 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
Thomas Grafd0daebc32012-06-12 00:44:01 +00001765 in_dev = __in_dev_get_rcu(dev_out);
1766 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001767 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768
Thomas Grafd0daebc32012-06-12 00:44:01 +00001769 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1770 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1771 return ERR_PTR(-EINVAL);
1772
David S. Miller68a5e3d2011-03-11 20:07:33 -05001773 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001774 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001775 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001776 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001777 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001778 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
1780 if (dev_out->flags & IFF_LOOPBACK)
1781 flags |= RTCF_LOCAL;
1782
David S. Miller982721f2011-02-16 21:44:24 -08001783 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001785 fi = NULL;
1786 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001787 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001788 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1789 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 flags &= ~RTCF_LOCAL;
1791 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001792 * default one, but do not gateway in this case.
1793 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 */
David S. Miller982721f2011-02-16 21:44:24 -08001795 if (fi && res->prefixlen < 4)
1796 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 }
1798
David S. Millerf2bb4be2012-07-17 12:20:47 -07001799 fnhe = NULL;
1800 if (fi) {
David S. Millerc5038a82012-07-31 15:02:02 -07001801 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001802
David S. Millerc5038a82012-07-31 15:02:02 -07001803 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1804 if (fnhe)
1805 prth = &fnhe->fnhe_rth;
1806 else
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001807 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07001808 rth = rcu_dereference(*prth);
1809 if (rt_cache_valid(rth)) {
1810 dst_hold(&rth->dst);
1811 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001812 }
1813 }
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001814 rth = rt_dst_alloc(dev_out,
1815 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001816 IN_DEV_CONF_GET(in_dev, NOXFRM),
David S. Millerc5038a82012-07-31 15:02:02 -07001817 fi);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001818 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001819 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001820
David S. Millercf911662011-04-28 14:31:47 -07001821 rth->dst.output = ip_output;
1822
David S. Millercf911662011-04-28 14:31:47 -07001823 rth->rt_genid = rt_genid(dev_net(dev_out));
1824 rth->rt_flags = flags;
1825 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001826 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001827 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001828 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001829 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001830 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831
1832 RT_CACHE_STAT_INC(out_slow_tot);
1833
David S. Miller41347dc2012-06-28 04:05:27 -07001834 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001835 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001837 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001839 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 RT_CACHE_STAT_INC(out_slow_mc);
1841 }
1842#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001843 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001845 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001846 rth->dst.input = ip_mr_input;
1847 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 }
1849 }
1850#endif
1851 }
1852
David S. Millerf2bb4be2012-07-17 12:20:47 -07001853 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
David S. Miller5ada5522011-02-17 15:29:00 -08001855 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856}
1857
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858/*
1859 * Major route resolver routine.
1860 */
1861
David S. Miller89aef892012-07-17 11:00:09 -07001862struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001865 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001866 unsigned int flags = 0;
1867 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001868 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001869 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870
David S. Miller85b91b02012-07-13 08:21:29 -07001871 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001873 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874
David S. Miller813b3b52011-04-28 14:48:42 -07001875 orig_oif = fl4->flowi4_oif;
1876
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001877 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001878 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1879 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1880 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001881
David S. Miller010c2702011-02-17 15:37:09 -08001882 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001883 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001884 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001885 if (ipv4_is_multicast(fl4->saddr) ||
1886 ipv4_is_lbcast(fl4->saddr) ||
1887 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 goto out;
1889
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 /* I removed check for oif == dev_out->oif here.
1891 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001892 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1893 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 2. Moreover, we are allowed to send packets with saddr
1895 of another iface. --ANK
1896 */
1897
David S. Miller813b3b52011-04-28 14:48:42 -07001898 if (fl4->flowi4_oif == 0 &&
1899 (ipv4_is_multicast(fl4->daddr) ||
1900 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001901 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001902 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001903 if (dev_out == NULL)
1904 goto out;
1905
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 /* Special hack: user can direct multicasts
1907 and limited broadcast via necessary interface
1908 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1909 This hack is not just for fun, it allows
1910 vic,vat and friends to work.
1911 They bind socket to loopback, set ttl to zero
1912 and expect that it will work.
1913 From the viewpoint of routing cache they are broken,
1914 because we are not allowed to build multicast path
1915 with loopback source addr (look, routing cache
1916 cannot know, that ttl is zero, so that packet
1917 will not leave this host and route is valid).
1918 Luckily, this hack is good workaround.
1919 */
1920
David S. Miller813b3b52011-04-28 14:48:42 -07001921 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 goto make_route;
1923 }
Julian Anastasova210d012008-10-01 07:28:28 -07001924
David S. Miller813b3b52011-04-28 14:48:42 -07001925 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001926 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001927 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001928 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001929 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 }
1931
1932
David S. Miller813b3b52011-04-28 14:48:42 -07001933 if (fl4->flowi4_oif) {
1934 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001935 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 if (dev_out == NULL)
1937 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07001938
1939 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001940 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001941 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001942 goto out;
1943 }
David S. Miller813b3b52011-04-28 14:48:42 -07001944 if (ipv4_is_local_multicast(fl4->daddr) ||
1945 ipv4_is_lbcast(fl4->daddr)) {
1946 if (!fl4->saddr)
1947 fl4->saddr = inet_select_addr(dev_out, 0,
1948 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 goto make_route;
1950 }
David S. Miller813b3b52011-04-28 14:48:42 -07001951 if (fl4->saddr) {
1952 if (ipv4_is_multicast(fl4->daddr))
1953 fl4->saddr = inet_select_addr(dev_out, 0,
1954 fl4->flowi4_scope);
1955 else if (!fl4->daddr)
1956 fl4->saddr = inet_select_addr(dev_out, 0,
1957 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 }
1959 }
1960
David S. Miller813b3b52011-04-28 14:48:42 -07001961 if (!fl4->daddr) {
1962 fl4->daddr = fl4->saddr;
1963 if (!fl4->daddr)
1964 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001965 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001966 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 res.type = RTN_LOCAL;
1968 flags |= RTCF_LOCAL;
1969 goto make_route;
1970 }
1971
David S. Miller813b3b52011-04-28 14:48:42 -07001972 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001974 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07001975 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 /* Apparently, routing tables are wrong. Assume,
1977 that the destination is on link.
1978
1979 WHY? DW.
1980 Because we are allowed to send to iface
1981 even if it has NO routes and NO assigned
1982 addresses. When oif is specified, routing
1983 tables are looked up with only one purpose:
1984 to catch if destination is gatewayed, rather than
1985 direct. Moreover, if MSG_DONTROUTE is set,
1986 we send packet, ignoring both routing tables
1987 and ifaddr state. --ANK
1988
1989
1990 We could make it even if oif is unknown,
1991 likely IPv6, but we do not.
1992 */
1993
David S. Miller813b3b52011-04-28 14:48:42 -07001994 if (fl4->saddr == 0)
1995 fl4->saddr = inet_select_addr(dev_out, 0,
1996 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 res.type = RTN_UNICAST;
1998 goto make_route;
1999 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002000 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 goto out;
2002 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003
2004 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002005 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002006 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002007 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002008 else
David S. Miller813b3b52011-04-28 14:48:42 -07002009 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002010 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002011 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002012 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 flags |= RTCF_LOCAL;
2014 goto make_route;
2015 }
2016
2017#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002018 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002019 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 else
2021#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002022 if (!res.prefixlen &&
2023 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002024 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002025 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026
David S. Miller813b3b52011-04-28 14:48:42 -07002027 if (!fl4->saddr)
2028 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002031 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032
2033
2034make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002035 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036
David S. Miller010c2702011-02-17 15:37:09 -08002037out:
2038 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002039 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002041EXPORT_SYMBOL_GPL(__ip_route_output_key);
2042
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002043static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2044{
2045 return NULL;
2046}
2047
Steffen Klassertebb762f2011-11-23 02:12:51 +00002048static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002049{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002050 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2051
2052 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002053}
2054
David S. Miller6700c272012-07-17 03:29:28 -07002055static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2056 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002057{
2058}
2059
David S. Miller6700c272012-07-17 03:29:28 -07002060static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2061 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002062{
2063}
2064
Held Bernhard0972ddb2011-04-24 22:07:32 +00002065static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2066 unsigned long old)
2067{
2068 return NULL;
2069}
2070
David S. Miller14e50e52007-05-24 18:17:54 -07002071static struct dst_ops ipv4_dst_blackhole_ops = {
2072 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002073 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002074 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002075 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002076 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002077 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002078 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002079 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002080 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002081};
2082
David S. Miller2774c132011-03-01 14:59:04 -08002083struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002084{
David S. Miller2774c132011-03-01 14:59:04 -08002085 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002086 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002087
David S. Millerf5b0a872012-07-19 12:31:33 -07002088 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002089 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002090 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002091
David S. Miller14e50e52007-05-24 18:17:54 -07002092 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002093 new->input = dst_discard;
2094 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002095
Changli Gaod8d1f302010-06-10 23:31:35 -07002096 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002097 if (new->dev)
2098 dev_hold(new->dev);
2099
David S. Miller9917e1e82012-07-17 14:44:26 -07002100 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002101 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002102 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002103
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002104 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002105 rt->rt_flags = ort->rt_flags;
2106 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002107 rt->rt_gateway = ort->rt_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002108
David S. Millercaacf052012-07-31 15:06:50 -07002109 INIT_LIST_HEAD(&rt->rt_uncached);
2110
David S. Miller14e50e52007-05-24 18:17:54 -07002111 dst_free(new);
2112 }
2113
David S. Miller2774c132011-03-01 14:59:04 -08002114 dst_release(dst_orig);
2115
2116 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002117}
2118
David S. Miller9d6ec932011-03-12 01:12:47 -05002119struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002120 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121{
David S. Miller9d6ec932011-03-12 01:12:47 -05002122 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
David S. Millerb23dd4f2011-03-02 14:31:35 -08002124 if (IS_ERR(rt))
2125 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126
David S. Miller56157872011-05-02 14:37:45 -07002127 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002128 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2129 flowi4_to_flowi(flp4),
2130 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131
David S. Millerb23dd4f2011-03-02 14:31:35 -08002132 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002134EXPORT_SYMBOL_GPL(ip_route_output_flow);
2135
David S. Millerf1ce3062012-07-12 10:10:17 -07002136static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002137 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002138 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002140 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002142 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002143 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002144 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002145 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002146
Eric W. Biederman15e47302012-09-07 20:12:54 +00002147 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002148 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002149 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002150
2151 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 r->rtm_family = AF_INET;
2153 r->rtm_dst_len = 32;
2154 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002155 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002157 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2158 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 r->rtm_type = rt->rt_type;
2160 r->rtm_scope = RT_SCOPE_UNIVERSE;
2161 r->rtm_protocol = RTPROT_UNSPEC;
2162 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2163 if (rt->rt_flags & RTCF_NOTIFY)
2164 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002165
David S. Millerf1ce3062012-07-12 10:10:17 -07002166 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002167 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002168 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002170 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002171 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172 }
David S. Millerf3756b72012-04-01 20:39:02 -04002173 if (rt->dst.dev &&
2174 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2175 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002176#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002177 if (rt->dst.tclassid &&
2178 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2179 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002181 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002182 fl4->saddr != src) {
2183 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002184 goto nla_put_failure;
2185 }
David S. Millerf8126f12012-07-13 05:03:45 -07002186 if (rt->rt_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002187 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2188 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002189
Julian Anastasov521f5492012-07-20 12:02:08 +03002190 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2191 if (rt->rt_pmtu)
2192 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2193 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002194 goto nla_put_failure;
2195
David Millerb4869882012-07-01 02:03:01 +00002196 if (fl4->flowi4_mark &&
2197 nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002198 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002199
Changli Gaod8d1f302010-06-10 23:31:35 -07002200 error = rt->dst.error;
David S. Miller59436342012-07-10 06:58:42 -07002201 expires = rt->dst.expires;
2202 if (expires) {
2203 if (time_before(jiffies, expires))
2204 expires -= jiffies;
2205 else
2206 expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002208
David S. Millerc7537962010-11-11 17:07:48 -08002209 if (rt_is_input_route(rt)) {
David S. Millerf1ce3062012-07-12 10:10:17 -07002210 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2211 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 }
2213
David S. Millerf1850712012-07-10 07:26:01 -07002214 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002215 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216
Thomas Grafbe403ea2006-08-17 18:15:17 -07002217 return nlmsg_end(skb, nlh);
2218
2219nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002220 nlmsg_cancel(skb, nlh);
2221 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222}
2223
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002224static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002226 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002227 struct rtmsg *rtm;
2228 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002230 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002231 __be32 dst = 0;
2232 __be32 src = 0;
2233 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002234 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002235 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 struct sk_buff *skb;
2237
Thomas Grafd889ce32006-08-17 18:15:44 -07002238 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2239 if (err < 0)
2240 goto errout;
2241
2242 rtm = nlmsg_data(nlh);
2243
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002245 if (skb == NULL) {
2246 err = -ENOBUFS;
2247 goto errout;
2248 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
2250 /* Reserve room for dummy headers, this skb can pass
2251 through good chunk of routing engine.
2252 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002253 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002254 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002255
2256 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002257 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2259
Al Viro17fb2c62006-09-26 22:15:25 -07002260 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2261 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002262 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002263 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
David Millerd6c0a4f2012-07-01 02:02:59 +00002265 memset(&fl4, 0, sizeof(fl4));
2266 fl4.daddr = dst;
2267 fl4.saddr = src;
2268 fl4.flowi4_tos = rtm->rtm_tos;
2269 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2270 fl4.flowi4_mark = mark;
2271
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002273 struct net_device *dev;
2274
Denis V. Lunev19375042008-02-28 20:52:04 -08002275 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002276 if (dev == NULL) {
2277 err = -ENODEV;
2278 goto errout_free;
2279 }
2280
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 skb->protocol = htons(ETH_P_IP);
2282 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002283 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 local_bh_disable();
2285 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2286 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002287
Eric Dumazet511c3f92009-06-02 05:14:27 +00002288 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002289 if (err == 0 && rt->dst.error)
2290 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002292 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002293
2294 err = 0;
2295 if (IS_ERR(rt))
2296 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002298
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002300 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301
Changli Gaod8d1f302010-06-10 23:31:35 -07002302 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 if (rtm->rtm_flags & RTM_F_NOTIFY)
2304 rt->rt_flags |= RTCF_NOTIFY;
2305
David S. Millerf1ce3062012-07-12 10:10:17 -07002306 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002307 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002308 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002309 if (err <= 0)
2310 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311
Eric W. Biederman15e47302012-09-07 20:12:54 +00002312 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002313errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002314 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
Thomas Grafd889ce32006-08-17 18:15:44 -07002316errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002318 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319}
2320
2321int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2322{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 return skb->len;
2324}
2325
2326void ip_rt_multicast_event(struct in_device *in_dev)
2327{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002328 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329}
2330
2331#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002332static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002333 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 size_t *lenp, loff_t *ppos)
2335{
2336 if (write) {
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002337 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002339 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340
2341 return -EINVAL;
2342}
2343
Al Viroeeb61f72008-07-27 08:59:33 +01002344static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002345 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 .procname = "gc_thresh",
2347 .data = &ipv4_dst_ops.gc_thresh,
2348 .maxlen = sizeof(int),
2349 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002350 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 },
2352 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 .procname = "max_size",
2354 .data = &ip_rt_max_size,
2355 .maxlen = sizeof(int),
2356 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002357 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 },
2359 {
2360 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002361
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 .procname = "gc_min_interval",
2363 .data = &ip_rt_gc_min_interval,
2364 .maxlen = sizeof(int),
2365 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002366 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 },
2368 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 .procname = "gc_min_interval_ms",
2370 .data = &ip_rt_gc_min_interval,
2371 .maxlen = sizeof(int),
2372 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002373 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 },
2375 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 .procname = "gc_timeout",
2377 .data = &ip_rt_gc_timeout,
2378 .maxlen = sizeof(int),
2379 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002380 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 },
2382 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002383 .procname = "gc_interval",
2384 .data = &ip_rt_gc_interval,
2385 .maxlen = sizeof(int),
2386 .mode = 0644,
2387 .proc_handler = proc_dointvec_jiffies,
2388 },
2389 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 .procname = "redirect_load",
2391 .data = &ip_rt_redirect_load,
2392 .maxlen = sizeof(int),
2393 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002394 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 },
2396 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 .procname = "redirect_number",
2398 .data = &ip_rt_redirect_number,
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002401 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 },
2403 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 .procname = "redirect_silence",
2405 .data = &ip_rt_redirect_silence,
2406 .maxlen = sizeof(int),
2407 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002408 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 },
2410 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 .procname = "error_cost",
2412 .data = &ip_rt_error_cost,
2413 .maxlen = sizeof(int),
2414 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002415 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 },
2417 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 .procname = "error_burst",
2419 .data = &ip_rt_error_burst,
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002422 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 },
2424 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425 .procname = "gc_elasticity",
2426 .data = &ip_rt_gc_elasticity,
2427 .maxlen = sizeof(int),
2428 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002429 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 },
2431 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 .procname = "mtu_expires",
2433 .data = &ip_rt_mtu_expires,
2434 .maxlen = sizeof(int),
2435 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002436 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 },
2438 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 .procname = "min_pmtu",
2440 .data = &ip_rt_min_pmtu,
2441 .maxlen = sizeof(int),
2442 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002443 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 },
2445 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 .procname = "min_adv_mss",
2447 .data = &ip_rt_min_advmss,
2448 .maxlen = sizeof(int),
2449 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002450 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002452 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002454
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002455static struct ctl_table ipv4_route_flush_table[] = {
2456 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002457 .procname = "flush",
2458 .maxlen = sizeof(int),
2459 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002460 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002461 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002462 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002463};
2464
2465static __net_init int sysctl_route_net_init(struct net *net)
2466{
2467 struct ctl_table *tbl;
2468
2469 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002470 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002471 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2472 if (tbl == NULL)
2473 goto err_dup;
2474 }
2475 tbl[0].extra1 = net;
2476
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002477 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002478 if (net->ipv4.route_hdr == NULL)
2479 goto err_reg;
2480 return 0;
2481
2482err_reg:
2483 if (tbl != ipv4_route_flush_table)
2484 kfree(tbl);
2485err_dup:
2486 return -ENOMEM;
2487}
2488
2489static __net_exit void sysctl_route_net_exit(struct net *net)
2490{
2491 struct ctl_table *tbl;
2492
2493 tbl = net->ipv4.route_hdr->ctl_table_arg;
2494 unregister_net_sysctl_table(net->ipv4.route_hdr);
2495 BUG_ON(tbl == ipv4_route_flush_table);
2496 kfree(tbl);
2497}
2498
2499static __net_initdata struct pernet_operations sysctl_route_ops = {
2500 .init = sysctl_route_net_init,
2501 .exit = sysctl_route_net_exit,
2502};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503#endif
2504
Neil Horman3ee94372010-05-08 01:57:52 -07002505static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002506{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002507 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002508 get_random_bytes(&net->ipv4.dev_addr_genid,
2509 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002510 return 0;
2511}
2512
Neil Horman3ee94372010-05-08 01:57:52 -07002513static __net_initdata struct pernet_operations rt_genid_ops = {
2514 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002515};
2516
David S. Millerc3426b42012-06-09 16:27:05 -07002517static int __net_init ipv4_inetpeer_init(struct net *net)
2518{
2519 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2520
2521 if (!bp)
2522 return -ENOMEM;
2523 inet_peer_base_init(bp);
2524 net->ipv4.peers = bp;
2525 return 0;
2526}
2527
2528static void __net_exit ipv4_inetpeer_exit(struct net *net)
2529{
2530 struct inet_peer_base *bp = net->ipv4.peers;
2531
2532 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002533 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002534 kfree(bp);
2535}
2536
2537static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2538 .init = ipv4_inetpeer_init,
2539 .exit = ipv4_inetpeer_exit,
2540};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002541
Patrick McHardyc7066f72011-01-14 13:36:42 +01002542#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002543struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002544#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546int __init ip_rt_init(void)
2547{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002548 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549
Patrick McHardyc7066f72011-01-14 13:36:42 +01002550#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002551 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 if (!ip_rt_acct)
2553 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554#endif
2555
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002556 ipv4_dst_ops.kmem_cachep =
2557 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002558 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559
David S. Miller14e50e52007-05-24 18:17:54 -07002560 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2561
Eric Dumazetfc66f952010-10-08 06:37:34 +00002562 if (dst_entries_init(&ipv4_dst_ops) < 0)
2563 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2564
2565 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2566 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2567
David S. Miller89aef892012-07-17 11:00:09 -07002568 ipv4_dst_ops.gc_thresh = ~0;
2569 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 devinet_init();
2572 ip_fib_init();
2573
Denis V. Lunev73b38712008-02-28 20:51:18 -08002574 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002575 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576#ifdef CONFIG_XFRM
2577 xfrm_init();
Neil Hormana33bc5c2009-07-30 18:52:15 -07002578 xfrm4_init(ip_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002580 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002581
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002582#ifdef CONFIG_SYSCTL
2583 register_pernet_subsys(&sysctl_route_ops);
2584#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002585 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002586 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 return rc;
2588}
2589
Al Viroa1bc6eb2008-07-30 06:32:52 -04002590#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002591/*
2592 * We really need to sanitize the damn ipv4 init order, then all
2593 * this nonsense will go away.
2594 */
2595void __init ip_static_sysctl_init(void)
2596{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002597 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002598}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002599#endif