blob: b102eeb16e34ad03b8647b41d78ac388f4267255 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
Eric Dumazet424c4b72005-07-05 14:58:19 -070073#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <linux/string.h>
75#include <linux/socket.h>
76#include <linux/sockios.h>
77#include <linux/errno.h>
78#include <linux/in.h>
79#include <linux/inet.h>
80#include <linux/netdevice.h>
81#include <linux/proc_fs.h>
82#include <linux/init.h>
Eric Dumazet39c90ec2007-09-15 10:55:54 -070083#include <linux/workqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/inetdevice.h>
86#include <linux/igmp.h>
87#include <linux/pkt_sched.h>
88#include <linux/mroute.h>
89#include <linux/netfilter_ipv4.h>
90#include <linux/random.h>
91#include <linux/jhash.h>
92#include <linux/rcupdate.h>
93#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090094#include <linux/slab.h>
Stephen Rothwellb9eda062011-12-22 17:03:29 +110095#include <linux/prefetch.h>
Herbert Xu352e5122007-11-13 21:34:06 -080096#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/protocol.h>
99#include <net/ip.h>
100#include <net/route.h>
101#include <net/inetpeer.h>
102#include <net/sock.h>
103#include <net/ip_fib.h>
104#include <net/arp.h>
105#include <net/tcp.h>
106#include <net/icmp.h>
107#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700108#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700109#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#ifdef CONFIG_SYSCTL
111#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000112#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700114#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
David S. Miller68a5e3d2011-03-11 20:07:33 -0500116#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000117 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119#define IP_MAX_MTU 0xFFF0
120
121#define RT_GC_TIMEOUT (300*HZ)
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700124static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500125static int ip_rt_gc_interval __read_mostly = 60 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700126static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
127static int ip_rt_redirect_number __read_mostly = 9;
128static int ip_rt_redirect_load __read_mostly = HZ / 50;
129static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
130static int ip_rt_error_cost __read_mostly = HZ;
131static int ip_rt_error_burst __read_mostly = 5 * HZ;
132static int ip_rt_gc_elasticity __read_mostly = 8;
133static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
134static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
135static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500136
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137/*
138 * Interface to generic destination cache.
139 */
140
141static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800142static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000143static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
145static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700146static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb, u32 mtu);
148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
149 struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000151static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152 int how)
153{
154}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155
David S. Miller62fa8a82011-01-26 20:51:05 -0800156static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
157{
David S. Miller31248732012-07-10 07:08:18 -0700158 WARN_ON(1);
159 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800160}
161
David S. Millerf894cbf2012-07-02 21:52:24 -0700162static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
163 struct sk_buff *skb,
164 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700165
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166static struct dst_ops ipv4_dst_ops = {
167 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800168 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800170 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000171 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800172 .cow_metrics = ipv4_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 .ifdown = ipv4_dst_ifdown,
174 .negative_advice = ipv4_negative_advice,
175 .link_failure = ipv4_link_failure,
176 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700177 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700178 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700179 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180};
181
182#define ECN_OR_COST(class) TC_PRIO_##class
183
Philippe De Muyter4839c522007-07-09 15:32:57 -0700184const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000186 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 TC_PRIO_BESTEFFORT,
188 ECN_OR_COST(BESTEFFORT),
189 TC_PRIO_BULK,
190 ECN_OR_COST(BULK),
191 TC_PRIO_BULK,
192 ECN_OR_COST(BULK),
193 TC_PRIO_INTERACTIVE,
194 ECN_OR_COST(INTERACTIVE),
195 TC_PRIO_INTERACTIVE,
196 ECN_OR_COST(INTERACTIVE),
197 TC_PRIO_INTERACTIVE_BULK,
198 ECN_OR_COST(INTERACTIVE_BULK),
199 TC_PRIO_INTERACTIVE_BULK,
200 ECN_OR_COST(INTERACTIVE_BULK)
201};
Amir Vadaid4a96862012-04-04 21:33:28 +0000202EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203
Eric Dumazet2f970d82006-01-17 02:54:36 -0800204static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000205#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700207static inline int rt_genid(struct net *net)
208{
209 return atomic_read(&net->ipv4.rt_genid);
210}
211
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
214{
Eric Dumazet29e75252008-01-31 17:05:09 -0800215 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700216 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800217 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219
220static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
221{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700223 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
226static void rt_cache_seq_stop(struct seq_file *seq, void *v)
227{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228}
229
230static int rt_cache_seq_show(struct seq_file *seq, void *v)
231{
232 if (v == SEQ_START_TOKEN)
233 seq_printf(seq, "%-127s\n",
234 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
235 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
236 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900237 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238}
239
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700240static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 .start = rt_cache_seq_start,
242 .next = rt_cache_seq_next,
243 .stop = rt_cache_seq_stop,
244 .show = rt_cache_seq_show,
245};
246
247static int rt_cache_seq_open(struct inode *inode, struct file *file)
248{
David S. Miller89aef892012-07-17 11:00:09 -0700249 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
Arjan van de Ven9a321442007-02-12 00:55:35 -0800252static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 .owner = THIS_MODULE,
254 .open = rt_cache_seq_open,
255 .read = seq_read,
256 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700257 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258};
259
260
261static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
262{
263 int cpu;
264
265 if (*pos == 0)
266 return SEQ_START_TOKEN;
267
Rusty Russell0f23174a2008-12-29 12:23:42 +0000268 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 if (!cpu_possible(cpu))
270 continue;
271 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800272 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 }
274 return NULL;
275}
276
277static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
278{
279 int cpu;
280
Rusty Russell0f23174a2008-12-29 12:23:42 +0000281 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 if (!cpu_possible(cpu))
283 continue;
284 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800285 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 }
287 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900288
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289}
290
291static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
292{
293
294}
295
296static int rt_cpu_seq_show(struct seq_file *seq, void *v)
297{
298 struct rt_cache_stat *st = v;
299
300 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700301 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 return 0;
303 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
306 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000307 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 st->in_hit,
309 st->in_slow_tot,
310 st->in_slow_mc,
311 st->in_no_route,
312 st->in_brd,
313 st->in_martian_dst,
314 st->in_martian_src,
315
316 st->out_hit,
317 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900318 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
320 st->gc_total,
321 st->gc_ignored,
322 st->gc_goal_miss,
323 st->gc_dst_overflow,
324 st->in_hlist_search,
325 st->out_hlist_search
326 );
327 return 0;
328}
329
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700330static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 .start = rt_cpu_seq_start,
332 .next = rt_cpu_seq_next,
333 .stop = rt_cpu_seq_stop,
334 .show = rt_cpu_seq_show,
335};
336
337
338static int rt_cpu_seq_open(struct inode *inode, struct file *file)
339{
340 return seq_open(file, &rt_cpu_seq_ops);
341}
342
Arjan van de Ven9a321442007-02-12 00:55:35 -0800343static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 .owner = THIS_MODULE,
345 .open = rt_cpu_seq_open,
346 .read = seq_read,
347 .llseek = seq_lseek,
348 .release = seq_release,
349};
350
Patrick McHardyc7066f72011-01-14 13:36:42 +0100351#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800352static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800353{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354 struct ip_rt_acct *dst, *src;
355 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800356
Alexey Dobriyana661c412009-11-25 15:40:35 -0800357 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
358 if (!dst)
359 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800360
Alexey Dobriyana661c412009-11-25 15:40:35 -0800361 for_each_possible_cpu(i) {
362 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
363 for (j = 0; j < 256; j++) {
364 dst[j].o_bytes += src[j].o_bytes;
365 dst[j].o_packets += src[j].o_packets;
366 dst[j].i_bytes += src[j].i_bytes;
367 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800368 }
369 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800370
371 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
372 kfree(dst);
373 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800374}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800375
376static int rt_acct_proc_open(struct inode *inode, struct file *file)
377{
378 return single_open(file, rt_acct_proc_show, NULL);
379}
380
381static const struct file_operations rt_acct_proc_fops = {
382 .owner = THIS_MODULE,
383 .open = rt_acct_proc_open,
384 .read = seq_read,
385 .llseek = seq_lseek,
386 .release = single_release,
387};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800388#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800389
Denis V. Lunev73b38712008-02-28 20:51:18 -0800390static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800391{
392 struct proc_dir_entry *pde;
393
394 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
395 &rt_cache_seq_fops);
396 if (!pde)
397 goto err1;
398
Wang Chen77020722008-02-28 14:14:25 -0800399 pde = proc_create("rt_cache", S_IRUGO,
400 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401 if (!pde)
402 goto err2;
403
Patrick McHardyc7066f72011-01-14 13:36:42 +0100404#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800405 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800406 if (!pde)
407 goto err3;
408#endif
409 return 0;
410
Patrick McHardyc7066f72011-01-14 13:36:42 +0100411#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800412err3:
413 remove_proc_entry("rt_cache", net->proc_net_stat);
414#endif
415err2:
416 remove_proc_entry("rt_cache", net->proc_net);
417err1:
418 return -ENOMEM;
419}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800420
421static void __net_exit ip_rt_do_proc_exit(struct net *net)
422{
423 remove_proc_entry("rt_cache", net->proc_net_stat);
424 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100425#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800426 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000427#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800428}
429
430static struct pernet_operations ip_rt_proc_ops __net_initdata = {
431 .init = ip_rt_do_proc_init,
432 .exit = ip_rt_do_proc_exit,
433};
434
435static int __init ip_rt_proc_init(void)
436{
437 return register_pernet_subsys(&ip_rt_proc_ops);
438}
439
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800440#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800441static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800442{
443 return 0;
444}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900446
Eric Dumazet4331deb2012-07-25 05:11:23 +0000447static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700448{
Changli Gaod8d1f302010-06-10 23:31:35 -0700449 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700450}
451
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800452/*
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300453 * Perturbation of rt_genid by a small quantity [1..256]
Eric Dumazet29e75252008-01-31 17:05:09 -0800454 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
455 * many times (2^24) without giving recent rt_genid.
456 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 */
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700458static void rt_cache_invalidate(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
Eric Dumazet29e75252008-01-31 17:05:09 -0800460 unsigned char shuffle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
Eric Dumazet29e75252008-01-31 17:05:09 -0800462 get_random_bytes(&shuffle, sizeof(shuffle));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700463 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464}
465
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800466/*
Eric Dumazet29e75252008-01-31 17:05:09 -0800467 * delay < 0 : invalidate cache (fast : entries will be deleted later)
468 * delay >= 0 : invalidate & flush cache (can be long)
469 */
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700470void rt_cache_flush(struct net *net, int delay)
Eric Dumazet29e75252008-01-31 17:05:09 -0800471{
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700472 rt_cache_invalidate(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000473}
474
David S. Millerf894cbf2012-07-02 21:52:24 -0700475static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
476 struct sk_buff *skb,
477 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000478{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700479 struct net_device *dev = dst->dev;
480 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500481 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000482 struct neighbour *n;
483
David S. Miller39232972012-01-26 15:22:32 -0500484 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700485 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500486 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700487 else if (skb)
488 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700489
David S. Miller80703d22012-02-15 17:48:35 -0500490 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700491 if (n)
492 return n;
David Miller32092ec2011-07-25 00:01:41 +0000493 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700494}
495
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496/*
497 * Peer allocation may fail only in serious out-of-memory conditions. However
498 * we still can generate some output.
499 * Random ID selection looks a bit dangerous because we have no chances to
500 * select ID being unique in a reasonable period of time.
501 * But broken packet identifier may be better than no packet at all.
502 */
503static void ip_select_fb_ident(struct iphdr *iph)
504{
505 static DEFINE_SPINLOCK(ip_fb_id_lock);
506 static u32 ip_fallback_id;
507 u32 salt;
508
509 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700510 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 iph->id = htons(salt & 0xFFFF);
512 ip_fallback_id = salt;
513 spin_unlock_bh(&ip_fb_id_lock);
514}
515
516void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
517{
David S. Miller1d861aa2012-07-10 03:58:16 -0700518 struct net *net = dev_net(dst->dev);
519 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
David S. Miller1d861aa2012-07-10 03:58:16 -0700521 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
522 if (peer) {
523 iph->id = htons(inet_getid(peer, more));
524 inet_putpeer(peer);
525 return;
526 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528 ip_select_fb_ident(iph);
529}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000530EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200532static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700533 const struct iphdr *iph,
534 int oif, u8 tos,
535 u8 prot, u32 mark, int flow_flags)
536{
537 if (sk) {
538 const struct inet_sock *inet = inet_sk(sk);
539
540 oif = sk->sk_bound_dev_if;
541 mark = sk->sk_mark;
542 tos = RT_CONN_FLAGS(sk);
543 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
544 }
545 flowi4_init_output(fl4, oif, mark, tos,
546 RT_SCOPE_UNIVERSE, prot,
547 flow_flags,
548 iph->daddr, iph->saddr, 0, 0);
549}
550
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200551static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
552 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700553{
554 const struct iphdr *iph = ip_hdr(skb);
555 int oif = skb->dev->ifindex;
556 u8 tos = RT_TOS(iph->tos);
557 u8 prot = iph->protocol;
558 u32 mark = skb->mark;
559
560 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
561}
562
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200563static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200566 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700567 __be32 daddr = inet->inet_daddr;
568
569 rcu_read_lock();
570 inet_opt = rcu_dereference(inet->inet_opt);
571 if (inet_opt && inet_opt->opt.srr)
572 daddr = inet_opt->opt.faddr;
573 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
574 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
575 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
576 inet_sk_flowi_flags(sk),
577 daddr, inet->inet_saddr, 0, 0);
578 rcu_read_unlock();
579}
580
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200581static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
582 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700583{
584 if (skb)
585 build_skb_flow_key(fl4, skb, sk);
586 else
587 build_sk_flow_key(fl4, sk);
588}
589
David S. Millerc5038a82012-07-31 15:02:02 -0700590static inline void rt_free(struct rtable *rt)
591{
592 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
593}
594
595static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700596
Julian Anastasovaee06da2012-07-18 10:15:35 +0000597static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700598{
599 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700600 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700601
602 oldest = rcu_dereference(hash->chain);
603 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
604 fnhe = rcu_dereference(fnhe->fnhe_next)) {
605 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
606 oldest = fnhe;
607 }
David S. Millerc5038a82012-07-31 15:02:02 -0700608 orig = rcu_dereference(oldest->fnhe_rth);
609 if (orig) {
610 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
611 rt_free(orig);
612 }
David S. Miller4895c772012-07-17 04:19:00 -0700613 return oldest;
614}
615
David S. Millerd3a25c92012-07-17 13:23:08 -0700616static inline u32 fnhe_hashfun(__be32 daddr)
617{
618 u32 hval;
619
620 hval = (__force u32) daddr;
621 hval ^= (hval >> 11) ^ (hval >> 22);
622
623 return hval & (FNHE_HASH_SIZE - 1);
624}
625
Julian Anastasovaee06da2012-07-18 10:15:35 +0000626static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
627 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700628{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000629 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700630 struct fib_nh_exception *fnhe;
631 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000632 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700633
David S. Millerc5038a82012-07-31 15:02:02 -0700634 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000635
636 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700637 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000638 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700639 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000640 goto out_unlock;
641 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700642 }
643
David S. Miller4895c772012-07-17 04:19:00 -0700644 hash += hval;
645
646 depth = 0;
647 for (fnhe = rcu_dereference(hash->chain); fnhe;
648 fnhe = rcu_dereference(fnhe->fnhe_next)) {
649 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 break;
David S. Miller4895c772012-07-17 04:19:00 -0700651 depth++;
652 }
653
Julian Anastasovaee06da2012-07-18 10:15:35 +0000654 if (fnhe) {
655 if (gw)
656 fnhe->fnhe_gw = gw;
657 if (pmtu) {
658 fnhe->fnhe_pmtu = pmtu;
659 fnhe->fnhe_expires = expires;
660 }
661 } else {
662 if (depth > FNHE_RECLAIM_DEPTH)
663 fnhe = fnhe_oldest(hash);
664 else {
665 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
666 if (!fnhe)
667 goto out_unlock;
668
669 fnhe->fnhe_next = hash->chain;
670 rcu_assign_pointer(hash->chain, fnhe);
671 }
672 fnhe->fnhe_daddr = daddr;
673 fnhe->fnhe_gw = gw;
674 fnhe->fnhe_pmtu = pmtu;
675 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700676 }
David S. Miller4895c772012-07-17 04:19:00 -0700677
David S. Miller4895c772012-07-17 04:19:00 -0700678 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000679
680out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700681 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000682 return;
David S. Miller4895c772012-07-17 04:19:00 -0700683}
684
David S. Millerceb33202012-07-17 11:31:28 -0700685static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
686 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687{
David S. Millere47a1852012-07-11 20:55:47 -0700688 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700689 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700690 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700691 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700692 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700693 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800694 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
David S. Miller94206122012-07-11 20:38:08 -0700696 switch (icmp_hdr(skb)->code & 7) {
697 case ICMP_REDIR_NET:
698 case ICMP_REDIR_NETTOS:
699 case ICMP_REDIR_HOST:
700 case ICMP_REDIR_HOSTTOS:
701 break;
702
703 default:
704 return;
705 }
706
David S. Millere47a1852012-07-11 20:55:47 -0700707 if (rt->rt_gateway != old_gw)
708 return;
709
710 in_dev = __in_dev_get_rcu(dev);
711 if (!in_dev)
712 return;
713
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900714 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800715 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
716 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
717 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 goto reject_redirect;
719
720 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
721 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
722 goto reject_redirect;
723 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
724 goto reject_redirect;
725 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800726 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 goto reject_redirect;
728 }
729
David S. Miller4895c772012-07-17 04:19:00 -0700730 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700731 if (n) {
732 if (!(n->nud_state & NUD_VALID)) {
733 neigh_event_send(n, NULL);
734 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700735 if (fib_lookup(net, fl4, &res) == 0) {
736 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700737
Julian Anastasovaee06da2012-07-18 10:15:35 +0000738 update_or_create_fnhe(nh, fl4->daddr, new_gw,
739 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700740 }
David S. Millerceb33202012-07-17 11:31:28 -0700741 if (kill_route)
742 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700743 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
744 }
745 neigh_release(n);
746 }
747 return;
748
749reject_redirect:
750#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700751 if (IN_DEV_LOG_MARTIANS(in_dev)) {
752 const struct iphdr *iph = (const struct iphdr *) skb->data;
753 __be32 daddr = iph->daddr;
754 __be32 saddr = iph->saddr;
755
David S. Millere47a1852012-07-11 20:55:47 -0700756 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
757 " Advised path = %pI4 -> %pI4\n",
758 &old_gw, dev->name, &new_gw,
759 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700760 }
David S. Millere47a1852012-07-11 20:55:47 -0700761#endif
762 ;
763}
764
David S. Miller4895c772012-07-17 04:19:00 -0700765static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
766{
767 struct rtable *rt;
768 struct flowi4 fl4;
769
770 rt = (struct rtable *) dst;
771
772 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700773 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700774}
775
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
777{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800778 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 struct dst_entry *ret = dst;
780
781 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000782 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 ip_rt_put(rt);
784 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700785 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
786 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700787 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 ret = NULL;
789 }
790 }
791 return ret;
792}
793
794/*
795 * Algorithm:
796 * 1. The first ip_rt_redirect_number redirects are sent
797 * with exponential backoff, then we stop sending them at all,
798 * assuming that the host ignores our redirects.
799 * 2. If we did not see packets requiring redirects
800 * during ip_rt_redirect_silence, we assume that the host
801 * forgot redirected route and start to send redirects again.
802 *
803 * This algorithm is much cheaper and more intelligent than dumb load limiting
804 * in icmp.c.
805 *
806 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
807 * and "frag. need" (breaks PMTU discovery) in icmp.c.
808 */
809
810void ip_rt_send_redirect(struct sk_buff *skb)
811{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000812 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700813 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800814 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700815 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700816 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
Eric Dumazet30038fc2009-08-28 23:52:01 -0700818 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700819 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700820 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
821 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700823 }
824 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
825 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
David S. Miller1d861aa2012-07-10 03:58:16 -0700827 net = dev_net(rt->dst.dev);
828 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800829 if (!peer) {
830 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
831 return;
832 }
833
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 /* No redirected packets during ip_rt_redirect_silence;
835 * reset the algorithm.
836 */
David S. Miller92d86822011-02-04 15:55:25 -0800837 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
838 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
840 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700841 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 */
David S. Miller92d86822011-02-04 15:55:25 -0800843 if (peer->rate_tokens >= ip_rt_redirect_number) {
844 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700845 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 }
847
848 /* Check for load limit; set rate_last to the latest sent
849 * redirect.
850 */
David S. Miller92d86822011-02-04 15:55:25 -0800851 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800852 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800853 (peer->rate_last +
854 (ip_rt_redirect_load << peer->rate_tokens)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
David S. Miller92d86822011-02-04 15:55:25 -0800856 peer->rate_last = jiffies;
857 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700859 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000860 peer->rate_tokens == ip_rt_redirect_number)
861 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700862 &ip_hdr(skb)->saddr, inet_iif(skb),
David S. Millerf1ce3062012-07-12 10:10:17 -0700863 &ip_hdr(skb)->daddr, &rt->rt_gateway);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864#endif
865 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700866out_put_peer:
867 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868}
869
870static int ip_error(struct sk_buff *skb)
871{
David S. Miller251da412012-06-26 16:27:09 -0700872 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000873 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800874 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700876 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800877 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 int code;
879
David S. Miller251da412012-06-26 16:27:09 -0700880 net = dev_net(rt->dst.dev);
881 if (!IN_DEV_FORWARD(in_dev)) {
882 switch (rt->dst.error) {
883 case EHOSTUNREACH:
884 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
885 break;
886
887 case ENETUNREACH:
888 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
889 break;
890 }
891 goto out;
892 }
893
Changli Gaod8d1f302010-06-10 23:31:35 -0700894 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000895 case EINVAL:
896 default:
897 goto out;
898 case EHOSTUNREACH:
899 code = ICMP_HOST_UNREACH;
900 break;
901 case ENETUNREACH:
902 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700903 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000904 break;
905 case EACCES:
906 code = ICMP_PKT_FILTERED;
907 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 }
909
David S. Miller1d861aa2012-07-10 03:58:16 -0700910 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800911
912 send = true;
913 if (peer) {
914 now = jiffies;
915 peer->rate_tokens += now - peer->rate_last;
916 if (peer->rate_tokens > ip_rt_error_burst)
917 peer->rate_tokens = ip_rt_error_burst;
918 peer->rate_last = now;
919 if (peer->rate_tokens >= ip_rt_error_cost)
920 peer->rate_tokens -= ip_rt_error_cost;
921 else
922 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700923 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 }
David S. Miller92d86822011-02-04 15:55:25 -0800925 if (send)
926 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
928out: kfree_skb(skb);
929 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900930}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
David S. Millerceb33202012-07-17 11:31:28 -0700932static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933{
David S. Miller4895c772012-07-17 04:19:00 -0700934 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800935
David S. Miller59436342012-07-10 06:58:42 -0700936 if (mtu < ip_rt_min_pmtu)
937 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000938
David S. Miller4895c772012-07-17 04:19:00 -0700939 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
940 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700941
Julian Anastasovaee06da2012-07-18 10:15:35 +0000942 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
943 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700944 }
David S. Millerceb33202012-07-17 11:31:28 -0700945 return mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946}
947
David S. Miller4895c772012-07-17 04:19:00 -0700948static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
949 struct sk_buff *skb, u32 mtu)
950{
951 struct rtable *rt = (struct rtable *) dst;
952 struct flowi4 fl4;
953
954 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700955 mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
956
957 if (!rt->rt_pmtu) {
958 dst->obsolete = DST_OBSOLETE_KILL;
959 } else {
960 rt->rt_pmtu = mtu;
961 dst_set_expires(&rt->dst, ip_rt_mtu_expires);
962 }
David S. Miller4895c772012-07-17 04:19:00 -0700963}
964
David S. Miller36393392012-06-14 22:21:46 -0700965void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
966 int oif, u32 mark, u8 protocol, int flow_flags)
967{
David S. Miller4895c772012-07-17 04:19:00 -0700968 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700969 struct flowi4 fl4;
970 struct rtable *rt;
971
David S. Miller4895c772012-07-17 04:19:00 -0700972 __build_flow_key(&fl4, NULL, iph, oif,
973 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700974 rt = __ip_route_output_key(net, &fl4);
975 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700976 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700977 ip_rt_put(rt);
978 }
979}
980EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
981
982void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
983{
David S. Miller4895c772012-07-17 04:19:00 -0700984 const struct iphdr *iph = (const struct iphdr *) skb->data;
985 struct flowi4 fl4;
986 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700987
David S. Miller4895c772012-07-17 04:19:00 -0700988 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
989 rt = __ip_route_output_key(sock_net(sk), &fl4);
990 if (!IS_ERR(rt)) {
991 __ip_rt_update_pmtu(rt, &fl4, mtu);
992 ip_rt_put(rt);
993 }
David S. Miller36393392012-06-14 22:21:46 -0700994}
995EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -0800996
David S. Millerb42597e2012-07-11 21:25:45 -0700997void ipv4_redirect(struct sk_buff *skb, struct net *net,
998 int oif, u32 mark, u8 protocol, int flow_flags)
999{
David S. Miller4895c772012-07-17 04:19:00 -07001000 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001001 struct flowi4 fl4;
1002 struct rtable *rt;
1003
David S. Miller4895c772012-07-17 04:19:00 -07001004 __build_flow_key(&fl4, NULL, iph, oif,
1005 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001006 rt = __ip_route_output_key(net, &fl4);
1007 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001008 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001009 ip_rt_put(rt);
1010 }
1011}
1012EXPORT_SYMBOL_GPL(ipv4_redirect);
1013
1014void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1015{
David S. Miller4895c772012-07-17 04:19:00 -07001016 const struct iphdr *iph = (const struct iphdr *) skb->data;
1017 struct flowi4 fl4;
1018 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001019
David S. Miller4895c772012-07-17 04:19:00 -07001020 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1021 rt = __ip_route_output_key(sock_net(sk), &fl4);
1022 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001023 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001024 ip_rt_put(rt);
1025 }
David S. Millerb42597e2012-07-11 21:25:45 -07001026}
1027EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1028
David S. Millerefbc3682011-12-01 13:38:59 -05001029static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1030{
1031 struct rtable *rt = (struct rtable *) dst;
1032
David S. Millerceb33202012-07-17 11:31:28 -07001033 /* All IPV4 dsts are created with ->obsolete set to the value
1034 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1035 * into this function always.
1036 *
1037 * When a PMTU/redirect information update invalidates a
1038 * route, this is indicated by setting obsolete to
1039 * DST_OBSOLETE_KILL.
1040 */
1041 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001042 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001043 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044}
1045
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046static void ipv4_link_failure(struct sk_buff *skb)
1047{
1048 struct rtable *rt;
1049
1050 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1051
Eric Dumazet511c3f92009-06-02 05:14:27 +00001052 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001053 if (rt)
1054 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055}
1056
1057static int ip_rt_bug(struct sk_buff *skb)
1058{
Joe Perches91df42b2012-05-15 14:11:54 +00001059 pr_debug("%s: %pI4 -> %pI4, %s\n",
1060 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1061 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001063 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 return 0;
1065}
1066
1067/*
1068 We do not cache source address of outgoing interface,
1069 because it is used only by IP RR, TS and SRR options,
1070 so that it out of fast path.
1071
1072 BTW remember: "addr" is allowed to be not aligned
1073 in IP options!
1074 */
1075
David S. Miller8e363602011-05-13 17:29:41 -04001076void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077{
Al Viroa61ced52006-09-26 21:27:54 -07001078 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
David S. Millerc7537962010-11-11 17:07:48 -08001080 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001081 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001082 else {
David S. Miller8e363602011-05-13 17:29:41 -04001083 struct fib_result res;
1084 struct flowi4 fl4;
1085 struct iphdr *iph;
1086
1087 iph = ip_hdr(skb);
1088
1089 memset(&fl4, 0, sizeof(fl4));
1090 fl4.daddr = iph->daddr;
1091 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001092 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001093 fl4.flowi4_oif = rt->dst.dev->ifindex;
1094 fl4.flowi4_iif = skb->dev->ifindex;
1095 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001096
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001097 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001098 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001099 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001100 else
David S. Millerf8126f12012-07-13 05:03:45 -07001101 src = inet_select_addr(rt->dst.dev,
1102 rt_nexthop(rt, iph->daddr),
1103 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001104 rcu_read_unlock();
1105 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 memcpy(addr, &src, 4);
1107}
1108
Patrick McHardyc7066f72011-01-14 13:36:42 +01001109#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110static void set_class_tag(struct rtable *rt, u32 tag)
1111{
Changli Gaod8d1f302010-06-10 23:31:35 -07001112 if (!(rt->dst.tclassid & 0xFFFF))
1113 rt->dst.tclassid |= tag & 0xFFFF;
1114 if (!(rt->dst.tclassid & 0xFFFF0000))
1115 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116}
1117#endif
1118
David S. Miller0dbaee32010-12-13 12:52:14 -08001119static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1120{
1121 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1122
1123 if (advmss == 0) {
1124 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1125 ip_rt_min_advmss);
1126 if (advmss > 65535 - 40)
1127 advmss = 65535 - 40;
1128 }
1129 return advmss;
1130}
1131
Steffen Klassertebb762f2011-11-23 02:12:51 +00001132static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001133{
Steffen Klassert261663b2011-11-23 02:14:50 +00001134 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001135 unsigned int mtu = rt->rt_pmtu;
1136
1137 if (mtu && time_after_eq(jiffies, rt->dst.expires))
1138 mtu = 0;
1139
1140 if (!mtu)
1141 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001142
Steffen Klassert261663b2011-11-23 02:14:50 +00001143 if (mtu && rt_is_output_route(rt))
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001144 return mtu;
1145
1146 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001147
1148 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
David S. Millerf8126f12012-07-13 05:03:45 -07001149 if (rt->rt_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001150 mtu = 576;
1151 }
1152
1153 if (mtu > IP_MAX_MTU)
1154 mtu = IP_MAX_MTU;
1155
1156 return mtu;
1157}
1158
David S. Millerf2bb4be2012-07-17 12:20:47 -07001159static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001160{
1161 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1162 struct fib_nh_exception *fnhe;
1163 u32 hval;
1164
David S. Millerf2bb4be2012-07-17 12:20:47 -07001165 if (!hash)
1166 return NULL;
1167
David S. Millerd3a25c92012-07-17 13:23:08 -07001168 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001169
1170 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1171 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001172 if (fnhe->fnhe_daddr == daddr)
1173 return fnhe;
1174 }
1175 return NULL;
1176}
David S. Miller4895c772012-07-17 04:19:00 -07001177
David S. Millerf2bb4be2012-07-17 12:20:47 -07001178static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1179 __be32 daddr)
1180{
David S. Millerc5038a82012-07-31 15:02:02 -07001181 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001182
David S. Millerc5038a82012-07-31 15:02:02 -07001183 if (daddr == fnhe->fnhe_daddr) {
1184 struct rtable *orig;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001185
David S. Millerc5038a82012-07-31 15:02:02 -07001186 if (fnhe->fnhe_pmtu) {
1187 unsigned long expires = fnhe->fnhe_expires;
1188 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001189
David S. Millerc5038a82012-07-31 15:02:02 -07001190 if (time_before(jiffies, expires)) {
1191 rt->rt_pmtu = fnhe->fnhe_pmtu;
1192 dst_set_expires(&rt->dst, diff);
1193 }
David S. Miller4895c772012-07-17 04:19:00 -07001194 }
David S. Millerc5038a82012-07-31 15:02:02 -07001195 if (fnhe->fnhe_gw) {
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = fnhe->fnhe_gw;
1198 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001199
David S. Millerc5038a82012-07-31 15:02:02 -07001200 orig = rcu_dereference(fnhe->fnhe_rth);
1201 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1202 if (orig)
1203 rt_free(orig);
1204
1205 fnhe->fnhe_stamp = jiffies;
1206 } else {
1207 /* Routes we intend to cache in nexthop exception have
1208 * the DST_NOCACHE bit clear. However, if we are
1209 * unsuccessful at storing this route into the cache
1210 * we really need to set it.
1211 */
1212 rt->dst.flags |= DST_NOCACHE;
1213 }
1214 spin_unlock_bh(&fnhe_lock);
Eric Dumazet54764bb2012-07-31 01:08:23 +00001215}
1216
David S. Millerf2bb4be2012-07-17 12:20:47 -07001217static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1218{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001219 struct rtable *orig, *prev, **p;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001220
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001221 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001222 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001223 } else {
1224 if (!nh->nh_pcpu_rth_output)
1225 goto nocache;
1226 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1227 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001228 orig = *p;
1229
1230 prev = cmpxchg(p, orig, rt);
1231 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001232 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001233 rt_free(orig);
David S. Millerc6cffba2012-07-26 11:14:38 +00001234 } else {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001235 /* Routes we intend to cache in the FIB nexthop have
1236 * the DST_NOCACHE bit clear. However, if we are
1237 * unsuccessful at storing this route into the cache
1238 * we really need to set it.
1239 */
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001240nocache:
Eric Dumazet54764bb2012-07-31 01:08:23 +00001241 rt->dst.flags |= DST_NOCACHE;
David S. Miller4895c772012-07-17 04:19:00 -07001242 }
1243}
1244
Eric Dumazet4331deb2012-07-25 05:11:23 +00001245static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001246{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001247 return rt &&
1248 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1249 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001250}
1251
David S. Millerf2bb4be2012-07-17 12:20:47 -07001252static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001253 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001254 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001255 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001258 struct fib_nh *nh = &FIB_RES_NH(*res);
1259
1260 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1261 rt->rt_gateway = nh->nh_gw;
David S. Miller28605832012-07-17 14:55:59 -07001262 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001263#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001264 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001266 if (unlikely(fnhe))
1267 rt_bind_exception(rt, fnhe, daddr);
1268 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millerf2bb4be2012-07-17 12:20:47 -07001269 rt_cache_route(nh, rt);
David S. Millerd33e4552010-12-14 13:01:14 -08001270 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271
Patrick McHardyc7066f72011-01-14 13:36:42 +01001272#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001274 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275#endif
1276 set_class_tag(rt, itag);
1277#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278}
1279
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001280static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001281 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001282{
David S. Millerf5b0a872012-07-19 12:31:33 -07001283 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001284 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001285 (nopolicy ? DST_NOPOLICY : 0) |
1286 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001287}
1288
Eric Dumazet96d36222010-06-02 19:21:31 +00001289/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001290static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 u8 tos, struct net_device *dev, int our)
1292{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001294 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001296 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297
1298 /* Primary sanity checks. */
1299
1300 if (in_dev == NULL)
1301 return -EINVAL;
1302
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001303 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001304 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 goto e_inval;
1306
Thomas Grafd0daebc32012-06-12 00:44:01 +00001307 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1308 if (ipv4_is_loopback(saddr))
1309 goto e_inval;
1310
Joe Perchesf97c1e02007-12-16 13:45:43 -08001311 if (ipv4_is_zeronet(saddr)) {
1312 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001314 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001315 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1316 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001317 if (err < 0)
1318 goto e_err;
1319 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001320 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001321 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 if (!rth)
1323 goto e_nobufs;
1324
Patrick McHardyc7066f72011-01-14 13:36:42 +01001325#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001326 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327#endif
David S. Millercf911662011-04-28 14:31:47 -07001328 rth->dst.output = ip_rt_bug;
1329
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001330 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001332 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001333 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001334 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001335 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001336 rth->rt_gateway = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001338 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 rth->rt_flags |= RTCF_LOCAL;
1340 }
1341
1342#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001343 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001344 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345#endif
1346 RT_CACHE_STAT_INC(in_slow_mc);
1347
David S. Miller89aef892012-07-17 11:00:09 -07001348 skb_dst_set(skb, &rth->dst);
1349 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
1351e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001354 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001355e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001356 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357}
1358
1359
1360static void ip_handle_martian_source(struct net_device *dev,
1361 struct in_device *in_dev,
1362 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001363 __be32 daddr,
1364 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365{
1366 RT_CACHE_STAT_INC(in_martian_src);
1367#ifdef CONFIG_IP_ROUTE_VERBOSE
1368 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1369 /*
1370 * RFC1812 recommendation, if source is martian,
1371 * the only hint is MAC header.
1372 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001373 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001374 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001375 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001376 print_hex_dump(KERN_WARNING, "ll header: ",
1377 DUMP_PREFIX_OFFSET, 16, 1,
1378 skb_mac_header(skb),
1379 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 }
1382#endif
1383}
1384
Eric Dumazet47360222010-06-03 04:13:21 +00001385/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001386static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001387 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001388 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001389 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 struct rtable *rth;
1392 int err;
1393 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001394 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001395 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001396 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397
1398 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001399 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001401 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 return -EINVAL;
1403 }
1404
1405
Michael Smith5c04c812011-04-07 04:51:50 +00001406 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001407 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001409 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001411
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 goto cleanup;
1413 }
1414
Thomas Graf51b77ca2008-06-03 16:36:01 -07001415 if (out_dev == in_dev && err &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 (IN_DEV_SHARED_MEDIA(out_dev) ||
1417 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1418 flags |= RTCF_DOREDIRECT;
1419
1420 if (skb->protocol != htons(ETH_P_IP)) {
1421 /* Not IP (i.e. ARP). Do not create route, if it is
1422 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001423 *
1424 * Proxy arp feature have been extended to allow, ARP
1425 * replies back to the same interface, to support
1426 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001428 if (out_dev == in_dev &&
1429 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 err = -EINVAL;
1431 goto cleanup;
1432 }
1433 }
1434
David S. Millerd2d68ba2012-07-17 12:58:50 -07001435 do_cache = false;
1436 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001437 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001438 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001439 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001440 skb_dst_set_noref(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001441 goto out;
1442 }
1443 do_cache = true;
1444 }
1445 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001446
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001447 rth = rt_dst_alloc(out_dev->dev,
1448 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001449 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 if (!rth) {
1451 err = -ENOBUFS;
1452 goto cleanup;
1453 }
1454
David S. Millercf911662011-04-28 14:31:47 -07001455 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1456 rth->rt_flags = flags;
1457 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001458 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001459 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001460 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001461 rth->rt_gateway = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
Changli Gaod8d1f302010-06-10 23:31:35 -07001463 rth->dst.input = ip_forward;
1464 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465
David S. Millerd2d68ba2012-07-17 12:58:50 -07001466 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001467 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001468out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 err = 0;
1470 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001472}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
Stephen Hemminger5969f712008-04-10 01:52:09 -07001474static int ip_mkroute_input(struct sk_buff *skb,
1475 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001476 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001477 struct in_device *in_dev,
1478 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001481 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001482 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483#endif
1484
1485 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001486 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487}
1488
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489/*
1490 * NOTE. We drop all the packets that has local source
1491 * addresses, because every properly looped back packet
1492 * must have correct destination already attached by output routine.
1493 *
1494 * Such approach solves two big problems:
1495 * 1. Not simplex devices are handled properly.
1496 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001497 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 */
1499
Al Viro9e12bb22006-09-26 21:25:20 -07001500static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001501 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502{
1503 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001504 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001505 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001506 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001508 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001510 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001511 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512
1513 /* IP on this device is disabled. */
1514
1515 if (!in_dev)
1516 goto out;
1517
1518 /* Check for the most weird martians, which can be not detected
1519 by fib_lookup.
1520 */
1521
Thomas Grafd0daebc32012-06-12 00:44:01 +00001522 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 goto martian_source;
1524
David S. Millerd2d68ba2012-07-17 12:58:50 -07001525 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001526 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 goto brd_input;
1528
1529 /* Accept zero addresses only to limited broadcast;
1530 * I even do not know to fix it or not. Waiting for complains :-)
1531 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001532 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 goto martian_source;
1534
Thomas Grafd0daebc32012-06-12 00:44:01 +00001535 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 goto martian_destination;
1537
Thomas Grafd0daebc32012-06-12 00:44:01 +00001538 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
1539 if (ipv4_is_loopback(daddr))
1540 goto martian_destination;
1541
1542 if (ipv4_is_loopback(saddr))
1543 goto martian_source;
1544 }
1545
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 /*
1547 * Now we are ready to route packet.
1548 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001549 fl4.flowi4_oif = 0;
1550 fl4.flowi4_iif = dev->ifindex;
1551 fl4.flowi4_mark = skb->mark;
1552 fl4.flowi4_tos = tos;
1553 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1554 fl4.daddr = daddr;
1555 fl4.saddr = saddr;
1556 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001557 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559
1560 RT_CACHE_STAT_INC(in_slow_tot);
1561
1562 if (res.type == RTN_BROADCAST)
1563 goto brd_input;
1564
1565 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001566 err = fib_validate_source(skb, saddr, daddr, tos,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001567 net->loopback_dev->ifindex,
David S. Miller9e56e382012-06-28 18:54:02 -07001568 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001569 if (err < 0)
1570 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 goto local_input;
1572 }
1573
1574 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001575 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 if (res.type != RTN_UNICAST)
1577 goto martian_destination;
1578
David S. Miller68a5e3d2011-03-11 20:07:33 -05001579 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580out: return err;
1581
1582brd_input:
1583 if (skb->protocol != htons(ETH_P_IP))
1584 goto e_inval;
1585
David S. Miller41347dc2012-06-28 04:05:27 -07001586 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001587 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1588 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001590 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 }
1592 flags |= RTCF_BROADCAST;
1593 res.type = RTN_BROADCAST;
1594 RT_CACHE_STAT_INC(in_brd);
1595
1596local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001597 do_cache = false;
1598 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001599 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001600 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001601 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001602 skb_dst_set_noref(skb, &rth->dst);
1603 err = 0;
1604 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001605 }
1606 do_cache = true;
1607 }
1608 }
1609
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001610 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001611 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 if (!rth)
1613 goto e_nobufs;
1614
David S. Millercf911662011-04-28 14:31:47 -07001615 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001616 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001617#ifdef CONFIG_IP_ROUTE_CLASSID
1618 rth->dst.tclassid = itag;
1619#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620
David S. Millercf911662011-04-28 14:31:47 -07001621 rth->rt_genid = rt_genid(net);
1622 rth->rt_flags = flags|RTCF_LOCAL;
1623 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001624 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001625 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001626 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001627 rth->rt_gateway = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001629 rth->dst.input= ip_error;
1630 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 rth->rt_flags &= ~RTCF_LOCAL;
1632 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001633 if (do_cache)
1634 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001635 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001636 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001637 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
1639no_route:
1640 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001642 if (err == -ESRCH)
1643 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 goto local_input;
1645
1646 /*
1647 * Do not cache martian addresses: they should be logged (RFC1812)
1648 */
1649martian_destination:
1650 RT_CACHE_STAT_INC(in_martian_dst);
1651#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001652 if (IN_DEV_LOG_MARTIANS(in_dev))
1653 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1654 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001656
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657e_inval:
1658 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001659 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660
1661e_nobufs:
1662 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001663 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
1665martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001666 err = -EINVAL;
1667martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001669 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670}
1671
David S. Millerc6cffba2012-07-26 11:14:38 +00001672int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1673 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674{
Eric Dumazet96d36222010-06-02 19:21:31 +00001675 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
Eric Dumazet96d36222010-06-02 19:21:31 +00001677 rcu_read_lock();
1678
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 /* Multicast recognition logic is moved from route cache to here.
1680 The problem was that too many Ethernet cards have broken/missing
1681 hardware multicast filters :-( As result the host on multicasting
1682 network acquires a lot of useless route cache entries, sort of
1683 SDR messages from all the world. Now we try to get rid of them.
1684 Really, provided software IP multicast filter is organized
1685 reasonably (at least, hashed), it does not result in a slowdown
1686 comparing with route cache reject entries.
1687 Note, that multicast routers are not affected, because
1688 route cache entry is created eventually.
1689 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001690 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001691 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
Eric Dumazet96d36222010-06-02 19:21:31 +00001693 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001694 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1695 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 if (our
1697#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001698 ||
1699 (!ipv4_is_local_multicast(daddr) &&
1700 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001702 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001703 int res = ip_route_input_mc(skb, daddr, saddr,
1704 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001706 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 }
1708 }
1709 rcu_read_unlock();
1710 return -EINVAL;
1711 }
David S. Millerc10237e2012-06-27 17:05:06 -07001712 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001713 rcu_read_unlock();
1714 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715}
David S. Millerc6cffba2012-07-26 11:14:38 +00001716EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001718/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001719static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001720 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001721 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001722 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723{
David S. Miller982721f2011-02-16 21:44:24 -08001724 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001725 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001726 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001727 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001728 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729
Thomas Grafd0daebc32012-06-12 00:44:01 +00001730 in_dev = __in_dev_get_rcu(dev_out);
1731 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001732 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733
Thomas Grafd0daebc32012-06-12 00:44:01 +00001734 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1735 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1736 return ERR_PTR(-EINVAL);
1737
David S. Miller68a5e3d2011-03-11 20:07:33 -05001738 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001739 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001740 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001741 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001742 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001743 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
1745 if (dev_out->flags & IFF_LOOPBACK)
1746 flags |= RTCF_LOCAL;
1747
David S. Miller982721f2011-02-16 21:44:24 -08001748 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001750 fi = NULL;
1751 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001752 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001753 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1754 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 flags &= ~RTCF_LOCAL;
1756 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001757 * default one, but do not gateway in this case.
1758 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 */
David S. Miller982721f2011-02-16 21:44:24 -08001760 if (fi && res->prefixlen < 4)
1761 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 }
1763
David S. Millerf2bb4be2012-07-17 12:20:47 -07001764 fnhe = NULL;
1765 if (fi) {
David S. Millerc5038a82012-07-31 15:02:02 -07001766 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001767
David S. Millerc5038a82012-07-31 15:02:02 -07001768 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1769 if (fnhe)
1770 prth = &fnhe->fnhe_rth;
1771 else
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001772 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07001773 rth = rcu_dereference(*prth);
1774 if (rt_cache_valid(rth)) {
1775 dst_hold(&rth->dst);
1776 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001777 }
1778 }
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001779 rth = rt_dst_alloc(dev_out,
1780 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001781 IN_DEV_CONF_GET(in_dev, NOXFRM),
David S. Millerc5038a82012-07-31 15:02:02 -07001782 fi);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001783 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001784 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001785
David S. Millercf911662011-04-28 14:31:47 -07001786 rth->dst.output = ip_output;
1787
David S. Millercf911662011-04-28 14:31:47 -07001788 rth->rt_genid = rt_genid(dev_net(dev_out));
1789 rth->rt_flags = flags;
1790 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001791 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001792 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001793 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001794 rth->rt_gateway = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795
1796 RT_CACHE_STAT_INC(out_slow_tot);
1797
David S. Miller41347dc2012-06-28 04:05:27 -07001798 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001799 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001801 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001803 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 RT_CACHE_STAT_INC(out_slow_mc);
1805 }
1806#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001807 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001809 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001810 rth->dst.input = ip_mr_input;
1811 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 }
1813 }
1814#endif
1815 }
1816
David S. Millerf2bb4be2012-07-17 12:20:47 -07001817 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
David S. Miller5ada5522011-02-17 15:29:00 -08001819 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820}
1821
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822/*
1823 * Major route resolver routine.
1824 */
1825
David S. Miller89aef892012-07-17 11:00:09 -07001826struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001829 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001830 unsigned int flags = 0;
1831 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001832 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001833 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
David S. Miller85b91b02012-07-13 08:21:29 -07001835 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001837 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
David S. Miller813b3b52011-04-28 14:48:42 -07001839 orig_oif = fl4->flowi4_oif;
1840
1841 fl4->flowi4_iif = net->loopback_dev->ifindex;
1842 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1843 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1844 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001845
David S. Miller010c2702011-02-17 15:37:09 -08001846 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001847 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001848 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001849 if (ipv4_is_multicast(fl4->saddr) ||
1850 ipv4_is_lbcast(fl4->saddr) ||
1851 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 goto out;
1853
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 /* I removed check for oif == dev_out->oif here.
1855 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001856 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1857 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 2. Moreover, we are allowed to send packets with saddr
1859 of another iface. --ANK
1860 */
1861
David S. Miller813b3b52011-04-28 14:48:42 -07001862 if (fl4->flowi4_oif == 0 &&
1863 (ipv4_is_multicast(fl4->daddr) ||
1864 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001865 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001866 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001867 if (dev_out == NULL)
1868 goto out;
1869
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 /* Special hack: user can direct multicasts
1871 and limited broadcast via necessary interface
1872 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1873 This hack is not just for fun, it allows
1874 vic,vat and friends to work.
1875 They bind socket to loopback, set ttl to zero
1876 and expect that it will work.
1877 From the viewpoint of routing cache they are broken,
1878 because we are not allowed to build multicast path
1879 with loopback source addr (look, routing cache
1880 cannot know, that ttl is zero, so that packet
1881 will not leave this host and route is valid).
1882 Luckily, this hack is good workaround.
1883 */
1884
David S. Miller813b3b52011-04-28 14:48:42 -07001885 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 goto make_route;
1887 }
Julian Anastasova210d012008-10-01 07:28:28 -07001888
David S. Miller813b3b52011-04-28 14:48:42 -07001889 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001890 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001891 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001892 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001893 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 }
1895
1896
David S. Miller813b3b52011-04-28 14:48:42 -07001897 if (fl4->flowi4_oif) {
1898 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001899 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 if (dev_out == NULL)
1901 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07001902
1903 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001904 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001905 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001906 goto out;
1907 }
David S. Miller813b3b52011-04-28 14:48:42 -07001908 if (ipv4_is_local_multicast(fl4->daddr) ||
1909 ipv4_is_lbcast(fl4->daddr)) {
1910 if (!fl4->saddr)
1911 fl4->saddr = inet_select_addr(dev_out, 0,
1912 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 goto make_route;
1914 }
David S. Miller813b3b52011-04-28 14:48:42 -07001915 if (fl4->saddr) {
1916 if (ipv4_is_multicast(fl4->daddr))
1917 fl4->saddr = inet_select_addr(dev_out, 0,
1918 fl4->flowi4_scope);
1919 else if (!fl4->daddr)
1920 fl4->saddr = inet_select_addr(dev_out, 0,
1921 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 }
1923 }
1924
David S. Miller813b3b52011-04-28 14:48:42 -07001925 if (!fl4->daddr) {
1926 fl4->daddr = fl4->saddr;
1927 if (!fl4->daddr)
1928 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001929 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07001930 fl4->flowi4_oif = net->loopback_dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 res.type = RTN_LOCAL;
1932 flags |= RTCF_LOCAL;
1933 goto make_route;
1934 }
1935
David S. Miller813b3b52011-04-28 14:48:42 -07001936 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001938 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07001939 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 /* Apparently, routing tables are wrong. Assume,
1941 that the destination is on link.
1942
1943 WHY? DW.
1944 Because we are allowed to send to iface
1945 even if it has NO routes and NO assigned
1946 addresses. When oif is specified, routing
1947 tables are looked up with only one purpose:
1948 to catch if destination is gatewayed, rather than
1949 direct. Moreover, if MSG_DONTROUTE is set,
1950 we send packet, ignoring both routing tables
1951 and ifaddr state. --ANK
1952
1953
1954 We could make it even if oif is unknown,
1955 likely IPv6, but we do not.
1956 */
1957
David S. Miller813b3b52011-04-28 14:48:42 -07001958 if (fl4->saddr == 0)
1959 fl4->saddr = inet_select_addr(dev_out, 0,
1960 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 res.type = RTN_UNICAST;
1962 goto make_route;
1963 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08001964 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 goto out;
1966 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967
1968 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07001969 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00001970 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07001971 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00001972 else
David S. Miller813b3b52011-04-28 14:48:42 -07001973 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00001974 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001975 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07001976 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 res.fi = NULL;
1978 flags |= RTCF_LOCAL;
1979 goto make_route;
1980 }
1981
1982#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07001983 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001984 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 else
1986#endif
David S. Miller21d8c492011-04-14 14:49:37 -07001987 if (!res.prefixlen &&
1988 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07001989 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08001990 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
David S. Miller813b3b52011-04-28 14:48:42 -07001992 if (!fl4->saddr)
1993 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07001996 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
1998
1999make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002000 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001
David S. Miller010c2702011-02-17 15:37:09 -08002002out:
2003 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002004 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002006EXPORT_SYMBOL_GPL(__ip_route_output_key);
2007
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002008static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2009{
2010 return NULL;
2011}
2012
Steffen Klassertebb762f2011-11-23 02:12:51 +00002013static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002014{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002015 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2016
2017 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002018}
2019
David S. Miller6700c272012-07-17 03:29:28 -07002020static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2021 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002022{
2023}
2024
David S. Miller6700c272012-07-17 03:29:28 -07002025static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2026 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002027{
2028}
2029
Held Bernhard0972ddb2011-04-24 22:07:32 +00002030static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2031 unsigned long old)
2032{
2033 return NULL;
2034}
2035
David S. Miller14e50e52007-05-24 18:17:54 -07002036static struct dst_ops ipv4_dst_blackhole_ops = {
2037 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002038 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002039 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002040 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002041 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002042 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002043 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002044 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002045 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002046};
2047
David S. Miller2774c132011-03-01 14:59:04 -08002048struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002049{
David S. Miller2774c132011-03-01 14:59:04 -08002050 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002051 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002052
David S. Millerf5b0a872012-07-19 12:31:33 -07002053 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002054 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002055 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002056
David S. Miller14e50e52007-05-24 18:17:54 -07002057 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002058 new->input = dst_discard;
2059 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002060
Changli Gaod8d1f302010-06-10 23:31:35 -07002061 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002062 if (new->dev)
2063 dev_hold(new->dev);
2064
David S. Miller9917e1e82012-07-17 14:44:26 -07002065 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002066 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002067 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002068
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002069 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002070 rt->rt_flags = ort->rt_flags;
2071 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002072 rt->rt_gateway = ort->rt_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002073
2074 dst_free(new);
2075 }
2076
David S. Miller2774c132011-03-01 14:59:04 -08002077 dst_release(dst_orig);
2078
2079 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002080}
2081
David S. Miller9d6ec932011-03-12 01:12:47 -05002082struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002083 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084{
David S. Miller9d6ec932011-03-12 01:12:47 -05002085 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086
David S. Millerb23dd4f2011-03-02 14:31:35 -08002087 if (IS_ERR(rt))
2088 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089
David S. Miller56157872011-05-02 14:37:45 -07002090 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002091 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2092 flowi4_to_flowi(flp4),
2093 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
David S. Millerb23dd4f2011-03-02 14:31:35 -08002095 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002097EXPORT_SYMBOL_GPL(ip_route_output_flow);
2098
David S. Millerf1ce3062012-07-12 10:10:17 -07002099static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2100 struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
2101 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002103 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002105 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002106 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002107 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002108 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002109
2110 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2111 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002112 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002113
2114 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 r->rtm_family = AF_INET;
2116 r->rtm_dst_len = 32;
2117 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002118 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002120 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2121 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 r->rtm_type = rt->rt_type;
2123 r->rtm_scope = RT_SCOPE_UNIVERSE;
2124 r->rtm_protocol = RTPROT_UNSPEC;
2125 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2126 if (rt->rt_flags & RTCF_NOTIFY)
2127 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002128
David S. Millerf1ce3062012-07-12 10:10:17 -07002129 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002130 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002131 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002133 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002134 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 }
David S. Millerf3756b72012-04-01 20:39:02 -04002136 if (rt->dst.dev &&
2137 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2138 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002139#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002140 if (rt->dst.tclassid &&
2141 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2142 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002144 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002145 fl4->saddr != src) {
2146 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002147 goto nla_put_failure;
2148 }
David S. Millerf8126f12012-07-13 05:03:45 -07002149 if (rt->rt_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002150 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2151 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002152
Julian Anastasov521f5492012-07-20 12:02:08 +03002153 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2154 if (rt->rt_pmtu)
2155 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2156 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002157 goto nla_put_failure;
2158
David Millerb4869882012-07-01 02:03:01 +00002159 if (fl4->flowi4_mark &&
2160 nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002161 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002162
Changli Gaod8d1f302010-06-10 23:31:35 -07002163 error = rt->dst.error;
David S. Miller59436342012-07-10 06:58:42 -07002164 expires = rt->dst.expires;
2165 if (expires) {
2166 if (time_before(jiffies, expires))
2167 expires -= jiffies;
2168 else
2169 expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002171
David S. Millerc7537962010-11-11 17:07:48 -08002172 if (rt_is_input_route(rt)) {
David S. Millerf1ce3062012-07-12 10:10:17 -07002173 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2174 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 }
2176
David S. Millerf1850712012-07-10 07:26:01 -07002177 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002178 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
Thomas Grafbe403ea2006-08-17 18:15:17 -07002180 return nlmsg_end(skb, nlh);
2181
2182nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002183 nlmsg_cancel(skb, nlh);
2184 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185}
2186
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002187static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002189 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002190 struct rtmsg *rtm;
2191 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002193 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002194 __be32 dst = 0;
2195 __be32 src = 0;
2196 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002197 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002198 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 struct sk_buff *skb;
2200
Thomas Grafd889ce32006-08-17 18:15:44 -07002201 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2202 if (err < 0)
2203 goto errout;
2204
2205 rtm = nlmsg_data(nlh);
2206
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002208 if (skb == NULL) {
2209 err = -ENOBUFS;
2210 goto errout;
2211 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212
2213 /* Reserve room for dummy headers, this skb can pass
2214 through good chunk of routing engine.
2215 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002216 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002217 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002218
2219 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002220 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2222
Al Viro17fb2c62006-09-26 22:15:25 -07002223 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2224 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002225 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002226 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227
David Millerd6c0a4f2012-07-01 02:02:59 +00002228 memset(&fl4, 0, sizeof(fl4));
2229 fl4.daddr = dst;
2230 fl4.saddr = src;
2231 fl4.flowi4_tos = rtm->rtm_tos;
2232 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2233 fl4.flowi4_mark = mark;
2234
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002236 struct net_device *dev;
2237
Denis V. Lunev19375042008-02-28 20:52:04 -08002238 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002239 if (dev == NULL) {
2240 err = -ENODEV;
2241 goto errout_free;
2242 }
2243
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 skb->protocol = htons(ETH_P_IP);
2245 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002246 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 local_bh_disable();
2248 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2249 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002250
Eric Dumazet511c3f92009-06-02 05:14:27 +00002251 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002252 if (err == 0 && rt->dst.error)
2253 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002255 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002256
2257 err = 0;
2258 if (IS_ERR(rt))
2259 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002261
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002263 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
Changli Gaod8d1f302010-06-10 23:31:35 -07002265 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 if (rtm->rtm_flags & RTM_F_NOTIFY)
2267 rt->rt_flags |= RTCF_NOTIFY;
2268
David S. Millerf1ce3062012-07-12 10:10:17 -07002269 err = rt_fill_info(net, dst, src, &fl4, skb,
David Miller1a00fee2012-07-01 02:02:56 +00002270 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002271 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002272 if (err <= 0)
2273 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274
Denis V. Lunev19375042008-02-28 20:52:04 -08002275 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002276errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002277 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278
Thomas Grafd889ce32006-08-17 18:15:44 -07002279errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002281 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282}
2283
2284int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2285{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 return skb->len;
2287}
2288
2289void ip_rt_multicast_event(struct in_device *in_dev)
2290{
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07002291 rt_cache_flush(dev_net(in_dev->dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292}
2293
2294#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002295static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002296 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 size_t *lenp, loff_t *ppos)
2298{
2299 if (write) {
Denis V. Lunev639e1042008-07-05 19:02:06 -07002300 int flush_delay;
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002301 ctl_table ctl;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002302 struct net *net;
Denis V. Lunev639e1042008-07-05 19:02:06 -07002303
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002304 memcpy(&ctl, __ctl, sizeof(ctl));
2305 ctl.data = &flush_delay;
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002306 proc_dointvec(&ctl, write, buffer, lenp, ppos);
Denis V. Lunev639e1042008-07-05 19:02:06 -07002307
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002308 net = (struct net *)__ctl->extra1;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002309 rt_cache_flush(net, flush_delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002311 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
2313 return -EINVAL;
2314}
2315
Al Viroeeb61f72008-07-27 08:59:33 +01002316static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002317 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 .procname = "gc_thresh",
2319 .data = &ipv4_dst_ops.gc_thresh,
2320 .maxlen = sizeof(int),
2321 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002322 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 },
2324 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 .procname = "max_size",
2326 .data = &ip_rt_max_size,
2327 .maxlen = sizeof(int),
2328 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002329 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 },
2331 {
2332 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002333
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 .procname = "gc_min_interval",
2335 .data = &ip_rt_gc_min_interval,
2336 .maxlen = sizeof(int),
2337 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002338 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 },
2340 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 .procname = "gc_min_interval_ms",
2342 .data = &ip_rt_gc_min_interval,
2343 .maxlen = sizeof(int),
2344 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002345 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 },
2347 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 .procname = "gc_timeout",
2349 .data = &ip_rt_gc_timeout,
2350 .maxlen = sizeof(int),
2351 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002352 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 },
2354 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002355 .procname = "gc_interval",
2356 .data = &ip_rt_gc_interval,
2357 .maxlen = sizeof(int),
2358 .mode = 0644,
2359 .proc_handler = proc_dointvec_jiffies,
2360 },
2361 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 .procname = "redirect_load",
2363 .data = &ip_rt_redirect_load,
2364 .maxlen = sizeof(int),
2365 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002366 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 },
2368 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 .procname = "redirect_number",
2370 .data = &ip_rt_redirect_number,
2371 .maxlen = sizeof(int),
2372 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002373 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 },
2375 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 .procname = "redirect_silence",
2377 .data = &ip_rt_redirect_silence,
2378 .maxlen = sizeof(int),
2379 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002380 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 },
2382 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 .procname = "error_cost",
2384 .data = &ip_rt_error_cost,
2385 .maxlen = sizeof(int),
2386 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002387 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 },
2389 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 .procname = "error_burst",
2391 .data = &ip_rt_error_burst,
2392 .maxlen = sizeof(int),
2393 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002394 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 },
2396 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 .procname = "gc_elasticity",
2398 .data = &ip_rt_gc_elasticity,
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002401 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 },
2403 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 .procname = "mtu_expires",
2405 .data = &ip_rt_mtu_expires,
2406 .maxlen = sizeof(int),
2407 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002408 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 },
2410 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 .procname = "min_pmtu",
2412 .data = &ip_rt_min_pmtu,
2413 .maxlen = sizeof(int),
2414 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002415 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 },
2417 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 .procname = "min_adv_mss",
2419 .data = &ip_rt_min_advmss,
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002422 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002424 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002426
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002427static struct ctl_table ipv4_route_flush_table[] = {
2428 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002429 .procname = "flush",
2430 .maxlen = sizeof(int),
2431 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002432 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002433 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002434 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002435};
2436
2437static __net_init int sysctl_route_net_init(struct net *net)
2438{
2439 struct ctl_table *tbl;
2440
2441 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002442 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002443 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2444 if (tbl == NULL)
2445 goto err_dup;
2446 }
2447 tbl[0].extra1 = net;
2448
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002449 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002450 if (net->ipv4.route_hdr == NULL)
2451 goto err_reg;
2452 return 0;
2453
2454err_reg:
2455 if (tbl != ipv4_route_flush_table)
2456 kfree(tbl);
2457err_dup:
2458 return -ENOMEM;
2459}
2460
2461static __net_exit void sysctl_route_net_exit(struct net *net)
2462{
2463 struct ctl_table *tbl;
2464
2465 tbl = net->ipv4.route_hdr->ctl_table_arg;
2466 unregister_net_sysctl_table(net->ipv4.route_hdr);
2467 BUG_ON(tbl == ipv4_route_flush_table);
2468 kfree(tbl);
2469}
2470
2471static __net_initdata struct pernet_operations sysctl_route_ops = {
2472 .init = sysctl_route_net_init,
2473 .exit = sysctl_route_net_exit,
2474};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475#endif
2476
Neil Horman3ee94372010-05-08 01:57:52 -07002477static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002478{
Neil Horman3ee94372010-05-08 01:57:52 -07002479 get_random_bytes(&net->ipv4.rt_genid,
2480 sizeof(net->ipv4.rt_genid));
David S. Miller436c3b62011-03-24 17:42:21 -07002481 get_random_bytes(&net->ipv4.dev_addr_genid,
2482 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002483 return 0;
2484}
2485
Neil Horman3ee94372010-05-08 01:57:52 -07002486static __net_initdata struct pernet_operations rt_genid_ops = {
2487 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002488};
2489
David S. Millerc3426b42012-06-09 16:27:05 -07002490static int __net_init ipv4_inetpeer_init(struct net *net)
2491{
2492 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2493
2494 if (!bp)
2495 return -ENOMEM;
2496 inet_peer_base_init(bp);
2497 net->ipv4.peers = bp;
2498 return 0;
2499}
2500
2501static void __net_exit ipv4_inetpeer_exit(struct net *net)
2502{
2503 struct inet_peer_base *bp = net->ipv4.peers;
2504
2505 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002506 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002507 kfree(bp);
2508}
2509
2510static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2511 .init = ipv4_inetpeer_init,
2512 .exit = ipv4_inetpeer_exit,
2513};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002514
Patrick McHardyc7066f72011-01-14 13:36:42 +01002515#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002516struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002517#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519int __init ip_rt_init(void)
2520{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002521 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522
Patrick McHardyc7066f72011-01-14 13:36:42 +01002523#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002524 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 if (!ip_rt_acct)
2526 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527#endif
2528
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002529 ipv4_dst_ops.kmem_cachep =
2530 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002531 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532
David S. Miller14e50e52007-05-24 18:17:54 -07002533 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2534
Eric Dumazetfc66f952010-10-08 06:37:34 +00002535 if (dst_entries_init(&ipv4_dst_ops) < 0)
2536 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2537
2538 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2539 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2540
David S. Miller89aef892012-07-17 11:00:09 -07002541 ipv4_dst_ops.gc_thresh = ~0;
2542 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 devinet_init();
2545 ip_fib_init();
2546
Denis V. Lunev73b38712008-02-28 20:51:18 -08002547 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002548 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549#ifdef CONFIG_XFRM
2550 xfrm_init();
Neil Hormana33bc5c2009-07-30 18:52:15 -07002551 xfrm4_init(ip_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002553 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002554
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002555#ifdef CONFIG_SYSCTL
2556 register_pernet_subsys(&sysctl_route_ops);
2557#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002558 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002559 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 return rc;
2561}
2562
Al Viroa1bc6eb2008-07-30 06:32:52 -04002563#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002564/*
2565 * We really need to sanitize the damn ipv4 init order, then all
2566 * this nonsense will go away.
2567 */
2568void __init ip_static_sysctl_init(void)
2569{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002570 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002571}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002572#endif