blob: d3523661c9057828ed763df07459747431cb8cd3 [file] [log] [blame]
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Support for INET connection oriented protocols.
7 *
8 * Authors: See the TCP sources
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or(at your option) any later version.
14 */
15
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -070016#include <linux/module.h>
17#include <linux/jhash.h>
18
19#include <net/inet_connection_sock.h>
20#include <net/inet_hashtables.h>
21#include <net/inet_timewait_sock.h>
22#include <net/ip.h>
23#include <net/route.h>
24#include <net/tcp_states.h>
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -070025#include <net/xfrm.h>
Eric Dumazetfa76ce732015-03-19 19:04:20 -070026#include <net/tcp.h>
Craig Gallekc125e802016-02-10 11:50:40 -050027#include <net/sock_reuseport.h>
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -070028
29#ifdef INET_CSK_DEBUG
30const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
31EXPORT_SYMBOL(inet_csk_timer_bug_msg);
32#endif
33
Josef Bacikfe38d2a2017-01-17 07:51:01 -080034#if IS_ENABLED(CONFIG_IPV6)
35/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
36 * only, and any IPv4 addresses if not IPv6 only
37 * match_wildcard == false: addresses must be exactly the same, i.e.
38 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
39 * and 0.0.0.0 equals to 0.0.0.0 only
40 */
41static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
42 bool match_wildcard)
43{
44 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
45 int sk2_ipv6only = inet_v6_ipv6only(sk2);
46 int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
47 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
48
49 /* if both are mapped, treat as IPv4 */
50 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
51 if (!sk2_ipv6only) {
52 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
53 return 1;
54 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
55 return match_wildcard;
56 }
57 return 0;
58 }
59
60 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
61 return 1;
62
63 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
64 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
65 return 1;
66
67 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
68 !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
69 return 1;
70
71 if (sk2_rcv_saddr6 &&
72 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
73 return 1;
74
75 return 0;
76}
77#endif
78
79/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
80 * match_wildcard == false: addresses must be exactly the same, i.e.
81 * 0.0.0.0 only equals to 0.0.0.0
82 */
83static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
84 bool match_wildcard)
85{
86 if (!ipv6_only_sock(sk2)) {
87 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
88 return 1;
89 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
90 return match_wildcard;
91 }
92 return 0;
93}
94
95int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
96 bool match_wildcard)
97{
98#if IS_ENABLED(CONFIG_IPV6)
99 if (sk->sk_family == AF_INET6)
100 return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
101#endif
102 return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
103}
104EXPORT_SYMBOL(inet_rcv_saddr_equal);
105
Eric W. Biederman0bbf87d2013-09-28 14:10:59 -0700106void inet_get_local_port_range(struct net *net, int *low, int *high)
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700107{
Eric Dumazet95c96172012-04-15 05:58:06 +0000108 unsigned int seq;
109
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700110 do {
Cong Wangc9d8f1a2014-05-06 11:02:49 -0700111 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700112
Cong Wangc9d8f1a2014-05-06 11:02:49 -0700113 *low = net->ipv4.ip_local_ports.range[0];
114 *high = net->ipv4.ip_local_ports.range[1];
115 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700116}
117EXPORT_SYMBOL(inet_get_local_port_range);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700118
Josef Bacikaa078842017-01-17 07:51:02 -0800119static int inet_csk_bind_conflict(const struct sock *sk,
120 const struct inet_bind_bucket *tb,
121 bool relax, bool reuseport_ok)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700122{
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700123 struct sock *sk2;
Tom Herbert0643ee42016-12-14 16:54:16 -0800124 bool reuse = sk->sk_reuse;
125 bool reuseport = !!sk->sk_reuseport && reuseport_ok;
Tom Herbertda5e3632013-01-22 09:50:24 +0000126 kuid_t uid = sock_i_uid((struct sock *)sk);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700127
Pavel Emelyanov7477fd2e2008-04-14 02:42:27 -0700128 /*
129 * Unlike other sk lookup places we do not check
130 * for sk_net here, since _all_ the socks listed
131 * in tb->owners list belong to the same net - the
132 * one this bucket belongs to.
133 */
134
Sasha Levinb67bfe02013-02-27 17:06:00 -0800135 sk_for_each_bound(sk2, &tb->owners) {
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700136 if (sk != sk2 &&
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700137 (!sk->sk_bound_dev_if ||
138 !sk2->sk_bound_dev_if ||
139 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
Tom Herbertda5e3632013-01-22 09:50:24 +0000140 if ((!reuse || !sk2->sk_reuse ||
141 sk2->sk_state == TCP_LISTEN) &&
142 (!reuseport || !sk2->sk_reuseport ||
Craig Gallekc125e802016-02-10 11:50:40 -0500143 rcu_access_pointer(sk->sk_reuseport_cb) ||
144 (sk2->sk_state != TCP_TIME_WAIT &&
Tom Herbertda5e3632013-01-22 09:50:24 +0000145 !uid_eq(uid, sock_i_uid(sk2))))) {
Josef Bacikaa078842017-01-17 07:51:02 -0800146 if (inet_rcv_saddr_equal(sk, sk2, true))
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700147 break;
David S. Miller8d238b22010-04-28 11:25:59 -0700148 }
Alex Copotaacd9282012-04-12 22:21:45 +0000149 if (!relax && reuse && sk2->sk_reuse &&
150 sk2->sk_state != TCP_LISTEN) {
Josef Bacikaa078842017-01-17 07:51:02 -0800151 if (inet_rcv_saddr_equal(sk, sk2, true))
Alex Copotaacd9282012-04-12 22:21:45 +0000152 break;
153 }
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700154 }
155 }
Sasha Levinb67bfe02013-02-27 17:06:00 -0800156 return sk2 != NULL;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700157}
Arnaldo Carvalho de Melo971af182005-12-13 23:14:47 -0800158
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700159/* Obtain a reference to a local port for the given sock,
160 * if snum is zero it means select any available local port.
Eric Dumazetea8add22016-02-11 16:28:50 -0800161 * We try to allocate an odd port (and leave even ports for connect())
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700162 */
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -0800163int inet_csk_get_port(struct sock *sk, unsigned short snum)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700164{
Eric Dumazetea8add22016-02-11 16:28:50 -0800165 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
166 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
167 int ret = 1, attempts = 5, port = snum;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700168 struct inet_bind_hashbucket *head;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900169 struct net *net = sock_net(sk);
Eric Dumazetea8add22016-02-11 16:28:50 -0800170 int i, low, high, attempt_half;
171 struct inet_bind_bucket *tb;
Tom Herbertda5e3632013-01-22 09:50:24 +0000172 kuid_t uid = sock_i_uid(sk);
Eric Dumazetea8add22016-02-11 16:28:50 -0800173 u32 remaining, offset;
Tom Herbert0643ee42016-12-14 16:54:16 -0800174 bool reuseport_ok = !!snum;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700175
Eric Dumazetea8add22016-02-11 16:28:50 -0800176 if (port) {
Eric Dumazetea8add22016-02-11 16:28:50 -0800177 head = &hinfo->bhash[inet_bhashfn(net, port,
178 hinfo->bhash_size)];
179 spin_lock_bh(&head->lock);
Sasha Levinb67bfe02013-02-27 17:06:00 -0800180 inet_bind_bucket_for_each(tb, &head->chain)
Eric Dumazetea8add22016-02-11 16:28:50 -0800181 if (net_eq(ib_net(tb), net) && tb->port == port)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700182 goto tb_found;
Eric Dumazetea8add22016-02-11 16:28:50 -0800183
184 goto tb_not_found;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700185 }
Eric Dumazetea8add22016-02-11 16:28:50 -0800186again:
187 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
188other_half_scan:
189 inet_get_local_port_range(net, &low, &high);
190 high++; /* [32768, 60999] -> [32768, 61000[ */
191 if (high - low < 4)
192 attempt_half = 0;
193 if (attempt_half) {
194 int half = low + (((high - low) >> 2) << 1);
195
196 if (attempt_half == 1)
197 high = half;
198 else
199 low = half;
200 }
201 remaining = high - low;
202 if (likely(remaining > 1))
203 remaining &= ~1U;
204
205 offset = prandom_u32() % remaining;
206 /* __inet_hash_connect() favors ports having @low parity
207 * We do the opposite to not pollute connect() users.
208 */
209 offset |= 1U;
Eric Dumazetea8add22016-02-11 16:28:50 -0800210
211other_parity_scan:
212 port = low + offset;
213 for (i = 0; i < remaining; i += 2, port += 2) {
214 if (unlikely(port >= high))
215 port -= remaining;
216 if (inet_is_local_reserved_port(net, port))
217 continue;
218 head = &hinfo->bhash[inet_bhashfn(net, port,
219 hinfo->bhash_size)];
220 spin_lock_bh(&head->lock);
221 inet_bind_bucket_for_each(tb, &head->chain)
222 if (net_eq(ib_net(tb), net) && tb->port == port) {
Josef Bacikaa078842017-01-17 07:51:02 -0800223 if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
Eric Dumazetea8add22016-02-11 16:28:50 -0800224 goto tb_found;
225 goto next_port;
226 }
227 goto tb_not_found;
228next_port:
229 spin_unlock_bh(&head->lock);
230 cond_resched();
231 }
232
Eric Dumazetea8add22016-02-11 16:28:50 -0800233 offset--;
234 if (!(offset & 1))
235 goto other_parity_scan;
236
237 if (attempt_half == 1) {
238 /* OK we now try the upper half of the range */
239 attempt_half = 2;
240 goto other_half_scan;
241 }
242 return ret;
243
244tb_not_found:
245 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
246 net, head, port);
247 if (!tb)
248 goto fail_unlock;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700249tb_found:
250 if (!hlist_empty(&tb->owners)) {
Pavel Emelyanov4a17fd52012-04-19 03:39:36 +0000251 if (sk->sk_reuse == SK_FORCE_REUSE)
252 goto success;
253
Josef Bacikb9470c22017-01-17 07:51:03 -0800254 if ((tb->fastreuse > 0 && reuse) ||
Tom Herbertda5e3632013-01-22 09:50:24 +0000255 (tb->fastreuseport > 0 &&
Craig Galleke5fbfc12016-02-22 10:45:29 -0500256 !rcu_access_pointer(sk->sk_reuseport_cb) &&
Josef Bacikb9470c22017-01-17 07:51:03 -0800257 sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700258 goto success;
Josef Bacikaa078842017-01-17 07:51:02 -0800259 if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
Eric Dumazetea8add22016-02-11 16:28:50 -0800260 if ((reuse ||
261 (tb->fastreuseport > 0 &&
262 sk->sk_reuseport &&
263 !rcu_access_pointer(sk->sk_reuseport_cb) &&
Josef Bacikb9470c22017-01-17 07:51:03 -0800264 uid_eq(tb->fastuid, uid))) && !snum &&
265 --attempts >= 0) {
Eric Dumazetea8add22016-02-11 16:28:50 -0800266 spin_unlock_bh(&head->lock);
267 goto again;
Evgeniy Polyakova9d8f912009-01-19 16:46:02 -0800268 }
Eric Dumazetea8add22016-02-11 16:28:50 -0800269 goto fail_unlock;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700270 }
Eric Dumazetea8add22016-02-11 16:28:50 -0800271 if (!reuse)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700272 tb->fastreuse = 0;
Eric Dumazetea8add22016-02-11 16:28:50 -0800273 if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
274 tb->fastreuseport = 0;
275 } else {
276 tb->fastreuse = reuse;
Tom Herbertda5e3632013-01-22 09:50:24 +0000277 if (sk->sk_reuseport) {
278 tb->fastreuseport = 1;
279 tb->fastuid = uid;
Eric Dumazetea8add22016-02-11 16:28:50 -0800280 } else {
Tom Herbertda5e3632013-01-22 09:50:24 +0000281 tb->fastreuseport = 0;
Eric Dumazetea8add22016-02-11 16:28:50 -0800282 }
Tom Herbertda5e3632013-01-22 09:50:24 +0000283 }
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700284success:
285 if (!inet_csk(sk)->icsk_bind_hash)
Eric Dumazetea8add22016-02-11 16:28:50 -0800286 inet_bind_hash(sk, tb, port);
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700287 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900288 ret = 0;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700289
290fail_unlock:
Eric Dumazetea8add22016-02-11 16:28:50 -0800291 spin_unlock_bh(&head->lock);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700292 return ret;
293}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700294EXPORT_SYMBOL_GPL(inet_csk_get_port);
295
296/*
297 * Wait for an incoming connection, avoid race conditions. This must be called
298 * with the socket locked.
299 */
300static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
301{
302 struct inet_connection_sock *icsk = inet_csk(sk);
303 DEFINE_WAIT(wait);
304 int err;
305
306 /*
307 * True wake-one mechanism for incoming connections: only
308 * one process gets woken up, not the 'whole herd'.
309 * Since we do not 'race & poll' for established sockets
310 * anymore, the common case will execute the loop only once.
311 *
312 * Subtle issue: "add_wait_queue_exclusive()" will be added
313 * after any current non-exclusive waiters, and we know that
314 * it will always _stay_ after any new non-exclusive waiters
315 * because all non-exclusive waiters are added at the
316 * beginning of the wait-queue. As such, it's ok to "drop"
317 * our exclusiveness temporarily when we get woken up without
318 * having to remove and re-insert us on the wait queue.
319 */
320 for (;;) {
Eric Dumazetaa395142010-04-20 13:03:51 +0000321 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700322 TASK_INTERRUPTIBLE);
323 release_sock(sk);
324 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
325 timeo = schedule_timeout(timeo);
Eric Dumazetcb7cf8a2015-03-16 12:19:24 -0700326 sched_annotate_sleep();
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700327 lock_sock(sk);
328 err = 0;
329 if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
330 break;
331 err = -EINVAL;
332 if (sk->sk_state != TCP_LISTEN)
333 break;
334 err = sock_intr_errno(timeo);
335 if (signal_pending(current))
336 break;
337 err = -EAGAIN;
338 if (!timeo)
339 break;
340 }
Eric Dumazetaa395142010-04-20 13:03:51 +0000341 finish_wait(sk_sleep(sk), &wait);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700342 return err;
343}
344
345/*
346 * This will accept the next outstanding connection.
347 */
348struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
349{
350 struct inet_connection_sock *icsk = inet_csk(sk);
Jerry Chu83368862012-08-31 12:29:12 +0000351 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
Jerry Chu83368862012-08-31 12:29:12 +0000352 struct request_sock *req;
Eric Dumazete3d95ad2015-03-17 18:32:30 -0700353 struct sock *newsk;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700354 int error;
355
356 lock_sock(sk);
357
358 /* We need to make sure that this socket is listening,
359 * and that it has something pending.
360 */
361 error = -EINVAL;
362 if (sk->sk_state != TCP_LISTEN)
363 goto out_err;
364
365 /* Find already established connection */
Jerry Chu83368862012-08-31 12:29:12 +0000366 if (reqsk_queue_empty(queue)) {
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700367 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
368
369 /* If this is a non blocking socket don't sleep */
370 error = -EAGAIN;
371 if (!timeo)
372 goto out_err;
373
374 error = inet_csk_wait_for_connect(sk, timeo);
375 if (error)
376 goto out_err;
377 }
Eric Dumazetfff1f302015-10-02 11:43:23 -0700378 req = reqsk_queue_remove(queue, sk);
Jerry Chu83368862012-08-31 12:29:12 +0000379 newsk = req->sk;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700380
Eric Dumazete3d95ad2015-03-17 18:32:30 -0700381 if (sk->sk_protocol == IPPROTO_TCP &&
Eric Dumazet0536fcc2015-09-29 07:42:52 -0700382 tcp_rsk(req)->tfo_listener) {
383 spin_lock_bh(&queue->fastopenq.lock);
Eric Dumazet9439ce02015-03-17 18:32:29 -0700384 if (tcp_rsk(req)->tfo_listener) {
Jerry Chu83368862012-08-31 12:29:12 +0000385 /* We are still waiting for the final ACK from 3WHS
386 * so can't free req now. Instead, we set req->sk to
387 * NULL to signify that the child socket is taken
388 * so reqsk_fastopen_remove() will free the req
389 * when 3WHS finishes (or is aborted).
390 */
391 req->sk = NULL;
392 req = NULL;
393 }
Eric Dumazet0536fcc2015-09-29 07:42:52 -0700394 spin_unlock_bh(&queue->fastopenq.lock);
Jerry Chu83368862012-08-31 12:29:12 +0000395 }
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700396out:
397 release_sock(sk);
Jerry Chu83368862012-08-31 12:29:12 +0000398 if (req)
Eric Dumazet13854e52015-03-15 21:12:16 -0700399 reqsk_put(req);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700400 return newsk;
401out_err:
402 newsk = NULL;
Jerry Chu83368862012-08-31 12:29:12 +0000403 req = NULL;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700404 *err = error;
405 goto out;
406}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700407EXPORT_SYMBOL(inet_csk_accept);
408
409/*
410 * Using different timers for retransmit, delayed acks and probes
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900411 * We may wish use just one timer maintaining a list of expire jiffies
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700412 * to optimize.
413 */
414void inet_csk_init_xmit_timers(struct sock *sk,
415 void (*retransmit_handler)(unsigned long),
416 void (*delack_handler)(unsigned long),
417 void (*keepalive_handler)(unsigned long))
418{
419 struct inet_connection_sock *icsk = inet_csk(sk);
420
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -0800421 setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
422 (unsigned long)sk);
423 setup_timer(&icsk->icsk_delack_timer, delack_handler,
424 (unsigned long)sk);
425 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700426 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
427}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700428EXPORT_SYMBOL(inet_csk_init_xmit_timers);
429
430void inet_csk_clear_xmit_timers(struct sock *sk)
431{
432 struct inet_connection_sock *icsk = inet_csk(sk);
433
434 icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
435
436 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
437 sk_stop_timer(sk, &icsk->icsk_delack_timer);
438 sk_stop_timer(sk, &sk->sk_timer);
439}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700440EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
441
442void inet_csk_delete_keepalive_timer(struct sock *sk)
443{
444 sk_stop_timer(sk, &sk->sk_timer);
445}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700446EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
447
448void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
449{
450 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
451}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700452EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
453
Eric Dumazete5895bc2015-09-25 07:39:11 -0700454struct dst_entry *inet_csk_route_req(const struct sock *sk,
David S. Miller6bd023f2011-05-18 18:32:03 -0400455 struct flowi4 *fl4,
David S. Millerba3f7f02012-07-17 14:02:46 -0700456 const struct request_sock *req)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700457{
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700458 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700459 struct net *net = read_pnet(&ireq->ireq_net);
460 struct ip_options_rcu *opt = ireq->opt;
461 struct rtable *rt;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700462
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700463 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
David S. Millere79d9bc2011-03-31 04:53:20 -0700464 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700465 sk->sk_protocol, inet_sk_flowi_flags(sk),
Eric Dumazet634fb9792013-10-09 15:21:29 -0700466 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700467 ireq->ir_loc_addr, ireq->ir_rmt_port,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900468 htons(ireq->ir_num), sk->sk_uid);
David S. Miller6bd023f2011-05-18 18:32:03 -0400469 security_req_classify_flow(req, flowi4_to_flowi(fl4));
470 rt = ip_route_output_flow(net, fl4, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800471 if (IS_ERR(rt))
Ilpo Järvinen857a6e02008-12-14 23:13:08 -0800472 goto no_route;
Julian Anastasov155e8332012-10-08 11:41:18 +0000473 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
Ilpo Järvinen857a6e02008-12-14 23:13:08 -0800474 goto route_err;
Changli Gaod8d1f302010-06-10 23:31:35 -0700475 return &rt->dst;
Ilpo Järvinen857a6e02008-12-14 23:13:08 -0800476
477route_err:
478 ip_rt_put(rt);
479no_route:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700480 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
Ilpo Järvinen857a6e02008-12-14 23:13:08 -0800481 return NULL;
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700482}
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700483EXPORT_SYMBOL_GPL(inet_csk_route_req);
484
Eric Dumazeta2432c42015-09-29 07:42:43 -0700485struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
David S. Miller77357a92011-05-08 14:34:22 -0700486 struct sock *newsk,
487 const struct request_sock *req)
488{
489 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700490 struct net *net = read_pnet(&ireq->ireq_net);
David S. Miller77357a92011-05-08 14:34:22 -0700491 struct inet_sock *newinet = inet_sk(newsk);
Christoph Paasch1a7b27c2012-08-20 02:52:09 +0000492 struct ip_options_rcu *opt;
David S. Miller77357a92011-05-08 14:34:22 -0700493 struct flowi4 *fl4;
494 struct rtable *rt;
495
496 fl4 = &newinet->cork.fl.u.ip4;
Christoph Paasch1a7b27c2012-08-20 02:52:09 +0000497
498 rcu_read_lock();
499 opt = rcu_dereference(newinet->inet_opt);
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700500 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
David S. Miller77357a92011-05-08 14:34:22 -0700501 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
502 sk->sk_protocol, inet_sk_flowi_flags(sk),
Eric Dumazet634fb9792013-10-09 15:21:29 -0700503 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
Eric Dumazet8b929ab2015-03-22 10:22:20 -0700504 ireq->ir_loc_addr, ireq->ir_rmt_port,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900505 htons(ireq->ir_num), sk->sk_uid);
David S. Miller77357a92011-05-08 14:34:22 -0700506 security_req_classify_flow(req, flowi4_to_flowi(fl4));
507 rt = ip_route_output_flow(net, fl4, sk);
508 if (IS_ERR(rt))
509 goto no_route;
Julian Anastasov155e8332012-10-08 11:41:18 +0000510 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
David S. Miller77357a92011-05-08 14:34:22 -0700511 goto route_err;
Christoph Paasch1a7b27c2012-08-20 02:52:09 +0000512 rcu_read_unlock();
David S. Miller77357a92011-05-08 14:34:22 -0700513 return &rt->dst;
514
515route_err:
516 ip_rt_put(rt);
517no_route:
Christoph Paasch1a7b27c2012-08-20 02:52:09 +0000518 rcu_read_unlock();
Eric Dumazetb45386e2016-04-27 16:44:35 -0700519 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
David S. Miller77357a92011-05-08 14:34:22 -0700520 return NULL;
521}
522EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
523
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000524#if IS_ENABLED(CONFIG_IPV6)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700525#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
526#else
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700527#define AF_INET_FAMILY(fam) true
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700528#endif
529
Julian Anastasov0c3d79b2009-10-19 10:03:58 +0000530/* Decide when to expire the request and when to resend SYN-ACK */
531static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
532 const int max_retries,
533 const u8 rskq_defer_accept,
534 int *expire, int *resend)
535{
536 if (!rskq_defer_accept) {
Eric Dumazete6c022a2012-10-27 23:16:46 +0000537 *expire = req->num_timeout >= thresh;
Julian Anastasov0c3d79b2009-10-19 10:03:58 +0000538 *resend = 1;
539 return;
540 }
Eric Dumazete6c022a2012-10-27 23:16:46 +0000541 *expire = req->num_timeout >= thresh &&
542 (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
Julian Anastasov0c3d79b2009-10-19 10:03:58 +0000543 /*
544 * Do not resend while waiting for data after ACK,
545 * start to resend on end of deferring period to give
546 * last chance for data or ACK to create established socket.
547 */
548 *resend = !inet_rsk(req)->acked ||
Eric Dumazete6c022a2012-10-27 23:16:46 +0000549 req->num_timeout >= rskq_defer_accept - 1;
Julian Anastasov0c3d79b2009-10-19 10:03:58 +0000550}
551
Eric Dumazet1b70e972015-09-25 07:39:24 -0700552int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
Eric Dumazete6c022a2012-10-27 23:16:46 +0000553{
Christoph Paasch1a2c6182013-03-17 08:23:34 +0000554 int err = req->rsk_ops->rtx_syn_ack(parent, req);
Eric Dumazete6c022a2012-10-27 23:16:46 +0000555
556 if (!err)
557 req->num_retrans++;
558 return err;
559}
560EXPORT_SYMBOL(inet_rtx_syn_ack);
561
Eric Dumazet079096f2015-10-02 11:43:32 -0700562/* return true if req was found in the ehash table */
Eric Dumazetb357a362015-04-23 18:03:44 -0700563static bool reqsk_queue_unlink(struct request_sock_queue *queue,
564 struct request_sock *req)
565{
Eric Dumazet079096f2015-10-02 11:43:32 -0700566 struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
Eric Dumazet5e0724d2015-10-22 08:20:46 -0700567 bool found = false;
Eric Dumazetb357a362015-04-23 18:03:44 -0700568
Eric Dumazet5e0724d2015-10-22 08:20:46 -0700569 if (sk_hashed(req_to_sk(req))) {
570 spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
Eric Dumazetb357a362015-04-23 18:03:44 -0700571
Eric Dumazet5e0724d2015-10-22 08:20:46 -0700572 spin_lock(lock);
573 found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
574 spin_unlock(lock);
575 }
Eric Dumazet83fccfc2015-08-13 15:44:51 -0700576 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
Eric Dumazetb357a362015-04-23 18:03:44 -0700577 reqsk_put(req);
578 return found;
579}
580
581void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
582{
583 if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
584 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
585 reqsk_put(req);
586 }
587}
588EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
589
Eric Dumazetf03f2e12015-10-14 11:16:27 -0700590void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
591{
592 inet_csk_reqsk_queue_drop(sk, req);
593 reqsk_put(req);
594}
595EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
596
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700597static void reqsk_timer_handler(unsigned long data)
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700598{
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700599 struct request_sock *req = (struct request_sock *)data;
600 struct sock *sk_listener = req->rsk_listener;
Nikolay Borisov7c083ec2016-02-03 09:46:50 +0200601 struct net *net = sock_net(sk_listener);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700602 struct inet_connection_sock *icsk = inet_csk(sk_listener);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700603 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
Eric Dumazet2b41fab2015-03-22 10:22:18 -0700604 int qlen, expire = 0, resend = 0;
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700605 int max_retries, thresh;
Eric Dumazet2b41fab2015-03-22 10:22:18 -0700606 u8 defer_accept;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700607
Eric Dumazet00fd38d2015-11-12 08:43:18 -0800608 if (sk_state_load(sk_listener) != TCP_LISTEN)
Eric Dumazet079096f2015-10-02 11:43:32 -0700609 goto drop;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700610
Nikolay Borisov7c083ec2016-02-03 09:46:50 +0200611 max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700612 thresh = max_retries;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700613 /* Normally all the openreqs are young and become mature
614 * (i.e. converted to established socket) for first timeout.
Eric Dumazetfd4f2ce2012-04-12 19:48:40 +0000615 * If synack was not acknowledged for 1 second, it means
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700616 * one of the following things: synack was lost, ack was lost,
617 * rtt is high or nobody planned to ack (i.e. synflood).
618 * When server is a bit loaded, queue is populated with old
619 * open requests, reducing effective size of queue.
620 * When server is well loaded, queue size reduces to zero
621 * after several minutes of work. It is not synflood,
622 * it is normal operation. The solution is pruning
623 * too old entries overriding normal timeout, when
624 * situation becomes dangerous.
625 *
626 * Essentially, we reserve half of room for young
627 * embrions; and abort old ones without pity, if old
628 * ones are about to clog our table.
629 */
Eric Dumazetaac065c2015-10-02 11:43:24 -0700630 qlen = reqsk_queue_len(queue);
Eric Dumazetacb4a6b2015-10-06 14:49:58 -0700631 if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
Eric Dumazetaac065c2015-10-02 11:43:24 -0700632 int young = reqsk_queue_len_young(queue) << 1;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700633
634 while (thresh > 2) {
Eric Dumazet2b41fab2015-03-22 10:22:18 -0700635 if (qlen < young)
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700636 break;
637 thresh--;
638 young <<= 1;
639 }
640 }
Eric Dumazet2b41fab2015-03-22 10:22:18 -0700641 defer_accept = READ_ONCE(queue->rskq_defer_accept);
642 if (defer_accept)
643 max_retries = defer_accept;
644 syn_ack_recalc(req, thresh, max_retries, defer_accept,
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700645 &expire, &resend);
Eric Dumazet42cb80a2015-03-22 10:22:19 -0700646 req->rsk_ops->syn_ack_timeout(req);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700647 if (!expire &&
648 (!resend ||
649 !inet_rtx_syn_ack(sk_listener, req) ||
650 inet_rsk(req)->acked)) {
651 unsigned long timeo;
David S. Millerec0a1962008-06-12 16:31:35 -0700652
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700653 if (req->num_timeout++ == 0)
Eric Dumazetaac065c2015-10-02 11:43:24 -0700654 atomic_dec(&queue->young);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700655 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
Thomas Gleixnerf3438bc2016-07-04 09:50:23 +0000656 mod_timer(&req->rsk_timer, jiffies + timeo);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700657 return;
658 }
Eric Dumazet079096f2015-10-02 11:43:32 -0700659drop:
Eric Dumazetf03f2e12015-10-14 11:16:27 -0700660 inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700661}
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700662
Eric Dumazet079096f2015-10-02 11:43:32 -0700663static void reqsk_queue_hash_req(struct request_sock *req,
664 unsigned long timeout)
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700665{
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700666 req->num_retrans = 0;
667 req->num_timeout = 0;
668 req->sk = NULL;
669
Thomas Gleixnerf3438bc2016-07-04 09:50:23 +0000670 setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
671 (unsigned long)req);
672 mod_timer(&req->rsk_timer, jiffies + timeout);
Eric Dumazet29c68522015-09-19 09:48:04 -0700673
Eric Dumazet079096f2015-10-02 11:43:32 -0700674 inet_ehash_insert(req_to_sk(req), NULL);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700675 /* before letting lookups find us, make sure all req fields
676 * are committed to memory and refcnt initialized.
677 */
678 smp_wmb();
Eric Dumazetca6fb062015-10-02 11:43:35 -0700679 atomic_set(&req->rsk_refcnt, 2 + 1);
Eric Dumazetfa76ce732015-03-19 19:04:20 -0700680}
Eric Dumazet079096f2015-10-02 11:43:32 -0700681
682void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
683 unsigned long timeout)
684{
685 reqsk_queue_hash_req(req, timeout);
686 inet_csk_reqsk_queue_added(sk);
687}
688EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700689
Eric Dumazete56c57d2011-11-08 17:07:07 -0500690/**
691 * inet_csk_clone_lock - clone an inet socket, and lock its clone
692 * @sk: the socket to clone
693 * @req: request_sock
694 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
695 *
696 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
697 */
698struct sock *inet_csk_clone_lock(const struct sock *sk,
699 const struct request_sock *req,
700 const gfp_t priority)
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700701{
Eric Dumazete56c57d2011-11-08 17:07:07 -0500702 struct sock *newsk = sk_clone_lock(sk, priority);
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700703
Ian Morris00db4122015-04-03 09:17:27 +0100704 if (newsk) {
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700705 struct inet_connection_sock *newicsk = inet_csk(newsk);
706
707 newsk->sk_state = TCP_SYN_RECV;
708 newicsk->icsk_bind_hash = NULL;
709
Eric Dumazet634fb9792013-10-09 15:21:29 -0700710 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
Eric Dumazetb44084c2013-10-10 00:04:37 -0700711 inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
712 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700713 newsk->sk_write_space = sk_stream_write_space;
714
Eric Dumazet85017862016-04-06 22:07:34 -0700715 /* listeners have SOCK_RCU_FREE, not the children */
716 sock_reset_flag(newsk, SOCK_RCU_FREE);
717
Lorenzo Colitti84f39b02014-05-13 10:17:35 -0700718 newsk->sk_mark = inet_rsk(req)->ir_mark;
Eric Dumazet33cf7c92015-03-11 18:53:14 -0700719 atomic64_set(&newsk->sk_cookie,
720 atomic64_read(&inet_rsk(req)->ir_cookie));
Lorenzo Colitti84f39b02014-05-13 10:17:35 -0700721
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700722 newicsk->icsk_retransmits = 0;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300723 newicsk->icsk_backoff = 0;
724 newicsk->icsk_probes_out = 0;
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700725
726 /* Deinitialize accept_queue to trap illegal accesses. */
727 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
Venkat Yekkirala4237c752006-07-24 23:32:50 -0700728
729 security_inet_csk_clone(newsk, req);
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700730 }
731 return newsk;
732}
Eric Dumazete56c57d2011-11-08 17:07:07 -0500733EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700734
735/*
736 * At this point, there should be no process reference to this
737 * socket, and thus no user references at all. Therefore we
738 * can assume the socket waitqueue is inactive and nobody will
739 * try to jump onto it.
740 */
741void inet_csk_destroy_sock(struct sock *sk)
742{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700743 WARN_ON(sk->sk_state != TCP_CLOSE);
744 WARN_ON(!sock_flag(sk, SOCK_DEAD));
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700745
746 /* It cannot be in hash table! */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700747 WARN_ON(!sk_unhashed(sk));
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700748
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000749 /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
750 WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700751
752 sk->sk_prot->destroy(sk);
753
754 sk_stream_kill_queues(sk);
755
756 xfrm_sk_free_policy(sk);
757
758 sk_refcnt_debug_release(sk);
759
Eric Dumazet777c6ae2016-05-04 15:27:29 -0700760 local_bh_disable();
Eric Dumazetdd24c002008-11-25 21:17:14 -0800761 percpu_counter_dec(sk->sk_prot->orphan_count);
Eric Dumazet777c6ae2016-05-04 15:27:29 -0700762 local_bh_enable();
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700763 sock_put(sk);
764}
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700765EXPORT_SYMBOL(inet_csk_destroy_sock);
766
Christoph Paasche337e242012-12-14 04:07:58 +0000767/* This function allows to force a closure of a socket after the call to
768 * tcp/dccp_create_openreq_child().
769 */
770void inet_csk_prepare_forced_close(struct sock *sk)
Christoph Paaschc10cb5f2013-03-07 02:34:33 +0000771 __releases(&sk->sk_lock.slock)
Christoph Paasche337e242012-12-14 04:07:58 +0000772{
773 /* sk_clone_lock locked the socket and set refcnt to 2 */
774 bh_unlock_sock(sk);
775 sock_put(sk);
776
777 /* The below has to be done to allow calling inet_csk_destroy_sock */
778 sock_set_flag(sk, SOCK_DEAD);
779 percpu_counter_inc(sk->sk_prot->orphan_count);
780 inet_sk(sk)->inet_num = 0;
781}
782EXPORT_SYMBOL(inet_csk_prepare_forced_close);
783
Eric Dumazetf985c652015-10-14 06:16:49 -0700784int inet_csk_listen_start(struct sock *sk, int backlog)
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700785{
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700786 struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazet10cbc8f2015-10-02 11:43:36 -0700787 struct inet_sock *inet = inet_sk(sk);
Craig Gallek086c6532016-02-10 11:50:35 -0500788 int err = -EADDRINUSE;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700789
Eric Dumazetef547f22015-10-02 11:43:37 -0700790 reqsk_queue_alloc(&icsk->icsk_accept_queue);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700791
Eric Dumazetf985c652015-10-14 06:16:49 -0700792 sk->sk_max_ack_backlog = backlog;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700793 sk->sk_ack_backlog = 0;
794 inet_csk_delack_init(sk);
795
796 /* There is race window here: we announce ourselves listening,
797 * but this transition is still not validated by get_port().
798 * It is OK, because this socket enters to hash table only
799 * after validation is complete.
800 */
Eric Dumazet00fd38d2015-11-12 08:43:18 -0800801 sk_state_store(sk, TCP_LISTEN);
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000802 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
803 inet->inet_sport = htons(inet->inet_num);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700804
805 sk_dst_reset(sk);
Craig Gallek086c6532016-02-10 11:50:35 -0500806 err = sk->sk_prot->hash(sk);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700807
Craig Gallek086c6532016-02-10 11:50:35 -0500808 if (likely(!err))
809 return 0;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700810 }
811
812 sk->sk_state = TCP_CLOSE;
Craig Gallek086c6532016-02-10 11:50:35 -0500813 return err;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700814}
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700815EXPORT_SYMBOL_GPL(inet_csk_listen_start);
816
Eric Dumazetebb516a2015-10-14 11:16:28 -0700817static void inet_child_forget(struct sock *sk, struct request_sock *req,
818 struct sock *child)
819{
820 sk->sk_prot->disconnect(child, O_NONBLOCK);
821
822 sock_orphan(child);
823
824 percpu_counter_inc(sk->sk_prot->orphan_count);
825
826 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
827 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
828 BUG_ON(sk != req->rsk_listener);
829
830 /* Paranoid, to prevent race condition if
831 * an inbound pkt destined for child is
832 * blocked by sock lock in tcp_v4_rcv().
833 * Also to satisfy an assertion in
834 * tcp_v4_destroy_sock().
835 */
836 tcp_sk(child)->fastopen_rsk = NULL;
837 }
838 inet_csk_destroy_sock(child);
839 reqsk_put(req);
840}
841
Eric Dumazet77166822016-02-18 05:39:18 -0800842struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
843 struct request_sock *req,
844 struct sock *child)
Eric Dumazetebb516a2015-10-14 11:16:28 -0700845{
846 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
847
848 spin_lock(&queue->rskq_lock);
849 if (unlikely(sk->sk_state != TCP_LISTEN)) {
850 inet_child_forget(sk, req, child);
Eric Dumazet77166822016-02-18 05:39:18 -0800851 child = NULL;
Eric Dumazetebb516a2015-10-14 11:16:28 -0700852 } else {
853 req->sk = child;
854 req->dl_next = NULL;
855 if (queue->rskq_accept_head == NULL)
856 queue->rskq_accept_head = req;
857 else
858 queue->rskq_accept_tail->dl_next = req;
859 queue->rskq_accept_tail = req;
860 sk_acceptq_added(sk);
861 }
862 spin_unlock(&queue->rskq_lock);
Eric Dumazet77166822016-02-18 05:39:18 -0800863 return child;
Eric Dumazetebb516a2015-10-14 11:16:28 -0700864}
865EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
866
Eric Dumazet5e0724d2015-10-22 08:20:46 -0700867struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
868 struct request_sock *req, bool own_req)
869{
870 if (own_req) {
871 inet_csk_reqsk_queue_drop(sk, req);
872 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
Eric Dumazet77166822016-02-18 05:39:18 -0800873 if (inet_csk_reqsk_queue_add(sk, req, child))
874 return child;
Eric Dumazet5e0724d2015-10-22 08:20:46 -0700875 }
876 /* Too bad, another child took ownership of the request, undo. */
877 bh_unlock_sock(child);
878 sock_put(child);
879 return NULL;
880}
881EXPORT_SYMBOL(inet_csk_complete_hashdance);
882
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700883/*
884 * This routine closes sockets which have been at least partially
885 * opened, but not yet accepted.
886 */
887void inet_csk_listen_stop(struct sock *sk)
888{
889 struct inet_connection_sock *icsk = inet_csk(sk);
Jerry Chu83368862012-08-31 12:29:12 +0000890 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
Eric Dumazetfff1f302015-10-02 11:43:23 -0700891 struct request_sock *next, *req;
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700892
893 /* Following specs, it would be better either to send FIN
894 * (and enter FIN-WAIT-1, it is normal close)
895 * or to send active reset (abort).
896 * Certainly, it is pretty dangerous while synflood, but it is
897 * bad justification for our negligence 8)
898 * To be honest, we are not able to make either
899 * of the variants now. --ANK
900 */
Eric Dumazetfff1f302015-10-02 11:43:23 -0700901 while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700902 struct sock *child = req->sk;
903
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700904 local_bh_disable();
905 bh_lock_sock(child);
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700906 WARN_ON(sock_owned_by_user(child));
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700907 sock_hold(child);
908
Eric Dumazetebb516a2015-10-14 11:16:28 -0700909 inet_child_forget(sk, req, child);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700910 bh_unlock_sock(child);
911 local_bh_enable();
912 sock_put(child);
913
Eric Dumazet92d6f172015-10-02 11:43:38 -0700914 cond_resched();
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700915 }
Eric Dumazet0536fcc2015-09-29 07:42:52 -0700916 if (queue->fastopenq.rskq_rst_head) {
Jerry Chu83368862012-08-31 12:29:12 +0000917 /* Free all the reqs queued in rskq_rst_head. */
Eric Dumazet0536fcc2015-09-29 07:42:52 -0700918 spin_lock_bh(&queue->fastopenq.lock);
Eric Dumazetfff1f302015-10-02 11:43:23 -0700919 req = queue->fastopenq.rskq_rst_head;
Eric Dumazet0536fcc2015-09-29 07:42:52 -0700920 queue->fastopenq.rskq_rst_head = NULL;
921 spin_unlock_bh(&queue->fastopenq.lock);
Eric Dumazetfff1f302015-10-02 11:43:23 -0700922 while (req != NULL) {
923 next = req->dl_next;
Eric Dumazet13854e52015-03-15 21:12:16 -0700924 reqsk_put(req);
Eric Dumazetfff1f302015-10-02 11:43:23 -0700925 req = next;
Jerry Chu83368862012-08-31 12:29:12 +0000926 }
927 }
Eric Dumazetebb516a2015-10-14 11:16:28 -0700928 WARN_ON_ONCE(sk->sk_ack_backlog);
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700929}
Arnaldo Carvalho de Meloa019d6f2005-08-09 20:15:09 -0700930EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
Arnaldo Carvalho de Meloaf05dc92005-12-13 23:16:04 -0800931
932void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
933{
934 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
935 const struct inet_sock *inet = inet_sk(sk);
936
937 sin->sin_family = AF_INET;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000938 sin->sin_addr.s_addr = inet->inet_daddr;
939 sin->sin_port = inet->inet_dport;
Arnaldo Carvalho de Meloaf05dc92005-12-13 23:16:04 -0800940}
Arnaldo Carvalho de Meloaf05dc92005-12-13 23:16:04 -0800941EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
Arnaldo Carvalho de Meloc4d93902006-03-20 22:01:03 -0800942
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800943#ifdef CONFIG_COMPAT
944int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
945 char __user *optval, int __user *optlen)
946{
David S. Millerdbeff122006-03-20 22:52:32 -0800947 const struct inet_connection_sock *icsk = inet_csk(sk);
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800948
Ian Morris00db4122015-04-03 09:17:27 +0100949 if (icsk->icsk_af_ops->compat_getsockopt)
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800950 return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
951 optval, optlen);
952 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
953 optval, optlen);
954}
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800955EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
956
957int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -0700958 char __user *optval, unsigned int optlen)
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800959{
David S. Millerdbeff122006-03-20 22:52:32 -0800960 const struct inet_connection_sock *icsk = inet_csk(sk);
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800961
Ian Morris00db4122015-04-03 09:17:27 +0100962 if (icsk->icsk_af_ops->compat_setsockopt)
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800963 return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
964 optval, optlen);
965 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
966 optval, optlen);
967}
Arnaldo Carvalho de Melodec73ff2006-03-20 22:46:16 -0800968EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
969#endif
David S. Miller80d0a692012-07-16 03:28:06 -0700970
971static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
972{
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200973 const struct inet_sock *inet = inet_sk(sk);
974 const struct ip_options_rcu *inet_opt;
David S. Miller80d0a692012-07-16 03:28:06 -0700975 __be32 daddr = inet->inet_daddr;
976 struct flowi4 *fl4;
977 struct rtable *rt;
978
979 rcu_read_lock();
980 inet_opt = rcu_dereference(inet->inet_opt);
981 if (inet_opt && inet_opt->opt.srr)
982 daddr = inet_opt->opt.faddr;
983 fl4 = &fl->u.ip4;
984 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
985 inet->inet_saddr, inet->inet_dport,
986 inet->inet_sport, sk->sk_protocol,
987 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
988 if (IS_ERR(rt))
989 rt = NULL;
990 if (rt)
991 sk_setup_caps(sk, &rt->dst);
992 rcu_read_unlock();
993
994 return &rt->dst;
995}
996
997struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
998{
999 struct dst_entry *dst = __sk_dst_check(sk, 0);
1000 struct inet_sock *inet = inet_sk(sk);
1001
1002 if (!dst) {
1003 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
1004 if (!dst)
1005 goto out;
1006 }
David S. Miller6700c272012-07-17 03:29:28 -07001007 dst->ops->update_pmtu(dst, sk, NULL, mtu);
David S. Miller80d0a692012-07-16 03:28:06 -07001008
1009 dst = __sk_dst_check(sk, 0);
1010 if (!dst)
1011 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
1012out:
1013 return dst;
1014}
1015EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);