blob: fa4f39e8ee0c46a2af56db9805300a1e64a4d7da [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
wangweidong5cc208b2013-12-06 18:03:36 +080083#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070087#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200106#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#include <net/sock.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -0700108#include <net/tcp_states.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
Colin Cross2b15af62013-05-06 23:50:21 +0000119#include <linux/freezer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Jens Axboee32d0082019-02-08 09:01:44 -0700121#include "scm.h"
122
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000123struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000124EXPORT_SYMBOL_GPL(unix_socket_table);
125DEFINE_SPINLOCK(unix_table_lock);
126EXPORT_SYMBOL_GPL(unix_table_lock);
Eric Dumazet518de9b2010-10-26 14:22:44 -0700127static atomic_long_t unix_nr_socks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000130static struct hlist_head *unix_sockets_unbound(void *addr)
131{
132 unsigned long hash = (unsigned long)addr;
133
134 hash ^= hash >> 16;
135 hash ^= hash >> 8;
136 hash %= UNIX_HASH_SIZE;
137 return &unix_socket_table[UNIX_HASH_SIZE + hash];
138}
139
140#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700142#ifdef CONFIG_SECURITY_NETWORK
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700143static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700144{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400145 UNIXCB(skb).secid = scm->secid;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700146}
147
148static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400150 scm->secid = UNIXCB(skb).secid;
151}
152
153static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
154{
155 return (scm->secid == UNIXCB(skb).secid);
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700156}
157#else
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700158static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700159{ }
160
161static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
162{ }
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400163
164static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
165{
166 return true;
167}
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700168#endif /* CONFIG_SECURITY_NETWORK */
169
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170/*
171 * SMP locking strategy:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800172 * hash table is protected with spinlock unix_table_lock
Stephen Hemminger663717f2010-02-18 14:12:06 -0800173 * each socket state is protected by separate spin lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 */
175
Eric Dumazet95c96172012-04-15 05:58:06 +0000176static inline unsigned int unix_hash_fold(__wsum n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177{
Anton Blanchard0a134042014-03-05 14:29:58 +1100178 unsigned int hash = (__force unsigned int)csum_fold(n);
Eric Dumazet95c96172012-04-15 05:58:06 +0000179
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 hash ^= hash>>8;
181 return hash&(UNIX_HASH_SIZE-1);
182}
183
184#define unix_peer(sk) (unix_sk(sk)->peer)
185
186static inline int unix_our_peer(struct sock *sk, struct sock *osk)
187{
188 return unix_peer(osk) == sk;
189}
190
191static inline int unix_may_send(struct sock *sk, struct sock *osk)
192{
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800193 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194}
195
Qian Caia12e9432020-02-04 13:40:29 -0500196static inline int unix_recvq_full(const struct sock *sk)
Rainer Weikusat3c734192008-06-17 22:28:05 -0700197{
198 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
199}
200
Qian Caia12e9432020-02-04 13:40:29 -0500201static inline int unix_recvq_full_lockless(const struct sock *sk)
202{
203 return skb_queue_len_lockless(&sk->sk_receive_queue) >
204 READ_ONCE(sk->sk_max_ack_backlog);
205}
206
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000207struct sock *unix_peer_get(struct sock *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
209 struct sock *peer;
210
David S. Miller1c92b4e2007-05-31 13:24:26 -0700211 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 peer = unix_peer(s);
213 if (peer)
214 sock_hold(peer);
David S. Miller1c92b4e2007-05-31 13:24:26 -0700215 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 return peer;
217}
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000218EXPORT_SYMBOL_GPL(unix_peer_get);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
220static inline void unix_release_addr(struct unix_address *addr)
221{
222 if (atomic_dec_and_test(&addr->refcnt))
223 kfree(addr);
224}
225
226/*
227 * Check unix socket name:
228 * - should be not zero length.
229 * - if started by not zero, should be NULL terminated (FS object)
230 * - if started by zero, it is abstract name.
231 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900232
Eric Dumazet95c96172012-04-15 05:58:06 +0000233static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234{
Kyeongdon Kim03a94d72018-10-16 14:57:26 +0900235 *hashp = 0;
236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 if (len <= sizeof(short) || len > sizeof(*sunaddr))
238 return -EINVAL;
239 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
240 return -EINVAL;
241 if (sunaddr->sun_path[0]) {
242 /*
243 * This may look like an off by one error but it is a bit more
244 * subtle. 108 is the longest valid AF_UNIX path for a binding.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300245 * sun_path[108] doesn't as such exist. However in kernel space
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 * we are guaranteed that it is a valid memory location in our
247 * kernel address buffer.
248 */
Jianjun Konge27dfce2008-11-01 21:38:31 -0700249 ((char *)sunaddr)[len] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 len = strlen(sunaddr->sun_path)+1+sizeof(short);
251 return len;
252 }
253
Joe Perches07f07572008-11-19 15:44:53 -0800254 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 return len;
256}
257
258static void __unix_remove_socket(struct sock *sk)
259{
260 sk_del_node_init(sk);
261}
262
263static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
264{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700265 WARN_ON(!sk_unhashed(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 sk_add_node(sk, list);
267}
268
269static inline void unix_remove_socket(struct sock *sk)
270{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800271 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 __unix_remove_socket(sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800273 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274}
275
276static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
277{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800278 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 __unix_insert_socket(list, sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800280 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281}
282
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800283static struct sock *__unix_find_socket_byname(struct net *net,
284 struct sockaddr_un *sunname,
Eric Dumazet95c96172012-04-15 05:58:06 +0000285 int len, int type, unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286{
287 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Sasha Levinb67bfe02013-02-27 17:06:00 -0800289 sk_for_each(s, &unix_socket_table[hash ^ type]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 struct unix_sock *u = unix_sk(s);
291
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +0900292 if (!net_eq(sock_net(s), net))
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800293 continue;
294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 if (u->addr->len == len &&
296 !memcmp(u->addr->name, sunname, len))
297 goto found;
298 }
299 s = NULL;
300found:
301 return s;
302}
303
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800304static inline struct sock *unix_find_socket_byname(struct net *net,
305 struct sockaddr_un *sunname,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 int len, int type,
Eric Dumazet95c96172012-04-15 05:58:06 +0000307 unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 struct sock *s;
310
David S. Millerfbe9cc42005-12-13 23:26:29 -0800311 spin_lock(&unix_table_lock);
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800312 s = __unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 if (s)
314 sock_hold(s);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800315 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 return s;
317}
318
Eric W. Biederman6616f782010-06-13 03:35:48 +0000319static struct sock *unix_find_socket_byinode(struct inode *i)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320{
321 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322
David S. Millerfbe9cc42005-12-13 23:26:29 -0800323 spin_lock(&unix_table_lock);
Sasha Levinb67bfe02013-02-27 17:06:00 -0800324 sk_for_each(s,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
Al Viro40ffe672012-03-14 21:54:32 -0400326 struct dentry *dentry = unix_sk(s)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327
Miklos Szeredieb0a4a42016-05-20 22:13:45 +0200328 if (dentry && d_real_inode(dentry) == i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 sock_hold(s);
330 goto found;
331 }
332 }
333 s = NULL;
334found:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800335 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 return s;
337}
338
Rainer Weikusat7d267272015-11-20 22:07:23 +0000339/* Support code for asymmetrically connected dgram sockets
340 *
341 * If a datagram socket is connected to a socket not itself connected
342 * to the first socket (eg, /dev/log), clients may only enqueue more
343 * messages if the present receive queue of the server socket is not
344 * "too large". This means there's a second writeability condition
345 * poll and sendmsg need to test. The dgram recv code will do a wake
346 * up on the peer_wait wait queue of a socket upon reception of a
347 * datagram which needs to be propagated to sleeping would-be writers
348 * since these might not have sent anything so far. This can't be
349 * accomplished via poll_wait because the lifetime of the server
350 * socket might be less than that of its clients if these break their
351 * association with it or if the server socket is closed while clients
352 * are still connected to it and there's no way to inform "a polling
353 * implementation" that it should let go of a certain wait queue
354 *
355 * In order to propagate a wake up, a wait_queue_t of the client
356 * socket is enqueued on the peer_wait queue of the server socket
357 * whose wake function does a wake_up on the ordinary client socket
358 * wait queue. This connection is established whenever a write (or
359 * poll for write) hit the flow control condition and broken when the
360 * association to the server socket is dissolved or after a wake up
361 * was relayed.
362 */
363
364static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
365 void *key)
366{
367 struct unix_sock *u;
368 wait_queue_head_t *u_sleep;
369
370 u = container_of(q, struct unix_sock, peer_wake);
371
372 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
373 q);
374 u->peer_wake.private = NULL;
375
376 /* relaying can only happen while the wq still exists */
377 u_sleep = sk_sleep(&u->sk);
378 if (u_sleep)
379 wake_up_interruptible_poll(u_sleep, key);
380
381 return 0;
382}
383
384static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
385{
386 struct unix_sock *u, *u_other;
387 int rc;
388
389 u = unix_sk(sk);
390 u_other = unix_sk(other);
391 rc = 0;
392 spin_lock(&u_other->peer_wait.lock);
393
394 if (!u->peer_wake.private) {
395 u->peer_wake.private = other;
396 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
397
398 rc = 1;
399 }
400
401 spin_unlock(&u_other->peer_wait.lock);
402 return rc;
403}
404
405static void unix_dgram_peer_wake_disconnect(struct sock *sk,
406 struct sock *other)
407{
408 struct unix_sock *u, *u_other;
409
410 u = unix_sk(sk);
411 u_other = unix_sk(other);
412 spin_lock(&u_other->peer_wait.lock);
413
414 if (u->peer_wake.private == other) {
415 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
416 u->peer_wake.private = NULL;
417 }
418
419 spin_unlock(&u_other->peer_wait.lock);
420}
421
422static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
423 struct sock *other)
424{
425 unix_dgram_peer_wake_disconnect(sk, other);
426 wake_up_interruptible_poll(sk_sleep(sk),
427 POLLOUT |
428 POLLWRNORM |
429 POLLWRBAND);
430}
431
432/* preconditions:
433 * - unix_peer(sk) == other
434 * - association is stable
435 */
436static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
437{
438 int connected;
439
440 connected = unix_dgram_peer_wake_connect(sk, other);
441
442 if (unix_recvq_full(other))
443 return 1;
444
445 if (connected)
446 unix_dgram_peer_wake_disconnect(sk, other);
447
448 return 0;
449}
450
Eric Dumazet1586a582015-10-23 10:59:16 -0700451static int unix_writable(const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452{
Eric Dumazet1586a582015-10-23 10:59:16 -0700453 return sk->sk_state != TCP_LISTEN &&
454 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455}
456
457static void unix_write_space(struct sock *sk)
458{
Eric Dumazet43815482010-04-29 11:01:49 +0000459 struct socket_wq *wq;
460
461 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 if (unix_writable(sk)) {
Eric Dumazet43815482010-04-29 11:01:49 +0000463 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +0800464 if (skwq_has_sleeper(wq))
Eric Dumazet67426b72010-10-29 20:44:44 +0000465 wake_up_interruptible_sync_poll(&wq->wait,
466 POLLOUT | POLLWRNORM | POLLWRBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800467 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 }
Eric Dumazet43815482010-04-29 11:01:49 +0000469 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470}
471
472/* When dgram socket disconnects (or changes its peer), we clear its receive
473 * queue of packets arrived from previous peer. First, it allows to do
474 * flow control based only on wmem_alloc; second, sk connected to peer
475 * may receive messages only from that peer. */
476static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
477{
David S. Millerb03efcf2005-07-08 14:57:23 -0700478 if (!skb_queue_empty(&sk->sk_receive_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 skb_queue_purge(&sk->sk_receive_queue);
480 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
481
482 /* If one link of bidirectional dgram pipe is disconnected,
483 * we signal error. Messages are lost. Do not make this,
484 * when peer was not connected to us.
485 */
486 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
487 other->sk_err = ECONNRESET;
488 other->sk_error_report(other);
489 }
490 }
491}
492
493static void unix_sock_destructor(struct sock *sk)
494{
495 struct unix_sock *u = unix_sk(sk);
496
497 skb_queue_purge(&sk->sk_receive_queue);
498
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700499 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
500 WARN_ON(!sk_unhashed(sk));
501 WARN_ON(sk->sk_socket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 if (!sock_flag(sk, SOCK_DEAD)) {
wangweidong5cc208b2013-12-06 18:03:36 +0800503 pr_info("Attempt to release alive unix socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return;
505 }
506
507 if (u->addr)
508 unix_release_addr(u->addr);
509
Eric Dumazet518de9b2010-10-26 14:22:44 -0700510 atomic_long_dec(&unix_nr_socks);
David S. Miller6f756a82008-11-23 17:34:03 -0800511 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800512 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
David S. Miller6f756a82008-11-23 17:34:03 -0800513 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514#ifdef UNIX_REFCNT_DEBUG
wangweidong5cc208b2013-12-06 18:03:36 +0800515 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
Eric Dumazet518de9b2010-10-26 14:22:44 -0700516 atomic_long_read(&unix_nr_socks));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517#endif
518}
519
Paul Mooreded34e02013-03-25 03:18:33 +0000520static void unix_release_sock(struct sock *sk, int embrion)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521{
522 struct unix_sock *u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400523 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 struct sock *skpair;
525 struct sk_buff *skb;
526 int state;
527
528 unix_remove_socket(sk);
529
530 /* Clear state */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700531 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 sock_orphan(sk);
533 sk->sk_shutdown = SHUTDOWN_MASK;
Al Viro40ffe672012-03-14 21:54:32 -0400534 path = u->path;
535 u->path.dentry = NULL;
536 u->path.mnt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 state = sk->sk_state;
538 sk->sk_state = TCP_CLOSE;
Eric Dumazet0c36db72021-06-16 07:47:15 -0700539
540 skpair = unix_peer(sk);
541 unix_peer(sk) = NULL;
542
David S. Miller1c92b4e2007-05-31 13:24:26 -0700543 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 wake_up_interruptible_all(&u->peer_wait);
546
Jianjun Konge27dfce2008-11-01 21:38:31 -0700547 if (skpair != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
David S. Miller1c92b4e2007-05-31 13:24:26 -0700549 unix_state_lock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 /* No more writes */
551 skpair->sk_shutdown = SHUTDOWN_MASK;
552 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
553 skpair->sk_err = ECONNRESET;
David S. Miller1c92b4e2007-05-31 13:24:26 -0700554 unix_state_unlock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 skpair->sk_state_change(skpair);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800556 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 }
Rainer Weikusat7d267272015-11-20 22:07:23 +0000558
559 unix_dgram_peer_wake_disconnect(sk, skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 sock_put(skpair); /* It may now die */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 }
562
563 /* Try to flush out this socket. Throw out buffers at least */
564
565 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
Jianjun Konge27dfce2008-11-01 21:38:31 -0700566 if (state == TCP_LISTEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 unix_release_sock(skb->sk, 1);
568 /* passed fds are erased in the kfree_skb hook */
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +0100569 UNIXCB(skb).consumed = skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 kfree_skb(skb);
571 }
572
Al Viro40ffe672012-03-14 21:54:32 -0400573 if (path.dentry)
574 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 sock_put(sk);
577
578 /* ---- Socket is dead now and most probably destroyed ---- */
579
580 /*
Alan Coxe04dae82012-09-17 00:52:41 +0000581 * Fixme: BSD difference: In BSD all sockets connected to us get
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 * ECONNRESET and we die on the spot. In Linux we behave
583 * like files and pipes do and wait for the last
584 * dereference.
585 *
586 * Can't we simply set sock->err?
587 *
588 * What the above comment does talk about? --ANK(980817)
589 */
590
Pavel Emelyanov9305cfa2007-11-10 22:06:01 -0800591 if (unix_tot_inflight)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900592 unix_gc(); /* Garbage collect fds */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593}
594
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000595static void init_peercred(struct sock *sk)
596{
597 put_pid(sk->sk_peer_pid);
598 if (sk->sk_peer_cred)
599 put_cred(sk->sk_peer_cred);
600 sk->sk_peer_pid = get_pid(task_tgid(current));
601 sk->sk_peer_cred = get_current_cred();
602}
603
604static void copy_peercred(struct sock *sk, struct sock *peersk)
605{
606 put_pid(sk->sk_peer_pid);
607 if (sk->sk_peer_cred)
608 put_cred(sk->sk_peer_cred);
609 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
610 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
611}
612
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613static int unix_listen(struct socket *sock, int backlog)
614{
615 int err;
616 struct sock *sk = sock->sk;
617 struct unix_sock *u = unix_sk(sk);
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000618 struct pid *old_pid = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
620 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800621 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
622 goto out; /* Only stream/seqpacket sockets accept */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 err = -EINVAL;
624 if (!u->addr)
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800625 goto out; /* No listens on an unbound socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700626 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
628 goto out_unlock;
629 if (backlog > sk->sk_max_ack_backlog)
630 wake_up_interruptible_all(&u->peer_wait);
631 sk->sk_max_ack_backlog = backlog;
632 sk->sk_state = TCP_LISTEN;
633 /* set credentials so connect can copy them */
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000634 init_peercred(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 err = 0;
636
637out_unlock:
David S. Miller1c92b4e2007-05-31 13:24:26 -0700638 unix_state_unlock(sk);
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000639 put_pid(old_pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640out:
641 return err;
642}
643
644static int unix_release(struct socket *);
645static int unix_bind(struct socket *, struct sockaddr *, int);
646static int unix_stream_connect(struct socket *, struct sockaddr *,
647 int addr_len, int flags);
648static int unix_socketpair(struct socket *, struct socket *);
649static int unix_accept(struct socket *, struct socket *, int);
650static int unix_getname(struct socket *, struct sockaddr *, int *, int);
651static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700652static unsigned int unix_dgram_poll(struct file *, struct socket *,
653 poll_table *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654static int unix_ioctl(struct socket *, unsigned int, unsigned long);
655static int unix_shutdown(struct socket *, int);
Ying Xue1b784142015-03-02 15:37:48 +0800656static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
657static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200658static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
659 size_t size, int flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200660static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
661 struct pipe_inode_info *, size_t size,
662 unsigned int flags);
Ying Xue1b784142015-03-02 15:37:48 +0800663static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
664static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665static int unix_dgram_connect(struct socket *, struct sockaddr *,
666 int, int);
Ying Xue1b784142015-03-02 15:37:48 +0800667static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
668static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
669 int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Sasha Levin12663bf2013-12-07 17:26:27 -0500671static int unix_set_peek_off(struct sock *sk, int val)
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000672{
673 struct unix_sock *u = unix_sk(sk);
674
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700675 if (mutex_lock_interruptible(&u->iolock))
Sasha Levin12663bf2013-12-07 17:26:27 -0500676 return -EINTR;
677
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000678 sk->sk_peek_off = val;
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700679 mutex_unlock(&u->iolock);
Sasha Levin12663bf2013-12-07 17:26:27 -0500680
681 return 0;
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000682}
683
684
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800685static const struct proto_ops unix_stream_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 .family = PF_UNIX,
687 .owner = THIS_MODULE,
688 .release = unix_release,
689 .bind = unix_bind,
690 .connect = unix_stream_connect,
691 .socketpair = unix_socketpair,
692 .accept = unix_accept,
693 .getname = unix_getname,
694 .poll = unix_poll,
695 .ioctl = unix_ioctl,
696 .listen = unix_listen,
697 .shutdown = unix_shutdown,
698 .setsockopt = sock_no_setsockopt,
699 .getsockopt = sock_no_getsockopt,
700 .sendmsg = unix_stream_sendmsg,
701 .recvmsg = unix_stream_recvmsg,
702 .mmap = sock_no_mmap,
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200703 .sendpage = unix_stream_sendpage,
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200704 .splice_read = unix_stream_splice_read,
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +0000705 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706};
707
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800708static const struct proto_ops unix_dgram_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 .family = PF_UNIX,
710 .owner = THIS_MODULE,
711 .release = unix_release,
712 .bind = unix_bind,
713 .connect = unix_dgram_connect,
714 .socketpair = unix_socketpair,
715 .accept = sock_no_accept,
716 .getname = unix_getname,
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700717 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 .ioctl = unix_ioctl,
719 .listen = sock_no_listen,
720 .shutdown = unix_shutdown,
721 .setsockopt = sock_no_setsockopt,
722 .getsockopt = sock_no_getsockopt,
723 .sendmsg = unix_dgram_sendmsg,
724 .recvmsg = unix_dgram_recvmsg,
725 .mmap = sock_no_mmap,
726 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000727 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728};
729
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800730static const struct proto_ops unix_seqpacket_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 .family = PF_UNIX,
732 .owner = THIS_MODULE,
733 .release = unix_release,
734 .bind = unix_bind,
735 .connect = unix_stream_connect,
736 .socketpair = unix_socketpair,
737 .accept = unix_accept,
738 .getname = unix_getname,
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700739 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 .ioctl = unix_ioctl,
741 .listen = unix_listen,
742 .shutdown = unix_shutdown,
743 .setsockopt = sock_no_setsockopt,
744 .getsockopt = sock_no_getsockopt,
745 .sendmsg = unix_seqpacket_sendmsg,
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +0000746 .recvmsg = unix_seqpacket_recvmsg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 .mmap = sock_no_mmap,
748 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000749 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750};
751
752static struct proto unix_proto = {
Eric Dumazet248969a2008-11-17 00:00:30 -0800753 .name = "UNIX",
754 .owner = THIS_MODULE,
Eric Dumazet248969a2008-11-17 00:00:30 -0800755 .obj_size = sizeof(struct unix_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756};
757
Ingo Molnara09785a2006-07-03 00:25:12 -0700758/*
759 * AF_UNIX sockets do not interact with hardware, hence they
760 * dont trigger interrupts - so it's safe for them to have
761 * bh-unsafe locking for their sk_receive_queue.lock. Split off
762 * this special lock-class by reinitializing the spinlock key:
763 */
764static struct lock_class_key af_unix_sk_receive_queue_lock_key;
765
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500766static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767{
768 struct sock *sk = NULL;
769 struct unix_sock *u;
770
Eric Dumazet518de9b2010-10-26 14:22:44 -0700771 atomic_long_inc(&unix_nr_socks);
772 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 goto out;
774
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500775 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 if (!sk)
777 goto out;
778
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800779 sock_init_data(sock, sk);
Ingo Molnara09785a2006-07-03 00:25:12 -0700780 lockdep_set_class(&sk->sk_receive_queue.lock,
781 &af_unix_sk_receive_queue_lock_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782
Vladimir Davydov3aa97992016-07-26 15:24:36 -0700783 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 sk->sk_write_space = unix_write_space;
Denis V. Luneva0a53c82007-12-11 04:19:17 -0800785 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 sk->sk_destruct = unix_sock_destructor;
787 u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400788 u->path.dentry = NULL;
789 u->path.mnt = NULL;
Benjamin LaHaisefd19f322006-01-03 14:10:46 -0800790 spin_lock_init(&u->lock);
Al Viro516e0cc2008-07-26 00:39:17 -0400791 atomic_long_set(&u->inflight, 0);
Miklos Szeredi1fd05ba2007-07-11 14:22:39 -0700792 INIT_LIST_HEAD(&u->link);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700793 mutex_init(&u->iolock); /* single task reading lock */
794 mutex_init(&u->bindlock); /* single task binding lock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 init_waitqueue_head(&u->peer_wait);
Rainer Weikusat7d267272015-11-20 22:07:23 +0000796 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000797 unix_insert_socket(unix_sockets_unbound(sk), sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798out:
Pavel Emelyanov284b3272007-11-10 22:08:30 -0800799 if (sk == NULL)
Eric Dumazet518de9b2010-10-26 14:22:44 -0700800 atomic_long_dec(&unix_nr_socks);
Eric Dumazet920de802008-11-24 00:09:29 -0800801 else {
802 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800803 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
Eric Dumazet920de802008-11-24 00:09:29 -0800804 local_bh_enable();
805 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 return sk;
807}
808
Eric Paris3f378b62009-11-05 22:18:14 -0800809static int unix_create(struct net *net, struct socket *sock, int protocol,
810 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811{
812 if (protocol && protocol != PF_UNIX)
813 return -EPROTONOSUPPORT;
814
815 sock->state = SS_UNCONNECTED;
816
817 switch (sock->type) {
818 case SOCK_STREAM:
819 sock->ops = &unix_stream_ops;
820 break;
821 /*
822 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
823 * nothing uses it.
824 */
825 case SOCK_RAW:
Jianjun Konge27dfce2008-11-01 21:38:31 -0700826 sock->type = SOCK_DGRAM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 case SOCK_DGRAM:
828 sock->ops = &unix_dgram_ops;
829 break;
830 case SOCK_SEQPACKET:
831 sock->ops = &unix_seqpacket_ops;
832 break;
833 default:
834 return -ESOCKTNOSUPPORT;
835 }
836
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500837 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838}
839
840static int unix_release(struct socket *sock)
841{
842 struct sock *sk = sock->sk;
843
844 if (!sk)
845 return 0;
846
Paul Mooreded34e02013-03-25 03:18:33 +0000847 unix_release_sock(sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 sock->sk = NULL;
849
Paul Mooreded34e02013-03-25 03:18:33 +0000850 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851}
852
853static int unix_autobind(struct socket *sock)
854{
855 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900856 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 struct unix_sock *u = unix_sk(sk);
858 static u32 ordernum = 1;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800859 struct unix_address *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 int err;
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000861 unsigned int retries = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700863 err = mutex_lock_interruptible(&u->bindlock);
Sasha Levin37ab4fa2013-12-13 10:54:22 -0500864 if (err)
865 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867 err = 0;
868 if (u->addr)
869 goto out;
870
871 err = -ENOMEM;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700872 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 if (!addr)
874 goto out;
875
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 addr->name->sun_family = AF_UNIX;
877 atomic_set(&addr->refcnt, 1);
878
879retry:
880 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
Joe Perches07f07572008-11-19 15:44:53 -0800881 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882
David S. Millerfbe9cc42005-12-13 23:26:29 -0800883 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 ordernum = (ordernum+1)&0xFFFFF;
885
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800886 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 addr->hash)) {
David S. Millerfbe9cc42005-12-13 23:26:29 -0800888 spin_unlock(&unix_table_lock);
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000889 /*
890 * __unix_find_socket_byname() may take long time if many names
891 * are already in use.
892 */
893 cond_resched();
894 /* Give up if all names seems to be in use. */
895 if (retries++ == 0xFFFFF) {
896 err = -ENOSPC;
897 kfree(addr);
898 goto out;
899 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 goto retry;
901 }
902 addr->hash ^= sk->sk_type;
903
904 __unix_remove_socket(sk);
Al Viro713b91c2019-02-15 20:09:35 +0000905 smp_store_release(&u->addr, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800907 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 err = 0;
909
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700910out: mutex_unlock(&u->bindlock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 return err;
912}
913
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800914static struct sock *unix_find_other(struct net *net,
915 struct sockaddr_un *sunname, int len,
Eric Dumazet95c96172012-04-15 05:58:06 +0000916 int type, unsigned int hash, int *error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917{
918 struct sock *u;
Al Viro421748e2008-08-02 01:04:36 -0400919 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 int err = 0;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900921
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 if (sunname->sun_path[0]) {
Al Viro421748e2008-08-02 01:04:36 -0400923 struct inode *inode;
924 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 if (err)
926 goto fail;
Miklos Szeredieb0a4a42016-05-20 22:13:45 +0200927 inode = d_real_inode(path.dentry);
Al Viro421748e2008-08-02 01:04:36 -0400928 err = inode_permission(inode, MAY_WRITE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 if (err)
930 goto put_fail;
931
932 err = -ECONNREFUSED;
Al Viro421748e2008-08-02 01:04:36 -0400933 if (!S_ISSOCK(inode->i_mode))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 goto put_fail;
Eric W. Biederman6616f782010-06-13 03:35:48 +0000935 u = unix_find_socket_byinode(inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 if (!u)
937 goto put_fail;
938
939 if (u->sk_type == type)
Al Viro68ac1232012-03-15 08:21:57 -0400940 touch_atime(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
Al Viro421748e2008-08-02 01:04:36 -0400942 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Jianjun Konge27dfce2008-11-01 21:38:31 -0700944 err = -EPROTOTYPE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 if (u->sk_type != type) {
946 sock_put(u);
947 goto fail;
948 }
949 } else {
950 err = -ECONNREFUSED;
Jianjun Konge27dfce2008-11-01 21:38:31 -0700951 u = unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 if (u) {
953 struct dentry *dentry;
Al Viro40ffe672012-03-14 21:54:32 -0400954 dentry = unix_sk(u)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 if (dentry)
Al Viro68ac1232012-03-15 08:21:57 -0400956 touch_atime(&unix_sk(u)->path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 } else
958 goto fail;
959 }
960 return u;
961
962put_fail:
Al Viro421748e2008-08-02 01:04:36 -0400963 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964fail:
Jianjun Konge27dfce2008-11-01 21:38:31 -0700965 *error = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return NULL;
967}
968
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700969static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
Al Virofaf02012012-07-20 02:37:29 +0400970{
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700971 struct dentry *dentry;
972 struct path path;
973 int err = 0;
974 /*
975 * Get the parent directory, calculate the hash for last
976 * component.
977 */
978 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
979 err = PTR_ERR(dentry);
980 if (IS_ERR(dentry))
981 return err;
Al Virofaf02012012-07-20 02:37:29 +0400982
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700983 /*
984 * All right, let's create it.
985 */
986 err = security_path_mknod(&path, dentry, mode, 0);
Al Virofaf02012012-07-20 02:37:29 +0400987 if (!err) {
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700988 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
Al Virofaf02012012-07-20 02:37:29 +0400989 if (!err) {
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700990 res->mnt = mntget(path.mnt);
Al Virofaf02012012-07-20 02:37:29 +0400991 res->dentry = dget(dentry);
992 }
993 }
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700994 done_path_create(&path, dentry);
Al Virofaf02012012-07-20 02:37:29 +0400995 return err;
996}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997
998static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
999{
1000 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001001 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 struct unix_sock *u = unix_sk(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001003 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Al Virodae6ad82011-06-26 11:50:15 -04001004 char *sun_path = sunaddr->sun_path;
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001005 int err;
Kyle Yan74fdd732017-03-22 13:37:08 -07001006 unsigned int hash = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 struct unix_address *addr;
1008 struct hlist_head *list;
WANG Cong93ff5e02017-01-23 11:17:35 -08001009 struct path path = { NULL, NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
1011 err = -EINVAL;
Mateusz Jurczykbb842902017-06-08 11:13:36 +02001012 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1013 sunaddr->sun_family != AF_UNIX)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 goto out;
1015
Jianjun Konge27dfce2008-11-01 21:38:31 -07001016 if (addr_len == sizeof(short)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 err = unix_autobind(sock);
1018 goto out;
1019 }
1020
1021 err = unix_mkname(sunaddr, addr_len, &hash);
1022 if (err < 0)
1023 goto out;
1024 addr_len = err;
1025
WANG Cong93ff5e02017-01-23 11:17:35 -08001026 if (sun_path[0]) {
1027 umode_t mode = S_IFSOCK |
1028 (SOCK_INODE(sock)->i_mode & ~current_umask());
1029 err = unix_mknod(sun_path, mode, &path);
1030 if (err) {
1031 if (err == -EEXIST)
1032 err = -EADDRINUSE;
1033 goto out;
1034 }
1035 }
1036
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001037 err = mutex_lock_interruptible(&u->bindlock);
Sasha Levin37ab4fa2013-12-13 10:54:22 -05001038 if (err)
WANG Cong93ff5e02017-01-23 11:17:35 -08001039 goto out_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040
1041 err = -EINVAL;
1042 if (u->addr)
1043 goto out_up;
1044
1045 err = -ENOMEM;
1046 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1047 if (!addr)
1048 goto out_up;
1049
1050 memcpy(addr->name, sunaddr, addr_len);
1051 addr->len = addr_len;
1052 addr->hash = hash ^ sk->sk_type;
1053 atomic_set(&addr->refcnt, 1);
1054
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001055 if (sun_path[0]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 addr->hash = UNIX_HASH_SIZE;
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001057 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
Al Virofaf02012012-07-20 02:37:29 +04001058 spin_lock(&unix_table_lock);
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001059 u->path = path;
Al Virofaf02012012-07-20 02:37:29 +04001060 list = &unix_socket_table[hash];
1061 } else {
1062 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 err = -EADDRINUSE;
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001064 if (__unix_find_socket_byname(net, sunaddr, addr_len,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 sk->sk_type, hash)) {
1066 unix_release_addr(addr);
1067 goto out_unlock;
1068 }
1069
1070 list = &unix_socket_table[addr->hash];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 }
1072
1073 err = 0;
1074 __unix_remove_socket(sk);
Al Viro713b91c2019-02-15 20:09:35 +00001075 smp_store_release(&u->addr, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 __unix_insert_socket(list, sk);
1077
1078out_unlock:
David S. Millerfbe9cc42005-12-13 23:26:29 -08001079 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080out_up:
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001081 mutex_unlock(&u->bindlock);
WANG Cong93ff5e02017-01-23 11:17:35 -08001082out_put:
1083 if (err)
1084 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085out:
1086 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087}
1088
David S. Miller278a3de2007-05-31 15:19:20 -07001089static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1090{
1091 if (unlikely(sk1 == sk2) || !sk2) {
1092 unix_state_lock(sk1);
1093 return;
1094 }
1095 if (sk1 < sk2) {
1096 unix_state_lock(sk1);
1097 unix_state_lock_nested(sk2);
1098 } else {
1099 unix_state_lock(sk2);
1100 unix_state_lock_nested(sk1);
1101 }
1102}
1103
1104static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1105{
1106 if (unlikely(sk1 == sk2) || !sk2) {
1107 unix_state_unlock(sk1);
1108 return;
1109 }
1110 unix_state_unlock(sk1);
1111 unix_state_unlock(sk2);
1112}
1113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1115 int alen, int flags)
1116{
1117 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001118 struct net *net = sock_net(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001119 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 struct sock *other;
Eric Dumazet95c96172012-04-15 05:58:06 +00001121 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 int err;
1123
Mateusz Jurczykbb842902017-06-08 11:13:36 +02001124 err = -EINVAL;
1125 if (alen < offsetofend(struct sockaddr, sa_family))
1126 goto out;
1127
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 if (addr->sa_family != AF_UNSPEC) {
1129 err = unix_mkname(sunaddr, alen, &hash);
1130 if (err < 0)
1131 goto out;
1132 alen = err;
1133
1134 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1135 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1136 goto out;
1137
David S. Miller278a3de2007-05-31 15:19:20 -07001138restart:
Jianjun Konge27dfce2008-11-01 21:38:31 -07001139 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (!other)
1141 goto out;
1142
David S. Miller278a3de2007-05-31 15:19:20 -07001143 unix_state_double_lock(sk, other);
1144
1145 /* Apparently VFS overslept socket death. Retry. */
1146 if (sock_flag(other, SOCK_DEAD)) {
1147 unix_state_double_unlock(sk, other);
1148 sock_put(other);
1149 goto restart;
1150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151
1152 err = -EPERM;
1153 if (!unix_may_send(sk, other))
1154 goto out_unlock;
1155
1156 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1157 if (err)
1158 goto out_unlock;
1159
1160 } else {
1161 /*
1162 * 1003.1g breaking connected state with AF_UNSPEC
1163 */
1164 other = NULL;
David S. Miller278a3de2007-05-31 15:19:20 -07001165 unix_state_double_lock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 }
1167
1168 /*
1169 * If it was connected, reconnect.
1170 */
1171 if (unix_peer(sk)) {
1172 struct sock *old_peer = unix_peer(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001173 unix_peer(sk) = other;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001174 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1175
David S. Miller278a3de2007-05-31 15:19:20 -07001176 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 if (other != old_peer)
1179 unix_dgram_disconnected(sk, old_peer);
1180 sock_put(old_peer);
1181 } else {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001182 unix_peer(sk) = other;
David S. Miller278a3de2007-05-31 15:19:20 -07001183 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 }
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001185 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
1187out_unlock:
David S. Miller278a3de2007-05-31 15:19:20 -07001188 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 sock_put(other);
1190out:
1191 return err;
1192}
1193
1194static long unix_wait_for_peer(struct sock *other, long timeo)
1195{
1196 struct unix_sock *u = unix_sk(other);
1197 int sched;
1198 DEFINE_WAIT(wait);
1199
1200 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1201
1202 sched = !sock_flag(other, SOCK_DEAD) &&
1203 !(other->sk_shutdown & RCV_SHUTDOWN) &&
Rainer Weikusat3c734192008-06-17 22:28:05 -07001204 unix_recvq_full(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
David S. Miller1c92b4e2007-05-31 13:24:26 -07001206 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
1208 if (sched)
1209 timeo = schedule_timeout(timeo);
1210
1211 finish_wait(&u->peer_wait, &wait);
1212 return timeo;
1213}
1214
1215static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1216 int addr_len, int flags)
1217{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001218 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001220 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1222 struct sock *newsk = NULL;
1223 struct sock *other = NULL;
1224 struct sk_buff *skb = NULL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001225 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 int st;
1227 int err;
1228 long timeo;
1229
1230 err = unix_mkname(sunaddr, addr_len, &hash);
1231 if (err < 0)
1232 goto out;
1233 addr_len = err;
1234
Joe Perchesf64f9e72009-11-29 16:55:45 -08001235 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1236 (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 goto out;
1238
1239 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1240
1241 /* First of all allocate resources.
1242 If we will make it after state is locked,
1243 we will have to recheck all again in any case.
1244 */
1245
1246 err = -ENOMEM;
1247
1248 /* create new sock for complete connection */
Eric W. Biederman11aa9c22015-05-08 21:09:13 -05001249 newsk = unix_create1(sock_net(sk), NULL, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 if (newsk == NULL)
1251 goto out;
1252
1253 /* Allocate skb for sending to listening sock */
1254 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1255 if (skb == NULL)
1256 goto out;
1257
1258restart:
1259 /* Find listening sock. */
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001260 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 if (!other)
1262 goto out;
1263
1264 /* Latch state of peer */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001265 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267 /* Apparently VFS overslept socket death. Retry. */
1268 if (sock_flag(other, SOCK_DEAD)) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001269 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 sock_put(other);
1271 goto restart;
1272 }
1273
1274 err = -ECONNREFUSED;
1275 if (other->sk_state != TCP_LISTEN)
1276 goto out_unlock;
Tomoki Sekiyama77238f22009-10-18 23:17:37 -07001277 if (other->sk_shutdown & RCV_SHUTDOWN)
1278 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Rainer Weikusat3c734192008-06-17 22:28:05 -07001280 if (unix_recvq_full(other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 err = -EAGAIN;
1282 if (!timeo)
1283 goto out_unlock;
1284
1285 timeo = unix_wait_for_peer(other, timeo);
1286
1287 err = sock_intr_errno(timeo);
1288 if (signal_pending(current))
1289 goto out;
1290 sock_put(other);
1291 goto restart;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001292 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
1294 /* Latch our state.
1295
Daniel Balutae5537bf2011-03-14 15:25:33 -07001296 It is tricky place. We need to grab our state lock and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 drop lock on peer. It is dangerous because deadlock is
1298 possible. Connect to self case and simultaneous
1299 attempt to connect are eliminated by checking socket
1300 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1301 check this before attempt to grab lock.
1302
1303 Well, and we have to recheck the state after socket locked.
1304 */
1305 st = sk->sk_state;
1306
1307 switch (st) {
1308 case TCP_CLOSE:
1309 /* This is ok... continue with connect */
1310 break;
1311 case TCP_ESTABLISHED:
1312 /* Socket is already connected */
1313 err = -EISCONN;
1314 goto out_unlock;
1315 default:
1316 err = -EINVAL;
1317 goto out_unlock;
1318 }
1319
David S. Miller1c92b4e2007-05-31 13:24:26 -07001320 unix_state_lock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
1322 if (sk->sk_state != st) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001323 unix_state_unlock(sk);
1324 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 sock_put(other);
1326 goto restart;
1327 }
1328
David S. Miller3610cda2011-01-05 15:38:53 -08001329 err = security_unix_stream_connect(sk, other, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 if (err) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001331 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 goto out_unlock;
1333 }
1334
1335 /* The way is open! Fastly set all the necessary fields... */
1336
1337 sock_hold(sk);
1338 unix_peer(newsk) = sk;
1339 newsk->sk_state = TCP_ESTABLISHED;
1340 newsk->sk_type = sk->sk_type;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001341 init_peercred(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 newu = unix_sk(newsk);
Eric Dumazeteaefd112011-02-18 03:26:36 +00001343 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 otheru = unix_sk(other);
1345
Al Viro713b91c2019-02-15 20:09:35 +00001346 /* copy address information from listening to new sock
1347 *
1348 * The contents of *(otheru->addr) and otheru->path
1349 * are seen fully set up here, since we have found
1350 * otheru in hash under unix_table_lock. Insertion
1351 * into the hash chain we'd found it in had been done
1352 * in an earlier critical area protected by unix_table_lock,
1353 * the same one where we'd set *(otheru->addr) contents,
1354 * as well as otheru->path and otheru->addr itself.
1355 *
1356 * Using smp_store_release() here to set newu->addr
1357 * is enough to make those stores, as well as stores
1358 * to newu->path visible to anyone who gets newu->addr
1359 * by smp_load_acquire(). IOW, the same warranties
1360 * as for unix_sock instances bound in unix_bind() or
1361 * in unix_autobind().
1362 */
Al Viro40ffe672012-03-14 21:54:32 -04001363 if (otheru->path.dentry) {
1364 path_get(&otheru->path);
1365 newu->path = otheru->path;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 }
Al Viro713b91c2019-02-15 20:09:35 +00001367 atomic_inc(&otheru->addr->refcnt);
1368 smp_store_release(&newu->addr, otheru->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
1370 /* Set credentials */
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001371 copy_peercred(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 sock->state = SS_CONNECTED;
1374 sk->sk_state = TCP_ESTABLISHED;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001375 sock_hold(newsk);
1376
Peter Zijlstra4e857c52014-03-17 18:06:10 +01001377 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001378 unix_peer(sk) = newsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
David S. Miller1c92b4e2007-05-31 13:24:26 -07001380 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381
1382 /* take ten and and send info to listening sock */
1383 spin_lock(&other->sk_receive_queue.lock);
1384 __skb_queue_tail(&other->sk_receive_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 spin_unlock(&other->sk_receive_queue.lock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001386 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001387 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 sock_put(other);
1389 return 0;
1390
1391out_unlock:
1392 if (other)
David S. Miller1c92b4e2007-05-31 13:24:26 -07001393 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
1395out:
Wei Yongjun40d44442009-02-25 00:32:45 +00001396 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 if (newsk)
1398 unix_release_sock(newsk, 0);
1399 if (other)
1400 sock_put(other);
1401 return err;
1402}
1403
1404static int unix_socketpair(struct socket *socka, struct socket *sockb)
1405{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001406 struct sock *ska = socka->sk, *skb = sockb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407
1408 /* Join our sockets back to back */
1409 sock_hold(ska);
1410 sock_hold(skb);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001411 unix_peer(ska) = skb;
1412 unix_peer(skb) = ska;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001413 init_peercred(ska);
1414 init_peercred(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
1416 if (ska->sk_type != SOCK_DGRAM) {
1417 ska->sk_state = TCP_ESTABLISHED;
1418 skb->sk_state = TCP_ESTABLISHED;
1419 socka->state = SS_CONNECTED;
1420 sockb->state = SS_CONNECTED;
1421 }
1422 return 0;
1423}
1424
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001425static void unix_sock_inherit_flags(const struct socket *old,
1426 struct socket *new)
1427{
1428 if (test_bit(SOCK_PASSCRED, &old->flags))
1429 set_bit(SOCK_PASSCRED, &new->flags);
1430 if (test_bit(SOCK_PASSSEC, &old->flags))
1431 set_bit(SOCK_PASSSEC, &new->flags);
1432}
1433
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1435{
1436 struct sock *sk = sock->sk;
1437 struct sock *tsk;
1438 struct sk_buff *skb;
1439 int err;
1440
1441 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001442 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 goto out;
1444
1445 err = -EINVAL;
1446 if (sk->sk_state != TCP_LISTEN)
1447 goto out;
1448
1449 /* If socket state is TCP_LISTEN it cannot change (for now...),
1450 * so that no locks are necessary.
1451 */
1452
1453 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1454 if (!skb) {
1455 /* This means receive shutdown. */
1456 if (err == 0)
1457 err = -EINVAL;
1458 goto out;
1459 }
1460
1461 tsk = skb->sk;
1462 skb_free_datagram(sk, skb);
1463 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1464
1465 /* attach accepted sock to socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001466 unix_state_lock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 newsock->state = SS_CONNECTED;
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001468 unix_sock_inherit_flags(sock, newsock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 sock_graft(tsk, newsock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001470 unix_state_unlock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return 0;
1472
1473out:
1474 return err;
1475}
1476
1477
1478static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1479{
1480 struct sock *sk = sock->sk;
Al Viro713b91c2019-02-15 20:09:35 +00001481 struct unix_address *addr;
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001482 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 int err = 0;
1484
1485 if (peer) {
1486 sk = unix_peer_get(sk);
1487
1488 err = -ENOTCONN;
1489 if (!sk)
1490 goto out;
1491 err = 0;
1492 } else {
1493 sock_hold(sk);
1494 }
1495
Al Viro713b91c2019-02-15 20:09:35 +00001496 addr = smp_load_acquire(&unix_sk(sk)->addr);
1497 if (!addr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 sunaddr->sun_family = AF_UNIX;
1499 sunaddr->sun_path[0] = 0;
1500 *uaddr_len = sizeof(short);
1501 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 *uaddr_len = addr->len;
1503 memcpy(sunaddr, addr->name, *uaddr_len);
1504 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 sock_put(sk);
1506out:
1507 return err;
1508}
1509
Miklos Szeredi97106c22021-07-28 14:47:20 +02001510static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1511{
1512 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1513
1514 /*
1515 * Garbage collection of unix sockets starts by selecting a set of
1516 * candidate sockets which have reference only from being in flight
1517 * (total_refs == inflight_refs). This condition is checked once during
1518 * the candidate collection phase, and candidates are marked as such, so
1519 * that non-candidates can later be ignored. While inflight_refs is
1520 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1521 * is an instantaneous decision.
1522 *
1523 * Once a candidate, however, the socket must not be reinstalled into a
1524 * file descriptor while the garbage collection is in progress.
1525 *
1526 * If the above conditions are met, then the directed graph of
1527 * candidates (*) does not change while unix_gc_lock is held.
1528 *
1529 * Any operations that changes the file count through file descriptors
1530 * (dup, close, sendmsg) does not change the graph since candidates are
1531 * not installed in fds.
1532 *
1533 * Dequeing a candidate via recvmsg would install it into an fd, but
1534 * that takes unix_gc_lock to decrement the inflight count, so it's
1535 * serialized with garbage collection.
1536 *
1537 * MSG_PEEK is special in that it does not change the inflight count,
1538 * yet does install the socket into an fd. The following lock/unlock
1539 * pair is to ensure serialization with garbage collection. It must be
1540 * done between incrementing the file count and installing the file into
1541 * an fd.
1542 *
1543 * If garbage collection starts after the barrier provided by the
1544 * lock/unlock, then it will see the elevated refcount and not mark this
1545 * as a candidate. If a garbage collection is already in progress
1546 * before the file count was incremented, then the lock/unlock pair will
1547 * ensure that garbage collection is finished before progressing to
1548 * installing the fd.
1549 *
1550 * (*) A -> B where B is on the queue of A or B is on the queue of C
1551 * which is on the queue of listening socket A.
1552 */
1553 spin_lock(&unix_gc_lock);
1554 spin_unlock(&unix_gc_lock);
1555}
1556
David S. Millerf78a5fd2011-09-16 19:34:00 -04001557static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001558{
1559 int err = 0;
Eric Dumazet16e57262011-09-19 05:52:27 +00001560
David S. Millerf78a5fd2011-09-16 19:34:00 -04001561 UNIXCB(skb).pid = get_pid(scm->pid);
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001562 UNIXCB(skb).uid = scm->creds.uid;
1563 UNIXCB(skb).gid = scm->creds.gid;
Eric W. Biederman7361c362010-06-13 03:34:33 +00001564 UNIXCB(skb).fp = NULL;
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04001565 unix_get_secdata(scm, skb);
Eric W. Biederman7361c362010-06-13 03:34:33 +00001566 if (scm->fp && send_fds)
1567 err = unix_attach_fds(scm, skb);
1568
1569 skb->destructor = unix_destruct_scm;
1570 return err;
1571}
1572
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001573static bool unix_passcred_enabled(const struct socket *sock,
1574 const struct sock *other)
1575{
1576 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1577 !other->sk_socket ||
1578 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1579}
1580
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581/*
Eric Dumazet16e57262011-09-19 05:52:27 +00001582 * Some apps rely on write() giving SCM_CREDENTIALS
1583 * We include credentials if source or destination socket
1584 * asserted SOCK_PASSCRED.
1585 */
1586static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1587 const struct sock *other)
1588{
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001589 if (UNIXCB(skb).pid)
Eric Dumazet16e57262011-09-19 05:52:27 +00001590 return;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001591 if (unix_passcred_enabled(sock, other)) {
Eric Dumazet16e57262011-09-19 05:52:27 +00001592 UNIXCB(skb).pid = get_pid(task_tgid(current));
David S. Miller6e0895c2013-04-22 20:32:51 -04001593 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
Eric Dumazet16e57262011-09-19 05:52:27 +00001594 }
1595}
1596
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001597static int maybe_init_creds(struct scm_cookie *scm,
1598 struct socket *socket,
1599 const struct sock *other)
1600{
1601 int err;
1602 struct msghdr msg = { .msg_controllen = 0 };
1603
1604 err = scm_send(socket, &msg, scm, false);
1605 if (err)
1606 return err;
1607
1608 if (unix_passcred_enabled(socket, other)) {
1609 scm->pid = get_pid(task_tgid(current));
1610 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1611 }
1612 return err;
1613}
1614
1615static bool unix_skb_scm_eq(struct sk_buff *skb,
1616 struct scm_cookie *scm)
1617{
1618 const struct unix_skb_parms *u = &UNIXCB(skb);
1619
1620 return u->pid == scm->pid &&
1621 uid_eq(u->uid, scm->creds.uid) &&
1622 gid_eq(u->gid, scm->creds.gid) &&
1623 unix_secdata_eq(scm, skb);
1624}
1625
Eric Dumazet16e57262011-09-19 05:52:27 +00001626/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 * Send AF_UNIX data.
1628 */
1629
Ying Xue1b784142015-03-02 15:37:48 +08001630static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1631 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001634 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 struct unix_sock *u = unix_sk(sk);
Steffen Hurrle342dfc32014-01-17 22:53:15 +01001636 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 struct sock *other = NULL;
1638 int namelen = 0; /* fake GCC */
1639 int err;
Eric Dumazet95c96172012-04-15 05:58:06 +00001640 unsigned int hash;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001641 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 long timeo;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001643 struct scm_cookie scm;
Eric Dumazet25888e32010-11-25 04:11:39 +00001644 int max_level;
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001645 int data_len = 0;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001646 int sk_locked;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
dann frazier5f23b732008-11-26 15:32:27 -08001648 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001649 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 if (err < 0)
1651 return err;
1652
1653 err = -EOPNOTSUPP;
1654 if (msg->msg_flags&MSG_OOB)
1655 goto out;
1656
1657 if (msg->msg_namelen) {
1658 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1659 if (err < 0)
1660 goto out;
1661 namelen = err;
1662 } else {
1663 sunaddr = NULL;
1664 err = -ENOTCONN;
1665 other = unix_peer_get(sk);
1666 if (!other)
1667 goto out;
1668 }
1669
Joe Perchesf64f9e72009-11-29 16:55:45 -08001670 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1671 && (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 goto out;
1673
1674 err = -EMSGSIZE;
1675 if (len > sk->sk_sndbuf - 32)
1676 goto out;
1677
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001678 if (len > SKB_MAX_ALLOC) {
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001679 data_len = min_t(size_t,
1680 len - SKB_MAX_ALLOC,
1681 MAX_SKB_FRAGS * PAGE_SIZE);
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001682 data_len = PAGE_ALIGN(data_len);
1683
1684 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1685 }
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001686
1687 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001688 msg->msg_flags & MSG_DONTWAIT, &err,
1689 PAGE_ALLOC_COSTLY_ORDER);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001690 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 goto out;
1692
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001693 err = unix_scm_to_skb(&scm, skb, true);
Eric Dumazet25888e32010-11-25 04:11:39 +00001694 if (err < 0)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001695 goto out_free;
Eric Dumazet25888e32010-11-25 04:11:39 +00001696 max_level = err + 1;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001697
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001698 skb_put(skb, len - data_len);
1699 skb->data_len = data_len;
1700 skb->len = len;
Al Viroc0371da2014-11-24 10:42:55 -05001701 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 if (err)
1703 goto out_free;
1704
1705 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1706
1707restart:
1708 if (!other) {
1709 err = -ECONNRESET;
1710 if (sunaddr == NULL)
1711 goto out_free;
1712
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001713 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 hash, &err);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001715 if (other == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 goto out_free;
1717 }
1718
Alban Crequyd6ae3ba2011-01-18 06:39:15 +00001719 if (sk_filter(other, skb) < 0) {
1720 /* Toss the packet but do not return any error to the sender */
1721 err = len;
1722 goto out_free;
1723 }
1724
Rainer Weikusat7d267272015-11-20 22:07:23 +00001725 sk_locked = 0;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001726 unix_state_lock(other);
Rainer Weikusat7d267272015-11-20 22:07:23 +00001727restart_locked:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 err = -EPERM;
1729 if (!unix_may_send(sk, other))
1730 goto out_unlock;
1731
Rainer Weikusat7d267272015-11-20 22:07:23 +00001732 if (unlikely(sock_flag(other, SOCK_DEAD))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 /*
1734 * Check with 1003.1g - what should
1735 * datagram error
1736 */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001737 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 sock_put(other);
1739
Rainer Weikusat7d267272015-11-20 22:07:23 +00001740 if (!sk_locked)
1741 unix_state_lock(sk);
1742
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 if (unix_peer(sk) == other) {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001745 unix_peer(sk) = NULL;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001746 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1747
David S. Miller1c92b4e2007-05-31 13:24:26 -07001748 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
1750 unix_dgram_disconnected(sk, other);
1751 sock_put(other);
1752 err = -ECONNREFUSED;
1753 } else {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001754 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 }
1756
1757 other = NULL;
1758 if (err)
1759 goto out_free;
1760 goto restart;
1761 }
1762
1763 err = -EPIPE;
1764 if (other->sk_shutdown & RCV_SHUTDOWN)
1765 goto out_unlock;
1766
1767 if (sk->sk_type != SOCK_SEQPACKET) {
1768 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1769 if (err)
1770 goto out_unlock;
1771 }
1772
Rainer Weikusata5527dd2016-02-11 19:37:27 +00001773 /* other == sk && unix_peer(other) != sk if
1774 * - unix_peer(sk) == NULL, destination address bound to sk
1775 * - unix_peer(sk) == sk by time of get but disconnected before lock
1776 */
1777 if (other != sk &&
Qian Caia12e9432020-02-04 13:40:29 -05001778 unlikely(unix_peer(other) != sk &&
1779 unix_recvq_full_lockless(other))) {
Rainer Weikusat7d267272015-11-20 22:07:23 +00001780 if (timeo) {
1781 timeo = unix_wait_for_peer(other, timeo);
1782
1783 err = sock_intr_errno(timeo);
1784 if (signal_pending(current))
1785 goto out_free;
1786
1787 goto restart;
1788 }
1789
1790 if (!sk_locked) {
1791 unix_state_unlock(other);
1792 unix_state_double_lock(sk, other);
1793 }
1794
1795 if (unix_peer(sk) != other ||
1796 unix_dgram_peer_wake_me(sk, other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 err = -EAGAIN;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001798 sk_locked = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 goto out_unlock;
1800 }
1801
Rainer Weikusat7d267272015-11-20 22:07:23 +00001802 if (!sk_locked) {
1803 sk_locked = 1;
1804 goto restart_locked;
1805 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 }
1807
Rainer Weikusat7d267272015-11-20 22:07:23 +00001808 if (unlikely(sk_locked))
1809 unix_state_unlock(sk);
1810
Alban Crequy3f661162010-10-04 08:48:28 +00001811 if (sock_flag(other, SOCK_RCVTSTAMP))
1812 __net_timestamp(skb);
Eric Dumazet16e57262011-09-19 05:52:27 +00001813 maybe_add_creds(skb, sock, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 skb_queue_tail(&other->sk_receive_queue, skb);
Eric Dumazet25888e32010-11-25 04:11:39 +00001815 if (max_level > unix_sk(other)->recursion_level)
1816 unix_sk(other)->recursion_level = max_level;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001817 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001818 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001820 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 return len;
1822
1823out_unlock:
Rainer Weikusat7d267272015-11-20 22:07:23 +00001824 if (sk_locked)
1825 unix_state_unlock(sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001826 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827out_free:
1828 kfree_skb(skb);
1829out:
1830 if (other)
1831 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001832 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 return err;
1834}
1835
Eric Dumazete370a722013-08-08 14:37:32 -07001836/* We use paged skbs for stream sockets, and limit occupancy to 32768
1837 * bytes, and a minimun of a full page.
1838 */
1839#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001840
Ying Xue1b784142015-03-02 15:37:48 +08001841static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1842 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 struct sock *sk = sock->sk;
1845 struct sock *other = NULL;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001846 int err, size;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001847 struct sk_buff *skb;
Jianjun Konge27dfce2008-11-01 21:38:31 -07001848 int sent = 0;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001849 struct scm_cookie scm;
Miklos Szeredi8ba69ba2009-09-11 11:31:45 -07001850 bool fds_sent = false;
Eric Dumazet25888e32010-11-25 04:11:39 +00001851 int max_level;
Eric Dumazete370a722013-08-08 14:37:32 -07001852 int data_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853
dann frazier5f23b732008-11-26 15:32:27 -08001854 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001855 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 if (err < 0)
1857 return err;
1858
1859 err = -EOPNOTSUPP;
1860 if (msg->msg_flags&MSG_OOB)
1861 goto out_err;
1862
1863 if (msg->msg_namelen) {
1864 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1865 goto out_err;
1866 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 err = -ENOTCONN;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001868 other = unix_peer(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 if (!other)
1870 goto out_err;
1871 }
1872
1873 if (sk->sk_shutdown & SEND_SHUTDOWN)
1874 goto pipe_err;
1875
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001876 while (sent < len) {
Eric Dumazete370a722013-08-08 14:37:32 -07001877 size = len - sent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878
1879 /* Keep two messages in the pipe so it schedules better */
Eric Dumazete370a722013-08-08 14:37:32 -07001880 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881
Eric Dumazete370a722013-08-08 14:37:32 -07001882 /* allow fallback to order-0 allocations */
1883 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001884
Eric Dumazete370a722013-08-08 14:37:32 -07001885 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001886
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001887 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1888
Eric Dumazete370a722013-08-08 14:37:32 -07001889 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001890 msg->msg_flags & MSG_DONTWAIT, &err,
1891 get_order(UNIX_SKB_FRAGS_SZ));
Eric Dumazete370a722013-08-08 14:37:32 -07001892 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 goto out_err;
1894
David S. Millerf78a5fd2011-09-16 19:34:00 -04001895 /* Only send the fds in the first buffer */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001896 err = unix_scm_to_skb(&scm, skb, !fds_sent);
Eric Dumazet25888e32010-11-25 04:11:39 +00001897 if (err < 0) {
Eric W. Biederman7361c362010-06-13 03:34:33 +00001898 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04001899 goto out_err;
Miklos Szeredi62093442008-11-09 15:23:57 +01001900 }
Eric Dumazet25888e32010-11-25 04:11:39 +00001901 max_level = err + 1;
Eric W. Biederman7361c362010-06-13 03:34:33 +00001902 fds_sent = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903
Eric Dumazete370a722013-08-08 14:37:32 -07001904 skb_put(skb, size - data_len);
1905 skb->data_len = data_len;
1906 skb->len = size;
Al Viroc0371da2014-11-24 10:42:55 -05001907 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001908 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04001910 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 }
1912
David S. Miller1c92b4e2007-05-31 13:24:26 -07001913 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914
1915 if (sock_flag(other, SOCK_DEAD) ||
1916 (other->sk_shutdown & RCV_SHUTDOWN))
1917 goto pipe_err_free;
1918
Eric Dumazet16e57262011-09-19 05:52:27 +00001919 maybe_add_creds(skb, sock, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 skb_queue_tail(&other->sk_receive_queue, skb);
Eric Dumazet25888e32010-11-25 04:11:39 +00001921 if (max_level > unix_sk(other)->recursion_level)
1922 unix_sk(other)->recursion_level = max_level;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001923 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001924 other->sk_data_ready(other);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001925 sent += size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001928 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
1930 return sent;
1931
1932pipe_err_free:
David S. Miller1c92b4e2007-05-31 13:24:26 -07001933 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 kfree_skb(skb);
1935pipe_err:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001936 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1937 send_sig(SIGPIPE, current, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 err = -EPIPE;
1939out_err:
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001940 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 return sent ? : err;
1942}
1943
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001944static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1945 int offset, size_t size, int flags)
1946{
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001947 int err;
1948 bool send_sigpipe = false;
1949 bool init_scm = true;
1950 struct scm_cookie scm;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001951 struct sock *other, *sk = socket->sk;
1952 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1953
1954 if (flags & MSG_OOB)
1955 return -EOPNOTSUPP;
1956
1957 other = unix_peer(sk);
1958 if (!other || sk->sk_state != TCP_ESTABLISHED)
1959 return -ENOTCONN;
1960
1961 if (false) {
1962alloc_skb:
1963 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001964 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001965 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1966 &err, 0);
1967 if (!newskb)
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001968 goto err;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001969 }
1970
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001971 /* we must acquire iolock as we modify already present
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001972 * skbs in the sk_receive_queue and mess with skb->len
1973 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001974 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001975 if (err) {
1976 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001977 goto err;
1978 }
1979
1980 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1981 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001982 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001983 goto err_unlock;
1984 }
1985
1986 unix_state_lock(other);
1987
1988 if (sock_flag(other, SOCK_DEAD) ||
1989 other->sk_shutdown & RCV_SHUTDOWN) {
1990 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001991 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001992 goto err_state_unlock;
1993 }
1994
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001995 if (init_scm) {
1996 err = maybe_init_creds(&scm, socket, other);
1997 if (err)
1998 goto err_state_unlock;
1999 init_scm = false;
2000 }
2001
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002002 skb = skb_peek_tail(&other->sk_receive_queue);
2003 if (tail && tail == skb) {
2004 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002005 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2006 if (newskb) {
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002007 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002008 } else {
2009 tail = skb;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002010 goto alloc_skb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002011 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002012 } else if (newskb) {
2013 /* this is fast path, we don't necessarily need to
2014 * call to kfree_skb even though with newskb == NULL
2015 * this - does no harm
2016 */
2017 consume_skb(newskb);
Hannes Frederic Sowa8844f972015-11-16 16:25:56 +01002018 newskb = NULL;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002019 }
2020
2021 if (skb_append_pagefrags(skb, page, offset, size)) {
2022 tail = skb;
2023 goto alloc_skb;
2024 }
2025
2026 skb->len += size;
2027 skb->data_len += size;
2028 skb->truesize += size;
2029 atomic_add(size, &sk->sk_wmem_alloc);
2030
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002031 if (newskb) {
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002032 err = unix_scm_to_skb(&scm, skb, false);
2033 if (err)
2034 goto err_state_unlock;
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002035 spin_lock(&other->sk_receive_queue.lock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002036 __skb_queue_tail(&other->sk_receive_queue, newskb);
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002037 spin_unlock(&other->sk_receive_queue.lock);
2038 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002039
2040 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002041 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002042
2043 other->sk_data_ready(other);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002044 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002045 return size;
2046
2047err_state_unlock:
2048 unix_state_unlock(other);
2049err_unlock:
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002050 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002051err:
2052 kfree_skb(newskb);
2053 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2054 send_sig(SIGPIPE, current, 0);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002055 if (!init_scm)
2056 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002057 return err;
2058}
2059
Ying Xue1b784142015-03-02 15:37:48 +08002060static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2061 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062{
2063 int err;
2064 struct sock *sk = sock->sk;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002065
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 err = sock_error(sk);
2067 if (err)
2068 return err;
2069
2070 if (sk->sk_state != TCP_ESTABLISHED)
2071 return -ENOTCONN;
2072
2073 if (msg->msg_namelen)
2074 msg->msg_namelen = 0;
2075
Ying Xue1b784142015-03-02 15:37:48 +08002076 return unix_dgram_sendmsg(sock, msg, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077}
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002078
Ying Xue1b784142015-03-02 15:37:48 +08002079static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2080 size_t size, int flags)
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002081{
2082 struct sock *sk = sock->sk;
2083
2084 if (sk->sk_state != TCP_ESTABLISHED)
2085 return -ENOTCONN;
2086
Ying Xue1b784142015-03-02 15:37:48 +08002087 return unix_dgram_recvmsg(sock, msg, size, flags);
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002088}
2089
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2091{
Al Viro713b91c2019-02-15 20:09:35 +00002092 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093
Al Viro713b91c2019-02-15 20:09:35 +00002094 if (addr) {
2095 msg->msg_namelen = addr->len;
2096 memcpy(msg->msg_name, addr->name, addr->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 }
2098}
2099
Ying Xue1b784142015-03-02 15:37:48 +08002100static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2101 size_t size, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002103 struct scm_cookie scm;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 struct sock *sk = sock->sk;
2105 struct unix_sock *u = unix_sk(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002106 struct sk_buff *skb, *last;
2107 long timeo;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 int err;
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002109 int peeked, skip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110
2111 err = -EOPNOTSUPP;
2112 if (flags&MSG_OOB)
2113 goto out;
2114
Rainer Weikusat64874282015-12-06 21:11:38 +00002115 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116
Rainer Weikusat64874282015-12-06 21:11:38 +00002117 do {
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002118 mutex_lock(&u->iolock);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002119
Rainer Weikusat64874282015-12-06 21:11:38 +00002120 skip = sk_peek_offset(sk, flags);
2121 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2122 &last);
2123 if (skb)
2124 break;
2125
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002126 mutex_unlock(&u->iolock);
Rainer Weikusat64874282015-12-06 21:11:38 +00002127
2128 if (err != -EAGAIN)
2129 break;
2130 } while (timeo &&
2131 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2132
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002133 if (!skb) { /* implies iolock unlocked */
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002134 unix_state_lock(sk);
2135 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2136 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2137 (sk->sk_shutdown & RCV_SHUTDOWN))
2138 err = 0;
2139 unix_state_unlock(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002140 goto out;
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002141 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142
Rainer Weikusat77b75f42015-11-26 19:23:15 +00002143 if (wq_has_sleeper(&u->peer_wait))
2144 wake_up_interruptible_sync_poll(&u->peer_wait,
2145 POLLOUT | POLLWRNORM |
2146 POLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147
2148 if (msg->msg_name)
2149 unix_copy_addr(msg, skb->sk);
2150
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002151 if (size > skb->len - skip)
2152 size = skb->len - skip;
2153 else if (size < skb->len - skip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 msg->msg_flags |= MSG_TRUNC;
2155
David S. Miller51f3d022014-11-05 16:46:40 -05002156 err = skb_copy_datagram_msg(skb, skip, msg, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 if (err)
2158 goto out_free;
2159
Alban Crequy3f661162010-10-04 08:48:28 +00002160 if (sock_flag(sk, SOCK_RCVTSTAMP))
2161 __sock_recv_timestamp(msg, sk, skb);
2162
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002163 memset(&scm, 0, sizeof(scm));
2164
2165 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2166 unix_set_secdata(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002168 if (!(flags & MSG_PEEK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002170 unix_detach_fds(&scm, skb);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002171
2172 sk_peek_offset_bwd(sk, skb->len);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002173 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 /* It is questionable: on PEEK we could:
2175 - do not return fds - good, but too simple 8)
2176 - return fds, and do not return them on read (old strategy,
2177 apparently wrong)
2178 - clone fds (I chose it for now, it is the most universal
2179 solution)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002180
2181 POSIX 1003.1g does not actually define this clearly
2182 at all. POSIX 1003.1g doesn't define a lot of things
2183 clearly however!
2184
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 */
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002186
2187 sk_peek_offset_fwd(sk, size);
2188
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 if (UNIXCB(skb).fp)
Miklos Szeredi97106c22021-07-28 14:47:20 +02002190 unix_peek_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 }
Eric Dumazet9f6f9af2012-02-21 23:24:55 +00002192 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002194 scm_recv(sock, msg, &scm, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195
2196out_free:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002197 skb_free_datagram(sk, skb);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002198 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199out:
2200 return err;
2201}
2202
2203/*
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002204 * Sleep until more data has arrived. But check for races..
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 */
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002206static long unix_stream_data_wait(struct sock *sk, long timeo,
WANG Cong06a77b02016-11-17 15:55:26 -08002207 struct sk_buff *last, unsigned int last_len,
2208 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209{
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002210 struct sk_buff *tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 DEFINE_WAIT(wait);
2212
David S. Miller1c92b4e2007-05-31 13:24:26 -07002213 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
2215 for (;;) {
Eric Dumazetaa395142010-04-20 13:03:51 +00002216 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002218 tail = skb_peek_tail(&sk->sk_receive_queue);
2219 if (tail != last ||
2220 (tail && tail->len != last_len) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 sk->sk_err ||
2222 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2223 signal_pending(current) ||
2224 !timeo)
2225 break;
2226
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002227 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002228 unix_state_unlock(sk);
WANG Cong06a77b02016-11-17 15:55:26 -08002229 if (freezable)
2230 timeo = freezable_schedule_timeout(timeo);
2231 else
2232 timeo = schedule_timeout(timeo);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002233 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002234
2235 if (sock_flag(sk, SOCK_DEAD))
2236 break;
2237
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002238 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 }
2240
Eric Dumazetaa395142010-04-20 13:03:51 +00002241 finish_wait(sk_sleep(sk), &wait);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002242 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 return timeo;
2244}
2245
Eric Dumazete370a722013-08-08 14:37:32 -07002246static unsigned int unix_skb_len(const struct sk_buff *skb)
2247{
2248 return skb->len - UNIXCB(skb).consumed;
2249}
2250
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002251struct unix_stream_read_state {
2252 int (*recv_actor)(struct sk_buff *, int, int,
2253 struct unix_stream_read_state *);
2254 struct socket *socket;
2255 struct msghdr *msg;
2256 struct pipe_inode_info *pipe;
2257 size_t size;
2258 int flags;
2259 unsigned int splice_flags;
2260};
2261
WANG Cong06a77b02016-11-17 15:55:26 -08002262static int unix_stream_read_generic(struct unix_stream_read_state *state,
2263 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002265 struct scm_cookie scm;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002266 struct socket *sock = state->socket;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 struct sock *sk = sock->sk;
2268 struct unix_sock *u = unix_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 int copied = 0;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002270 int flags = state->flags;
Eric Dumazetde144392014-03-25 18:42:27 -07002271 int noblock = flags & MSG_DONTWAIT;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002272 bool check_creds = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 int target;
2274 int err = 0;
2275 long timeo;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002276 int skip;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002277 size_t size = state->size;
2278 unsigned int last_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002280 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2281 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002283 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002285 if (unlikely(flags & MSG_OOB)) {
2286 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002288 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002290 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
Eric Dumazetde144392014-03-25 18:42:27 -07002291 timeo = sock_rcvtimeo(sk, noblock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002293 memset(&scm, 0, sizeof(scm));
2294
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 /* Lock the socket to prevent queue disordering
2296 * while sleeps in memcpy_tomsg
2297 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002298 mutex_lock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299
Andrey Vagine9193d62015-10-02 00:05:36 +03002300 if (flags & MSG_PEEK)
2301 skip = sk_peek_offset(sk, flags);
2302 else
2303 skip = 0;
2304
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002305 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 int chunk;
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002307 bool drop_skb;
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002308 struct sk_buff *skb, *last;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002310redo:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002311 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002312 if (sock_flag(sk, SOCK_DEAD)) {
2313 err = -ECONNRESET;
2314 goto unlock;
2315 }
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002316 last = skb = skb_peek(&sk->sk_receive_queue);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002317 last_len = last ? last->len : 0;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002318again:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002319 if (skb == NULL) {
Eric Dumazet25888e32010-11-25 04:11:39 +00002320 unix_sk(sk)->recursion_level = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321 if (copied >= target)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002322 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323
2324 /*
2325 * POSIX 1003.1g mandates this order.
2326 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002327
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002328 err = sock_error(sk);
2329 if (err)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002330 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 if (sk->sk_shutdown & RCV_SHUTDOWN)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002332 goto unlock;
2333
2334 unix_state_unlock(sk);
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002335 if (!timeo) {
2336 err = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 break;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002338 }
2339
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002340 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002342 timeo = unix_stream_data_wait(sk, timeo, last,
WANG Cong06a77b02016-11-17 15:55:26 -08002343 last_len, freezable);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344
Rainer Weikusat3822b5c2015-12-16 20:09:25 +00002345 if (signal_pending(current)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 err = sock_intr_errno(timeo);
Eric Dumazetfa0dc042016-01-24 13:53:50 -08002347 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 goto out;
2349 }
Rainer Weikusatb3ca9b02011-02-28 04:50:55 +00002350
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002351 mutex_lock(&u->iolock);
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002352 goto redo;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002353unlock:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002354 unix_state_unlock(sk);
2355 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 }
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002357
Eric Dumazete370a722013-08-08 14:37:32 -07002358 while (skip >= unix_skb_len(skb)) {
2359 skip -= unix_skb_len(skb);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002360 last = skb;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002361 last_len = skb->len;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002362 skb = skb_peek_next(skb, &sk->sk_receive_queue);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002363 if (!skb)
2364 goto again;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002365 }
2366
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002367 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368
2369 if (check_creds) {
2370 /* Never glue messages from different writers */
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002371 if (!unix_skb_scm_eq(skb, &scm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 break;
Eric W. Biederman0e82e7f6d2013-04-03 16:14:47 +00002373 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 /* Copy credentials */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002375 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04002376 unix_set_secdata(&scm, skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002377 check_creds = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 }
2379
2380 /* Copy address just once */
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002381 if (state->msg && state->msg->msg_name) {
2382 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2383 state->msg->msg_name);
2384 unix_copy_addr(state->msg, skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 sunaddr = NULL;
2386 }
2387
Eric Dumazete370a722013-08-08 14:37:32 -07002388 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002389 skb_get(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002390 chunk = state->recv_actor(skb, skip, chunk, state);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002391 drop_skb = !unix_skb_len(skb);
2392 /* skb is only safe to use if !drop_skb */
2393 consume_skb(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002394 if (chunk < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 if (copied == 0)
2396 copied = -EFAULT;
2397 break;
2398 }
2399 copied += chunk;
2400 size -= chunk;
2401
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002402 if (drop_skb) {
2403 /* the skb was touched by a concurrent reader;
2404 * we should not expect anything from this skb
2405 * anymore and assume it invalid - we can be
2406 * sure it was dropped from the socket queue
2407 *
2408 * let's report a short read
2409 */
2410 err = 0;
2411 break;
2412 }
2413
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 /* Mark read part of skb as used */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002415 if (!(flags & MSG_PEEK)) {
Eric Dumazete370a722013-08-08 14:37:32 -07002416 UNIXCB(skb).consumed += chunk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002418 sk_peek_offset_bwd(sk, chunk);
2419
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002421 unix_detach_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422
Eric Dumazete370a722013-08-08 14:37:32 -07002423 if (unix_skb_len(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425
Eric Dumazet6f01fd62012-01-28 16:11:03 +00002426 skb_unlink(skb, &sk->sk_receive_queue);
Neil Horman70d4bf62010-07-20 06:45:56 +00002427 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002429 if (scm.fp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 break;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002431 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 /* It is questionable, see note in unix_dgram_recvmsg.
2433 */
2434 if (UNIXCB(skb).fp)
Miklos Szeredi97106c22021-07-28 14:47:20 +02002435 unix_peek_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436
Andrey Vagine9193d62015-10-02 00:05:36 +03002437 sk_peek_offset_fwd(sk, chunk);
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002438
Aaron Conole9f389e32015-09-26 18:50:43 -04002439 if (UNIXCB(skb).fp)
2440 break;
2441
Andrey Vagine9193d62015-10-02 00:05:36 +03002442 skip = 0;
Aaron Conole9f389e32015-09-26 18:50:43 -04002443 last = skb;
2444 last_len = skb->len;
2445 unix_state_lock(sk);
2446 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2447 if (skb)
2448 goto again;
2449 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 break;
2451 }
2452 } while (size);
2453
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002454 mutex_unlock(&u->iolock);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002455 if (state->msg)
2456 scm_recv(sock, state->msg, &scm, flags);
2457 else
2458 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459out:
2460 return copied ? : err;
2461}
2462
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002463static int unix_stream_read_actor(struct sk_buff *skb,
2464 int skip, int chunk,
2465 struct unix_stream_read_state *state)
2466{
2467 int ret;
2468
2469 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2470 state->msg, chunk);
2471 return ret ?: chunk;
2472}
2473
2474static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2475 size_t size, int flags)
2476{
2477 struct unix_stream_read_state state = {
2478 .recv_actor = unix_stream_read_actor,
2479 .socket = sock,
2480 .msg = msg,
2481 .size = size,
2482 .flags = flags
2483 };
2484
WANG Cong06a77b02016-11-17 15:55:26 -08002485 return unix_stream_read_generic(&state, true);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002486}
2487
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002488static int unix_stream_splice_actor(struct sk_buff *skb,
2489 int skip, int chunk,
2490 struct unix_stream_read_state *state)
2491{
2492 return skb_splice_bits(skb, state->socket->sk,
2493 UNIXCB(skb).consumed + skip,
Al Viro25869262016-09-17 21:02:10 -04002494 state->pipe, chunk, state->splice_flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002495}
2496
2497static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2498 struct pipe_inode_info *pipe,
2499 size_t size, unsigned int flags)
2500{
2501 struct unix_stream_read_state state = {
2502 .recv_actor = unix_stream_splice_actor,
2503 .socket = sock,
2504 .pipe = pipe,
2505 .size = size,
2506 .splice_flags = flags,
2507 };
2508
2509 if (unlikely(*ppos))
2510 return -ESPIPE;
2511
2512 if (sock->file->f_flags & O_NONBLOCK ||
2513 flags & SPLICE_F_NONBLOCK)
2514 state.flags = MSG_DONTWAIT;
2515
WANG Cong06a77b02016-11-17 15:55:26 -08002516 return unix_stream_read_generic(&state, false);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002517}
2518
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519static int unix_shutdown(struct socket *sock, int mode)
2520{
2521 struct sock *sk = sock->sk;
2522 struct sock *other;
2523
Xi Wangfc61b922012-08-26 16:47:13 +00002524 if (mode < SHUT_RD || mode > SHUT_RDWR)
2525 return -EINVAL;
2526 /* This maps:
2527 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2528 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2529 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2530 */
2531 ++mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532
Alban Crequy7180a032011-01-19 04:56:36 +00002533 unix_state_lock(sk);
2534 sk->sk_shutdown |= mode;
2535 other = unix_peer(sk);
2536 if (other)
2537 sock_hold(other);
2538 unix_state_unlock(sk);
2539 sk->sk_state_change(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540
Alban Crequy7180a032011-01-19 04:56:36 +00002541 if (other &&
2542 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543
Alban Crequy7180a032011-01-19 04:56:36 +00002544 int peer_mode = 0;
2545
2546 if (mode&RCV_SHUTDOWN)
2547 peer_mode |= SEND_SHUTDOWN;
2548 if (mode&SEND_SHUTDOWN)
2549 peer_mode |= RCV_SHUTDOWN;
2550 unix_state_lock(other);
2551 other->sk_shutdown |= peer_mode;
2552 unix_state_unlock(other);
2553 other->sk_state_change(other);
2554 if (peer_mode == SHUTDOWN_MASK)
2555 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2556 else if (peer_mode & RCV_SHUTDOWN)
2557 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 }
Alban Crequy7180a032011-01-19 04:56:36 +00002559 if (other)
2560 sock_put(other);
2561
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 return 0;
2563}
2564
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002565long unix_inq_len(struct sock *sk)
2566{
2567 struct sk_buff *skb;
2568 long amount = 0;
2569
2570 if (sk->sk_state == TCP_LISTEN)
2571 return -EINVAL;
2572
2573 spin_lock(&sk->sk_receive_queue.lock);
2574 if (sk->sk_type == SOCK_STREAM ||
2575 sk->sk_type == SOCK_SEQPACKET) {
2576 skb_queue_walk(&sk->sk_receive_queue, skb)
Eric Dumazete370a722013-08-08 14:37:32 -07002577 amount += unix_skb_len(skb);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002578 } else {
2579 skb = skb_peek(&sk->sk_receive_queue);
2580 if (skb)
2581 amount = skb->len;
2582 }
2583 spin_unlock(&sk->sk_receive_queue.lock);
2584
2585 return amount;
2586}
2587EXPORT_SYMBOL_GPL(unix_inq_len);
2588
2589long unix_outq_len(struct sock *sk)
2590{
2591 return sk_wmem_alloc_get(sk);
2592}
2593EXPORT_SYMBOL_GPL(unix_outq_len);
2594
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2596{
2597 struct sock *sk = sock->sk;
Jianjun Konge27dfce2008-11-01 21:38:31 -07002598 long amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 int err;
2600
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002601 switch (cmd) {
2602 case SIOCOUTQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002603 amount = unix_outq_len(sk);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002604 err = put_user(amount, (int __user *)arg);
2605 break;
2606 case SIOCINQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002607 amount = unix_inq_len(sk);
2608 if (amount < 0)
2609 err = amount;
2610 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 err = put_user(amount, (int __user *)arg);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002612 break;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002613 default:
2614 err = -ENOIOCTLCMD;
2615 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 }
2617 return err;
2618}
2619
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002620static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621{
2622 struct sock *sk = sock->sk;
2623 unsigned int mask;
2624
Eric Dumazetaa395142010-04-20 13:03:51 +00002625 sock_poll_wait(file, sk_sleep(sk), wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626 mask = 0;
2627
2628 /* exceptional events? */
2629 if (sk->sk_err)
2630 mask |= POLLERR;
2631 if (sk->sk_shutdown == SHUTDOWN_MASK)
2632 mask |= POLLHUP;
Davide Libenzif348d702006-03-25 03:07:39 -08002633 if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazetdb409802010-09-06 11:13:50 +00002634 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635
2636 /* readable? */
Eric Dumazetdb409802010-09-06 11:13:50 +00002637 if (!skb_queue_empty(&sk->sk_receive_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 mask |= POLLIN | POLLRDNORM;
2639
2640 /* Connection-based need to check for termination and startup */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002641 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2642 sk->sk_state == TCP_CLOSE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 mask |= POLLHUP;
2644
2645 /*
2646 * we set writable also when the other side has shut down the
2647 * connection. This prevents stuck sockets.
2648 */
2649 if (unix_writable(sk))
2650 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2651
2652 return mask;
2653}
2654
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002655static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2656 poll_table *wait)
Rainer Weikusat3c734192008-06-17 22:28:05 -07002657{
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002658 struct sock *sk = sock->sk, *other;
2659 unsigned int mask, writable;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002660
Eric Dumazetaa395142010-04-20 13:03:51 +00002661 sock_poll_wait(file, sk_sleep(sk), wait);
Rainer Weikusat3c734192008-06-17 22:28:05 -07002662 mask = 0;
2663
2664 /* exceptional events? */
2665 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00002666 mask |= POLLERR |
Jacob Keller8facd5f2013-04-02 13:55:40 -07002667 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00002668
Rainer Weikusat3c734192008-06-17 22:28:05 -07002669 if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazet5456f092010-10-31 05:36:23 +00002670 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002671 if (sk->sk_shutdown == SHUTDOWN_MASK)
2672 mask |= POLLHUP;
2673
2674 /* readable? */
Eric Dumazet5456f092010-10-31 05:36:23 +00002675 if (!skb_queue_empty(&sk->sk_receive_queue))
Rainer Weikusat3c734192008-06-17 22:28:05 -07002676 mask |= POLLIN | POLLRDNORM;
2677
2678 /* Connection-based need to check for termination and startup */
2679 if (sk->sk_type == SOCK_SEQPACKET) {
2680 if (sk->sk_state == TCP_CLOSE)
2681 mask |= POLLHUP;
2682 /* connection hasn't started yet? */
2683 if (sk->sk_state == TCP_SYN_SENT)
2684 return mask;
2685 }
2686
Eric Dumazet973a34a2010-10-31 05:38:25 +00002687 /* No write status requested, avoid expensive OUT tests. */
Hans Verkuil626cf232012-03-23 15:02:27 -07002688 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
Eric Dumazet973a34a2010-10-31 05:38:25 +00002689 return mask;
2690
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002691 writable = unix_writable(sk);
Rainer Weikusat7d267272015-11-20 22:07:23 +00002692 if (writable) {
2693 unix_state_lock(sk);
2694
2695 other = unix_peer(sk);
2696 if (other && unix_peer(other) != sk &&
2697 unix_recvq_full(other) &&
2698 unix_dgram_peer_wake_me(sk, other))
2699 writable = 0;
2700
2701 unix_state_unlock(sk);
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002702 }
2703
2704 if (writable)
Rainer Weikusat3c734192008-06-17 22:28:05 -07002705 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2706 else
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002707 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
Rainer Weikusat3c734192008-06-17 22:28:05 -07002708
Rainer Weikusat3c734192008-06-17 22:28:05 -07002709 return mask;
2710}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711
2712#ifdef CONFIG_PROC_FS
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08002713
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002714#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2715
2716#define get_bucket(x) ((x) >> BUCKET_SPACE)
2717#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2718#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08002719
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002720static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721{
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002722 unsigned long offset = get_offset(*pos);
2723 unsigned long bucket = get_bucket(*pos);
2724 struct sock *sk;
2725 unsigned long count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002727 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2728 if (sock_net(sk) != seq_file_net(seq))
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002729 continue;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002730 if (++count == offset)
2731 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732 }
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002733
2734 return sk;
2735}
2736
2737static struct sock *unix_next_socket(struct seq_file *seq,
2738 struct sock *sk,
2739 loff_t *pos)
2740{
2741 unsigned long bucket;
2742
2743 while (sk > (struct sock *)SEQ_START_TOKEN) {
2744 sk = sk_next(sk);
2745 if (!sk)
2746 goto next_bucket;
2747 if (sock_net(sk) == seq_file_net(seq))
2748 return sk;
2749 }
2750
2751 do {
2752 sk = unix_from_bucket(seq, pos);
2753 if (sk)
2754 return sk;
2755
2756next_bucket:
2757 bucket = get_bucket(*pos) + 1;
2758 *pos = set_bucket_offset(bucket, 1);
2759 } while (bucket < ARRAY_SIZE(unix_socket_table));
2760
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 return NULL;
2762}
2763
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002765 __acquires(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766{
David S. Millerfbe9cc42005-12-13 23:26:29 -08002767 spin_lock(&unix_table_lock);
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002768
2769 if (!*pos)
2770 return SEQ_START_TOKEN;
2771
2772 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2773 return NULL;
2774
2775 return unix_next_socket(seq, NULL, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776}
2777
2778static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2779{
2780 ++*pos;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002781 return unix_next_socket(seq, v, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782}
2783
2784static void unix_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002785 __releases(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786{
David S. Millerfbe9cc42005-12-13 23:26:29 -08002787 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788}
2789
2790static int unix_seq_show(struct seq_file *seq, void *v)
2791{
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002792
Joe Perchesb9f31242008-04-12 19:04:38 -07002793 if (v == SEQ_START_TOKEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2795 "Inode Path\n");
2796 else {
2797 struct sock *s = v;
2798 struct unix_sock *u = unix_sk(s);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002799 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800
Dan Rosenberg71338aa2011-05-23 12:17:35 +00002801 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002802 s,
2803 atomic_read(&s->sk_refcnt),
2804 0,
2805 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2806 s->sk_type,
2807 s->sk_socket ?
2808 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2809 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2810 sock_i_ino(s));
2811
Al Viro713b91c2019-02-15 20:09:35 +00002812 if (u->addr) { // under unix_table_lock here
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 int i, len;
2814 seq_putc(seq, ' ');
2815
2816 i = 0;
2817 len = u->addr->len - sizeof(short);
2818 if (!UNIX_ABSTRACT(s))
2819 len--;
2820 else {
2821 seq_putc(seq, '@');
2822 i++;
2823 }
2824 for ( ; i < len; i++)
Isaac Boukrise7947ea2016-11-01 02:41:35 +02002825 seq_putc(seq, u->addr->name->sun_path[i] ?:
2826 '@');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002827 }
David S. Miller1c92b4e2007-05-31 13:24:26 -07002828 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002829 seq_putc(seq, '\n');
2830 }
2831
2832 return 0;
2833}
2834
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002835static const struct seq_operations unix_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002836 .start = unix_seq_start,
2837 .next = unix_seq_next,
2838 .stop = unix_seq_stop,
2839 .show = unix_seq_show,
2840};
2841
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842static int unix_seq_open(struct inode *inode, struct file *file)
2843{
Denis V. Luneve372c412007-11-19 22:31:54 -08002844 return seq_open_net(inode, file, &unix_seq_ops,
Eric Dumazet8b51b062012-06-08 22:10:20 +00002845 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002846}
2847
Arjan van de Venda7071d2007-02-12 00:55:36 -08002848static const struct file_operations unix_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849 .owner = THIS_MODULE,
2850 .open = unix_seq_open,
2851 .read = seq_read,
2852 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002853 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002854};
2855
2856#endif
2857
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002858static const struct net_proto_family unix_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 .family = PF_UNIX,
2860 .create = unix_create,
2861 .owner = THIS_MODULE,
2862};
2863
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002864
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002865static int __net_init unix_net_init(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002866{
2867 int error = -ENOMEM;
2868
Denis V. Luneva0a53c82007-12-11 04:19:17 -08002869 net->unx.sysctl_max_dgram_qlen = 10;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002870 if (unix_sysctl_register(net))
2871 goto out;
Pavel Emelyanovd392e492007-12-01 23:44:15 +11002872
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002873#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00002874 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002875 unix_sysctl_unregister(net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002876 goto out;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002877 }
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002878#endif
2879 error = 0;
2880out:
Jianjun Kong48dcc33e2008-11-01 21:37:27 -07002881 return error;
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002882}
2883
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002884static void __net_exit unix_net_exit(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002885{
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002886 unix_sysctl_unregister(net);
Gao fengece31ff2013-02-18 01:34:56 +00002887 remove_proc_entry("unix", net->proc_net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002888}
2889
2890static struct pernet_operations unix_net_ops = {
2891 .init = unix_net_init,
2892 .exit = unix_net_exit,
2893};
2894
Linus Torvalds1da177e2005-04-16 15:20:36 -07002895static int __init af_unix_init(void)
2896{
2897 int rc = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002898
YOSHIFUJI Hideaki / 吉藤英明b4fff5f2013-01-09 07:20:07 +00002899 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002900
2901 rc = proto_register(&unix_proto, 1);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002902 if (rc != 0) {
wangweidong5cc208b2013-12-06 18:03:36 +08002903 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002904 goto out;
2905 }
2906
2907 sock_register(&unix_family_ops);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002908 register_pernet_subsys(&unix_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002909out:
2910 return rc;
2911}
2912
2913static void __exit af_unix_exit(void)
2914{
2915 sock_unregister(PF_UNIX);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002916 proto_unregister(&unix_proto);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002917 unregister_pernet_subsys(&unix_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002918}
2919
David Woodhouse3d366962008-04-24 00:59:25 -07002920/* Earlier than device_initcall() so that other drivers invoking
2921 request_module() don't end up in a loop when modprobe tries
2922 to use a UNIX socket. But later than subsys_initcall() because
2923 we depend on stuff initialised there */
2924fs_initcall(af_unix_init);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002925module_exit(af_unix_exit);
2926
2927MODULE_LICENSE("GPL");
2928MODULE_ALIAS_NETPROTO(PF_UNIX);