blob: f280a70c41d75f91bca97297bbb466bec13d997e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
wangweidong5cc208b2013-12-06 18:03:36 +080083#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070087#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200106#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#include <net/sock.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -0700108#include <net/tcp_states.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
Colin Cross2b15af62013-05-06 23:50:21 +0000119#include <linux/freezer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Jens Axboee32d0082019-02-08 09:01:44 -0700121#include "scm.h"
122
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000123struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000124EXPORT_SYMBOL_GPL(unix_socket_table);
125DEFINE_SPINLOCK(unix_table_lock);
126EXPORT_SYMBOL_GPL(unix_table_lock);
Eric Dumazet518de9b2010-10-26 14:22:44 -0700127static atomic_long_t unix_nr_socks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000130static struct hlist_head *unix_sockets_unbound(void *addr)
131{
132 unsigned long hash = (unsigned long)addr;
133
134 hash ^= hash >> 16;
135 hash ^= hash >> 8;
136 hash %= UNIX_HASH_SIZE;
137 return &unix_socket_table[UNIX_HASH_SIZE + hash];
138}
139
140#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700142#ifdef CONFIG_SECURITY_NETWORK
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700143static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700144{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400145 UNIXCB(skb).secid = scm->secid;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700146}
147
148static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400150 scm->secid = UNIXCB(skb).secid;
151}
152
153static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
154{
155 return (scm->secid == UNIXCB(skb).secid);
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700156}
157#else
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700158static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700159{ }
160
161static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
162{ }
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400163
164static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
165{
166 return true;
167}
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700168#endif /* CONFIG_SECURITY_NETWORK */
169
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170/*
171 * SMP locking strategy:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800172 * hash table is protected with spinlock unix_table_lock
Stephen Hemminger663717f2010-02-18 14:12:06 -0800173 * each socket state is protected by separate spin lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 */
175
Eric Dumazet95c96172012-04-15 05:58:06 +0000176static inline unsigned int unix_hash_fold(__wsum n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177{
Anton Blanchard0a134042014-03-05 14:29:58 +1100178 unsigned int hash = (__force unsigned int)csum_fold(n);
Eric Dumazet95c96172012-04-15 05:58:06 +0000179
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 hash ^= hash>>8;
181 return hash&(UNIX_HASH_SIZE-1);
182}
183
184#define unix_peer(sk) (unix_sk(sk)->peer)
185
186static inline int unix_our_peer(struct sock *sk, struct sock *osk)
187{
188 return unix_peer(osk) == sk;
189}
190
191static inline int unix_may_send(struct sock *sk, struct sock *osk)
192{
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800193 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194}
195
Qian Caia12e9432020-02-04 13:40:29 -0500196static inline int unix_recvq_full(const struct sock *sk)
Rainer Weikusat3c734192008-06-17 22:28:05 -0700197{
198 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
199}
200
Qian Caia12e9432020-02-04 13:40:29 -0500201static inline int unix_recvq_full_lockless(const struct sock *sk)
202{
203 return skb_queue_len_lockless(&sk->sk_receive_queue) >
204 READ_ONCE(sk->sk_max_ack_backlog);
205}
206
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000207struct sock *unix_peer_get(struct sock *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
209 struct sock *peer;
210
David S. Miller1c92b4e2007-05-31 13:24:26 -0700211 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 peer = unix_peer(s);
213 if (peer)
214 sock_hold(peer);
David S. Miller1c92b4e2007-05-31 13:24:26 -0700215 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 return peer;
217}
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000218EXPORT_SYMBOL_GPL(unix_peer_get);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
220static inline void unix_release_addr(struct unix_address *addr)
221{
222 if (atomic_dec_and_test(&addr->refcnt))
223 kfree(addr);
224}
225
226/*
227 * Check unix socket name:
228 * - should be not zero length.
229 * - if started by not zero, should be NULL terminated (FS object)
230 * - if started by zero, it is abstract name.
231 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900232
Eric Dumazet95c96172012-04-15 05:58:06 +0000233static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234{
Kyeongdon Kim03a94d72018-10-16 14:57:26 +0900235 *hashp = 0;
236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 if (len <= sizeof(short) || len > sizeof(*sunaddr))
238 return -EINVAL;
239 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
240 return -EINVAL;
241 if (sunaddr->sun_path[0]) {
242 /*
243 * This may look like an off by one error but it is a bit more
244 * subtle. 108 is the longest valid AF_UNIX path for a binding.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300245 * sun_path[108] doesn't as such exist. However in kernel space
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 * we are guaranteed that it is a valid memory location in our
247 * kernel address buffer.
248 */
Jianjun Konge27dfce2008-11-01 21:38:31 -0700249 ((char *)sunaddr)[len] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 len = strlen(sunaddr->sun_path)+1+sizeof(short);
251 return len;
252 }
253
Joe Perches07f07572008-11-19 15:44:53 -0800254 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 return len;
256}
257
258static void __unix_remove_socket(struct sock *sk)
259{
260 sk_del_node_init(sk);
261}
262
263static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
264{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700265 WARN_ON(!sk_unhashed(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 sk_add_node(sk, list);
267}
268
269static inline void unix_remove_socket(struct sock *sk)
270{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800271 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 __unix_remove_socket(sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800273 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274}
275
276static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
277{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800278 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 __unix_insert_socket(list, sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800280 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281}
282
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800283static struct sock *__unix_find_socket_byname(struct net *net,
284 struct sockaddr_un *sunname,
Eric Dumazet95c96172012-04-15 05:58:06 +0000285 int len, int type, unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286{
287 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Sasha Levinb67bfe02013-02-27 17:06:00 -0800289 sk_for_each(s, &unix_socket_table[hash ^ type]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 struct unix_sock *u = unix_sk(s);
291
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +0900292 if (!net_eq(sock_net(s), net))
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800293 continue;
294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 if (u->addr->len == len &&
296 !memcmp(u->addr->name, sunname, len))
297 goto found;
298 }
299 s = NULL;
300found:
301 return s;
302}
303
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800304static inline struct sock *unix_find_socket_byname(struct net *net,
305 struct sockaddr_un *sunname,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 int len, int type,
Eric Dumazet95c96172012-04-15 05:58:06 +0000307 unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 struct sock *s;
310
David S. Millerfbe9cc42005-12-13 23:26:29 -0800311 spin_lock(&unix_table_lock);
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800312 s = __unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 if (s)
314 sock_hold(s);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800315 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 return s;
317}
318
Eric W. Biederman6616f782010-06-13 03:35:48 +0000319static struct sock *unix_find_socket_byinode(struct inode *i)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320{
321 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322
David S. Millerfbe9cc42005-12-13 23:26:29 -0800323 spin_lock(&unix_table_lock);
Sasha Levinb67bfe02013-02-27 17:06:00 -0800324 sk_for_each(s,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
Al Viro40ffe672012-03-14 21:54:32 -0400326 struct dentry *dentry = unix_sk(s)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327
Miklos Szeredieb0a4a42016-05-20 22:13:45 +0200328 if (dentry && d_real_inode(dentry) == i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 sock_hold(s);
330 goto found;
331 }
332 }
333 s = NULL;
334found:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800335 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 return s;
337}
338
Rainer Weikusat7d267272015-11-20 22:07:23 +0000339/* Support code for asymmetrically connected dgram sockets
340 *
341 * If a datagram socket is connected to a socket not itself connected
342 * to the first socket (eg, /dev/log), clients may only enqueue more
343 * messages if the present receive queue of the server socket is not
344 * "too large". This means there's a second writeability condition
345 * poll and sendmsg need to test. The dgram recv code will do a wake
346 * up on the peer_wait wait queue of a socket upon reception of a
347 * datagram which needs to be propagated to sleeping would-be writers
348 * since these might not have sent anything so far. This can't be
349 * accomplished via poll_wait because the lifetime of the server
350 * socket might be less than that of its clients if these break their
351 * association with it or if the server socket is closed while clients
352 * are still connected to it and there's no way to inform "a polling
353 * implementation" that it should let go of a certain wait queue
354 *
355 * In order to propagate a wake up, a wait_queue_t of the client
356 * socket is enqueued on the peer_wait queue of the server socket
357 * whose wake function does a wake_up on the ordinary client socket
358 * wait queue. This connection is established whenever a write (or
359 * poll for write) hit the flow control condition and broken when the
360 * association to the server socket is dissolved or after a wake up
361 * was relayed.
362 */
363
364static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
365 void *key)
366{
367 struct unix_sock *u;
368 wait_queue_head_t *u_sleep;
369
370 u = container_of(q, struct unix_sock, peer_wake);
371
372 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
373 q);
374 u->peer_wake.private = NULL;
375
376 /* relaying can only happen while the wq still exists */
377 u_sleep = sk_sleep(&u->sk);
378 if (u_sleep)
379 wake_up_interruptible_poll(u_sleep, key);
380
381 return 0;
382}
383
384static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
385{
386 struct unix_sock *u, *u_other;
387 int rc;
388
389 u = unix_sk(sk);
390 u_other = unix_sk(other);
391 rc = 0;
392 spin_lock(&u_other->peer_wait.lock);
393
394 if (!u->peer_wake.private) {
395 u->peer_wake.private = other;
396 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
397
398 rc = 1;
399 }
400
401 spin_unlock(&u_other->peer_wait.lock);
402 return rc;
403}
404
405static void unix_dgram_peer_wake_disconnect(struct sock *sk,
406 struct sock *other)
407{
408 struct unix_sock *u, *u_other;
409
410 u = unix_sk(sk);
411 u_other = unix_sk(other);
412 spin_lock(&u_other->peer_wait.lock);
413
414 if (u->peer_wake.private == other) {
415 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
416 u->peer_wake.private = NULL;
417 }
418
419 spin_unlock(&u_other->peer_wait.lock);
420}
421
422static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
423 struct sock *other)
424{
425 unix_dgram_peer_wake_disconnect(sk, other);
426 wake_up_interruptible_poll(sk_sleep(sk),
427 POLLOUT |
428 POLLWRNORM |
429 POLLWRBAND);
430}
431
432/* preconditions:
433 * - unix_peer(sk) == other
434 * - association is stable
435 */
436static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
437{
438 int connected;
439
440 connected = unix_dgram_peer_wake_connect(sk, other);
441
442 if (unix_recvq_full(other))
443 return 1;
444
445 if (connected)
446 unix_dgram_peer_wake_disconnect(sk, other);
447
448 return 0;
449}
450
Eric Dumazet1586a582015-10-23 10:59:16 -0700451static int unix_writable(const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452{
Eric Dumazet1586a582015-10-23 10:59:16 -0700453 return sk->sk_state != TCP_LISTEN &&
454 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455}
456
457static void unix_write_space(struct sock *sk)
458{
Eric Dumazet43815482010-04-29 11:01:49 +0000459 struct socket_wq *wq;
460
461 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 if (unix_writable(sk)) {
Eric Dumazet43815482010-04-29 11:01:49 +0000463 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +0800464 if (skwq_has_sleeper(wq))
Eric Dumazet67426b72010-10-29 20:44:44 +0000465 wake_up_interruptible_sync_poll(&wq->wait,
466 POLLOUT | POLLWRNORM | POLLWRBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800467 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 }
Eric Dumazet43815482010-04-29 11:01:49 +0000469 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470}
471
472/* When dgram socket disconnects (or changes its peer), we clear its receive
473 * queue of packets arrived from previous peer. First, it allows to do
474 * flow control based only on wmem_alloc; second, sk connected to peer
475 * may receive messages only from that peer. */
476static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
477{
David S. Millerb03efcf2005-07-08 14:57:23 -0700478 if (!skb_queue_empty(&sk->sk_receive_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 skb_queue_purge(&sk->sk_receive_queue);
480 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
481
482 /* If one link of bidirectional dgram pipe is disconnected,
483 * we signal error. Messages are lost. Do not make this,
484 * when peer was not connected to us.
485 */
486 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
487 other->sk_err = ECONNRESET;
488 other->sk_error_report(other);
489 }
490 }
491}
492
493static void unix_sock_destructor(struct sock *sk)
494{
495 struct unix_sock *u = unix_sk(sk);
496
497 skb_queue_purge(&sk->sk_receive_queue);
498
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700499 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
500 WARN_ON(!sk_unhashed(sk));
501 WARN_ON(sk->sk_socket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 if (!sock_flag(sk, SOCK_DEAD)) {
wangweidong5cc208b2013-12-06 18:03:36 +0800503 pr_info("Attempt to release alive unix socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return;
505 }
506
507 if (u->addr)
508 unix_release_addr(u->addr);
509
Eric Dumazet518de9b2010-10-26 14:22:44 -0700510 atomic_long_dec(&unix_nr_socks);
David S. Miller6f756a82008-11-23 17:34:03 -0800511 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800512 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
David S. Miller6f756a82008-11-23 17:34:03 -0800513 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514#ifdef UNIX_REFCNT_DEBUG
wangweidong5cc208b2013-12-06 18:03:36 +0800515 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
Eric Dumazet518de9b2010-10-26 14:22:44 -0700516 atomic_long_read(&unix_nr_socks));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517#endif
518}
519
Paul Mooreded34e02013-03-25 03:18:33 +0000520static void unix_release_sock(struct sock *sk, int embrion)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521{
522 struct unix_sock *u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400523 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 struct sock *skpair;
525 struct sk_buff *skb;
526 int state;
527
528 unix_remove_socket(sk);
529
530 /* Clear state */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700531 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 sock_orphan(sk);
533 sk->sk_shutdown = SHUTDOWN_MASK;
Al Viro40ffe672012-03-14 21:54:32 -0400534 path = u->path;
535 u->path.dentry = NULL;
536 u->path.mnt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 state = sk->sk_state;
538 sk->sk_state = TCP_CLOSE;
Eric Dumazet0c36db72021-06-16 07:47:15 -0700539
540 skpair = unix_peer(sk);
541 unix_peer(sk) = NULL;
542
David S. Miller1c92b4e2007-05-31 13:24:26 -0700543 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 wake_up_interruptible_all(&u->peer_wait);
546
Jianjun Konge27dfce2008-11-01 21:38:31 -0700547 if (skpair != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
David S. Miller1c92b4e2007-05-31 13:24:26 -0700549 unix_state_lock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 /* No more writes */
551 skpair->sk_shutdown = SHUTDOWN_MASK;
552 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
553 skpair->sk_err = ECONNRESET;
David S. Miller1c92b4e2007-05-31 13:24:26 -0700554 unix_state_unlock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 skpair->sk_state_change(skpair);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800556 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 }
Rainer Weikusat7d267272015-11-20 22:07:23 +0000558
559 unix_dgram_peer_wake_disconnect(sk, skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 sock_put(skpair); /* It may now die */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 }
562
563 /* Try to flush out this socket. Throw out buffers at least */
564
565 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
Jianjun Konge27dfce2008-11-01 21:38:31 -0700566 if (state == TCP_LISTEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 unix_release_sock(skb->sk, 1);
568 /* passed fds are erased in the kfree_skb hook */
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +0100569 UNIXCB(skb).consumed = skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 kfree_skb(skb);
571 }
572
Al Viro40ffe672012-03-14 21:54:32 -0400573 if (path.dentry)
574 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 sock_put(sk);
577
578 /* ---- Socket is dead now and most probably destroyed ---- */
579
580 /*
Alan Coxe04dae82012-09-17 00:52:41 +0000581 * Fixme: BSD difference: In BSD all sockets connected to us get
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 * ECONNRESET and we die on the spot. In Linux we behave
583 * like files and pipes do and wait for the last
584 * dereference.
585 *
586 * Can't we simply set sock->err?
587 *
588 * What the above comment does talk about? --ANK(980817)
589 */
590
Pavel Emelyanov9305cfa2007-11-10 22:06:01 -0800591 if (unix_tot_inflight)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900592 unix_gc(); /* Garbage collect fds */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593}
594
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000595static void init_peercred(struct sock *sk)
596{
597 put_pid(sk->sk_peer_pid);
598 if (sk->sk_peer_cred)
599 put_cred(sk->sk_peer_cred);
600 sk->sk_peer_pid = get_pid(task_tgid(current));
601 sk->sk_peer_cred = get_current_cred();
602}
603
604static void copy_peercred(struct sock *sk, struct sock *peersk)
605{
606 put_pid(sk->sk_peer_pid);
607 if (sk->sk_peer_cred)
608 put_cred(sk->sk_peer_cred);
609 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
610 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
611}
612
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613static int unix_listen(struct socket *sock, int backlog)
614{
615 int err;
616 struct sock *sk = sock->sk;
617 struct unix_sock *u = unix_sk(sk);
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000618 struct pid *old_pid = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
620 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800621 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
622 goto out; /* Only stream/seqpacket sockets accept */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 err = -EINVAL;
624 if (!u->addr)
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800625 goto out; /* No listens on an unbound socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700626 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
628 goto out_unlock;
629 if (backlog > sk->sk_max_ack_backlog)
630 wake_up_interruptible_all(&u->peer_wait);
631 sk->sk_max_ack_backlog = backlog;
632 sk->sk_state = TCP_LISTEN;
633 /* set credentials so connect can copy them */
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000634 init_peercred(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 err = 0;
636
637out_unlock:
David S. Miller1c92b4e2007-05-31 13:24:26 -0700638 unix_state_unlock(sk);
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000639 put_pid(old_pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640out:
641 return err;
642}
643
644static int unix_release(struct socket *);
645static int unix_bind(struct socket *, struct sockaddr *, int);
646static int unix_stream_connect(struct socket *, struct sockaddr *,
647 int addr_len, int flags);
648static int unix_socketpair(struct socket *, struct socket *);
649static int unix_accept(struct socket *, struct socket *, int);
650static int unix_getname(struct socket *, struct sockaddr *, int *, int);
651static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700652static unsigned int unix_dgram_poll(struct file *, struct socket *,
653 poll_table *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654static int unix_ioctl(struct socket *, unsigned int, unsigned long);
655static int unix_shutdown(struct socket *, int);
Ying Xue1b784142015-03-02 15:37:48 +0800656static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
657static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200658static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
659 size_t size, int flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200660static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
661 struct pipe_inode_info *, size_t size,
662 unsigned int flags);
Ying Xue1b784142015-03-02 15:37:48 +0800663static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
664static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665static int unix_dgram_connect(struct socket *, struct sockaddr *,
666 int, int);
Ying Xue1b784142015-03-02 15:37:48 +0800667static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
668static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
669 int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Sasha Levin12663bf2013-12-07 17:26:27 -0500671static int unix_set_peek_off(struct sock *sk, int val)
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000672{
673 struct unix_sock *u = unix_sk(sk);
674
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700675 if (mutex_lock_interruptible(&u->iolock))
Sasha Levin12663bf2013-12-07 17:26:27 -0500676 return -EINTR;
677
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000678 sk->sk_peek_off = val;
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700679 mutex_unlock(&u->iolock);
Sasha Levin12663bf2013-12-07 17:26:27 -0500680
681 return 0;
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000682}
683
684
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800685static const struct proto_ops unix_stream_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 .family = PF_UNIX,
687 .owner = THIS_MODULE,
688 .release = unix_release,
689 .bind = unix_bind,
690 .connect = unix_stream_connect,
691 .socketpair = unix_socketpair,
692 .accept = unix_accept,
693 .getname = unix_getname,
694 .poll = unix_poll,
695 .ioctl = unix_ioctl,
696 .listen = unix_listen,
697 .shutdown = unix_shutdown,
698 .setsockopt = sock_no_setsockopt,
699 .getsockopt = sock_no_getsockopt,
700 .sendmsg = unix_stream_sendmsg,
701 .recvmsg = unix_stream_recvmsg,
702 .mmap = sock_no_mmap,
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200703 .sendpage = unix_stream_sendpage,
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200704 .splice_read = unix_stream_splice_read,
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +0000705 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706};
707
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800708static const struct proto_ops unix_dgram_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 .family = PF_UNIX,
710 .owner = THIS_MODULE,
711 .release = unix_release,
712 .bind = unix_bind,
713 .connect = unix_dgram_connect,
714 .socketpair = unix_socketpair,
715 .accept = sock_no_accept,
716 .getname = unix_getname,
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700717 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 .ioctl = unix_ioctl,
719 .listen = sock_no_listen,
720 .shutdown = unix_shutdown,
721 .setsockopt = sock_no_setsockopt,
722 .getsockopt = sock_no_getsockopt,
723 .sendmsg = unix_dgram_sendmsg,
724 .recvmsg = unix_dgram_recvmsg,
725 .mmap = sock_no_mmap,
726 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000727 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728};
729
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800730static const struct proto_ops unix_seqpacket_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 .family = PF_UNIX,
732 .owner = THIS_MODULE,
733 .release = unix_release,
734 .bind = unix_bind,
735 .connect = unix_stream_connect,
736 .socketpair = unix_socketpair,
737 .accept = unix_accept,
738 .getname = unix_getname,
Rainer Weikusatec0d2152008-06-27 19:34:18 -0700739 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 .ioctl = unix_ioctl,
741 .listen = unix_listen,
742 .shutdown = unix_shutdown,
743 .setsockopt = sock_no_setsockopt,
744 .getsockopt = sock_no_getsockopt,
745 .sendmsg = unix_seqpacket_sendmsg,
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +0000746 .recvmsg = unix_seqpacket_recvmsg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 .mmap = sock_no_mmap,
748 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000749 .set_peek_off = unix_set_peek_off,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750};
751
752static struct proto unix_proto = {
Eric Dumazet248969a2008-11-17 00:00:30 -0800753 .name = "UNIX",
754 .owner = THIS_MODULE,
Eric Dumazet248969a2008-11-17 00:00:30 -0800755 .obj_size = sizeof(struct unix_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756};
757
Ingo Molnara09785a2006-07-03 00:25:12 -0700758/*
759 * AF_UNIX sockets do not interact with hardware, hence they
760 * dont trigger interrupts - so it's safe for them to have
761 * bh-unsafe locking for their sk_receive_queue.lock. Split off
762 * this special lock-class by reinitializing the spinlock key:
763 */
764static struct lock_class_key af_unix_sk_receive_queue_lock_key;
765
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500766static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767{
768 struct sock *sk = NULL;
769 struct unix_sock *u;
770
Eric Dumazet518de9b2010-10-26 14:22:44 -0700771 atomic_long_inc(&unix_nr_socks);
772 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 goto out;
774
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500775 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 if (!sk)
777 goto out;
778
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800779 sock_init_data(sock, sk);
Ingo Molnara09785a2006-07-03 00:25:12 -0700780 lockdep_set_class(&sk->sk_receive_queue.lock,
781 &af_unix_sk_receive_queue_lock_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782
Vladimir Davydov3aa97992016-07-26 15:24:36 -0700783 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 sk->sk_write_space = unix_write_space;
Denis V. Luneva0a53c82007-12-11 04:19:17 -0800785 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 sk->sk_destruct = unix_sock_destructor;
787 u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400788 u->path.dentry = NULL;
789 u->path.mnt = NULL;
Benjamin LaHaisefd19f322006-01-03 14:10:46 -0800790 spin_lock_init(&u->lock);
Al Viro516e0cc2008-07-26 00:39:17 -0400791 atomic_long_set(&u->inflight, 0);
Miklos Szeredi1fd05ba2007-07-11 14:22:39 -0700792 INIT_LIST_HEAD(&u->link);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700793 mutex_init(&u->iolock); /* single task reading lock */
794 mutex_init(&u->bindlock); /* single task binding lock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 init_waitqueue_head(&u->peer_wait);
Rainer Weikusat7d267272015-11-20 22:07:23 +0000796 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000797 unix_insert_socket(unix_sockets_unbound(sk), sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798out:
Pavel Emelyanov284b3272007-11-10 22:08:30 -0800799 if (sk == NULL)
Eric Dumazet518de9b2010-10-26 14:22:44 -0700800 atomic_long_dec(&unix_nr_socks);
Eric Dumazet920de802008-11-24 00:09:29 -0800801 else {
802 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800803 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
Eric Dumazet920de802008-11-24 00:09:29 -0800804 local_bh_enable();
805 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 return sk;
807}
808
Eric Paris3f378b62009-11-05 22:18:14 -0800809static int unix_create(struct net *net, struct socket *sock, int protocol,
810 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811{
812 if (protocol && protocol != PF_UNIX)
813 return -EPROTONOSUPPORT;
814
815 sock->state = SS_UNCONNECTED;
816
817 switch (sock->type) {
818 case SOCK_STREAM:
819 sock->ops = &unix_stream_ops;
820 break;
821 /*
822 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
823 * nothing uses it.
824 */
825 case SOCK_RAW:
Jianjun Konge27dfce2008-11-01 21:38:31 -0700826 sock->type = SOCK_DGRAM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 case SOCK_DGRAM:
828 sock->ops = &unix_dgram_ops;
829 break;
830 case SOCK_SEQPACKET:
831 sock->ops = &unix_seqpacket_ops;
832 break;
833 default:
834 return -ESOCKTNOSUPPORT;
835 }
836
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500837 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838}
839
840static int unix_release(struct socket *sock)
841{
842 struct sock *sk = sock->sk;
843
844 if (!sk)
845 return 0;
846
Paul Mooreded34e02013-03-25 03:18:33 +0000847 unix_release_sock(sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 sock->sk = NULL;
849
Paul Mooreded34e02013-03-25 03:18:33 +0000850 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851}
852
853static int unix_autobind(struct socket *sock)
854{
855 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900856 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 struct unix_sock *u = unix_sk(sk);
858 static u32 ordernum = 1;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800859 struct unix_address *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 int err;
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000861 unsigned int retries = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700863 err = mutex_lock_interruptible(&u->bindlock);
Sasha Levin37ab4fa2013-12-13 10:54:22 -0500864 if (err)
865 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867 err = 0;
868 if (u->addr)
869 goto out;
870
871 err = -ENOMEM;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700872 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 if (!addr)
874 goto out;
875
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 addr->name->sun_family = AF_UNIX;
877 atomic_set(&addr->refcnt, 1);
878
879retry:
880 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
Joe Perches07f07572008-11-19 15:44:53 -0800881 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882
David S. Millerfbe9cc42005-12-13 23:26:29 -0800883 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 ordernum = (ordernum+1)&0xFFFFF;
885
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800886 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 addr->hash)) {
David S. Millerfbe9cc42005-12-13 23:26:29 -0800888 spin_unlock(&unix_table_lock);
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000889 /*
890 * __unix_find_socket_byname() may take long time if many names
891 * are already in use.
892 */
893 cond_resched();
894 /* Give up if all names seems to be in use. */
895 if (retries++ == 0xFFFFF) {
896 err = -ENOSPC;
897 kfree(addr);
898 goto out;
899 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 goto retry;
901 }
902 addr->hash ^= sk->sk_type;
903
904 __unix_remove_socket(sk);
Al Viro713b91c2019-02-15 20:09:35 +0000905 smp_store_release(&u->addr, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800907 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 err = 0;
909
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700910out: mutex_unlock(&u->bindlock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 return err;
912}
913
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800914static struct sock *unix_find_other(struct net *net,
915 struct sockaddr_un *sunname, int len,
Eric Dumazet95c96172012-04-15 05:58:06 +0000916 int type, unsigned int hash, int *error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917{
918 struct sock *u;
Al Viro421748e2008-08-02 01:04:36 -0400919 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 int err = 0;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900921
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 if (sunname->sun_path[0]) {
Al Viro421748e2008-08-02 01:04:36 -0400923 struct inode *inode;
924 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 if (err)
926 goto fail;
Miklos Szeredieb0a4a42016-05-20 22:13:45 +0200927 inode = d_real_inode(path.dentry);
Al Viro421748e2008-08-02 01:04:36 -0400928 err = inode_permission(inode, MAY_WRITE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 if (err)
930 goto put_fail;
931
932 err = -ECONNREFUSED;
Al Viro421748e2008-08-02 01:04:36 -0400933 if (!S_ISSOCK(inode->i_mode))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 goto put_fail;
Eric W. Biederman6616f782010-06-13 03:35:48 +0000935 u = unix_find_socket_byinode(inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 if (!u)
937 goto put_fail;
938
939 if (u->sk_type == type)
Al Viro68ac1232012-03-15 08:21:57 -0400940 touch_atime(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
Al Viro421748e2008-08-02 01:04:36 -0400942 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Jianjun Konge27dfce2008-11-01 21:38:31 -0700944 err = -EPROTOTYPE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 if (u->sk_type != type) {
946 sock_put(u);
947 goto fail;
948 }
949 } else {
950 err = -ECONNREFUSED;
Jianjun Konge27dfce2008-11-01 21:38:31 -0700951 u = unix_find_socket_byname(net, sunname, len, type, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 if (u) {
953 struct dentry *dentry;
Al Viro40ffe672012-03-14 21:54:32 -0400954 dentry = unix_sk(u)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 if (dentry)
Al Viro68ac1232012-03-15 08:21:57 -0400956 touch_atime(&unix_sk(u)->path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 } else
958 goto fail;
959 }
960 return u;
961
962put_fail:
Al Viro421748e2008-08-02 01:04:36 -0400963 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964fail:
Jianjun Konge27dfce2008-11-01 21:38:31 -0700965 *error = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return NULL;
967}
968
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700969static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
Al Virofaf02012012-07-20 02:37:29 +0400970{
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700971 struct dentry *dentry;
972 struct path path;
973 int err = 0;
974 /*
975 * Get the parent directory, calculate the hash for last
976 * component.
977 */
978 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
979 err = PTR_ERR(dentry);
980 if (IS_ERR(dentry))
981 return err;
Al Virofaf02012012-07-20 02:37:29 +0400982
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700983 /*
984 * All right, let's create it.
985 */
986 err = security_path_mknod(&path, dentry, mode, 0);
Al Virofaf02012012-07-20 02:37:29 +0400987 if (!err) {
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700988 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
Al Virofaf02012012-07-20 02:37:29 +0400989 if (!err) {
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700990 res->mnt = mntget(path.mnt);
Al Virofaf02012012-07-20 02:37:29 +0400991 res->dentry = dget(dentry);
992 }
993 }
Linus Torvalds38f7bd92016-09-01 14:56:49 -0700994 done_path_create(&path, dentry);
Al Virofaf02012012-07-20 02:37:29 +0400995 return err;
996}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997
998static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
999{
1000 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001001 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 struct unix_sock *u = unix_sk(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001003 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Al Virodae6ad82011-06-26 11:50:15 -04001004 char *sun_path = sunaddr->sun_path;
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001005 int err;
Kyle Yan74fdd732017-03-22 13:37:08 -07001006 unsigned int hash = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 struct unix_address *addr;
1008 struct hlist_head *list;
WANG Cong93ff5e02017-01-23 11:17:35 -08001009 struct path path = { NULL, NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
1011 err = -EINVAL;
Mateusz Jurczykbb842902017-06-08 11:13:36 +02001012 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1013 sunaddr->sun_family != AF_UNIX)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 goto out;
1015
Jianjun Konge27dfce2008-11-01 21:38:31 -07001016 if (addr_len == sizeof(short)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 err = unix_autobind(sock);
1018 goto out;
1019 }
1020
1021 err = unix_mkname(sunaddr, addr_len, &hash);
1022 if (err < 0)
1023 goto out;
1024 addr_len = err;
1025
WANG Cong93ff5e02017-01-23 11:17:35 -08001026 if (sun_path[0]) {
1027 umode_t mode = S_IFSOCK |
1028 (SOCK_INODE(sock)->i_mode & ~current_umask());
1029 err = unix_mknod(sun_path, mode, &path);
1030 if (err) {
1031 if (err == -EEXIST)
1032 err = -EADDRINUSE;
1033 goto out;
1034 }
1035 }
1036
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001037 err = mutex_lock_interruptible(&u->bindlock);
Sasha Levin37ab4fa2013-12-13 10:54:22 -05001038 if (err)
WANG Cong93ff5e02017-01-23 11:17:35 -08001039 goto out_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040
1041 err = -EINVAL;
1042 if (u->addr)
1043 goto out_up;
1044
1045 err = -ENOMEM;
1046 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1047 if (!addr)
1048 goto out_up;
1049
1050 memcpy(addr->name, sunaddr, addr_len);
1051 addr->len = addr_len;
1052 addr->hash = hash ^ sk->sk_type;
1053 atomic_set(&addr->refcnt, 1);
1054
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001055 if (sun_path[0]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 addr->hash = UNIX_HASH_SIZE;
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001057 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
Al Virofaf02012012-07-20 02:37:29 +04001058 spin_lock(&unix_table_lock);
Linus Torvalds38f7bd92016-09-01 14:56:49 -07001059 u->path = path;
Al Virofaf02012012-07-20 02:37:29 +04001060 list = &unix_socket_table[hash];
1061 } else {
1062 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 err = -EADDRINUSE;
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001064 if (__unix_find_socket_byname(net, sunaddr, addr_len,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 sk->sk_type, hash)) {
1066 unix_release_addr(addr);
1067 goto out_unlock;
1068 }
1069
1070 list = &unix_socket_table[addr->hash];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 }
1072
1073 err = 0;
1074 __unix_remove_socket(sk);
Al Viro713b91c2019-02-15 20:09:35 +00001075 smp_store_release(&u->addr, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 __unix_insert_socket(list, sk);
1077
1078out_unlock:
David S. Millerfbe9cc42005-12-13 23:26:29 -08001079 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080out_up:
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001081 mutex_unlock(&u->bindlock);
WANG Cong93ff5e02017-01-23 11:17:35 -08001082out_put:
1083 if (err)
1084 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085out:
1086 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087}
1088
David S. Miller278a3de2007-05-31 15:19:20 -07001089static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1090{
1091 if (unlikely(sk1 == sk2) || !sk2) {
1092 unix_state_lock(sk1);
1093 return;
1094 }
1095 if (sk1 < sk2) {
1096 unix_state_lock(sk1);
1097 unix_state_lock_nested(sk2);
1098 } else {
1099 unix_state_lock(sk2);
1100 unix_state_lock_nested(sk1);
1101 }
1102}
1103
1104static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1105{
1106 if (unlikely(sk1 == sk2) || !sk2) {
1107 unix_state_unlock(sk1);
1108 return;
1109 }
1110 unix_state_unlock(sk1);
1111 unix_state_unlock(sk2);
1112}
1113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1115 int alen, int flags)
1116{
1117 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001118 struct net *net = sock_net(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001119 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 struct sock *other;
Eric Dumazet95c96172012-04-15 05:58:06 +00001121 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 int err;
1123
Mateusz Jurczykbb842902017-06-08 11:13:36 +02001124 err = -EINVAL;
1125 if (alen < offsetofend(struct sockaddr, sa_family))
1126 goto out;
1127
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 if (addr->sa_family != AF_UNSPEC) {
1129 err = unix_mkname(sunaddr, alen, &hash);
1130 if (err < 0)
1131 goto out;
1132 alen = err;
1133
1134 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1135 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1136 goto out;
1137
David S. Miller278a3de2007-05-31 15:19:20 -07001138restart:
Jianjun Konge27dfce2008-11-01 21:38:31 -07001139 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (!other)
1141 goto out;
1142
David S. Miller278a3de2007-05-31 15:19:20 -07001143 unix_state_double_lock(sk, other);
1144
1145 /* Apparently VFS overslept socket death. Retry. */
1146 if (sock_flag(other, SOCK_DEAD)) {
1147 unix_state_double_unlock(sk, other);
1148 sock_put(other);
1149 goto restart;
1150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151
1152 err = -EPERM;
1153 if (!unix_may_send(sk, other))
1154 goto out_unlock;
1155
1156 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1157 if (err)
1158 goto out_unlock;
1159
1160 } else {
1161 /*
1162 * 1003.1g breaking connected state with AF_UNSPEC
1163 */
1164 other = NULL;
David S. Miller278a3de2007-05-31 15:19:20 -07001165 unix_state_double_lock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 }
1167
1168 /*
1169 * If it was connected, reconnect.
1170 */
1171 if (unix_peer(sk)) {
1172 struct sock *old_peer = unix_peer(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001173 unix_peer(sk) = other;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001174 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1175
David S. Miller278a3de2007-05-31 15:19:20 -07001176 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 if (other != old_peer)
1179 unix_dgram_disconnected(sk, old_peer);
1180 sock_put(old_peer);
1181 } else {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001182 unix_peer(sk) = other;
David S. Miller278a3de2007-05-31 15:19:20 -07001183 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 }
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001185 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
1187out_unlock:
David S. Miller278a3de2007-05-31 15:19:20 -07001188 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 sock_put(other);
1190out:
1191 return err;
1192}
1193
1194static long unix_wait_for_peer(struct sock *other, long timeo)
1195{
1196 struct unix_sock *u = unix_sk(other);
1197 int sched;
1198 DEFINE_WAIT(wait);
1199
1200 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1201
1202 sched = !sock_flag(other, SOCK_DEAD) &&
1203 !(other->sk_shutdown & RCV_SHUTDOWN) &&
Rainer Weikusat3c734192008-06-17 22:28:05 -07001204 unix_recvq_full(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
David S. Miller1c92b4e2007-05-31 13:24:26 -07001206 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
1208 if (sched)
1209 timeo = schedule_timeout(timeo);
1210
1211 finish_wait(&u->peer_wait, &wait);
1212 return timeo;
1213}
1214
1215static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1216 int addr_len, int flags)
1217{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001218 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001220 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1222 struct sock *newsk = NULL;
1223 struct sock *other = NULL;
1224 struct sk_buff *skb = NULL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001225 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 int st;
1227 int err;
1228 long timeo;
1229
1230 err = unix_mkname(sunaddr, addr_len, &hash);
1231 if (err < 0)
1232 goto out;
1233 addr_len = err;
1234
Joe Perchesf64f9e72009-11-29 16:55:45 -08001235 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1236 (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 goto out;
1238
1239 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1240
1241 /* First of all allocate resources.
1242 If we will make it after state is locked,
1243 we will have to recheck all again in any case.
1244 */
1245
1246 err = -ENOMEM;
1247
1248 /* create new sock for complete connection */
Eric W. Biederman11aa9c22015-05-08 21:09:13 -05001249 newsk = unix_create1(sock_net(sk), NULL, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 if (newsk == NULL)
1251 goto out;
1252
1253 /* Allocate skb for sending to listening sock */
1254 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1255 if (skb == NULL)
1256 goto out;
1257
1258restart:
1259 /* Find listening sock. */
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001260 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 if (!other)
1262 goto out;
1263
1264 /* Latch state of peer */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001265 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267 /* Apparently VFS overslept socket death. Retry. */
1268 if (sock_flag(other, SOCK_DEAD)) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001269 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 sock_put(other);
1271 goto restart;
1272 }
1273
1274 err = -ECONNREFUSED;
1275 if (other->sk_state != TCP_LISTEN)
1276 goto out_unlock;
Tomoki Sekiyama77238f22009-10-18 23:17:37 -07001277 if (other->sk_shutdown & RCV_SHUTDOWN)
1278 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Rainer Weikusat3c734192008-06-17 22:28:05 -07001280 if (unix_recvq_full(other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 err = -EAGAIN;
1282 if (!timeo)
1283 goto out_unlock;
1284
1285 timeo = unix_wait_for_peer(other, timeo);
1286
1287 err = sock_intr_errno(timeo);
1288 if (signal_pending(current))
1289 goto out;
1290 sock_put(other);
1291 goto restart;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001292 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
1294 /* Latch our state.
1295
Daniel Balutae5537bf2011-03-14 15:25:33 -07001296 It is tricky place. We need to grab our state lock and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 drop lock on peer. It is dangerous because deadlock is
1298 possible. Connect to self case and simultaneous
1299 attempt to connect are eliminated by checking socket
1300 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1301 check this before attempt to grab lock.
1302
1303 Well, and we have to recheck the state after socket locked.
1304 */
1305 st = sk->sk_state;
1306
1307 switch (st) {
1308 case TCP_CLOSE:
1309 /* This is ok... continue with connect */
1310 break;
1311 case TCP_ESTABLISHED:
1312 /* Socket is already connected */
1313 err = -EISCONN;
1314 goto out_unlock;
1315 default:
1316 err = -EINVAL;
1317 goto out_unlock;
1318 }
1319
David S. Miller1c92b4e2007-05-31 13:24:26 -07001320 unix_state_lock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
1322 if (sk->sk_state != st) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001323 unix_state_unlock(sk);
1324 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 sock_put(other);
1326 goto restart;
1327 }
1328
David S. Miller3610cda2011-01-05 15:38:53 -08001329 err = security_unix_stream_connect(sk, other, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 if (err) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001331 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 goto out_unlock;
1333 }
1334
1335 /* The way is open! Fastly set all the necessary fields... */
1336
1337 sock_hold(sk);
1338 unix_peer(newsk) = sk;
1339 newsk->sk_state = TCP_ESTABLISHED;
1340 newsk->sk_type = sk->sk_type;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001341 init_peercred(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 newu = unix_sk(newsk);
Eric Dumazeteaefd112011-02-18 03:26:36 +00001343 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 otheru = unix_sk(other);
1345
Al Viro713b91c2019-02-15 20:09:35 +00001346 /* copy address information from listening to new sock
1347 *
1348 * The contents of *(otheru->addr) and otheru->path
1349 * are seen fully set up here, since we have found
1350 * otheru in hash under unix_table_lock. Insertion
1351 * into the hash chain we'd found it in had been done
1352 * in an earlier critical area protected by unix_table_lock,
1353 * the same one where we'd set *(otheru->addr) contents,
1354 * as well as otheru->path and otheru->addr itself.
1355 *
1356 * Using smp_store_release() here to set newu->addr
1357 * is enough to make those stores, as well as stores
1358 * to newu->path visible to anyone who gets newu->addr
1359 * by smp_load_acquire(). IOW, the same warranties
1360 * as for unix_sock instances bound in unix_bind() or
1361 * in unix_autobind().
1362 */
Al Viro40ffe672012-03-14 21:54:32 -04001363 if (otheru->path.dentry) {
1364 path_get(&otheru->path);
1365 newu->path = otheru->path;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 }
Al Viro713b91c2019-02-15 20:09:35 +00001367 atomic_inc(&otheru->addr->refcnt);
1368 smp_store_release(&newu->addr, otheru->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
1370 /* Set credentials */
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001371 copy_peercred(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 sock->state = SS_CONNECTED;
1374 sk->sk_state = TCP_ESTABLISHED;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001375 sock_hold(newsk);
1376
Peter Zijlstra4e857c52014-03-17 18:06:10 +01001377 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001378 unix_peer(sk) = newsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
David S. Miller1c92b4e2007-05-31 13:24:26 -07001380 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381
1382 /* take ten and and send info to listening sock */
1383 spin_lock(&other->sk_receive_queue.lock);
1384 __skb_queue_tail(&other->sk_receive_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 spin_unlock(&other->sk_receive_queue.lock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001386 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001387 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 sock_put(other);
1389 return 0;
1390
1391out_unlock:
1392 if (other)
David S. Miller1c92b4e2007-05-31 13:24:26 -07001393 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
1395out:
Wei Yongjun40d44442009-02-25 00:32:45 +00001396 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 if (newsk)
1398 unix_release_sock(newsk, 0);
1399 if (other)
1400 sock_put(other);
1401 return err;
1402}
1403
1404static int unix_socketpair(struct socket *socka, struct socket *sockb)
1405{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001406 struct sock *ska = socka->sk, *skb = sockb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407
1408 /* Join our sockets back to back */
1409 sock_hold(ska);
1410 sock_hold(skb);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001411 unix_peer(ska) = skb;
1412 unix_peer(skb) = ska;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001413 init_peercred(ska);
1414 init_peercred(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
1416 if (ska->sk_type != SOCK_DGRAM) {
1417 ska->sk_state = TCP_ESTABLISHED;
1418 skb->sk_state = TCP_ESTABLISHED;
1419 socka->state = SS_CONNECTED;
1420 sockb->state = SS_CONNECTED;
1421 }
1422 return 0;
1423}
1424
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001425static void unix_sock_inherit_flags(const struct socket *old,
1426 struct socket *new)
1427{
1428 if (test_bit(SOCK_PASSCRED, &old->flags))
1429 set_bit(SOCK_PASSCRED, &new->flags);
1430 if (test_bit(SOCK_PASSSEC, &old->flags))
1431 set_bit(SOCK_PASSSEC, &new->flags);
1432}
1433
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1435{
1436 struct sock *sk = sock->sk;
1437 struct sock *tsk;
1438 struct sk_buff *skb;
1439 int err;
1440
1441 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001442 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 goto out;
1444
1445 err = -EINVAL;
1446 if (sk->sk_state != TCP_LISTEN)
1447 goto out;
1448
1449 /* If socket state is TCP_LISTEN it cannot change (for now...),
1450 * so that no locks are necessary.
1451 */
1452
1453 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1454 if (!skb) {
1455 /* This means receive shutdown. */
1456 if (err == 0)
1457 err = -EINVAL;
1458 goto out;
1459 }
1460
1461 tsk = skb->sk;
1462 skb_free_datagram(sk, skb);
1463 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1464
1465 /* attach accepted sock to socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001466 unix_state_lock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 newsock->state = SS_CONNECTED;
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001468 unix_sock_inherit_flags(sock, newsock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 sock_graft(tsk, newsock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001470 unix_state_unlock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return 0;
1472
1473out:
1474 return err;
1475}
1476
1477
1478static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1479{
1480 struct sock *sk = sock->sk;
Al Viro713b91c2019-02-15 20:09:35 +00001481 struct unix_address *addr;
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001482 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 int err = 0;
1484
1485 if (peer) {
1486 sk = unix_peer_get(sk);
1487
1488 err = -ENOTCONN;
1489 if (!sk)
1490 goto out;
1491 err = 0;
1492 } else {
1493 sock_hold(sk);
1494 }
1495
Al Viro713b91c2019-02-15 20:09:35 +00001496 addr = smp_load_acquire(&unix_sk(sk)->addr);
1497 if (!addr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 sunaddr->sun_family = AF_UNIX;
1499 sunaddr->sun_path[0] = 0;
1500 *uaddr_len = sizeof(short);
1501 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 *uaddr_len = addr->len;
1503 memcpy(sunaddr, addr->name, *uaddr_len);
1504 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 sock_put(sk);
1506out:
1507 return err;
1508}
1509
David S. Millerf78a5fd2011-09-16 19:34:00 -04001510static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001511{
1512 int err = 0;
Eric Dumazet16e57262011-09-19 05:52:27 +00001513
David S. Millerf78a5fd2011-09-16 19:34:00 -04001514 UNIXCB(skb).pid = get_pid(scm->pid);
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001515 UNIXCB(skb).uid = scm->creds.uid;
1516 UNIXCB(skb).gid = scm->creds.gid;
Eric W. Biederman7361c362010-06-13 03:34:33 +00001517 UNIXCB(skb).fp = NULL;
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04001518 unix_get_secdata(scm, skb);
Eric W. Biederman7361c362010-06-13 03:34:33 +00001519 if (scm->fp && send_fds)
1520 err = unix_attach_fds(scm, skb);
1521
1522 skb->destructor = unix_destruct_scm;
1523 return err;
1524}
1525
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001526static bool unix_passcred_enabled(const struct socket *sock,
1527 const struct sock *other)
1528{
1529 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1530 !other->sk_socket ||
1531 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1532}
1533
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534/*
Eric Dumazet16e57262011-09-19 05:52:27 +00001535 * Some apps rely on write() giving SCM_CREDENTIALS
1536 * We include credentials if source or destination socket
1537 * asserted SOCK_PASSCRED.
1538 */
1539static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1540 const struct sock *other)
1541{
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001542 if (UNIXCB(skb).pid)
Eric Dumazet16e57262011-09-19 05:52:27 +00001543 return;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001544 if (unix_passcred_enabled(sock, other)) {
Eric Dumazet16e57262011-09-19 05:52:27 +00001545 UNIXCB(skb).pid = get_pid(task_tgid(current));
David S. Miller6e0895c2013-04-22 20:32:51 -04001546 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
Eric Dumazet16e57262011-09-19 05:52:27 +00001547 }
1548}
1549
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001550static int maybe_init_creds(struct scm_cookie *scm,
1551 struct socket *socket,
1552 const struct sock *other)
1553{
1554 int err;
1555 struct msghdr msg = { .msg_controllen = 0 };
1556
1557 err = scm_send(socket, &msg, scm, false);
1558 if (err)
1559 return err;
1560
1561 if (unix_passcred_enabled(socket, other)) {
1562 scm->pid = get_pid(task_tgid(current));
1563 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1564 }
1565 return err;
1566}
1567
1568static bool unix_skb_scm_eq(struct sk_buff *skb,
1569 struct scm_cookie *scm)
1570{
1571 const struct unix_skb_parms *u = &UNIXCB(skb);
1572
1573 return u->pid == scm->pid &&
1574 uid_eq(u->uid, scm->creds.uid) &&
1575 gid_eq(u->gid, scm->creds.gid) &&
1576 unix_secdata_eq(scm, skb);
1577}
1578
Eric Dumazet16e57262011-09-19 05:52:27 +00001579/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 * Send AF_UNIX data.
1581 */
1582
Ying Xue1b784142015-03-02 15:37:48 +08001583static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1584 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001587 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 struct unix_sock *u = unix_sk(sk);
Steffen Hurrle342dfc32014-01-17 22:53:15 +01001589 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 struct sock *other = NULL;
1591 int namelen = 0; /* fake GCC */
1592 int err;
Eric Dumazet95c96172012-04-15 05:58:06 +00001593 unsigned int hash;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001594 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 long timeo;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001596 struct scm_cookie scm;
Eric Dumazet25888e32010-11-25 04:11:39 +00001597 int max_level;
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001598 int data_len = 0;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001599 int sk_locked;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600
dann frazier5f23b732008-11-26 15:32:27 -08001601 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001602 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 if (err < 0)
1604 return err;
1605
1606 err = -EOPNOTSUPP;
1607 if (msg->msg_flags&MSG_OOB)
1608 goto out;
1609
1610 if (msg->msg_namelen) {
1611 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1612 if (err < 0)
1613 goto out;
1614 namelen = err;
1615 } else {
1616 sunaddr = NULL;
1617 err = -ENOTCONN;
1618 other = unix_peer_get(sk);
1619 if (!other)
1620 goto out;
1621 }
1622
Joe Perchesf64f9e72009-11-29 16:55:45 -08001623 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1624 && (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 goto out;
1626
1627 err = -EMSGSIZE;
1628 if (len > sk->sk_sndbuf - 32)
1629 goto out;
1630
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001631 if (len > SKB_MAX_ALLOC) {
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001632 data_len = min_t(size_t,
1633 len - SKB_MAX_ALLOC,
1634 MAX_SKB_FRAGS * PAGE_SIZE);
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001635 data_len = PAGE_ALIGN(data_len);
1636
1637 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1638 }
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001639
1640 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001641 msg->msg_flags & MSG_DONTWAIT, &err,
1642 PAGE_ALLOC_COSTLY_ORDER);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001643 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 goto out;
1645
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001646 err = unix_scm_to_skb(&scm, skb, true);
Eric Dumazet25888e32010-11-25 04:11:39 +00001647 if (err < 0)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001648 goto out_free;
Eric Dumazet25888e32010-11-25 04:11:39 +00001649 max_level = err + 1;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001650
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001651 skb_put(skb, len - data_len);
1652 skb->data_len = data_len;
1653 skb->len = len;
Al Viroc0371da2014-11-24 10:42:55 -05001654 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 if (err)
1656 goto out_free;
1657
1658 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1659
1660restart:
1661 if (!other) {
1662 err = -ECONNRESET;
1663 if (sunaddr == NULL)
1664 goto out_free;
1665
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001666 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 hash, &err);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001668 if (other == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 goto out_free;
1670 }
1671
Alban Crequyd6ae3ba2011-01-18 06:39:15 +00001672 if (sk_filter(other, skb) < 0) {
1673 /* Toss the packet but do not return any error to the sender */
1674 err = len;
1675 goto out_free;
1676 }
1677
Rainer Weikusat7d267272015-11-20 22:07:23 +00001678 sk_locked = 0;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001679 unix_state_lock(other);
Rainer Weikusat7d267272015-11-20 22:07:23 +00001680restart_locked:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 err = -EPERM;
1682 if (!unix_may_send(sk, other))
1683 goto out_unlock;
1684
Rainer Weikusat7d267272015-11-20 22:07:23 +00001685 if (unlikely(sock_flag(other, SOCK_DEAD))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 /*
1687 * Check with 1003.1g - what should
1688 * datagram error
1689 */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001690 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 sock_put(other);
1692
Rainer Weikusat7d267272015-11-20 22:07:23 +00001693 if (!sk_locked)
1694 unix_state_lock(sk);
1695
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 if (unix_peer(sk) == other) {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001698 unix_peer(sk) = NULL;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001699 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1700
David S. Miller1c92b4e2007-05-31 13:24:26 -07001701 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702
1703 unix_dgram_disconnected(sk, other);
1704 sock_put(other);
1705 err = -ECONNREFUSED;
1706 } else {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001707 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 }
1709
1710 other = NULL;
1711 if (err)
1712 goto out_free;
1713 goto restart;
1714 }
1715
1716 err = -EPIPE;
1717 if (other->sk_shutdown & RCV_SHUTDOWN)
1718 goto out_unlock;
1719
1720 if (sk->sk_type != SOCK_SEQPACKET) {
1721 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1722 if (err)
1723 goto out_unlock;
1724 }
1725
Rainer Weikusata5527dd2016-02-11 19:37:27 +00001726 /* other == sk && unix_peer(other) != sk if
1727 * - unix_peer(sk) == NULL, destination address bound to sk
1728 * - unix_peer(sk) == sk by time of get but disconnected before lock
1729 */
1730 if (other != sk &&
Qian Caia12e9432020-02-04 13:40:29 -05001731 unlikely(unix_peer(other) != sk &&
1732 unix_recvq_full_lockless(other))) {
Rainer Weikusat7d267272015-11-20 22:07:23 +00001733 if (timeo) {
1734 timeo = unix_wait_for_peer(other, timeo);
1735
1736 err = sock_intr_errno(timeo);
1737 if (signal_pending(current))
1738 goto out_free;
1739
1740 goto restart;
1741 }
1742
1743 if (!sk_locked) {
1744 unix_state_unlock(other);
1745 unix_state_double_lock(sk, other);
1746 }
1747
1748 if (unix_peer(sk) != other ||
1749 unix_dgram_peer_wake_me(sk, other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 err = -EAGAIN;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001751 sk_locked = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 goto out_unlock;
1753 }
1754
Rainer Weikusat7d267272015-11-20 22:07:23 +00001755 if (!sk_locked) {
1756 sk_locked = 1;
1757 goto restart_locked;
1758 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 }
1760
Rainer Weikusat7d267272015-11-20 22:07:23 +00001761 if (unlikely(sk_locked))
1762 unix_state_unlock(sk);
1763
Alban Crequy3f661162010-10-04 08:48:28 +00001764 if (sock_flag(other, SOCK_RCVTSTAMP))
1765 __net_timestamp(skb);
Eric Dumazet16e57262011-09-19 05:52:27 +00001766 maybe_add_creds(skb, sock, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767 skb_queue_tail(&other->sk_receive_queue, skb);
Eric Dumazet25888e32010-11-25 04:11:39 +00001768 if (max_level > unix_sk(other)->recursion_level)
1769 unix_sk(other)->recursion_level = max_level;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001770 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001771 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001773 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 return len;
1775
1776out_unlock:
Rainer Weikusat7d267272015-11-20 22:07:23 +00001777 if (sk_locked)
1778 unix_state_unlock(sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001779 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780out_free:
1781 kfree_skb(skb);
1782out:
1783 if (other)
1784 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001785 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786 return err;
1787}
1788
Eric Dumazete370a722013-08-08 14:37:32 -07001789/* We use paged skbs for stream sockets, and limit occupancy to 32768
1790 * bytes, and a minimun of a full page.
1791 */
1792#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001793
Ying Xue1b784142015-03-02 15:37:48 +08001794static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1795 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 struct sock *sk = sock->sk;
1798 struct sock *other = NULL;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001799 int err, size;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001800 struct sk_buff *skb;
Jianjun Konge27dfce2008-11-01 21:38:31 -07001801 int sent = 0;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001802 struct scm_cookie scm;
Miklos Szeredi8ba69ba2009-09-11 11:31:45 -07001803 bool fds_sent = false;
Eric Dumazet25888e32010-11-25 04:11:39 +00001804 int max_level;
Eric Dumazete370a722013-08-08 14:37:32 -07001805 int data_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806
dann frazier5f23b732008-11-26 15:32:27 -08001807 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001808 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 if (err < 0)
1810 return err;
1811
1812 err = -EOPNOTSUPP;
1813 if (msg->msg_flags&MSG_OOB)
1814 goto out_err;
1815
1816 if (msg->msg_namelen) {
1817 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1818 goto out_err;
1819 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 err = -ENOTCONN;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001821 other = unix_peer(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 if (!other)
1823 goto out_err;
1824 }
1825
1826 if (sk->sk_shutdown & SEND_SHUTDOWN)
1827 goto pipe_err;
1828
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001829 while (sent < len) {
Eric Dumazete370a722013-08-08 14:37:32 -07001830 size = len - sent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831
1832 /* Keep two messages in the pipe so it schedules better */
Eric Dumazete370a722013-08-08 14:37:32 -07001833 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
Eric Dumazete370a722013-08-08 14:37:32 -07001835 /* allow fallback to order-0 allocations */
1836 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001837
Eric Dumazete370a722013-08-08 14:37:32 -07001838 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001839
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001840 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1841
Eric Dumazete370a722013-08-08 14:37:32 -07001842 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001843 msg->msg_flags & MSG_DONTWAIT, &err,
1844 get_order(UNIX_SKB_FRAGS_SZ));
Eric Dumazete370a722013-08-08 14:37:32 -07001845 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 goto out_err;
1847
David S. Millerf78a5fd2011-09-16 19:34:00 -04001848 /* Only send the fds in the first buffer */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001849 err = unix_scm_to_skb(&scm, skb, !fds_sent);
Eric Dumazet25888e32010-11-25 04:11:39 +00001850 if (err < 0) {
Eric W. Biederman7361c362010-06-13 03:34:33 +00001851 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04001852 goto out_err;
Miklos Szeredi62093442008-11-09 15:23:57 +01001853 }
Eric Dumazet25888e32010-11-25 04:11:39 +00001854 max_level = err + 1;
Eric W. Biederman7361c362010-06-13 03:34:33 +00001855 fds_sent = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856
Eric Dumazete370a722013-08-08 14:37:32 -07001857 skb_put(skb, size - data_len);
1858 skb->data_len = data_len;
1859 skb->len = size;
Al Viroc0371da2014-11-24 10:42:55 -05001860 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001861 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04001863 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 }
1865
David S. Miller1c92b4e2007-05-31 13:24:26 -07001866 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
1868 if (sock_flag(other, SOCK_DEAD) ||
1869 (other->sk_shutdown & RCV_SHUTDOWN))
1870 goto pipe_err_free;
1871
Eric Dumazet16e57262011-09-19 05:52:27 +00001872 maybe_add_creds(skb, sock, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 skb_queue_tail(&other->sk_receive_queue, skb);
Eric Dumazet25888e32010-11-25 04:11:39 +00001874 if (max_level > unix_sk(other)->recursion_level)
1875 unix_sk(other)->recursion_level = max_level;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001876 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001877 other->sk_data_ready(other);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001878 sent += size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001881 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
1883 return sent;
1884
1885pipe_err_free:
David S. Miller1c92b4e2007-05-31 13:24:26 -07001886 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 kfree_skb(skb);
1888pipe_err:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001889 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1890 send_sig(SIGPIPE, current, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 err = -EPIPE;
1892out_err:
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001893 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 return sent ? : err;
1895}
1896
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001897static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1898 int offset, size_t size, int flags)
1899{
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001900 int err;
1901 bool send_sigpipe = false;
1902 bool init_scm = true;
1903 struct scm_cookie scm;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001904 struct sock *other, *sk = socket->sk;
1905 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1906
1907 if (flags & MSG_OOB)
1908 return -EOPNOTSUPP;
1909
1910 other = unix_peer(sk);
1911 if (!other || sk->sk_state != TCP_ESTABLISHED)
1912 return -ENOTCONN;
1913
1914 if (false) {
1915alloc_skb:
1916 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001917 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001918 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1919 &err, 0);
1920 if (!newskb)
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001921 goto err;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001922 }
1923
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001924 /* we must acquire iolock as we modify already present
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001925 * skbs in the sk_receive_queue and mess with skb->len
1926 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001927 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001928 if (err) {
1929 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001930 goto err;
1931 }
1932
1933 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1934 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001935 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001936 goto err_unlock;
1937 }
1938
1939 unix_state_lock(other);
1940
1941 if (sock_flag(other, SOCK_DEAD) ||
1942 other->sk_shutdown & RCV_SHUTDOWN) {
1943 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001944 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001945 goto err_state_unlock;
1946 }
1947
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001948 if (init_scm) {
1949 err = maybe_init_creds(&scm, socket, other);
1950 if (err)
1951 goto err_state_unlock;
1952 init_scm = false;
1953 }
1954
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001955 skb = skb_peek_tail(&other->sk_receive_queue);
1956 if (tail && tail == skb) {
1957 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001958 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1959 if (newskb) {
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001960 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001961 } else {
1962 tail = skb;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001963 goto alloc_skb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001964 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001965 } else if (newskb) {
1966 /* this is fast path, we don't necessarily need to
1967 * call to kfree_skb even though with newskb == NULL
1968 * this - does no harm
1969 */
1970 consume_skb(newskb);
Hannes Frederic Sowa8844f972015-11-16 16:25:56 +01001971 newskb = NULL;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001972 }
1973
1974 if (skb_append_pagefrags(skb, page, offset, size)) {
1975 tail = skb;
1976 goto alloc_skb;
1977 }
1978
1979 skb->len += size;
1980 skb->data_len += size;
1981 skb->truesize += size;
1982 atomic_add(size, &sk->sk_wmem_alloc);
1983
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01001984 if (newskb) {
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001985 err = unix_scm_to_skb(&scm, skb, false);
1986 if (err)
1987 goto err_state_unlock;
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01001988 spin_lock(&other->sk_receive_queue.lock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001989 __skb_queue_tail(&other->sk_receive_queue, newskb);
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01001990 spin_unlock(&other->sk_receive_queue.lock);
1991 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001992
1993 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07001994 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001995
1996 other->sk_data_ready(other);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001997 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02001998 return size;
1999
2000err_state_unlock:
2001 unix_state_unlock(other);
2002err_unlock:
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002003 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002004err:
2005 kfree_skb(newskb);
2006 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2007 send_sig(SIGPIPE, current, 0);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002008 if (!init_scm)
2009 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002010 return err;
2011}
2012
Ying Xue1b784142015-03-02 15:37:48 +08002013static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2014 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015{
2016 int err;
2017 struct sock *sk = sock->sk;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002018
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 err = sock_error(sk);
2020 if (err)
2021 return err;
2022
2023 if (sk->sk_state != TCP_ESTABLISHED)
2024 return -ENOTCONN;
2025
2026 if (msg->msg_namelen)
2027 msg->msg_namelen = 0;
2028
Ying Xue1b784142015-03-02 15:37:48 +08002029 return unix_dgram_sendmsg(sock, msg, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030}
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002031
Ying Xue1b784142015-03-02 15:37:48 +08002032static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2033 size_t size, int flags)
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002034{
2035 struct sock *sk = sock->sk;
2036
2037 if (sk->sk_state != TCP_ESTABLISHED)
2038 return -ENOTCONN;
2039
Ying Xue1b784142015-03-02 15:37:48 +08002040 return unix_dgram_recvmsg(sock, msg, size, flags);
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002041}
2042
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2044{
Al Viro713b91c2019-02-15 20:09:35 +00002045 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
Al Viro713b91c2019-02-15 20:09:35 +00002047 if (addr) {
2048 msg->msg_namelen = addr->len;
2049 memcpy(msg->msg_name, addr->name, addr->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 }
2051}
2052
Ying Xue1b784142015-03-02 15:37:48 +08002053static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2054 size_t size, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002056 struct scm_cookie scm;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 struct sock *sk = sock->sk;
2058 struct unix_sock *u = unix_sk(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002059 struct sk_buff *skb, *last;
2060 long timeo;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061 int err;
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002062 int peeked, skip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
2064 err = -EOPNOTSUPP;
2065 if (flags&MSG_OOB)
2066 goto out;
2067
Rainer Weikusat64874282015-12-06 21:11:38 +00002068 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069
Rainer Weikusat64874282015-12-06 21:11:38 +00002070 do {
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002071 mutex_lock(&u->iolock);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002072
Rainer Weikusat64874282015-12-06 21:11:38 +00002073 skip = sk_peek_offset(sk, flags);
2074 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2075 &last);
2076 if (skb)
2077 break;
2078
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002079 mutex_unlock(&u->iolock);
Rainer Weikusat64874282015-12-06 21:11:38 +00002080
2081 if (err != -EAGAIN)
2082 break;
2083 } while (timeo &&
2084 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2085
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002086 if (!skb) { /* implies iolock unlocked */
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002087 unix_state_lock(sk);
2088 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2089 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2090 (sk->sk_shutdown & RCV_SHUTDOWN))
2091 err = 0;
2092 unix_state_unlock(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002093 goto out;
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002094 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095
Rainer Weikusat77b75f42015-11-26 19:23:15 +00002096 if (wq_has_sleeper(&u->peer_wait))
2097 wake_up_interruptible_sync_poll(&u->peer_wait,
2098 POLLOUT | POLLWRNORM |
2099 POLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100
2101 if (msg->msg_name)
2102 unix_copy_addr(msg, skb->sk);
2103
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002104 if (size > skb->len - skip)
2105 size = skb->len - skip;
2106 else if (size < skb->len - skip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 msg->msg_flags |= MSG_TRUNC;
2108
David S. Miller51f3d022014-11-05 16:46:40 -05002109 err = skb_copy_datagram_msg(skb, skip, msg, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 if (err)
2111 goto out_free;
2112
Alban Crequy3f661162010-10-04 08:48:28 +00002113 if (sock_flag(sk, SOCK_RCVTSTAMP))
2114 __sock_recv_timestamp(msg, sk, skb);
2115
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002116 memset(&scm, 0, sizeof(scm));
2117
2118 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2119 unix_set_secdata(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002121 if (!(flags & MSG_PEEK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002123 unix_detach_fds(&scm, skb);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002124
2125 sk_peek_offset_bwd(sk, skb->len);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002126 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 /* It is questionable: on PEEK we could:
2128 - do not return fds - good, but too simple 8)
2129 - return fds, and do not return them on read (old strategy,
2130 apparently wrong)
2131 - clone fds (I chose it for now, it is the most universal
2132 solution)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002133
2134 POSIX 1003.1g does not actually define this clearly
2135 at all. POSIX 1003.1g doesn't define a lot of things
2136 clearly however!
2137
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 */
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002139
2140 sk_peek_offset_fwd(sk, size);
2141
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002143 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 }
Eric Dumazet9f6f9af2012-02-21 23:24:55 +00002145 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002147 scm_recv(sock, msg, &scm, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148
2149out_free:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002150 skb_free_datagram(sk, skb);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002151 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152out:
2153 return err;
2154}
2155
2156/*
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002157 * Sleep until more data has arrived. But check for races..
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 */
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002159static long unix_stream_data_wait(struct sock *sk, long timeo,
WANG Cong06a77b02016-11-17 15:55:26 -08002160 struct sk_buff *last, unsigned int last_len,
2161 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162{
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002163 struct sk_buff *tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 DEFINE_WAIT(wait);
2165
David S. Miller1c92b4e2007-05-31 13:24:26 -07002166 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
2168 for (;;) {
Eric Dumazetaa395142010-04-20 13:03:51 +00002169 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002171 tail = skb_peek_tail(&sk->sk_receive_queue);
2172 if (tail != last ||
2173 (tail && tail->len != last_len) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 sk->sk_err ||
2175 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2176 signal_pending(current) ||
2177 !timeo)
2178 break;
2179
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002180 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002181 unix_state_unlock(sk);
WANG Cong06a77b02016-11-17 15:55:26 -08002182 if (freezable)
2183 timeo = freezable_schedule_timeout(timeo);
2184 else
2185 timeo = schedule_timeout(timeo);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002186 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002187
2188 if (sock_flag(sk, SOCK_DEAD))
2189 break;
2190
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002191 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 }
2193
Eric Dumazetaa395142010-04-20 13:03:51 +00002194 finish_wait(sk_sleep(sk), &wait);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002195 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 return timeo;
2197}
2198
Eric Dumazete370a722013-08-08 14:37:32 -07002199static unsigned int unix_skb_len(const struct sk_buff *skb)
2200{
2201 return skb->len - UNIXCB(skb).consumed;
2202}
2203
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002204struct unix_stream_read_state {
2205 int (*recv_actor)(struct sk_buff *, int, int,
2206 struct unix_stream_read_state *);
2207 struct socket *socket;
2208 struct msghdr *msg;
2209 struct pipe_inode_info *pipe;
2210 size_t size;
2211 int flags;
2212 unsigned int splice_flags;
2213};
2214
WANG Cong06a77b02016-11-17 15:55:26 -08002215static int unix_stream_read_generic(struct unix_stream_read_state *state,
2216 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002218 struct scm_cookie scm;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002219 struct socket *sock = state->socket;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 struct sock *sk = sock->sk;
2221 struct unix_sock *u = unix_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 int copied = 0;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002223 int flags = state->flags;
Eric Dumazetde144392014-03-25 18:42:27 -07002224 int noblock = flags & MSG_DONTWAIT;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002225 bool check_creds = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 int target;
2227 int err = 0;
2228 long timeo;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002229 int skip;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002230 size_t size = state->size;
2231 unsigned int last_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002233 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2234 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002236 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002238 if (unlikely(flags & MSG_OOB)) {
2239 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002241 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002243 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
Eric Dumazetde144392014-03-25 18:42:27 -07002244 timeo = sock_rcvtimeo(sk, noblock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002246 memset(&scm, 0, sizeof(scm));
2247
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 /* Lock the socket to prevent queue disordering
2249 * while sleeps in memcpy_tomsg
2250 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002251 mutex_lock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252
Andrey Vagine9193d62015-10-02 00:05:36 +03002253 if (flags & MSG_PEEK)
2254 skip = sk_peek_offset(sk, flags);
2255 else
2256 skip = 0;
2257
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002258 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 int chunk;
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002260 bool drop_skb;
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002261 struct sk_buff *skb, *last;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002263redo:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002264 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002265 if (sock_flag(sk, SOCK_DEAD)) {
2266 err = -ECONNRESET;
2267 goto unlock;
2268 }
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002269 last = skb = skb_peek(&sk->sk_receive_queue);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002270 last_len = last ? last->len : 0;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002271again:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002272 if (skb == NULL) {
Eric Dumazet25888e32010-11-25 04:11:39 +00002273 unix_sk(sk)->recursion_level = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 if (copied >= target)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002275 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276
2277 /*
2278 * POSIX 1003.1g mandates this order.
2279 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002280
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002281 err = sock_error(sk);
2282 if (err)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002283 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 if (sk->sk_shutdown & RCV_SHUTDOWN)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002285 goto unlock;
2286
2287 unix_state_unlock(sk);
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002288 if (!timeo) {
2289 err = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 break;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002291 }
2292
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002293 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002295 timeo = unix_stream_data_wait(sk, timeo, last,
WANG Cong06a77b02016-11-17 15:55:26 -08002296 last_len, freezable);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297
Rainer Weikusat3822b5c2015-12-16 20:09:25 +00002298 if (signal_pending(current)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 err = sock_intr_errno(timeo);
Eric Dumazetfa0dc042016-01-24 13:53:50 -08002300 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 goto out;
2302 }
Rainer Weikusatb3ca9b02011-02-28 04:50:55 +00002303
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002304 mutex_lock(&u->iolock);
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002305 goto redo;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002306unlock:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002307 unix_state_unlock(sk);
2308 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 }
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002310
Eric Dumazete370a722013-08-08 14:37:32 -07002311 while (skip >= unix_skb_len(skb)) {
2312 skip -= unix_skb_len(skb);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002313 last = skb;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002314 last_len = skb->len;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002315 skb = skb_peek_next(skb, &sk->sk_receive_queue);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002316 if (!skb)
2317 goto again;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002318 }
2319
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002320 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321
2322 if (check_creds) {
2323 /* Never glue messages from different writers */
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002324 if (!unix_skb_scm_eq(skb, &scm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 break;
Eric W. Biederman0e82e7f6d2013-04-03 16:14:47 +00002326 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327 /* Copy credentials */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002328 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04002329 unix_set_secdata(&scm, skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002330 check_creds = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 }
2332
2333 /* Copy address just once */
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002334 if (state->msg && state->msg->msg_name) {
2335 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2336 state->msg->msg_name);
2337 unix_copy_addr(state->msg, skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 sunaddr = NULL;
2339 }
2340
Eric Dumazete370a722013-08-08 14:37:32 -07002341 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002342 skb_get(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002343 chunk = state->recv_actor(skb, skip, chunk, state);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002344 drop_skb = !unix_skb_len(skb);
2345 /* skb is only safe to use if !drop_skb */
2346 consume_skb(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002347 if (chunk < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 if (copied == 0)
2349 copied = -EFAULT;
2350 break;
2351 }
2352 copied += chunk;
2353 size -= chunk;
2354
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002355 if (drop_skb) {
2356 /* the skb was touched by a concurrent reader;
2357 * we should not expect anything from this skb
2358 * anymore and assume it invalid - we can be
2359 * sure it was dropped from the socket queue
2360 *
2361 * let's report a short read
2362 */
2363 err = 0;
2364 break;
2365 }
2366
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 /* Mark read part of skb as used */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002368 if (!(flags & MSG_PEEK)) {
Eric Dumazete370a722013-08-08 14:37:32 -07002369 UNIXCB(skb).consumed += chunk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002371 sk_peek_offset_bwd(sk, chunk);
2372
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002374 unix_detach_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375
Eric Dumazete370a722013-08-08 14:37:32 -07002376 if (unix_skb_len(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
Eric Dumazet6f01fd62012-01-28 16:11:03 +00002379 skb_unlink(skb, &sk->sk_receive_queue);
Neil Horman70d4bf62010-07-20 06:45:56 +00002380 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002382 if (scm.fp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 break;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002384 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 /* It is questionable, see note in unix_dgram_recvmsg.
2386 */
2387 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002388 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
Andrey Vagine9193d62015-10-02 00:05:36 +03002390 sk_peek_offset_fwd(sk, chunk);
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002391
Aaron Conole9f389e32015-09-26 18:50:43 -04002392 if (UNIXCB(skb).fp)
2393 break;
2394
Andrey Vagine9193d62015-10-02 00:05:36 +03002395 skip = 0;
Aaron Conole9f389e32015-09-26 18:50:43 -04002396 last = skb;
2397 last_len = skb->len;
2398 unix_state_lock(sk);
2399 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2400 if (skb)
2401 goto again;
2402 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 break;
2404 }
2405 } while (size);
2406
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002407 mutex_unlock(&u->iolock);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002408 if (state->msg)
2409 scm_recv(sock, state->msg, &scm, flags);
2410 else
2411 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412out:
2413 return copied ? : err;
2414}
2415
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002416static int unix_stream_read_actor(struct sk_buff *skb,
2417 int skip, int chunk,
2418 struct unix_stream_read_state *state)
2419{
2420 int ret;
2421
2422 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2423 state->msg, chunk);
2424 return ret ?: chunk;
2425}
2426
2427static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2428 size_t size, int flags)
2429{
2430 struct unix_stream_read_state state = {
2431 .recv_actor = unix_stream_read_actor,
2432 .socket = sock,
2433 .msg = msg,
2434 .size = size,
2435 .flags = flags
2436 };
2437
WANG Cong06a77b02016-11-17 15:55:26 -08002438 return unix_stream_read_generic(&state, true);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002439}
2440
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002441static int unix_stream_splice_actor(struct sk_buff *skb,
2442 int skip, int chunk,
2443 struct unix_stream_read_state *state)
2444{
2445 return skb_splice_bits(skb, state->socket->sk,
2446 UNIXCB(skb).consumed + skip,
Al Viro25869262016-09-17 21:02:10 -04002447 state->pipe, chunk, state->splice_flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002448}
2449
2450static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2451 struct pipe_inode_info *pipe,
2452 size_t size, unsigned int flags)
2453{
2454 struct unix_stream_read_state state = {
2455 .recv_actor = unix_stream_splice_actor,
2456 .socket = sock,
2457 .pipe = pipe,
2458 .size = size,
2459 .splice_flags = flags,
2460 };
2461
2462 if (unlikely(*ppos))
2463 return -ESPIPE;
2464
2465 if (sock->file->f_flags & O_NONBLOCK ||
2466 flags & SPLICE_F_NONBLOCK)
2467 state.flags = MSG_DONTWAIT;
2468
WANG Cong06a77b02016-11-17 15:55:26 -08002469 return unix_stream_read_generic(&state, false);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002470}
2471
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472static int unix_shutdown(struct socket *sock, int mode)
2473{
2474 struct sock *sk = sock->sk;
2475 struct sock *other;
2476
Xi Wangfc61b922012-08-26 16:47:13 +00002477 if (mode < SHUT_RD || mode > SHUT_RDWR)
2478 return -EINVAL;
2479 /* This maps:
2480 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2481 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2482 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2483 */
2484 ++mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485
Alban Crequy7180a032011-01-19 04:56:36 +00002486 unix_state_lock(sk);
2487 sk->sk_shutdown |= mode;
2488 other = unix_peer(sk);
2489 if (other)
2490 sock_hold(other);
2491 unix_state_unlock(sk);
2492 sk->sk_state_change(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493
Alban Crequy7180a032011-01-19 04:56:36 +00002494 if (other &&
2495 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496
Alban Crequy7180a032011-01-19 04:56:36 +00002497 int peer_mode = 0;
2498
2499 if (mode&RCV_SHUTDOWN)
2500 peer_mode |= SEND_SHUTDOWN;
2501 if (mode&SEND_SHUTDOWN)
2502 peer_mode |= RCV_SHUTDOWN;
2503 unix_state_lock(other);
2504 other->sk_shutdown |= peer_mode;
2505 unix_state_unlock(other);
2506 other->sk_state_change(other);
2507 if (peer_mode == SHUTDOWN_MASK)
2508 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2509 else if (peer_mode & RCV_SHUTDOWN)
2510 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 }
Alban Crequy7180a032011-01-19 04:56:36 +00002512 if (other)
2513 sock_put(other);
2514
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 return 0;
2516}
2517
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002518long unix_inq_len(struct sock *sk)
2519{
2520 struct sk_buff *skb;
2521 long amount = 0;
2522
2523 if (sk->sk_state == TCP_LISTEN)
2524 return -EINVAL;
2525
2526 spin_lock(&sk->sk_receive_queue.lock);
2527 if (sk->sk_type == SOCK_STREAM ||
2528 sk->sk_type == SOCK_SEQPACKET) {
2529 skb_queue_walk(&sk->sk_receive_queue, skb)
Eric Dumazete370a722013-08-08 14:37:32 -07002530 amount += unix_skb_len(skb);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002531 } else {
2532 skb = skb_peek(&sk->sk_receive_queue);
2533 if (skb)
2534 amount = skb->len;
2535 }
2536 spin_unlock(&sk->sk_receive_queue.lock);
2537
2538 return amount;
2539}
2540EXPORT_SYMBOL_GPL(unix_inq_len);
2541
2542long unix_outq_len(struct sock *sk)
2543{
2544 return sk_wmem_alloc_get(sk);
2545}
2546EXPORT_SYMBOL_GPL(unix_outq_len);
2547
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2549{
2550 struct sock *sk = sock->sk;
Jianjun Konge27dfce2008-11-01 21:38:31 -07002551 long amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 int err;
2553
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002554 switch (cmd) {
2555 case SIOCOUTQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002556 amount = unix_outq_len(sk);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002557 err = put_user(amount, (int __user *)arg);
2558 break;
2559 case SIOCINQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002560 amount = unix_inq_len(sk);
2561 if (amount < 0)
2562 err = amount;
2563 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 err = put_user(amount, (int __user *)arg);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002565 break;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002566 default:
2567 err = -ENOIOCTLCMD;
2568 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 }
2570 return err;
2571}
2572
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002573static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574{
2575 struct sock *sk = sock->sk;
2576 unsigned int mask;
2577
Eric Dumazetaa395142010-04-20 13:03:51 +00002578 sock_poll_wait(file, sk_sleep(sk), wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 mask = 0;
2580
2581 /* exceptional events? */
2582 if (sk->sk_err)
2583 mask |= POLLERR;
2584 if (sk->sk_shutdown == SHUTDOWN_MASK)
2585 mask |= POLLHUP;
Davide Libenzif348d702006-03-25 03:07:39 -08002586 if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazetdb409802010-09-06 11:13:50 +00002587 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588
2589 /* readable? */
Eric Dumazetdb409802010-09-06 11:13:50 +00002590 if (!skb_queue_empty(&sk->sk_receive_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 mask |= POLLIN | POLLRDNORM;
2592
2593 /* Connection-based need to check for termination and startup */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002594 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2595 sk->sk_state == TCP_CLOSE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 mask |= POLLHUP;
2597
2598 /*
2599 * we set writable also when the other side has shut down the
2600 * connection. This prevents stuck sockets.
2601 */
2602 if (unix_writable(sk))
2603 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2604
2605 return mask;
2606}
2607
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002608static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2609 poll_table *wait)
Rainer Weikusat3c734192008-06-17 22:28:05 -07002610{
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002611 struct sock *sk = sock->sk, *other;
2612 unsigned int mask, writable;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002613
Eric Dumazetaa395142010-04-20 13:03:51 +00002614 sock_poll_wait(file, sk_sleep(sk), wait);
Rainer Weikusat3c734192008-06-17 22:28:05 -07002615 mask = 0;
2616
2617 /* exceptional events? */
2618 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00002619 mask |= POLLERR |
Jacob Keller8facd5f2013-04-02 13:55:40 -07002620 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00002621
Rainer Weikusat3c734192008-06-17 22:28:05 -07002622 if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazet5456f092010-10-31 05:36:23 +00002623 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002624 if (sk->sk_shutdown == SHUTDOWN_MASK)
2625 mask |= POLLHUP;
2626
2627 /* readable? */
Eric Dumazet5456f092010-10-31 05:36:23 +00002628 if (!skb_queue_empty(&sk->sk_receive_queue))
Rainer Weikusat3c734192008-06-17 22:28:05 -07002629 mask |= POLLIN | POLLRDNORM;
2630
2631 /* Connection-based need to check for termination and startup */
2632 if (sk->sk_type == SOCK_SEQPACKET) {
2633 if (sk->sk_state == TCP_CLOSE)
2634 mask |= POLLHUP;
2635 /* connection hasn't started yet? */
2636 if (sk->sk_state == TCP_SYN_SENT)
2637 return mask;
2638 }
2639
Eric Dumazet973a34a2010-10-31 05:38:25 +00002640 /* No write status requested, avoid expensive OUT tests. */
Hans Verkuil626cf232012-03-23 15:02:27 -07002641 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
Eric Dumazet973a34a2010-10-31 05:38:25 +00002642 return mask;
2643
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002644 writable = unix_writable(sk);
Rainer Weikusat7d267272015-11-20 22:07:23 +00002645 if (writable) {
2646 unix_state_lock(sk);
2647
2648 other = unix_peer(sk);
2649 if (other && unix_peer(other) != sk &&
2650 unix_recvq_full(other) &&
2651 unix_dgram_peer_wake_me(sk, other))
2652 writable = 0;
2653
2654 unix_state_unlock(sk);
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002655 }
2656
2657 if (writable)
Rainer Weikusat3c734192008-06-17 22:28:05 -07002658 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2659 else
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002660 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
Rainer Weikusat3c734192008-06-17 22:28:05 -07002661
Rainer Weikusat3c734192008-06-17 22:28:05 -07002662 return mask;
2663}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664
2665#ifdef CONFIG_PROC_FS
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08002666
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002667#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2668
2669#define get_bucket(x) ((x) >> BUCKET_SPACE)
2670#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2671#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08002672
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002673static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674{
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002675 unsigned long offset = get_offset(*pos);
2676 unsigned long bucket = get_bucket(*pos);
2677 struct sock *sk;
2678 unsigned long count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002680 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2681 if (sock_net(sk) != seq_file_net(seq))
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002682 continue;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002683 if (++count == offset)
2684 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685 }
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002686
2687 return sk;
2688}
2689
2690static struct sock *unix_next_socket(struct seq_file *seq,
2691 struct sock *sk,
2692 loff_t *pos)
2693{
2694 unsigned long bucket;
2695
2696 while (sk > (struct sock *)SEQ_START_TOKEN) {
2697 sk = sk_next(sk);
2698 if (!sk)
2699 goto next_bucket;
2700 if (sock_net(sk) == seq_file_net(seq))
2701 return sk;
2702 }
2703
2704 do {
2705 sk = unix_from_bucket(seq, pos);
2706 if (sk)
2707 return sk;
2708
2709next_bucket:
2710 bucket = get_bucket(*pos) + 1;
2711 *pos = set_bucket_offset(bucket, 1);
2712 } while (bucket < ARRAY_SIZE(unix_socket_table));
2713
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 return NULL;
2715}
2716
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002718 __acquires(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719{
David S. Millerfbe9cc42005-12-13 23:26:29 -08002720 spin_lock(&unix_table_lock);
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002721
2722 if (!*pos)
2723 return SEQ_START_TOKEN;
2724
2725 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2726 return NULL;
2727
2728 return unix_next_socket(seq, NULL, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729}
2730
2731static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2732{
2733 ++*pos;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00002734 return unix_next_socket(seq, v, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735}
2736
2737static void unix_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002738 __releases(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739{
David S. Millerfbe9cc42005-12-13 23:26:29 -08002740 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741}
2742
2743static int unix_seq_show(struct seq_file *seq, void *v)
2744{
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002745
Joe Perchesb9f31242008-04-12 19:04:38 -07002746 if (v == SEQ_START_TOKEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2748 "Inode Path\n");
2749 else {
2750 struct sock *s = v;
2751 struct unix_sock *u = unix_sk(s);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002752 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753
Dan Rosenberg71338aa2011-05-23 12:17:35 +00002754 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755 s,
2756 atomic_read(&s->sk_refcnt),
2757 0,
2758 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2759 s->sk_type,
2760 s->sk_socket ?
2761 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2762 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2763 sock_i_ino(s));
2764
Al Viro713b91c2019-02-15 20:09:35 +00002765 if (u->addr) { // under unix_table_lock here
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 int i, len;
2767 seq_putc(seq, ' ');
2768
2769 i = 0;
2770 len = u->addr->len - sizeof(short);
2771 if (!UNIX_ABSTRACT(s))
2772 len--;
2773 else {
2774 seq_putc(seq, '@');
2775 i++;
2776 }
2777 for ( ; i < len; i++)
Isaac Boukrise7947ea2016-11-01 02:41:35 +02002778 seq_putc(seq, u->addr->name->sun_path[i] ?:
2779 '@');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 }
David S. Miller1c92b4e2007-05-31 13:24:26 -07002781 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782 seq_putc(seq, '\n');
2783 }
2784
2785 return 0;
2786}
2787
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002788static const struct seq_operations unix_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789 .start = unix_seq_start,
2790 .next = unix_seq_next,
2791 .stop = unix_seq_stop,
2792 .show = unix_seq_show,
2793};
2794
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795static int unix_seq_open(struct inode *inode, struct file *file)
2796{
Denis V. Luneve372c412007-11-19 22:31:54 -08002797 return seq_open_net(inode, file, &unix_seq_ops,
Eric Dumazet8b51b062012-06-08 22:10:20 +00002798 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799}
2800
Arjan van de Venda7071d2007-02-12 00:55:36 -08002801static const struct file_operations unix_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002802 .owner = THIS_MODULE,
2803 .open = unix_seq_open,
2804 .read = seq_read,
2805 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002806 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807};
2808
2809#endif
2810
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002811static const struct net_proto_family unix_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812 .family = PF_UNIX,
2813 .create = unix_create,
2814 .owner = THIS_MODULE,
2815};
2816
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002817
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002818static int __net_init unix_net_init(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002819{
2820 int error = -ENOMEM;
2821
Denis V. Luneva0a53c82007-12-11 04:19:17 -08002822 net->unx.sysctl_max_dgram_qlen = 10;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002823 if (unix_sysctl_register(net))
2824 goto out;
Pavel Emelyanovd392e492007-12-01 23:44:15 +11002825
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002826#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00002827 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002828 unix_sysctl_unregister(net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002829 goto out;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002830 }
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002831#endif
2832 error = 0;
2833out:
Jianjun Kong48dcc33e2008-11-01 21:37:27 -07002834 return error;
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002835}
2836
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002837static void __net_exit unix_net_exit(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002838{
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11002839 unix_sysctl_unregister(net);
Gao fengece31ff2013-02-18 01:34:56 +00002840 remove_proc_entry("unix", net->proc_net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002841}
2842
2843static struct pernet_operations unix_net_ops = {
2844 .init = unix_net_init,
2845 .exit = unix_net_exit,
2846};
2847
Linus Torvalds1da177e2005-04-16 15:20:36 -07002848static int __init af_unix_init(void)
2849{
2850 int rc = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002851
YOSHIFUJI Hideaki / 吉藤英明b4fff5f2013-01-09 07:20:07 +00002852 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002853
2854 rc = proto_register(&unix_proto, 1);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002855 if (rc != 0) {
wangweidong5cc208b2013-12-06 18:03:36 +08002856 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002857 goto out;
2858 }
2859
2860 sock_register(&unix_family_ops);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002861 register_pernet_subsys(&unix_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862out:
2863 return rc;
2864}
2865
2866static void __exit af_unix_exit(void)
2867{
2868 sock_unregister(PF_UNIX);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869 proto_unregister(&unix_proto);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08002870 unregister_pernet_subsys(&unix_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002871}
2872
David Woodhouse3d366962008-04-24 00:59:25 -07002873/* Earlier than device_initcall() so that other drivers invoking
2874 request_module() don't end up in a loop when modprobe tries
2875 to use a UNIX socket. But later than subsys_initcall() because
2876 we depend on stuff initialised there */
2877fs_initcall(af_unix_init);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002878module_exit(af_unix_exit);
2879
2880MODULE_LICENSE("GPL");
2881MODULE_ALIAS_NETPROTO(PF_UNIX);