blob: 8ab48cd8955971ec7db7e60464a946380a7be49a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * SUCS NET3:
3 *
4 * Generic datagram handling routines. These are generic for all
5 * protocols. Possibly a generic IP version on top of these would
6 * make sense. Not tonight however 8-).
7 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8 * NetROM layer all have identical poll code and mostly
9 * identical recvmsg() code. So we share it here. The poll was
10 * shared before but buried in udp.c so I moved it.
11 *
Alan Cox113aa832008-10-13 19:01:08 -070012 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * udp.c code)
14 *
15 * Fixes:
16 * Alan Cox : NULL return from skb_peek_copy()
17 * understood
18 * Alan Cox : Rewrote skb_read_datagram to avoid the
19 * skb_peek_copy stuff.
20 * Alan Cox : Added support for SOCK_SEQPACKET.
21 * IPX can no longer use the SO_TYPE hack
22 * but AX.25 now works right, and SPX is
23 * feasible.
24 * Alan Cox : Fixed write poll of non IP protocol
25 * crash.
26 * Florian La Roche: Changed for my new skbuff handling.
27 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
28 * Linus Torvalds : BSD semantic fixes.
29 * Alan Cox : Datagram iovec handling
30 * Darryl Miles : Fixed non-blocking SOCK_STREAM.
31 * Alan Cox : POSIXisms
32 * Pete Wyckoff : Unconnected accept() fix.
33 *
34 */
35
36#include <linux/module.h>
37#include <linux/types.h>
38#include <linux/kernel.h>
39#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/errno.h>
43#include <linux/sched.h>
44#include <linux/inet.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/poll.h>
48#include <linux/highmem.h>
Herbert Xu3305b802005-12-13 23:16:37 -080049#include <linux/spinlock.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -070055#include <net/checksum.h>
56#include <net/sock.h>
57#include <net/tcp_states.h>
Neil Hormane9b3cc12009-08-13 05:19:44 +000058#include <trace/events/skb.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030059#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61/*
62 * Is a socket 'connection oriented' ?
63 */
64static inline int connection_based(struct sock *sk)
65{
66 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
67}
68
Eric Dumazet95c96172012-04-15 05:58:06 +000069static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
Eric Dumazetbf368e42009-04-28 02:24:21 -070070 void *key)
71{
72 unsigned long bits = (unsigned long)key;
73
74 /*
75 * Avoid a wakeup if event not interesting for us
76 */
77 if (bits && !(bits & (POLLIN | POLLERR)))
78 return 0;
79 return autoremove_wake_function(wait, mode, sync, key);
80}
Linus Torvalds1da177e2005-04-16 15:20:36 -070081/*
Benjamin Poirier39cc8612013-04-29 11:42:13 +000082 * Wait for the last received packet to be different from skb
Linus Torvalds1da177e2005-04-16 15:20:36 -070083 */
Benjamin Poirier39cc8612013-04-29 11:42:13 +000084static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
85 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070086{
87 int error;
Eric Dumazetbf368e42009-04-28 02:24:21 -070088 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
Eric Dumazetaa395142010-04-20 13:03:51 +000090 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
92 /* Socket errors? */
93 error = sock_error(sk);
94 if (error)
95 goto out_err;
96
Benjamin Poirier39cc8612013-04-29 11:42:13 +000097 if (sk->sk_receive_queue.prev != skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070098 goto out;
99
100 /* Socket shut down? */
101 if (sk->sk_shutdown & RCV_SHUTDOWN)
102 goto out_noerr;
103
104 /* Sequenced packets can come disconnected.
105 * If so we report the problem
106 */
107 error = -ENOTCONN;
108 if (connection_based(sk) &&
109 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
110 goto out_err;
111
112 /* handle signals */
113 if (signal_pending(current))
114 goto interrupted;
115
116 error = 0;
117 *timeo_p = schedule_timeout(*timeo_p);
118out:
Eric Dumazetaa395142010-04-20 13:03:51 +0000119 finish_wait(sk_sleep(sk), &wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 return error;
121interrupted:
122 error = sock_intr_errno(*timeo_p);
123out_err:
124 *err = error;
125 goto out;
126out_noerr:
127 *err = 0;
128 error = 1;
129 goto out;
130}
131
132/**
Herbert Xua59322b2007-12-05 01:53:40 -0800133 * __skb_recv_datagram - Receive a datagram skbuff
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700134 * @sk: socket
135 * @flags: MSG_ flags
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000136 * @peeked: returns non-zero if this packet has been seen before
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000137 * @off: an offset in bytes to peek skb from. Returns an offset
138 * within an skb where data actually starts
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700139 * @err: error code returned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 *
141 * Get a datagram skbuff, understands the peeking, nonblocking wakeups
142 * and possible races. This replaces identical code in packet, raw and
143 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
144 * the long standing peek and read race for datagram sockets. If you
145 * alter this routine remember it must be re-entrant.
146 *
147 * This function will lock the socket if a skb is returned, so the caller
148 * needs to unlock the socket in that case (usually by calling
149 * skb_free_datagram)
150 *
151 * * It does not lock socket since today. This function is
152 * * free of race conditions. This measure should/can improve
153 * * significantly datagram socket latencies at high loads,
154 * * when data copying to user space takes lots of time.
155 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
156 * * 8) Great win.)
157 * * --ANK (980729)
158 *
159 * The order of the tests when we find no data waiting are specified
160 * quite explicitly by POSIX 1003.1g, don't change them without having
161 * the standard around please.
162 */
Eric Dumazet95c96172012-04-15 05:58:06 +0000163struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000164 int *peeked, int *off, int *err)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165{
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000166 struct sk_buff *skb, *last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 long timeo;
168 /*
169 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
170 */
171 int error = sock_error(sk);
172
173 if (error)
174 goto no_packet;
175
Herbert Xua59322b2007-12-05 01:53:40 -0800176 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
178 do {
179 /* Again only user level code calls this function, so nothing
180 * interrupt level will suddenly eat the receive_queue.
181 *
182 * Look at current nfs client by the way...
David Shwatrz8917a3c2010-12-02 09:01:55 +0000183 * However, this function was correct in any case. 8)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 */
Herbert Xua59322b2007-12-05 01:53:40 -0800185 unsigned long cpu_flags;
Pavel Emelyanov4934b032012-02-21 07:30:33 +0000186 struct sk_buff_head *queue = &sk->sk_receive_queue;
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000187 int _off = *off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000189 last = (struct sk_buff *)queue;
Pavel Emelyanov4934b032012-02-21 07:30:33 +0000190 spin_lock_irqsave(&queue->lock, cpu_flags);
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000191 skb_queue_walk(queue, skb) {
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000192 last = skb;
Herbert Xua59322b2007-12-05 01:53:40 -0800193 *peeked = skb->peeked;
194 if (flags & MSG_PEEK) {
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000195 if (_off >= skb->len && (skb->len || _off ||
Benjamin Poirieradd05ad2013-04-29 11:42:12 +0000196 skb->peeked)) {
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000197 _off -= skb->len;
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000198 continue;
199 }
Herbert Xua59322b2007-12-05 01:53:40 -0800200 skb->peeked = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 atomic_inc(&skb->users);
Herbert Xua59322b2007-12-05 01:53:40 -0800202 } else
Pavel Emelyanov4934b032012-02-21 07:30:33 +0000203 __skb_unlink(skb, queue);
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000204
205 spin_unlock_irqrestore(&queue->lock, cpu_flags);
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000206 *off = _off;
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000207 return skb;
Herbert Xua59322b2007-12-05 01:53:40 -0800208 }
Pavel Emelyanov4934b032012-02-21 07:30:33 +0000209 spin_unlock_irqrestore(&queue->lock, cpu_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
Eliezer Tamircbf55002013-07-08 16:20:34 +0300211 if (sk_can_busy_loop(sk) &&
212 sk_busy_loop(sk, flags & MSG_DONTWAIT))
Eliezer Tamira5b50472013-06-10 11:40:00 +0300213 continue;
214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 /* User doesn't want to wait */
216 error = -EAGAIN;
217 if (!timeo)
218 goto no_packet;
219
Benjamin Poirier39cc8612013-04-29 11:42:13 +0000220 } while (!wait_for_more_packets(sk, err, &timeo, last));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
222 return NULL;
223
224no_packet:
225 *err = error;
226 return NULL;
227}
Herbert Xua59322b2007-12-05 01:53:40 -0800228EXPORT_SYMBOL(__skb_recv_datagram);
229
Eric Dumazet95c96172012-04-15 05:58:06 +0000230struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
Herbert Xua59322b2007-12-05 01:53:40 -0800231 int noblock, int *err)
232{
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000233 int peeked, off = 0;
Herbert Xua59322b2007-12-05 01:53:40 -0800234
235 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
Pavel Emelyanov3f518bf2012-02-21 07:30:58 +0000236 &peeked, &off, err);
Herbert Xua59322b2007-12-05 01:53:40 -0800237}
Eric Dumazet9e34a5b2010-07-09 21:22:04 +0000238EXPORT_SYMBOL(skb_recv_datagram);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239
240void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
241{
Neil Hormanead2ceb2009-03-11 09:49:55 +0000242 consume_skb(skb);
Eric Dumazet270acef2008-11-05 01:38:06 -0800243 sk_mem_reclaim_partial(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244}
Eric Dumazet9d410c72009-10-30 05:03:53 +0000245EXPORT_SYMBOL(skb_free_datagram);
246
247void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
248{
Eric Dumazet8a74ad62010-05-26 19:20:18 +0000249 bool slow;
250
Eric Dumazet93bb64e2010-05-03 23:18:14 -0700251 if (likely(atomic_read(&skb->users) == 1))
252 smp_rmb();
253 else if (likely(!atomic_dec_and_test(&skb->users)))
254 return;
255
Eric Dumazet8a74ad62010-05-26 19:20:18 +0000256 slow = lock_sock_fast(sk);
Eric Dumazet4b0b72f2010-04-28 14:35:48 -0700257 skb_orphan(skb);
258 sk_mem_reclaim_partial(sk);
Eric Dumazet8a74ad62010-05-26 19:20:18 +0000259 unlock_sock_fast(sk, slow);
Eric Dumazet4b0b72f2010-04-28 14:35:48 -0700260
Eric Dumazet93bb64e2010-05-03 23:18:14 -0700261 /* skb is now orphaned, can be freed outside of locked section */
262 __kfree_skb(skb);
Eric Dumazet9d410c72009-10-30 05:03:53 +0000263}
264EXPORT_SYMBOL(skb_free_datagram_locked);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
266/**
Herbert Xu3305b802005-12-13 23:16:37 -0800267 * skb_kill_datagram - Free a datagram skbuff forcibly
268 * @sk: socket
269 * @skb: datagram skbuff
270 * @flags: MSG_ flags
271 *
272 * This function frees a datagram skbuff that was received by
273 * skb_recv_datagram. The flags argument must match the one
274 * used for skb_recv_datagram.
275 *
276 * If the MSG_PEEK flag is set, and the packet is still on the
277 * receive queue of the socket, it will be taken off the queue
278 * before it is freed.
279 *
280 * This function currently only disables BH when acquiring the
281 * sk_receive_queue lock. Therefore it must not be used in a
282 * context where that lock is acquired in an IRQ context.
Herbert Xu27ab2562007-12-05 01:51:58 -0800283 *
284 * It returns 0 if the packet was removed by us.
Herbert Xu3305b802005-12-13 23:16:37 -0800285 */
286
Herbert Xu27ab2562007-12-05 01:51:58 -0800287int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
Herbert Xu3305b802005-12-13 23:16:37 -0800288{
Herbert Xu27ab2562007-12-05 01:51:58 -0800289 int err = 0;
290
Herbert Xu3305b802005-12-13 23:16:37 -0800291 if (flags & MSG_PEEK) {
Herbert Xu27ab2562007-12-05 01:51:58 -0800292 err = -ENOENT;
Herbert Xu3305b802005-12-13 23:16:37 -0800293 spin_lock_bh(&sk->sk_receive_queue.lock);
294 if (skb == skb_peek(&sk->sk_receive_queue)) {
295 __skb_unlink(skb, &sk->sk_receive_queue);
296 atomic_dec(&skb->users);
Herbert Xu27ab2562007-12-05 01:51:58 -0800297 err = 0;
Herbert Xu3305b802005-12-13 23:16:37 -0800298 }
299 spin_unlock_bh(&sk->sk_receive_queue.lock);
300 }
301
John Dykstra61de71c2009-05-08 14:57:01 -0700302 kfree_skb(skb);
Eric Dumazet8edf19c2009-10-15 00:12:40 +0000303 atomic_inc(&sk->sk_drops);
John Dykstra61de71c2009-05-08 14:57:01 -0700304 sk_mem_reclaim_partial(sk);
305
Herbert Xu27ab2562007-12-05 01:51:58 -0800306 return err;
Herbert Xu3305b802005-12-13 23:16:37 -0800307}
Herbert Xu3305b802005-12-13 23:16:37 -0800308EXPORT_SYMBOL(skb_kill_datagram);
309
310/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 * skb_copy_datagram_iovec - Copy a datagram to an iovec.
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700312 * @skb: buffer to copy
313 * @offset: offset in the buffer to start copying from
Martin Waitz67be2dd2005-05-01 08:59:26 -0700314 * @to: io vector to copy to
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700315 * @len: amount of data to copy from buffer to iovec
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 *
317 * Note: the iovec is modified during the copy.
318 */
319int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
320 struct iovec *to, int len)
321{
David S. Miller1a028e52007-04-27 15:21:23 -0700322 int start = skb_headlen(skb);
323 int i, copy = start - offset;
David S. Miller5b1a0022009-06-09 00:18:15 -0700324 struct sk_buff *frag_iter;
Herbert Xuc75d7212005-11-02 18:55:00 +1100325
Neil Hormane9b3cc12009-08-13 05:19:44 +0000326 trace_skb_copy_datagram_iovec(skb, len);
327
David S. Millerb4d9eda2006-02-13 16:06:10 -0800328 /* Copy header. */
329 if (copy > 0) {
330 if (copy > len)
331 copy = len;
332 if (memcpy_toiovec(to, skb->data + offset, copy))
333 goto fault;
334 if ((len -= copy) == 0)
335 return 0;
336 offset += copy;
337 }
Herbert Xuc75d7212005-11-02 18:55:00 +1100338
David S. Millerb4d9eda2006-02-13 16:06:10 -0800339 /* Copy paged appendix. Hmm... why does this look so complicated? */
340 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -0700341 int end;
Eric Dumazet9e903e02011-10-18 21:00:24 +0000342 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700344 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -0700345
Eric Dumazet9e903e02011-10-18 21:00:24 +0000346 end = start + skb_frag_size(frag);
David S. Millerb4d9eda2006-02-13 16:06:10 -0800347 if ((copy = end - offset) > 0) {
348 int err;
349 u8 *vaddr;
Ian Campbellea2ab692011-08-22 23:44:58 +0000350 struct page *page = skb_frag_page(frag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351
352 if (copy > len)
353 copy = len;
David S. Millerb4d9eda2006-02-13 16:06:10 -0800354 vaddr = kmap(page);
David S. Miller1a028e52007-04-27 15:21:23 -0700355 err = memcpy_toiovec(to, vaddr + frag->page_offset +
356 offset - start, copy);
David S. Millerb4d9eda2006-02-13 16:06:10 -0800357 kunmap(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 if (err)
359 goto fault;
360 if (!(len -= copy))
361 return 0;
362 offset += copy;
363 }
David S. Miller1a028e52007-04-27 15:21:23 -0700364 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
David S. Millerb4d9eda2006-02-13 16:06:10 -0800366
David S. Miller5b1a0022009-06-09 00:18:15 -0700367 skb_walk_frags(skb, frag_iter) {
368 int end;
David S. Millerb4d9eda2006-02-13 16:06:10 -0800369
David S. Miller5b1a0022009-06-09 00:18:15 -0700370 WARN_ON(start > offset + len);
David S. Millerb4d9eda2006-02-13 16:06:10 -0800371
David S. Miller5b1a0022009-06-09 00:18:15 -0700372 end = start + frag_iter->len;
373 if ((copy = end - offset) > 0) {
374 if (copy > len)
375 copy = len;
376 if (skb_copy_datagram_iovec(frag_iter,
377 offset - start,
378 to, copy))
379 goto fault;
380 if ((len -= copy) == 0)
381 return 0;
382 offset += copy;
David S. Millerb4d9eda2006-02-13 16:06:10 -0800383 }
David S. Miller5b1a0022009-06-09 00:18:15 -0700384 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
David S. Millerb4d9eda2006-02-13 16:06:10 -0800386 if (!len)
387 return 0;
388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389fault:
390 return -EFAULT;
391}
Eric Dumazet9e34a5b2010-07-09 21:22:04 +0000392EXPORT_SYMBOL(skb_copy_datagram_iovec);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
Rusty Russelldb543c12008-08-15 15:13:53 -0700394/**
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000395 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
396 * @skb: buffer to copy
397 * @offset: offset in the buffer to start copying from
398 * @to: io vector to copy to
399 * @to_offset: offset in the io vector to start copying to
400 * @len: amount of data to copy from buffer to iovec
401 *
402 * Returns 0 or -EFAULT.
403 * Note: the iovec is not modified during the copy.
404 */
405int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
406 const struct iovec *to, int to_offset,
407 int len)
408{
409 int start = skb_headlen(skb);
410 int i, copy = start - offset;
David S. Miller5b1a0022009-06-09 00:18:15 -0700411 struct sk_buff *frag_iter;
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000412
413 /* Copy header. */
414 if (copy > 0) {
415 if (copy > len)
416 copy = len;
417 if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
418 goto fault;
419 if ((len -= copy) == 0)
420 return 0;
421 offset += copy;
422 to_offset += copy;
423 }
424
425 /* Copy paged appendix. Hmm... why does this look so complicated? */
426 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
427 int end;
Eric Dumazet9e903e02011-10-18 21:00:24 +0000428 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000429
430 WARN_ON(start > offset + len);
431
Eric Dumazet9e903e02011-10-18 21:00:24 +0000432 end = start + skb_frag_size(frag);
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000433 if ((copy = end - offset) > 0) {
434 int err;
435 u8 *vaddr;
Ian Campbellea2ab692011-08-22 23:44:58 +0000436 struct page *page = skb_frag_page(frag);
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000437
438 if (copy > len)
439 copy = len;
440 vaddr = kmap(page);
441 err = memcpy_toiovecend(to, vaddr + frag->page_offset +
442 offset - start, to_offset, copy);
443 kunmap(page);
444 if (err)
445 goto fault;
446 if (!(len -= copy))
447 return 0;
448 offset += copy;
449 to_offset += copy;
450 }
451 start = end;
452 }
453
David S. Miller5b1a0022009-06-09 00:18:15 -0700454 skb_walk_frags(skb, frag_iter) {
455 int end;
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000456
David S. Miller5b1a0022009-06-09 00:18:15 -0700457 WARN_ON(start > offset + len);
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000458
David S. Miller5b1a0022009-06-09 00:18:15 -0700459 end = start + frag_iter->len;
460 if ((copy = end - offset) > 0) {
461 if (copy > len)
462 copy = len;
463 if (skb_copy_datagram_const_iovec(frag_iter,
464 offset - start,
465 to, to_offset,
466 copy))
467 goto fault;
468 if ((len -= copy) == 0)
469 return 0;
470 offset += copy;
471 to_offset += copy;
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000472 }
David S. Miller5b1a0022009-06-09 00:18:15 -0700473 start = end;
Michael S. Tsirkin0a1ec072009-04-20 01:25:46 +0000474 }
475 if (!len)
476 return 0;
477
478fault:
479 return -EFAULT;
480}
481EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
482
483/**
Rusty Russelldb543c12008-08-15 15:13:53 -0700484 * skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
485 * @skb: buffer to copy
486 * @offset: offset in the buffer to start copying to
487 * @from: io vector to copy to
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000488 * @from_offset: offset in the io vector to start copying from
Rusty Russelldb543c12008-08-15 15:13:53 -0700489 * @len: amount of data to copy to buffer from iovec
490 *
491 * Returns 0 or -EFAULT.
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000492 * Note: the iovec is not modified during the copy.
Rusty Russelldb543c12008-08-15 15:13:53 -0700493 */
494int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000495 const struct iovec *from, int from_offset,
496 int len)
Rusty Russelldb543c12008-08-15 15:13:53 -0700497{
498 int start = skb_headlen(skb);
499 int i, copy = start - offset;
David S. Miller5b1a0022009-06-09 00:18:15 -0700500 struct sk_buff *frag_iter;
Rusty Russelldb543c12008-08-15 15:13:53 -0700501
502 /* Copy header. */
503 if (copy > 0) {
504 if (copy > len)
505 copy = len;
Sridhar Samudralad2d27bf2009-06-05 09:35:40 +0000506 if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
507 copy))
Rusty Russelldb543c12008-08-15 15:13:53 -0700508 goto fault;
509 if ((len -= copy) == 0)
510 return 0;
511 offset += copy;
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000512 from_offset += copy;
Rusty Russelldb543c12008-08-15 15:13:53 -0700513 }
514
515 /* Copy paged appendix. Hmm... why does this look so complicated? */
516 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
517 int end;
Eric Dumazet9e903e02011-10-18 21:00:24 +0000518 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Rusty Russelldb543c12008-08-15 15:13:53 -0700519
520 WARN_ON(start > offset + len);
521
Eric Dumazet9e903e02011-10-18 21:00:24 +0000522 end = start + skb_frag_size(frag);
Rusty Russelldb543c12008-08-15 15:13:53 -0700523 if ((copy = end - offset) > 0) {
524 int err;
525 u8 *vaddr;
Ian Campbellea2ab692011-08-22 23:44:58 +0000526 struct page *page = skb_frag_page(frag);
Rusty Russelldb543c12008-08-15 15:13:53 -0700527
528 if (copy > len)
529 copy = len;
530 vaddr = kmap(page);
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000531 err = memcpy_fromiovecend(vaddr + frag->page_offset +
532 offset - start,
533 from, from_offset, copy);
Rusty Russelldb543c12008-08-15 15:13:53 -0700534 kunmap(page);
535 if (err)
536 goto fault;
537
538 if (!(len -= copy))
539 return 0;
540 offset += copy;
Michael S. Tsirkin6f26c9a2009-04-20 01:26:11 +0000541 from_offset += copy;
Rusty Russelldb543c12008-08-15 15:13:53 -0700542 }
543 start = end;
544 }
545
David S. Miller5b1a0022009-06-09 00:18:15 -0700546 skb_walk_frags(skb, frag_iter) {
547 int end;
Rusty Russelldb543c12008-08-15 15:13:53 -0700548
David S. Miller5b1a0022009-06-09 00:18:15 -0700549 WARN_ON(start > offset + len);
Rusty Russelldb543c12008-08-15 15:13:53 -0700550
David S. Miller5b1a0022009-06-09 00:18:15 -0700551 end = start + frag_iter->len;
552 if ((copy = end - offset) > 0) {
553 if (copy > len)
554 copy = len;
555 if (skb_copy_datagram_from_iovec(frag_iter,
556 offset - start,
557 from,
558 from_offset,
559 copy))
560 goto fault;
561 if ((len -= copy) == 0)
562 return 0;
563 offset += copy;
564 from_offset += copy;
Rusty Russelldb543c12008-08-15 15:13:53 -0700565 }
David S. Miller5b1a0022009-06-09 00:18:15 -0700566 start = end;
Rusty Russelldb543c12008-08-15 15:13:53 -0700567 }
568 if (!len)
569 return 0;
570
571fault:
572 return -EFAULT;
573}
574EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
577 u8 __user *to, int len,
Al Viro50842052006-11-14 21:36:34 -0800578 __wsum *csump)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579{
David S. Miller1a028e52007-04-27 15:21:23 -0700580 int start = skb_headlen(skb);
David S. Miller1a028e52007-04-27 15:21:23 -0700581 int i, copy = start - offset;
David S. Miller5b1a0022009-06-09 00:18:15 -0700582 struct sk_buff *frag_iter;
583 int pos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
585 /* Copy header. */
586 if (copy > 0) {
587 int err = 0;
588 if (copy > len)
589 copy = len;
590 *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
591 *csump, &err);
592 if (err)
593 goto fault;
594 if ((len -= copy) == 0)
595 return 0;
596 offset += copy;
597 to += copy;
598 pos = copy;
599 }
600
601 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -0700602 int end;
Eric Dumazet9e903e02011-10-18 21:00:24 +0000603 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700605 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -0700606
Eric Dumazet9e903e02011-10-18 21:00:24 +0000607 end = start + skb_frag_size(frag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 if ((copy = end - offset) > 0) {
Al Viro50842052006-11-14 21:36:34 -0800609 __wsum csum2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 int err = 0;
611 u8 *vaddr;
Ian Campbellea2ab692011-08-22 23:44:58 +0000612 struct page *page = skb_frag_page(frag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
614 if (copy > len)
615 copy = len;
616 vaddr = kmap(page);
617 csum2 = csum_and_copy_to_user(vaddr +
David S. Miller1a028e52007-04-27 15:21:23 -0700618 frag->page_offset +
619 offset - start,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 to, copy, 0, &err);
621 kunmap(page);
622 if (err)
623 goto fault;
624 *csump = csum_block_add(*csump, csum2, pos);
625 if (!(len -= copy))
626 return 0;
627 offset += copy;
628 to += copy;
629 pos += copy;
630 }
David S. Miller1a028e52007-04-27 15:21:23 -0700631 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 }
633
David S. Miller5b1a0022009-06-09 00:18:15 -0700634 skb_walk_frags(skb, frag_iter) {
635 int end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
David S. Miller5b1a0022009-06-09 00:18:15 -0700637 WARN_ON(start > offset + len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
David S. Miller5b1a0022009-06-09 00:18:15 -0700639 end = start + frag_iter->len;
640 if ((copy = end - offset) > 0) {
641 __wsum csum2 = 0;
642 if (copy > len)
643 copy = len;
644 if (skb_copy_and_csum_datagram(frag_iter,
645 offset - start,
646 to, copy,
647 &csum2))
648 goto fault;
649 *csump = csum_block_add(*csump, csum2, pos);
650 if ((len -= copy) == 0)
651 return 0;
652 offset += copy;
653 to += copy;
654 pos += copy;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 }
David S. Miller5b1a0022009-06-09 00:18:15 -0700656 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 }
658 if (!len)
659 return 0;
660
661fault:
662 return -EFAULT;
663}
664
Herbert Xu759e5d02007-03-25 20:10:56 -0700665__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
Herbert Xufb286bb2005-11-10 13:01:24 -0800666{
Al Virod3bc23e2006-11-14 21:24:49 -0800667 __sum16 sum;
Herbert Xufb286bb2005-11-10 13:01:24 -0800668
Herbert Xu759e5d02007-03-25 20:10:56 -0700669 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
Herbert Xufb286bb2005-11-10 13:01:24 -0800670 if (likely(!sum)) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700671 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
Herbert Xufb286bb2005-11-10 13:01:24 -0800672 netdev_rx_csum_fault(skb->dev);
673 skb->ip_summed = CHECKSUM_UNNECESSARY;
674 }
675 return sum;
676}
Herbert Xu759e5d02007-03-25 20:10:56 -0700677EXPORT_SYMBOL(__skb_checksum_complete_head);
678
679__sum16 __skb_checksum_complete(struct sk_buff *skb)
680{
681 return __skb_checksum_complete_head(skb, skb->len);
682}
Herbert Xufb286bb2005-11-10 13:01:24 -0800683EXPORT_SYMBOL(__skb_checksum_complete);
684
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685/**
686 * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700687 * @skb: skbuff
688 * @hlen: hardware length
Martin Waitz67be2dd2005-05-01 08:59:26 -0700689 * @iov: io vector
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900690 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 * Caller _must_ check that skb will fit to this iovec.
692 *
693 * Returns: 0 - success.
694 * -EINVAL - checksum failure.
695 * -EFAULT - fault during copy. Beware, in this case iovec
696 * can be modified!
697 */
Herbert Xufb286bb2005-11-10 13:01:24 -0800698int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 int hlen, struct iovec *iov)
700{
Al Virod3bc23e2006-11-14 21:24:49 -0800701 __wsum csum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 int chunk = skb->len - hlen;
703
Herbert Xuef8aef52007-09-06 14:06:35 +0100704 if (!chunk)
705 return 0;
706
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 /* Skip filled elements.
708 * Pretty silly, look at memcpy_toiovec, though 8)
709 */
710 while (!iov->iov_len)
711 iov++;
712
713 if (iov->iov_len < chunk) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800714 if (__skb_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 goto csum_error;
716 if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
717 goto fault;
718 } else {
719 csum = csum_partial(skb->data, hlen, skb->csum);
720 if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
721 chunk, &csum))
722 goto fault;
Al Virod3bc23e2006-11-14 21:24:49 -0800723 if (csum_fold(csum))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 goto csum_error;
Patrick McHardy84fa7932006-08-29 16:44:56 -0700725 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
Herbert Xufb286bb2005-11-10 13:01:24 -0800726 netdev_rx_csum_fault(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 iov->iov_len -= chunk;
728 iov->iov_base += chunk;
729 }
730 return 0;
731csum_error:
732 return -EINVAL;
733fault:
734 return -EFAULT;
735}
Eric Dumazet9e34a5b2010-07-09 21:22:04 +0000736EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737
738/**
739 * datagram_poll - generic datagram poll
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700740 * @file: file struct
741 * @sock: socket
742 * @wait: poll table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 *
744 * Datagram poll: Again totally generic. This also handles
745 * sequenced packet sockets providing the socket receive queue
746 * is only ever holding data ready to receive.
747 *
748 * Note: when you _don't_ use this routine for this protocol,
749 * and you use a different write policy from sock_writeable()
750 * then please supply your own write_space callback.
751 */
752unsigned int datagram_poll(struct file *file, struct socket *sock,
753 poll_table *wait)
754{
755 struct sock *sk = sock->sk;
756 unsigned int mask;
757
Eric Dumazetaa395142010-04-20 13:03:51 +0000758 sock_poll_wait(file, sk_sleep(sk), wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 mask = 0;
760
761 /* exceptional events? */
762 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +0000763 mask |= POLLERR |
Jacob Keller8facd5f2013-04-02 13:55:40 -0700764 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +0000765
Davide Libenzif348d702006-03-25 03:07:39 -0800766 if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazetdb409802010-09-06 11:13:50 +0000767 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 if (sk->sk_shutdown == SHUTDOWN_MASK)
769 mask |= POLLHUP;
770
771 /* readable? */
Eric Dumazetdb409802010-09-06 11:13:50 +0000772 if (!skb_queue_empty(&sk->sk_receive_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 mask |= POLLIN | POLLRDNORM;
774
775 /* Connection-based need to check for termination and startup */
776 if (connection_based(sk)) {
777 if (sk->sk_state == TCP_CLOSE)
778 mask |= POLLHUP;
779 /* connection hasn't started yet? */
780 if (sk->sk_state == TCP_SYN_SENT)
781 return mask;
782 }
783
784 /* writable? */
785 if (sock_writeable(sk))
786 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
787 else
788 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
789
790 return mask;
791}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792EXPORT_SYMBOL(datagram_poll);