blob: d6851ca32598d65df55c4923a0ce0e6fb4c9fbb9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
Joe Perchese005d192012-05-16 19:58:40 +000039#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
40
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/kernel.h>
Vegard Nossumfe55f6d2008-08-30 12:16:35 +020044#include <linux/kmemcheck.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <linux/mm.h>
46#include <linux/interrupt.h>
47#include <linux/in.h>
48#include <linux/inet.h>
49#include <linux/slab.h>
Florian Westphalde960aa2014-01-26 10:58:16 +010050#include <linux/tcp.h>
51#include <linux/udp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/netdevice.h>
53#ifdef CONFIG_NET_CLS_ACT
54#include <net/pkt_sched.h>
55#endif
56#include <linux/string.h>
57#include <linux/skbuff.h>
Jens Axboe9c55e012007-11-06 23:30:13 -080058#include <linux/splice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059#include <linux/cache.h>
60#include <linux/rtnetlink.h>
61#include <linux/init.h>
David Howells716ea3a2007-04-02 20:19:53 -070062#include <linux/scatterlist.h>
Patrick Ohlyac45f602009-02-12 05:03:37 +000063#include <linux/errqueue.h>
Linus Torvalds268bb0c2011-05-20 12:50:29 -070064#include <linux/prefetch.h>
Vlad Yasevich0d5501c2014-08-08 14:42:13 -040065#include <linux/if_vlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
67#include <net/protocol.h>
68#include <net/dst.h>
69#include <net/sock.h>
70#include <net/checksum.h>
Paul Durranted1f50c2014-01-09 10:02:46 +000071#include <net/ip6_checksum.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <net/xfrm.h>
73
74#include <asm/uaccess.h>
Steven Rostedtad8d75f2009-04-14 19:39:12 -040075#include <trace/events/skb.h>
Eric Dumazet51c56b02012-04-05 11:35:15 +020076#include <linux/highmem.h>
Willem de Bruijnb245be12015-01-30 13:29:32 -050077#include <linux/capability.h>
78#include <linux/user_namespace.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040079
Eric Dumazetd7e88832012-04-30 08:10:34 +000080struct kmem_cache *skbuff_head_cache __read_mostly;
Christoph Lametere18b8902006-12-06 20:33:20 -080081static struct kmem_cache *skbuff_fclone_cache __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070082
Linus Torvalds1da177e2005-04-16 15:20:36 -070083/**
Jean Sacrenf05de732013-02-11 13:30:38 +000084 * skb_panic - private function for out-of-line support
85 * @skb: buffer
86 * @sz: size
87 * @addr: address
James Hogan99d58512013-02-13 11:20:27 +000088 * @msg: skb_over_panic or skb_under_panic
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 *
Jean Sacrenf05de732013-02-11 13:30:38 +000090 * Out-of-line support for skb_put() and skb_push().
91 * Called via the wrapper skb_over_panic() or skb_under_panic().
92 * Keep out of line to prevent kernel bloat.
93 * __builtin_return_address is not used because it is not always reliable.
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 */
Jean Sacrenf05de732013-02-11 13:30:38 +000095static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
James Hogan99d58512013-02-13 11:20:27 +000096 const char msg[])
Linus Torvalds1da177e2005-04-16 15:20:36 -070097{
Joe Perchese005d192012-05-16 19:58:40 +000098 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
James Hogan99d58512013-02-13 11:20:27 +000099 msg, addr, skb->len, sz, skb->head, skb->data,
Joe Perchese005d192012-05-16 19:58:40 +0000100 (unsigned long)skb->tail, (unsigned long)skb->end,
101 skb->dev ? skb->dev->name : "<NULL>");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 BUG();
103}
104
Jean Sacrenf05de732013-02-11 13:30:38 +0000105static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106{
Jean Sacrenf05de732013-02-11 13:30:38 +0000107 skb_panic(skb, sz, addr, __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108}
109
Jean Sacrenf05de732013-02-11 13:30:38 +0000110static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
111{
112 skb_panic(skb, sz, addr, __func__);
113}
Mel Gormanc93bdd02012-07-31 16:44:19 -0700114
115/*
116 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
117 * the caller if emergency pfmemalloc reserves are being used. If it is and
118 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
119 * may be used. Otherwise, the packet data may be discarded until enough
120 * memory is free
121 */
122#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
123 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
stephen hemminger61c5e882012-12-28 18:24:28 +0000124
125static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
126 unsigned long ip, bool *pfmemalloc)
Mel Gormanc93bdd02012-07-31 16:44:19 -0700127{
128 void *obj;
129 bool ret_pfmemalloc = false;
130
131 /*
132 * Try a regular allocation, when that fails and we're not entitled
133 * to the reserves, fail.
134 */
135 obj = kmalloc_node_track_caller(size,
136 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
137 node);
138 if (obj || !(gfp_pfmemalloc_allowed(flags)))
139 goto out;
140
141 /* Try again but now we are using pfmemalloc reserves */
142 ret_pfmemalloc = true;
143 obj = kmalloc_node_track_caller(size, flags, node);
144
145out:
146 if (pfmemalloc)
147 *pfmemalloc = ret_pfmemalloc;
148
149 return obj;
150}
151
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152/* Allocate a new skbuff. We do this ourselves so we can fill in a few
153 * 'private' fields and also do memory statistics to find all the
154 * [BEEP] leaks.
155 *
156 */
157
Patrick McHardy0ebd0ac2013-04-17 06:46:58 +0000158struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
159{
160 struct sk_buff *skb;
161
162 /* Get the HEAD */
163 skb = kmem_cache_alloc_node(skbuff_head_cache,
164 gfp_mask & ~__GFP_DMA, node);
165 if (!skb)
166 goto out;
167
168 /*
169 * Only clear those fields we need to clear, not those that we will
170 * actually initialise below. Hence, don't put any more fields after
171 * the tail pointer in struct sk_buff!
172 */
173 memset(skb, 0, offsetof(struct sk_buff, tail));
Pablo Neira5e71d9d2013-06-03 09:28:43 +0000174 skb->head = NULL;
Patrick McHardy0ebd0ac2013-04-17 06:46:58 +0000175 skb->truesize = sizeof(struct sk_buff);
176 atomic_set(&skb->users, 1);
177
Cong Wang35d04612013-05-29 15:16:05 +0800178 skb->mac_header = (typeof(skb->mac_header))~0U;
Patrick McHardy0ebd0ac2013-04-17 06:46:58 +0000179out:
180 return skb;
181}
182
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183/**
David S. Millerd179cd12005-08-17 14:57:30 -0700184 * __alloc_skb - allocate a network buffer
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 * @size: size to allocate
186 * @gfp_mask: allocation mask
Mel Gormanc93bdd02012-07-31 16:44:19 -0700187 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
188 * instead of head cache and allocate a cloned (child) skb.
189 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
190 * allocations in case the data is required for writeback
Christoph Hellwigb30973f2006-12-06 20:32:36 -0800191 * @node: numa node to allocate memory on
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 *
193 * Allocate a new &sk_buff. The returned buffer has no headroom and a
Ben Hutchings94b60422012-06-06 15:23:37 +0000194 * tail room of at least size bytes. The object has a reference count
195 * of one. The return is the buffer. On a failure the return is %NULL.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 *
197 * Buffers may only be allocated from interrupts using a @gfp_mask of
198 * %GFP_ATOMIC.
199 */
Al Virodd0fc662005-10-07 07:46:04 +0100200struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
Mel Gormanc93bdd02012-07-31 16:44:19 -0700201 int flags, int node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202{
Christoph Lametere18b8902006-12-06 20:33:20 -0800203 struct kmem_cache *cache;
Benjamin LaHaise4947d3e2006-01-03 14:06:50 -0800204 struct skb_shared_info *shinfo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 struct sk_buff *skb;
206 u8 *data;
Mel Gormanc93bdd02012-07-31 16:44:19 -0700207 bool pfmemalloc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
Mel Gormanc93bdd02012-07-31 16:44:19 -0700209 cache = (flags & SKB_ALLOC_FCLONE)
210 ? skbuff_fclone_cache : skbuff_head_cache;
211
212 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
213 gfp_mask |= __GFP_MEMALLOC;
Herbert Xu8798b3f2006-01-23 16:32:45 -0800214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 /* Get the HEAD */
Christoph Hellwigb30973f2006-12-06 20:32:36 -0800216 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 if (!skb)
218 goto out;
Eric Dumazetec7d2f22010-05-05 01:07:37 -0700219 prefetchw(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000221 /* We do our best to align skb_shared_info on a separate cache
222 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
223 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
224 * Both skb->head and skb_shared_info are cache line aligned.
225 */
Tony Lindgrenbc417e32011-11-02 13:40:28 +0000226 size = SKB_DATA_ALIGN(size);
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000227 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
Mel Gormanc93bdd02012-07-31 16:44:19 -0700228 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 if (!data)
230 goto nodata;
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000231 /* kmalloc(size) might give us more room than requested.
232 * Put skb_shared_info exactly at the end of allocated zone,
233 * to allow max possible filling before reallocation.
234 */
235 size = SKB_WITH_OVERHEAD(ksize(data));
Eric Dumazetec7d2f22010-05-05 01:07:37 -0700236 prefetchw(data + size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237
Arnaldo Carvalho de Meloca0605a2007-03-19 10:48:59 -0300238 /*
Johannes Bergc8005782008-05-03 20:56:42 -0700239 * Only clear those fields we need to clear, not those that we will
240 * actually initialise below. Hence, don't put any more fields after
241 * the tail pointer in struct sk_buff!
Arnaldo Carvalho de Meloca0605a2007-03-19 10:48:59 -0300242 */
243 memset(skb, 0, offsetof(struct sk_buff, tail));
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000244 /* Account for allocated memory : skb + skb->head */
245 skb->truesize = SKB_TRUESIZE(size);
Mel Gormanc93bdd02012-07-31 16:44:19 -0700246 skb->pfmemalloc = pfmemalloc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 atomic_set(&skb->users, 1);
248 skb->head = data;
249 skb->data = data;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700250 skb_reset_tail_pointer(skb);
Arnaldo Carvalho de Melo4305b542007-04-19 20:43:29 -0700251 skb->end = skb->tail + size;
Cong Wang35d04612013-05-29 15:16:05 +0800252 skb->mac_header = (typeof(skb->mac_header))~0U;
253 skb->transport_header = (typeof(skb->transport_header))~0U;
Stephen Hemminger19633e12009-06-17 05:23:27 +0000254
Benjamin LaHaise4947d3e2006-01-03 14:06:50 -0800255 /* make sure we initialize shinfo sequentially */
256 shinfo = skb_shinfo(skb);
Eric Dumazetec7d2f22010-05-05 01:07:37 -0700257 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
Benjamin LaHaise4947d3e2006-01-03 14:06:50 -0800258 atomic_set(&shinfo->dataref, 1);
Eric Dumazetc2aa3662011-01-25 23:18:38 +0000259 kmemcheck_annotate_variable(shinfo->destructor_arg);
Benjamin LaHaise4947d3e2006-01-03 14:06:50 -0800260
Mel Gormanc93bdd02012-07-31 16:44:19 -0700261 if (flags & SKB_ALLOC_FCLONE) {
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700262 struct sk_buff_fclones *fclones;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700264 fclones = container_of(skb, struct sk_buff_fclones, skb1);
265
266 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
David S. Millerd179cd12005-08-17 14:57:30 -0700267 skb->fclone = SKB_FCLONE_ORIG;
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700268 atomic_set(&fclones->fclone_ref, 1);
David S. Millerd179cd12005-08-17 14:57:30 -0700269
Eric Dumazet6ffe75e2014-12-03 17:04:39 -0800270 fclones->skb2.fclone = SKB_FCLONE_CLONE;
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700271 fclones->skb2.pfmemalloc = pfmemalloc;
David S. Millerd179cd12005-08-17 14:57:30 -0700272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273out:
274 return skb;
275nodata:
Herbert Xu8798b3f2006-01-23 16:32:45 -0800276 kmem_cache_free(cache, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 skb = NULL;
278 goto out;
279}
David S. Millerb4ac530fc2009-02-10 02:09:24 -0800280EXPORT_SYMBOL(__alloc_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281
282/**
Eric Dumazet2ea2f622015-04-24 16:05:01 -0700283 * __build_skb - build a network buffer
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000284 * @data: data buffer provided by caller
Eric Dumazet2ea2f622015-04-24 16:05:01 -0700285 * @frag_size: size of data, or 0 if head was kmalloced
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000286 *
287 * Allocate a new &sk_buff. Caller provides space holding head and
Florian Fainellideceb4c2013-07-23 20:22:39 +0100288 * skb_shared_info. @data must have been allocated by kmalloc() only if
Eric Dumazet2ea2f622015-04-24 16:05:01 -0700289 * @frag_size is 0, otherwise data should come from the page allocator
290 * or vmalloc()
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000291 * The return is the new skb buffer.
292 * On a failure the return is %NULL, and @data is not freed.
293 * Notes :
294 * Before IO, driver allocates only data buffer where NIC put incoming frame
295 * Driver should add room at head (NET_SKB_PAD) and
296 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
297 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
298 * before giving packet to stack.
299 * RX rings only contains data buffers, not full skbs.
300 */
Eric Dumazet2ea2f622015-04-24 16:05:01 -0700301struct sk_buff *__build_skb(void *data, unsigned int frag_size)
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000302{
303 struct skb_shared_info *shinfo;
304 struct sk_buff *skb;
Eric Dumazetd3836f22012-04-27 00:33:38 +0000305 unsigned int size = frag_size ? : ksize(data);
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000306
307 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
308 if (!skb)
309 return NULL;
310
Eric Dumazetd3836f22012-04-27 00:33:38 +0000311 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000312
313 memset(skb, 0, offsetof(struct sk_buff, tail));
314 skb->truesize = SKB_TRUESIZE(size);
315 atomic_set(&skb->users, 1);
316 skb->head = data;
317 skb->data = data;
318 skb_reset_tail_pointer(skb);
319 skb->end = skb->tail + size;
Cong Wang35d04612013-05-29 15:16:05 +0800320 skb->mac_header = (typeof(skb->mac_header))~0U;
321 skb->transport_header = (typeof(skb->transport_header))~0U;
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000322
323 /* make sure we initialize shinfo sequentially */
324 shinfo = skb_shinfo(skb);
325 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
326 atomic_set(&shinfo->dataref, 1);
327 kmemcheck_annotate_variable(shinfo->destructor_arg);
328
329 return skb;
330}
Eric Dumazet2ea2f622015-04-24 16:05:01 -0700331
332/* build_skb() is wrapper over __build_skb(), that specifically
333 * takes care of skb->head and skb->pfmemalloc
334 * This means that if @frag_size is not zero, then @data must be backed
335 * by a page fragment, not kmalloc() or vmalloc()
336 */
337struct sk_buff *build_skb(void *data, unsigned int frag_size)
338{
339 struct sk_buff *skb = __build_skb(data, frag_size);
340
341 if (skb && frag_size) {
342 skb->head_frag = 1;
343 if (virt_to_head_page(data)->pfmemalloc)
344 skb->pfmemalloc = 1;
345 }
346 return skb;
347}
Eric Dumazetb2b5ce92011-11-14 06:03:34 +0000348EXPORT_SYMBOL(build_skb);
349
Eric Dumazeta1c7fff2012-05-17 07:34:16 +0000350struct netdev_alloc_cache {
Eric Dumazet69b08f62012-09-26 06:46:57 +0000351 struct page_frag frag;
352 /* we maintain a pagecount bias, so that we dont dirty cache line
353 * containing page->_count every time we allocate a fragment.
354 */
355 unsigned int pagecnt_bias;
Alexander Duyck94519802015-05-06 21:11:40 -0700356 bool pfmemalloc;
Eric Dumazeta1c7fff2012-05-17 07:34:16 +0000357};
358static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
Alexander Duyckffde7322014-12-09 19:40:42 -0800359static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
Eric Dumazeta1c7fff2012-05-17 07:34:16 +0000360
Alexander Duyckffde7322014-12-09 19:40:42 -0800361static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
362 gfp_t gfp_mask)
Eric Dumazet6f532612012-05-18 05:12:12 +0000363{
Alexander Duyckffde7322014-12-09 19:40:42 -0800364 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
365 struct page *page = NULL;
366 gfp_t gfp = gfp_mask;
Eric Dumazet6f532612012-05-18 05:12:12 +0000367
Alexander Duyckffde7322014-12-09 19:40:42 -0800368 if (order) {
Eric Dumazet79930f52015-04-22 07:33:36 -0700369 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
370 __GFP_NOMEMALLOC;
Alexander Duyckffde7322014-12-09 19:40:42 -0800371 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
372 nc->frag.size = PAGE_SIZE << (page ? order : 0);
373 }
374
375 if (unlikely(!page))
376 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
377
378 nc->frag.page = page;
379
380 return page;
381}
382
Alexander Duyck94519802015-05-06 21:11:40 -0700383static void *__alloc_page_frag(struct netdev_alloc_cache *nc,
Alexander Duyckffde7322014-12-09 19:40:42 -0800384 unsigned int fragsz, gfp_t gfp_mask)
385{
Alexander Duyckffde7322014-12-09 19:40:42 -0800386 struct page *page = nc->frag.page;
387 unsigned int size;
388 int offset;
389
390 if (unlikely(!page)) {
Eric Dumazet6f532612012-05-18 05:12:12 +0000391refill:
Alexander Duyckffde7322014-12-09 19:40:42 -0800392 page = __page_frag_refill(nc, gfp_mask);
393 if (!page)
394 return NULL;
Eric Dumazet69b08f62012-09-26 06:46:57 +0000395
Alexander Duyckffde7322014-12-09 19:40:42 -0800396 /* if size can vary use frag.size else just use PAGE_SIZE */
397 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
398
Eric Dumazet4c450582014-10-10 04:48:18 -0700399 /* Even if we own the page, we do not use atomic_set().
400 * This would break get_page_unless_zero() users.
401 */
Alexander Duyckffde7322014-12-09 19:40:42 -0800402 atomic_add(size - 1, &page->_count);
403
404 /* reset page count bias and offset to start of new frag */
Alexander Duyck94519802015-05-06 21:11:40 -0700405 nc->pfmemalloc = page->pfmemalloc;
Alexander Duyckffde7322014-12-09 19:40:42 -0800406 nc->pagecnt_bias = size;
407 nc->frag.offset = size;
Eric Dumazet6f532612012-05-18 05:12:12 +0000408 }
Alexander Duyck540eb7b2012-07-12 14:23:50 +0000409
Alexander Duyckffde7322014-12-09 19:40:42 -0800410 offset = nc->frag.offset - fragsz;
411 if (unlikely(offset < 0)) {
412 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
413 goto refill;
414
415 /* if size can vary use frag.size else just use PAGE_SIZE */
416 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
417
418 /* OK, page count is 0, we can safely set it */
419 atomic_set(&page->_count, size);
420
421 /* reset page count bias and offset to start of new frag */
422 nc->pagecnt_bias = size;
423 offset = size - fragsz;
Eric Dumazet6f532612012-05-18 05:12:12 +0000424 }
Alexander Duyck540eb7b2012-07-12 14:23:50 +0000425
Alexander Duyck540eb7b2012-07-12 14:23:50 +0000426 nc->pagecnt_bias--;
Alexander Duyckffde7322014-12-09 19:40:42 -0800427 nc->frag.offset = offset;
428
429 return page_address(page) + offset;
430}
431
432static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
433{
Alexander Duyck94519802015-05-06 21:11:40 -0700434 struct netdev_alloc_cache *nc;
Alexander Duyckffde7322014-12-09 19:40:42 -0800435 unsigned long flags;
436 void *data;
437
438 local_irq_save(flags);
Alexander Duyck94519802015-05-06 21:11:40 -0700439 nc = this_cpu_ptr(&netdev_alloc_cache);
440 data = __alloc_page_frag(nc, fragsz, gfp_mask);
Eric Dumazet6f532612012-05-18 05:12:12 +0000441 local_irq_restore(flags);
442 return data;
443}
Mel Gormanc93bdd02012-07-31 16:44:19 -0700444
445/**
446 * netdev_alloc_frag - allocate a page fragment
447 * @fragsz: fragment size
448 *
449 * Allocates a frag from a page for receive buffer.
450 * Uses GFP_ATOMIC allocations.
451 */
452void *netdev_alloc_frag(unsigned int fragsz)
453{
454 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
455}
Eric Dumazet6f532612012-05-18 05:12:12 +0000456EXPORT_SYMBOL(netdev_alloc_frag);
457
Alexander Duyckffde7322014-12-09 19:40:42 -0800458static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
459{
Alexander Duyck94519802015-05-06 21:11:40 -0700460 struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
461
462 return __alloc_page_frag(nc, fragsz, gfp_mask);
Alexander Duyckffde7322014-12-09 19:40:42 -0800463}
464
465void *napi_alloc_frag(unsigned int fragsz)
466{
467 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
468}
469EXPORT_SYMBOL(napi_alloc_frag);
470
Eric Dumazet6f532612012-05-18 05:12:12 +0000471/**
Alexander Duyckfd11a832014-12-09 19:40:49 -0800472 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
473 * @dev: network device to receive on
474 * @length: length to allocate
475 * @gfp_mask: get_free_pages mask, passed to alloc_skb
476 *
477 * Allocate a new &sk_buff and assign it a usage count of one. The
478 * buffer has NET_SKB_PAD headroom built in. Users should allocate
479 * the headroom they think they need without accounting for the
480 * built in space. The built in space is used for optimisations.
481 *
482 * %NULL is returned if there is no free memory.
483 */
Alexander Duyck94519802015-05-06 21:11:40 -0700484struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
485 gfp_t gfp_mask)
Alexander Duyckfd11a832014-12-09 19:40:49 -0800486{
Alexander Duyck94519802015-05-06 21:11:40 -0700487 struct netdev_alloc_cache *nc;
488 unsigned long flags;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800489 struct sk_buff *skb;
Alexander Duyck94519802015-05-06 21:11:40 -0700490 bool pfmemalloc;
491 void *data;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800492
Alexander Duyck94519802015-05-06 21:11:40 -0700493 len += NET_SKB_PAD;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800494
Alexander Duyck94519802015-05-06 21:11:40 -0700495 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
496 (gfp_mask & (__GFP_WAIT | GFP_DMA)))
497 return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
498
499 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
500 len = SKB_DATA_ALIGN(len);
501
502 if (sk_memalloc_socks())
503 gfp_mask |= __GFP_MEMALLOC;
504
505 local_irq_save(flags);
506
507 nc = this_cpu_ptr(&netdev_alloc_cache);
508 data = __alloc_page_frag(nc, len, gfp_mask);
509 pfmemalloc = nc->pfmemalloc;
510
511 local_irq_restore(flags);
512
513 if (unlikely(!data))
514 return NULL;
515
516 skb = __build_skb(data, len);
517 if (unlikely(!skb)) {
518 put_page(virt_to_head_page(data));
519 return NULL;
Christoph Hellwig7b2e4972006-08-07 16:09:04 -0700520 }
Alexander Duyckfd11a832014-12-09 19:40:49 -0800521
Alexander Duyck94519802015-05-06 21:11:40 -0700522 /* use OR instead of assignment to avoid clearing of bits in mask */
523 if (pfmemalloc)
524 skb->pfmemalloc = 1;
525 skb->head_frag = 1;
526
527 skb_reserve(skb, NET_SKB_PAD);
528 skb->dev = dev;
529
Christoph Hellwig8af27452006-07-31 22:35:23 -0700530 return skb;
531}
David S. Millerb4ac530fc2009-02-10 02:09:24 -0800532EXPORT_SYMBOL(__netdev_alloc_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533
Alexander Duyckfd11a832014-12-09 19:40:49 -0800534/**
535 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
536 * @napi: napi instance this buffer was allocated for
537 * @length: length to allocate
538 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
539 *
540 * Allocate a new sk_buff for use in NAPI receive. This buffer will
541 * attempt to allocate the head from a special reserved region used
542 * only for NAPI Rx allocation. By doing this we can save several
543 * CPU cycles by avoiding having to disable and re-enable IRQs.
544 *
545 * %NULL is returned if there is no free memory.
546 */
Alexander Duyck94519802015-05-06 21:11:40 -0700547struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
548 gfp_t gfp_mask)
Alexander Duyckfd11a832014-12-09 19:40:49 -0800549{
Alexander Duyck94519802015-05-06 21:11:40 -0700550 struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
Alexander Duyckfd11a832014-12-09 19:40:49 -0800551 struct sk_buff *skb;
Alexander Duyck94519802015-05-06 21:11:40 -0700552 void *data;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800553
Alexander Duyck94519802015-05-06 21:11:40 -0700554 len += NET_SKB_PAD + NET_IP_ALIGN;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800555
Alexander Duyck94519802015-05-06 21:11:40 -0700556 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
557 (gfp_mask & (__GFP_WAIT | GFP_DMA)))
558 return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
559
560 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
561 len = SKB_DATA_ALIGN(len);
562
563 if (sk_memalloc_socks())
564 gfp_mask |= __GFP_MEMALLOC;
565
566 data = __alloc_page_frag(nc, len, gfp_mask);
567 if (unlikely(!data))
568 return NULL;
569
570 skb = __build_skb(data, len);
571 if (unlikely(!skb)) {
572 put_page(virt_to_head_page(data));
573 return NULL;
Alexander Duyckfd11a832014-12-09 19:40:49 -0800574 }
575
Alexander Duyck94519802015-05-06 21:11:40 -0700576 /* use OR instead of assignment to avoid clearing of bits in mask */
577 if (nc->pfmemalloc)
578 skb->pfmemalloc = 1;
579 skb->head_frag = 1;
580
581 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
582 skb->dev = napi->dev;
583
Alexander Duyckfd11a832014-12-09 19:40:49 -0800584 return skb;
585}
586EXPORT_SYMBOL(__napi_alloc_skb);
587
Peter Zijlstra654bed12008-10-07 14:22:33 -0700588void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
Eric Dumazet50269e12012-03-23 23:59:33 +0000589 int size, unsigned int truesize)
Peter Zijlstra654bed12008-10-07 14:22:33 -0700590{
591 skb_fill_page_desc(skb, i, page, off, size);
592 skb->len += size;
593 skb->data_len += size;
Eric Dumazet50269e12012-03-23 23:59:33 +0000594 skb->truesize += truesize;
Peter Zijlstra654bed12008-10-07 14:22:33 -0700595}
596EXPORT_SYMBOL(skb_add_rx_frag);
597
Jason Wangf8e617e2013-11-01 14:07:47 +0800598void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
599 unsigned int truesize)
600{
601 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
602
603 skb_frag_size_add(frag, size);
604 skb->len += size;
605 skb->data_len += size;
606 skb->truesize += truesize;
607}
608EXPORT_SYMBOL(skb_coalesce_rx_frag);
609
Herbert Xu27b437c2006-07-13 19:26:39 -0700610static void skb_drop_list(struct sk_buff **listp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
Eric Dumazetbd8a7032013-06-24 06:26:00 -0700612 kfree_skb_list(*listp);
Herbert Xu27b437c2006-07-13 19:26:39 -0700613 *listp = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614}
615
Herbert Xu27b437c2006-07-13 19:26:39 -0700616static inline void skb_drop_fraglist(struct sk_buff *skb)
617{
618 skb_drop_list(&skb_shinfo(skb)->frag_list);
619}
620
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621static void skb_clone_fraglist(struct sk_buff *skb)
622{
623 struct sk_buff *list;
624
David S. Millerfbb398a2009-06-09 00:18:59 -0700625 skb_walk_frags(skb, list)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 skb_get(list);
627}
628
Eric Dumazetd3836f22012-04-27 00:33:38 +0000629static void skb_free_head(struct sk_buff *skb)
630{
631 if (skb->head_frag)
632 put_page(virt_to_head_page(skb->head));
633 else
634 kfree(skb->head);
635}
636
Adrian Bunk5bba1712006-06-29 13:02:35 -0700637static void skb_release_data(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638{
Eric Dumazetff04a772014-09-23 18:39:30 -0700639 struct skb_shared_info *shinfo = skb_shinfo(skb);
640 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
Eric Dumazetff04a772014-09-23 18:39:30 -0700642 if (skb->cloned &&
643 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
644 &shinfo->dataref))
645 return;
Shirley Maa6686f22011-07-06 12:22:12 +0000646
Eric Dumazetff04a772014-09-23 18:39:30 -0700647 for (i = 0; i < shinfo->nr_frags; i++)
648 __skb_frag_unref(&shinfo->frags[i]);
Shirley Maa6686f22011-07-06 12:22:12 +0000649
Eric Dumazetff04a772014-09-23 18:39:30 -0700650 /*
651 * If skb buf is from userspace, we need to notify the caller
652 * the lower device DMA has done;
653 */
654 if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
655 struct ubuf_info *uarg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656
Eric Dumazetff04a772014-09-23 18:39:30 -0700657 uarg = shinfo->destructor_arg;
658 if (uarg->callback)
659 uarg->callback(uarg, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 }
Eric Dumazetff04a772014-09-23 18:39:30 -0700661
662 if (shinfo->frag_list)
663 kfree_skb_list(shinfo->frag_list);
664
665 skb_free_head(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666}
667
668/*
669 * Free an skbuff by memory without cleaning the state.
670 */
Herbert Xu2d4baff2007-11-26 23:11:19 +0800671static void kfree_skbmem(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672{
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700673 struct sk_buff_fclones *fclones;
David S. Millerd179cd12005-08-17 14:57:30 -0700674
David S. Millerd179cd12005-08-17 14:57:30 -0700675 switch (skb->fclone) {
676 case SKB_FCLONE_UNAVAILABLE:
677 kmem_cache_free(skbuff_head_cache, skb);
Eric Dumazet6ffe75e2014-12-03 17:04:39 -0800678 return;
David S. Millerd179cd12005-08-17 14:57:30 -0700679
680 case SKB_FCLONE_ORIG:
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700681 fclones = container_of(skb, struct sk_buff_fclones, skb1);
Eric Dumazet6ffe75e2014-12-03 17:04:39 -0800682
683 /* We usually free the clone (TX completion) before original skb
684 * This test would have no chance to be true for the clone,
685 * while here, branch prediction will be good.
686 */
687 if (atomic_read(&fclones->fclone_ref) == 1)
688 goto fastpath;
David S. Millerd179cd12005-08-17 14:57:30 -0700689 break;
690
Eric Dumazet6ffe75e2014-12-03 17:04:39 -0800691 default: /* SKB_FCLONE_CLONE */
Eric Dumazetd0bf4a92014-09-29 13:29:15 -0700692 fclones = container_of(skb, struct sk_buff_fclones, skb2);
David S. Millerd179cd12005-08-17 14:57:30 -0700693 break;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700694 }
Eric Dumazet6ffe75e2014-12-03 17:04:39 -0800695 if (!atomic_dec_and_test(&fclones->fclone_ref))
696 return;
697fastpath:
698 kmem_cache_free(skbuff_fclone_cache, fclones);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699}
700
Lennert Buytenhek04a4bb52008-10-01 02:33:12 -0700701static void skb_release_head_state(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
Eric Dumazetadf30902009-06-02 05:19:30 +0000703 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704#ifdef CONFIG_XFRM
705 secpath_put(skb->sp);
706#endif
Stephen Hemminger9c2b3322005-04-19 22:39:42 -0700707 if (skb->destructor) {
708 WARN_ON(in_irq());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 skb->destructor(skb);
710 }
Igor Maravića3bf7ae2011-12-12 02:58:22 +0000711#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Yasuyuki Kozakai5f79e0f2007-03-23 11:17:07 -0700712 nf_conntrack_put(skb->nfct);
KOVACS Krisztian2fc72c72011-01-12 20:25:08 +0100713#endif
Pablo Neira Ayuso1109a902014-10-01 11:19:17 +0200714#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 nf_bridge_put(skb->nf_bridge);
716#endif
Lennert Buytenhek04a4bb52008-10-01 02:33:12 -0700717}
718
719/* Free everything but the sk_buff shell. */
720static void skb_release_all(struct sk_buff *skb)
721{
722 skb_release_head_state(skb);
Pablo Neira5e71d9d2013-06-03 09:28:43 +0000723 if (likely(skb->head))
Patrick McHardy0ebd0ac2013-04-17 06:46:58 +0000724 skb_release_data(skb);
Herbert Xu2d4baff2007-11-26 23:11:19 +0800725}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
Herbert Xu2d4baff2007-11-26 23:11:19 +0800727/**
728 * __kfree_skb - private function
729 * @skb: buffer
730 *
731 * Free an sk_buff. Release anything attached to the buffer.
732 * Clean the state. This is an internal helper function. Users should
733 * always call kfree_skb
734 */
735
736void __kfree_skb(struct sk_buff *skb)
737{
738 skb_release_all(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 kfree_skbmem(skb);
740}
David S. Millerb4ac530fc2009-02-10 02:09:24 -0800741EXPORT_SYMBOL(__kfree_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743/**
Jörn Engel231d06a2006-03-20 21:28:35 -0800744 * kfree_skb - free an sk_buff
745 * @skb: buffer to free
746 *
747 * Drop a reference to the buffer and free it if the usage count has
748 * hit zero.
749 */
750void kfree_skb(struct sk_buff *skb)
751{
752 if (unlikely(!skb))
753 return;
754 if (likely(atomic_read(&skb->users) == 1))
755 smp_rmb();
756 else if (likely(!atomic_dec_and_test(&skb->users)))
757 return;
Neil Hormanead2ceb2009-03-11 09:49:55 +0000758 trace_kfree_skb(skb, __builtin_return_address(0));
Jörn Engel231d06a2006-03-20 21:28:35 -0800759 __kfree_skb(skb);
760}
David S. Millerb4ac530fc2009-02-10 02:09:24 -0800761EXPORT_SYMBOL(kfree_skb);
Jörn Engel231d06a2006-03-20 21:28:35 -0800762
Eric Dumazetbd8a7032013-06-24 06:26:00 -0700763void kfree_skb_list(struct sk_buff *segs)
764{
765 while (segs) {
766 struct sk_buff *next = segs->next;
767
768 kfree_skb(segs);
769 segs = next;
770 }
771}
772EXPORT_SYMBOL(kfree_skb_list);
773
Stephen Hemmingerd1a203e2008-11-01 21:01:09 -0700774/**
Michael S. Tsirkin25121172012-11-01 09:16:28 +0000775 * skb_tx_error - report an sk_buff xmit error
776 * @skb: buffer that triggered an error
777 *
778 * Report xmit error if a device callback is tracking this skb.
779 * skb must be freed afterwards.
780 */
781void skb_tx_error(struct sk_buff *skb)
782{
783 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
784 struct ubuf_info *uarg;
785
786 uarg = skb_shinfo(skb)->destructor_arg;
787 if (uarg->callback)
788 uarg->callback(uarg, false);
789 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
790 }
791}
792EXPORT_SYMBOL(skb_tx_error);
793
794/**
Neil Hormanead2ceb2009-03-11 09:49:55 +0000795 * consume_skb - free an skbuff
796 * @skb: buffer to free
797 *
798 * Drop a ref to the buffer and free it if the usage count has hit zero
799 * Functions identically to kfree_skb, but kfree_skb assumes that the frame
800 * is being dropped after a failure and notes that
801 */
802void consume_skb(struct sk_buff *skb)
803{
804 if (unlikely(!skb))
805 return;
806 if (likely(atomic_read(&skb->users) == 1))
807 smp_rmb();
808 else if (likely(!atomic_dec_and_test(&skb->users)))
809 return;
Koki Sanagi07dc22e2010-08-23 18:46:12 +0900810 trace_consume_skb(skb);
Neil Hormanead2ceb2009-03-11 09:49:55 +0000811 __kfree_skb(skb);
812}
813EXPORT_SYMBOL(consume_skb);
814
Eric Dumazetb1937222014-09-28 22:18:47 -0700815/* Make sure a field is enclosed inside headers_start/headers_end section */
816#define CHECK_SKB_FIELD(field) \
817 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
818 offsetof(struct sk_buff, headers_start)); \
819 BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
820 offsetof(struct sk_buff, headers_end)); \
821
Herbert Xudec18812007-10-14 00:37:30 -0700822static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
823{
824 new->tstamp = old->tstamp;
Eric Dumazetb1937222014-09-28 22:18:47 -0700825 /* We do not copy old->sk */
Herbert Xudec18812007-10-14 00:37:30 -0700826 new->dev = old->dev;
Eric Dumazetb1937222014-09-28 22:18:47 -0700827 memcpy(new->cb, old->cb, sizeof(old->cb));
Eric Dumazet7fee2262010-05-11 23:19:48 +0000828 skb_dst_copy(new, old);
Alexey Dobriyandef8b4f2008-10-28 13:24:06 -0700829#ifdef CONFIG_XFRM
Herbert Xudec18812007-10-14 00:37:30 -0700830 new->sp = secpath_get(old->sp);
831#endif
Eric Dumazetb1937222014-09-28 22:18:47 -0700832 __nf_copy(new, old, false);
Patrick McHardy6aa895b02008-07-14 22:49:06 -0700833
Eric Dumazetb1937222014-09-28 22:18:47 -0700834 /* Note : this field could be in headers_start/headers_end section
835 * It is not yet because we do not want to have a 16 bit hole
836 */
837 new->queue_mapping = old->queue_mapping;
Eliezer Tamir06021292013-06-10 11:39:50 +0300838
Eric Dumazetb1937222014-09-28 22:18:47 -0700839 memcpy(&new->headers_start, &old->headers_start,
840 offsetof(struct sk_buff, headers_end) -
841 offsetof(struct sk_buff, headers_start));
842 CHECK_SKB_FIELD(protocol);
843 CHECK_SKB_FIELD(csum);
844 CHECK_SKB_FIELD(hash);
845 CHECK_SKB_FIELD(priority);
846 CHECK_SKB_FIELD(skb_iif);
847 CHECK_SKB_FIELD(vlan_proto);
848 CHECK_SKB_FIELD(vlan_tci);
849 CHECK_SKB_FIELD(transport_header);
850 CHECK_SKB_FIELD(network_header);
851 CHECK_SKB_FIELD(mac_header);
852 CHECK_SKB_FIELD(inner_protocol);
853 CHECK_SKB_FIELD(inner_transport_header);
854 CHECK_SKB_FIELD(inner_network_header);
855 CHECK_SKB_FIELD(inner_mac_header);
856 CHECK_SKB_FIELD(mark);
857#ifdef CONFIG_NETWORK_SECMARK
858 CHECK_SKB_FIELD(secmark);
859#endif
Cong Wange0d10952013-08-01 11:10:25 +0800860#ifdef CONFIG_NET_RX_BUSY_POLL
Eric Dumazetb1937222014-09-28 22:18:47 -0700861 CHECK_SKB_FIELD(napi_id);
Eliezer Tamir06021292013-06-10 11:39:50 +0300862#endif
Eric Dumazet2bd82482015-02-03 23:48:24 -0800863#ifdef CONFIG_XPS
864 CHECK_SKB_FIELD(sender_cpu);
865#endif
Eric Dumazetb1937222014-09-28 22:18:47 -0700866#ifdef CONFIG_NET_SCHED
867 CHECK_SKB_FIELD(tc_index);
868#ifdef CONFIG_NET_CLS_ACT
869 CHECK_SKB_FIELD(tc_verd);
870#endif
871#endif
872
Herbert Xudec18812007-10-14 00:37:30 -0700873}
874
Herbert Xu82c49a32009-05-22 22:11:37 +0000875/*
876 * You should not add any new code to this function. Add it to
877 * __copy_skb_header above instead.
878 */
Herbert Xue0053ec2007-10-14 00:37:52 -0700879static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881#define C(x) n->x = skb->x
882
883 n->next = n->prev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 n->sk = NULL;
Herbert Xudec18812007-10-14 00:37:30 -0700885 __copy_skb_header(n, skb);
886
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 C(len);
888 C(data_len);
Alexey Dobriyan3e6b3b22007-03-16 15:00:46 -0700889 C(mac_len);
Patrick McHardy334a8132007-06-25 04:35:20 -0700890 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
Paul Moore02f1c892008-01-07 21:56:41 -0800891 n->cloned = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892 n->nohdr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 n->destructor = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 C(tail);
895 C(end);
Paul Moore02f1c892008-01-07 21:56:41 -0800896 C(head);
Eric Dumazetd3836f22012-04-27 00:33:38 +0000897 C(head_frag);
Paul Moore02f1c892008-01-07 21:56:41 -0800898 C(data);
899 C(truesize);
900 atomic_set(&n->users, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
902 atomic_inc(&(skb_shinfo(skb)->dataref));
903 skb->cloned = 1;
904
905 return n;
Herbert Xue0053ec2007-10-14 00:37:52 -0700906#undef C
907}
908
909/**
910 * skb_morph - morph one skb into another
911 * @dst: the skb to receive the contents
912 * @src: the skb to supply the contents
913 *
914 * This is identical to skb_clone except that the target skb is
915 * supplied by the user.
916 *
917 * The target skb is returned upon exit.
918 */
919struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
920{
Herbert Xu2d4baff2007-11-26 23:11:19 +0800921 skb_release_all(dst);
Herbert Xue0053ec2007-10-14 00:37:52 -0700922 return __skb_clone(dst, src);
923}
924EXPORT_SYMBOL_GPL(skb_morph);
925
Ben Hutchings2c530402012-07-10 10:55:09 +0000926/**
927 * skb_copy_ubufs - copy userspace skb frags buffers to kernel
Michael S. Tsirkin48c83012011-08-31 08:03:29 +0000928 * @skb: the skb to modify
929 * @gfp_mask: allocation priority
930 *
931 * This must be called on SKBTX_DEV_ZEROCOPY skb.
932 * It will copy all frags into kernel and drop the reference
933 * to userspace pages.
934 *
935 * If this function is called from an interrupt gfp_mask() must be
936 * %GFP_ATOMIC.
937 *
938 * Returns 0 on success or a negative error code on failure
939 * to allocate kernel memory to copy to.
940 */
941int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
Shirley Maa6686f22011-07-06 12:22:12 +0000942{
943 int i;
944 int num_frags = skb_shinfo(skb)->nr_frags;
945 struct page *page, *head = NULL;
946 struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
947
948 for (i = 0; i < num_frags; i++) {
949 u8 *vaddr;
950 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
951
Krishna Kumar02756ed2012-07-17 02:05:29 +0000952 page = alloc_page(gfp_mask);
Shirley Maa6686f22011-07-06 12:22:12 +0000953 if (!page) {
954 while (head) {
Sunghan Suh40dadff2013-07-12 16:17:23 +0900955 struct page *next = (struct page *)page_private(head);
Shirley Maa6686f22011-07-06 12:22:12 +0000956 put_page(head);
957 head = next;
958 }
959 return -ENOMEM;
960 }
Eric Dumazet51c56b02012-04-05 11:35:15 +0200961 vaddr = kmap_atomic(skb_frag_page(f));
Shirley Maa6686f22011-07-06 12:22:12 +0000962 memcpy(page_address(page),
Eric Dumazet9e903e02011-10-18 21:00:24 +0000963 vaddr + f->page_offset, skb_frag_size(f));
Eric Dumazet51c56b02012-04-05 11:35:15 +0200964 kunmap_atomic(vaddr);
Sunghan Suh40dadff2013-07-12 16:17:23 +0900965 set_page_private(page, (unsigned long)head);
Shirley Maa6686f22011-07-06 12:22:12 +0000966 head = page;
967 }
968
969 /* skb frags release userspace buffers */
Krishna Kumar02756ed2012-07-17 02:05:29 +0000970 for (i = 0; i < num_frags; i++)
Ian Campbella8605c62011-10-19 23:01:49 +0000971 skb_frag_unref(skb, i);
Shirley Maa6686f22011-07-06 12:22:12 +0000972
Michael S. Tsirkine19d6762012-11-01 09:16:22 +0000973 uarg->callback(uarg, false);
Shirley Maa6686f22011-07-06 12:22:12 +0000974
975 /* skb frags point to kernel buffers */
Krishna Kumar02756ed2012-07-17 02:05:29 +0000976 for (i = num_frags - 1; i >= 0; i--) {
977 __skb_fill_page_desc(skb, i, head, 0,
978 skb_shinfo(skb)->frags[i].size);
Sunghan Suh40dadff2013-07-12 16:17:23 +0900979 head = (struct page *)page_private(head);
Shirley Maa6686f22011-07-06 12:22:12 +0000980 }
Michael S. Tsirkin48c83012011-08-31 08:03:29 +0000981
982 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
Shirley Maa6686f22011-07-06 12:22:12 +0000983 return 0;
984}
Michael S. Tsirkindcc0fb72012-07-20 09:23:20 +0000985EXPORT_SYMBOL_GPL(skb_copy_ubufs);
Shirley Maa6686f22011-07-06 12:22:12 +0000986
Herbert Xue0053ec2007-10-14 00:37:52 -0700987/**
988 * skb_clone - duplicate an sk_buff
989 * @skb: buffer to clone
990 * @gfp_mask: allocation priority
991 *
992 * Duplicate an &sk_buff. The new one is not owned by a socket. Both
993 * copies share the same packet data but not structure. The new
994 * buffer has a reference count of 1. If the allocation fails the
995 * function returns %NULL otherwise the new buffer is returned.
996 *
997 * If this function is called from an interrupt gfp_mask() must be
998 * %GFP_ATOMIC.
999 */
1000
1001struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
1002{
Eric Dumazetd0bf4a92014-09-29 13:29:15 -07001003 struct sk_buff_fclones *fclones = container_of(skb,
1004 struct sk_buff_fclones,
1005 skb1);
Eric Dumazet6ffe75e2014-12-03 17:04:39 -08001006 struct sk_buff *n;
Herbert Xue0053ec2007-10-14 00:37:52 -07001007
Michael S. Tsirkin70008aa2012-07-20 09:23:10 +00001008 if (skb_orphan_frags(skb, gfp_mask))
1009 return NULL;
Shirley Maa6686f22011-07-06 12:22:12 +00001010
Herbert Xue0053ec2007-10-14 00:37:52 -07001011 if (skb->fclone == SKB_FCLONE_ORIG &&
Eric Dumazet6ffe75e2014-12-03 17:04:39 -08001012 atomic_read(&fclones->fclone_ref) == 1) {
1013 n = &fclones->skb2;
1014 atomic_set(&fclones->fclone_ref, 2);
Herbert Xue0053ec2007-10-14 00:37:52 -07001015 } else {
Mel Gormanc93bdd02012-07-31 16:44:19 -07001016 if (skb_pfmemalloc(skb))
1017 gfp_mask |= __GFP_MEMALLOC;
1018
Herbert Xue0053ec2007-10-14 00:37:52 -07001019 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
1020 if (!n)
1021 return NULL;
Vegard Nossumfe55f6d2008-08-30 12:16:35 +02001022
1023 kmemcheck_annotate_bitfield(n, flags1);
Herbert Xue0053ec2007-10-14 00:37:52 -07001024 n->fclone = SKB_FCLONE_UNAVAILABLE;
1025 }
1026
1027 return __skb_clone(n, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001029EXPORT_SYMBOL(skb_clone);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Pravin B Shelarf5b17292013-03-07 13:21:40 +00001031static void skb_headers_offset_update(struct sk_buff *skb, int off)
1032{
Eric Dumazet030737b2013-10-19 11:42:54 -07001033 /* Only adjust this if it actually is csum_start rather than csum */
1034 if (skb->ip_summed == CHECKSUM_PARTIAL)
1035 skb->csum_start += off;
Pravin B Shelarf5b17292013-03-07 13:21:40 +00001036 /* {transport,network,mac}_header and tail are relative to skb->head */
1037 skb->transport_header += off;
1038 skb->network_header += off;
1039 if (skb_mac_header_was_set(skb))
1040 skb->mac_header += off;
1041 skb->inner_transport_header += off;
1042 skb->inner_network_header += off;
Pravin B Shelaraefbd2b2013-03-07 13:21:46 +00001043 skb->inner_mac_header += off;
Pravin B Shelarf5b17292013-03-07 13:21:40 +00001044}
1045
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
1047{
Herbert Xudec18812007-10-14 00:37:30 -07001048 __copy_skb_header(new, old);
1049
Herbert Xu79671682006-06-22 02:40:14 -07001050 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
1051 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
1052 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053}
1054
Mel Gormanc93bdd02012-07-31 16:44:19 -07001055static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
1056{
1057 if (skb_pfmemalloc(skb))
1058 return SKB_ALLOC_RX;
1059 return 0;
1060}
1061
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062/**
1063 * skb_copy - create private copy of an sk_buff
1064 * @skb: buffer to copy
1065 * @gfp_mask: allocation priority
1066 *
1067 * Make a copy of both an &sk_buff and its data. This is used when the
1068 * caller wishes to modify the data and needs a private copy of the
1069 * data to alter. Returns %NULL on failure or the pointer to the buffer
1070 * on success. The returned buffer has a reference count of 1.
1071 *
1072 * As by-product this function converts non-linear &sk_buff to linear
1073 * one, so that &sk_buff becomes completely private and caller is allowed
1074 * to modify all the data of returned buffer. This means that this
1075 * function is not recommended for use in circumstances when only
1076 * header is going to be modified. Use pskb_copy() instead.
1077 */
1078
Al Virodd0fc662005-10-07 07:46:04 +01001079struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080{
Eric Dumazet6602ceb2010-09-01 05:25:10 +00001081 int headerlen = skb_headroom(skb);
Alexander Duyckec47ea82012-05-04 14:26:56 +00001082 unsigned int size = skb_end_offset(skb) + skb->data_len;
Mel Gormanc93bdd02012-07-31 16:44:19 -07001083 struct sk_buff *n = __alloc_skb(size, gfp_mask,
1084 skb_alloc_rx_flag(skb), NUMA_NO_NODE);
Eric Dumazet6602ceb2010-09-01 05:25:10 +00001085
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 if (!n)
1087 return NULL;
1088
1089 /* Set the data pointer */
1090 skb_reserve(n, headerlen);
1091 /* Set the tail pointer and length */
1092 skb_put(n, skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
1094 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
1095 BUG();
1096
1097 copy_skb_header(n, skb);
1098 return n;
1099}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001100EXPORT_SYMBOL(skb_copy);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102/**
Octavian Purdilabad93e92014-06-12 01:36:26 +03001103 * __pskb_copy_fclone - create copy of an sk_buff with private head.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 * @skb: buffer to copy
Eric Dumazet117632e2011-12-03 21:39:53 +00001105 * @headroom: headroom of new skb
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 * @gfp_mask: allocation priority
Octavian Purdilabad93e92014-06-12 01:36:26 +03001107 * @fclone: if true allocate the copy of the skb from the fclone
1108 * cache instead of the head cache; it is recommended to set this
1109 * to true for the cases where the copy will likely be cloned
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 *
1111 * Make a copy of both an &sk_buff and part of its data, located
1112 * in header. Fragmented data remain shared. This is used when
1113 * the caller wishes to modify only header of &sk_buff and needs
1114 * private copy of the header to alter. Returns %NULL on failure
1115 * or the pointer to the buffer on success.
1116 * The returned buffer has a reference count of 1.
1117 */
1118
Octavian Purdilabad93e92014-06-12 01:36:26 +03001119struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
1120 gfp_t gfp_mask, bool fclone)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121{
Eric Dumazet117632e2011-12-03 21:39:53 +00001122 unsigned int size = skb_headlen(skb) + headroom;
Octavian Purdilabad93e92014-06-12 01:36:26 +03001123 int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
1124 struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
Eric Dumazet6602ceb2010-09-01 05:25:10 +00001125
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 if (!n)
1127 goto out;
1128
1129 /* Set the data pointer */
Eric Dumazet117632e2011-12-03 21:39:53 +00001130 skb_reserve(n, headroom);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 /* Set the tail pointer and length */
1132 skb_put(n, skb_headlen(skb));
1133 /* Copy the bytes */
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03001134 skb_copy_from_linear_data(skb, n->data, n->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
Herbert Xu25f484a2006-11-07 14:57:15 -08001136 n->truesize += skb->data_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 n->data_len = skb->data_len;
1138 n->len = skb->len;
1139
1140 if (skb_shinfo(skb)->nr_frags) {
1141 int i;
1142
Michael S. Tsirkin70008aa2012-07-20 09:23:10 +00001143 if (skb_orphan_frags(skb, gfp_mask)) {
1144 kfree_skb(n);
1145 n = NULL;
1146 goto out;
Shirley Maa6686f22011-07-06 12:22:12 +00001147 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1149 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
Ian Campbellea2ab692011-08-22 23:44:58 +00001150 skb_frag_ref(skb, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
1152 skb_shinfo(n)->nr_frags = i;
1153 }
1154
David S. Miller21dc3302010-08-23 00:13:46 -07001155 if (skb_has_frag_list(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
1157 skb_clone_fraglist(n);
1158 }
1159
1160 copy_skb_header(n, skb);
1161out:
1162 return n;
1163}
Octavian Purdilabad93e92014-06-12 01:36:26 +03001164EXPORT_SYMBOL(__pskb_copy_fclone);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
1166/**
1167 * pskb_expand_head - reallocate header of &sk_buff
1168 * @skb: buffer to reallocate
1169 * @nhead: room to add at head
1170 * @ntail: room to add at tail
1171 * @gfp_mask: allocation priority
1172 *
Mathias Krausebc323832013-11-07 14:18:26 +01001173 * Expands (or creates identical copy, if @nhead and @ntail are zero)
1174 * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 * reference count of 1. Returns zero in the case of success or error,
1176 * if expansion failed. In the last case, &sk_buff is not changed.
1177 *
1178 * All the pointers pointing into skb header may change and must be
1179 * reloaded after call to this function.
1180 */
1181
Victor Fusco86a76ca2005-07-08 14:57:47 -07001182int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
Al Virodd0fc662005-10-07 07:46:04 +01001183 gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184{
1185 int i;
1186 u8 *data;
Alexander Duyckec47ea82012-05-04 14:26:56 +00001187 int size = nhead + skb_end_offset(skb) + ntail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 long off;
1189
Herbert Xu4edd87a2008-10-01 07:09:38 -07001190 BUG_ON(nhead < 0);
1191
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 if (skb_shared(skb))
1193 BUG();
1194
1195 size = SKB_DATA_ALIGN(size);
1196
Mel Gormanc93bdd02012-07-31 16:44:19 -07001197 if (skb_pfmemalloc(skb))
1198 gfp_mask |= __GFP_MEMALLOC;
1199 data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
1200 gfp_mask, NUMA_NO_NODE, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (!data)
1202 goto nodata;
Eric Dumazet87151b82012-04-10 20:08:39 +00001203 size = SKB_WITH_OVERHEAD(ksize(data));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204
1205 /* Copy only real data... and, alas, header. This should be
Eric Dumazet6602ceb2010-09-01 05:25:10 +00001206 * optimized for the cases when header is void.
1207 */
1208 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
1209
1210 memcpy((struct skb_shared_info *)(data + size),
1211 skb_shinfo(skb),
Eric Dumazetfed66382010-07-22 19:09:08 +00001212 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213
Alexander Duyck3e245912012-05-04 14:26:51 +00001214 /*
1215 * if shinfo is shared we must drop the old head gracefully, but if it
1216 * is not we can just drop the old head and let the existing refcount
1217 * be since all we did is relocate the values
1218 */
1219 if (skb_cloned(skb)) {
Shirley Maa6686f22011-07-06 12:22:12 +00001220 /* copy this zero copy skb frags */
Michael S. Tsirkin70008aa2012-07-20 09:23:10 +00001221 if (skb_orphan_frags(skb, gfp_mask))
1222 goto nofrags;
Eric Dumazet1fd63042010-09-02 23:09:32 +00001223 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
Ian Campbellea2ab692011-08-22 23:44:58 +00001224 skb_frag_ref(skb, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Eric Dumazet1fd63042010-09-02 23:09:32 +00001226 if (skb_has_frag_list(skb))
1227 skb_clone_fraglist(skb);
1228
1229 skb_release_data(skb);
Alexander Duyck3e245912012-05-04 14:26:51 +00001230 } else {
1231 skb_free_head(skb);
Eric Dumazet1fd63042010-09-02 23:09:32 +00001232 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 off = (data + nhead) - skb->head;
1234
1235 skb->head = data;
Eric Dumazetd3836f22012-04-27 00:33:38 +00001236 skb->head_frag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 skb->data += off;
Arnaldo Carvalho de Melo4305b542007-04-19 20:43:29 -07001238#ifdef NET_SKBUFF_DATA_USES_OFFSET
1239 skb->end = size;
Patrick McHardy56eb8882007-04-09 11:45:04 -07001240 off = nhead;
Arnaldo Carvalho de Melo4305b542007-04-19 20:43:29 -07001241#else
1242 skb->end = skb->head + size;
Patrick McHardy56eb8882007-04-09 11:45:04 -07001243#endif
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001244 skb->tail += off;
Peter Pan(潘卫平)b41abb42013-06-06 21:27:21 +08001245 skb_headers_offset_update(skb, nhead);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 skb->cloned = 0;
Patrick McHardy334a8132007-06-25 04:35:20 -07001247 skb->hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 skb->nohdr = 0;
1249 atomic_set(&skb_shinfo(skb)->dataref, 1);
1250 return 0;
1251
Shirley Maa6686f22011-07-06 12:22:12 +00001252nofrags:
1253 kfree(data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254nodata:
1255 return -ENOMEM;
1256}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001257EXPORT_SYMBOL(pskb_expand_head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
1259/* Make private copy of skb with writable head and some headroom */
1260
1261struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
1262{
1263 struct sk_buff *skb2;
1264 int delta = headroom - skb_headroom(skb);
1265
1266 if (delta <= 0)
1267 skb2 = pskb_copy(skb, GFP_ATOMIC);
1268 else {
1269 skb2 = skb_clone(skb, GFP_ATOMIC);
1270 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
1271 GFP_ATOMIC)) {
1272 kfree_skb(skb2);
1273 skb2 = NULL;
1274 }
1275 }
1276 return skb2;
1277}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001278EXPORT_SYMBOL(skb_realloc_headroom);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
1280/**
1281 * skb_copy_expand - copy and expand sk_buff
1282 * @skb: buffer to copy
1283 * @newheadroom: new free bytes at head
1284 * @newtailroom: new free bytes at tail
1285 * @gfp_mask: allocation priority
1286 *
1287 * Make a copy of both an &sk_buff and its data and while doing so
1288 * allocate additional space.
1289 *
1290 * This is used when the caller wishes to modify the data and needs a
1291 * private copy of the data to alter as well as more space for new fields.
1292 * Returns %NULL on failure or the pointer to the buffer
1293 * on success. The returned buffer has a reference count of 1.
1294 *
1295 * You must pass %GFP_ATOMIC as the allocation priority if this function
1296 * is called from an interrupt.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 */
1298struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
Victor Fusco86a76ca2005-07-08 14:57:47 -07001299 int newheadroom, int newtailroom,
Al Virodd0fc662005-10-07 07:46:04 +01001300 gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301{
1302 /*
1303 * Allocate the copy buffer
1304 */
Mel Gormanc93bdd02012-07-31 16:44:19 -07001305 struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
1306 gfp_mask, skb_alloc_rx_flag(skb),
1307 NUMA_NO_NODE);
Patrick McHardyefd1e8d2007-04-10 18:30:09 -07001308 int oldheadroom = skb_headroom(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 int head_copy_len, head_copy_off;
1310
1311 if (!n)
1312 return NULL;
1313
1314 skb_reserve(n, newheadroom);
1315
1316 /* Set the tail pointer and length */
1317 skb_put(n, skb->len);
1318
Patrick McHardyefd1e8d2007-04-10 18:30:09 -07001319 head_copy_len = oldheadroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 head_copy_off = 0;
1321 if (newheadroom <= head_copy_len)
1322 head_copy_len = newheadroom;
1323 else
1324 head_copy_off = newheadroom - head_copy_len;
1325
1326 /* Copy the linear header and data. */
1327 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
1328 skb->len + head_copy_len))
1329 BUG();
1330
1331 copy_skb_header(n, skb);
1332
Eric Dumazet030737b2013-10-19 11:42:54 -07001333 skb_headers_offset_update(n, newheadroom - oldheadroom);
Patrick McHardyefd1e8d2007-04-10 18:30:09 -07001334
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 return n;
1336}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001337EXPORT_SYMBOL(skb_copy_expand);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
1339/**
1340 * skb_pad - zero pad the tail of an skb
1341 * @skb: buffer to pad
1342 * @pad: space to pad
1343 *
1344 * Ensure that a buffer is followed by a padding area that is zero
1345 * filled. Used by network drivers which may DMA or transfer data
1346 * beyond the buffer end onto the wire.
1347 *
Herbert Xu5b057c62006-06-23 02:06:41 -07001348 * May return error in out of memory cases. The skb is freed on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001350
Herbert Xu5b057c62006-06-23 02:06:41 -07001351int skb_pad(struct sk_buff *skb, int pad)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352{
Herbert Xu5b057c62006-06-23 02:06:41 -07001353 int err;
1354 int ntail;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001355
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 /* If the skbuff is non linear tailroom is always zero.. */
Herbert Xu5b057c62006-06-23 02:06:41 -07001357 if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 memset(skb->data+skb->len, 0, pad);
Herbert Xu5b057c62006-06-23 02:06:41 -07001359 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 }
Herbert Xu5b057c62006-06-23 02:06:41 -07001361
Arnaldo Carvalho de Melo4305b542007-04-19 20:43:29 -07001362 ntail = skb->data_len + pad - (skb->end - skb->tail);
Herbert Xu5b057c62006-06-23 02:06:41 -07001363 if (likely(skb_cloned(skb) || ntail > 0)) {
1364 err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
1365 if (unlikely(err))
1366 goto free_skb;
1367 }
1368
1369 /* FIXME: The use of this function with non-linear skb's really needs
1370 * to be audited.
1371 */
1372 err = skb_linearize(skb);
1373 if (unlikely(err))
1374 goto free_skb;
1375
1376 memset(skb->data + skb->len, 0, pad);
1377 return 0;
1378
1379free_skb:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 kfree_skb(skb);
Herbert Xu5b057c62006-06-23 02:06:41 -07001381 return err;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001382}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001383EXPORT_SYMBOL(skb_pad);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001384
Ilpo Järvinen0dde3e12008-03-27 17:43:41 -07001385/**
Mathias Krause0c7ddf32013-11-07 14:18:24 +01001386 * pskb_put - add data to the tail of a potentially fragmented buffer
1387 * @skb: start of the buffer to use
1388 * @tail: tail fragment of the buffer to use
1389 * @len: amount of data to add
1390 *
1391 * This function extends the used data area of the potentially
1392 * fragmented buffer. @tail must be the last fragment of @skb -- or
1393 * @skb itself. If this would exceed the total buffer size the kernel
1394 * will panic. A pointer to the first byte of the extra data is
1395 * returned.
1396 */
1397
1398unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
1399{
1400 if (tail != skb) {
1401 skb->data_len += len;
1402 skb->len += len;
1403 }
1404 return skb_put(tail, len);
1405}
1406EXPORT_SYMBOL_GPL(pskb_put);
1407
1408/**
Ilpo Järvinen0dde3e12008-03-27 17:43:41 -07001409 * skb_put - add data to a buffer
1410 * @skb: buffer to use
1411 * @len: amount of data to add
1412 *
1413 * This function extends the used data area of the buffer. If this would
1414 * exceed the total buffer size the kernel will panic. A pointer to the
1415 * first byte of the extra data is returned.
1416 */
1417unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
1418{
1419 unsigned char *tmp = skb_tail_pointer(skb);
1420 SKB_LINEAR_ASSERT(skb);
1421 skb->tail += len;
1422 skb->len += len;
1423 if (unlikely(skb->tail > skb->end))
1424 skb_over_panic(skb, len, __builtin_return_address(0));
1425 return tmp;
1426}
1427EXPORT_SYMBOL(skb_put);
1428
Ilpo Järvinen6be8ac22008-03-27 17:47:24 -07001429/**
Ilpo Järvinenc2aa2702008-03-27 17:52:40 -07001430 * skb_push - add data to the start of a buffer
1431 * @skb: buffer to use
1432 * @len: amount of data to add
1433 *
1434 * This function extends the used data area of the buffer at the buffer
1435 * start. If this would exceed the total buffer headroom the kernel will
1436 * panic. A pointer to the first byte of the extra data is returned.
1437 */
1438unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
1439{
1440 skb->data -= len;
1441 skb->len += len;
1442 if (unlikely(skb->data<skb->head))
1443 skb_under_panic(skb, len, __builtin_return_address(0));
1444 return skb->data;
1445}
1446EXPORT_SYMBOL(skb_push);
1447
1448/**
Ilpo Järvinen6be8ac22008-03-27 17:47:24 -07001449 * skb_pull - remove data from the start of a buffer
1450 * @skb: buffer to use
1451 * @len: amount of data to remove
1452 *
1453 * This function removes data from the start of a buffer, returning
1454 * the memory to the headroom. A pointer to the next data in the buffer
1455 * is returned. Once the data has been pulled future pushes will overwrite
1456 * the old data.
1457 */
1458unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
1459{
David S. Miller47d29642010-05-02 02:21:44 -07001460 return skb_pull_inline(skb, len);
Ilpo Järvinen6be8ac22008-03-27 17:47:24 -07001461}
1462EXPORT_SYMBOL(skb_pull);
1463
Ilpo Järvinen419ae742008-03-27 17:54:01 -07001464/**
1465 * skb_trim - remove end from a buffer
1466 * @skb: buffer to alter
1467 * @len: new length
1468 *
1469 * Cut the length of a buffer down by removing data from the tail. If
1470 * the buffer is already under the length specified it is not modified.
1471 * The skb must be linear.
1472 */
1473void skb_trim(struct sk_buff *skb, unsigned int len)
1474{
1475 if (skb->len > len)
1476 __skb_trim(skb, len);
1477}
1478EXPORT_SYMBOL(skb_trim);
1479
Herbert Xu3cc0e872006-06-09 16:13:38 -07001480/* Trims skb to length len. It can change skb pointers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 */
1482
Herbert Xu3cc0e872006-06-09 16:13:38 -07001483int ___pskb_trim(struct sk_buff *skb, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484{
Herbert Xu27b437c2006-07-13 19:26:39 -07001485 struct sk_buff **fragp;
1486 struct sk_buff *frag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 int offset = skb_headlen(skb);
1488 int nfrags = skb_shinfo(skb)->nr_frags;
1489 int i;
Herbert Xu27b437c2006-07-13 19:26:39 -07001490 int err;
1491
1492 if (skb_cloned(skb) &&
1493 unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
1494 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
Herbert Xuf4d26fb2006-07-30 20:20:28 -07001496 i = 0;
1497 if (offset >= len)
1498 goto drop_pages;
1499
1500 for (; i < nfrags; i++) {
Eric Dumazet9e903e02011-10-18 21:00:24 +00001501 int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
Herbert Xu27b437c2006-07-13 19:26:39 -07001502
1503 if (end < len) {
1504 offset = end;
1505 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 }
Herbert Xu27b437c2006-07-13 19:26:39 -07001507
Eric Dumazet9e903e02011-10-18 21:00:24 +00001508 skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
Herbert Xu27b437c2006-07-13 19:26:39 -07001509
Herbert Xuf4d26fb2006-07-30 20:20:28 -07001510drop_pages:
Herbert Xu27b437c2006-07-13 19:26:39 -07001511 skb_shinfo(skb)->nr_frags = i;
1512
1513 for (; i < nfrags; i++)
Ian Campbellea2ab692011-08-22 23:44:58 +00001514 skb_frag_unref(skb, i);
Herbert Xu27b437c2006-07-13 19:26:39 -07001515
David S. Miller21dc3302010-08-23 00:13:46 -07001516 if (skb_has_frag_list(skb))
Herbert Xu27b437c2006-07-13 19:26:39 -07001517 skb_drop_fraglist(skb);
Herbert Xuf4d26fb2006-07-30 20:20:28 -07001518 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 }
1520
Herbert Xu27b437c2006-07-13 19:26:39 -07001521 for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
1522 fragp = &frag->next) {
1523 int end = offset + frag->len;
1524
1525 if (skb_shared(frag)) {
1526 struct sk_buff *nfrag;
1527
1528 nfrag = skb_clone(frag, GFP_ATOMIC);
1529 if (unlikely(!nfrag))
1530 return -ENOMEM;
1531
1532 nfrag->next = frag->next;
Eric Dumazet85bb2a62012-04-19 02:24:53 +00001533 consume_skb(frag);
Herbert Xu27b437c2006-07-13 19:26:39 -07001534 frag = nfrag;
1535 *fragp = frag;
1536 }
1537
1538 if (end < len) {
1539 offset = end;
1540 continue;
1541 }
1542
1543 if (end > len &&
1544 unlikely((err = pskb_trim(frag, len - offset))))
1545 return err;
1546
1547 if (frag->next)
1548 skb_drop_list(&frag->next);
1549 break;
1550 }
1551
Herbert Xuf4d26fb2006-07-30 20:20:28 -07001552done:
Herbert Xu27b437c2006-07-13 19:26:39 -07001553 if (len > skb_headlen(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 skb->data_len -= skb->len - len;
1555 skb->len = len;
1556 } else {
Herbert Xu27b437c2006-07-13 19:26:39 -07001557 skb->len = len;
1558 skb->data_len = 0;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001559 skb_set_tail_pointer(skb, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 }
1561
1562 return 0;
1563}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001564EXPORT_SYMBOL(___pskb_trim);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
1566/**
1567 * __pskb_pull_tail - advance tail of skb header
1568 * @skb: buffer to reallocate
1569 * @delta: number of bytes to advance tail
1570 *
1571 * The function makes a sense only on a fragmented &sk_buff,
1572 * it expands header moving its tail forward and copying necessary
1573 * data from fragmented part.
1574 *
1575 * &sk_buff MUST have reference count of 1.
1576 *
1577 * Returns %NULL (and &sk_buff does not change) if pull failed
1578 * or value of new tail of skb in the case of success.
1579 *
1580 * All the pointers pointing into skb header may change and must be
1581 * reloaded after call to this function.
1582 */
1583
1584/* Moves tail of skb head forward, copying data from fragmented part,
1585 * when it is necessary.
1586 * 1. It may fail due to malloc failure.
1587 * 2. It may change skb pointers.
1588 *
1589 * It is pretty complicated. Luckily, it is called only in exceptional cases.
1590 */
1591unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1592{
1593 /* If skb has not enough free space at tail, get new one
1594 * plus 128 bytes for future expansions. If we have enough
1595 * room at tail, reallocate without expansion only if skb is cloned.
1596 */
Arnaldo Carvalho de Melo4305b542007-04-19 20:43:29 -07001597 int i, k, eat = (skb->tail + delta) - skb->end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598
1599 if (eat > 0 || skb_cloned(skb)) {
1600 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
1601 GFP_ATOMIC))
1602 return NULL;
1603 }
1604
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001605 if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 BUG();
1607
1608 /* Optimization: no fragments, no reasons to preestimate
1609 * size of pulled pages. Superb.
1610 */
David S. Miller21dc3302010-08-23 00:13:46 -07001611 if (!skb_has_frag_list(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 goto pull_pages;
1613
1614 /* Estimate size of pulled pages. */
1615 eat = delta;
1616 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
Eric Dumazet9e903e02011-10-18 21:00:24 +00001617 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1618
1619 if (size >= eat)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 goto pull_pages;
Eric Dumazet9e903e02011-10-18 21:00:24 +00001621 eat -= size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 }
1623
1624 /* If we need update frag list, we are in troubles.
1625 * Certainly, it possible to add an offset to skb data,
1626 * but taking into account that pulling is expected to
1627 * be very rare operation, it is worth to fight against
1628 * further bloating skb head and crucify ourselves here instead.
1629 * Pure masohism, indeed. 8)8)
1630 */
1631 if (eat) {
1632 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1633 struct sk_buff *clone = NULL;
1634 struct sk_buff *insp = NULL;
1635
1636 do {
Kris Katterjohn09a62662006-01-08 22:24:28 -08001637 BUG_ON(!list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
1639 if (list->len <= eat) {
1640 /* Eaten as whole. */
1641 eat -= list->len;
1642 list = list->next;
1643 insp = list;
1644 } else {
1645 /* Eaten partially. */
1646
1647 if (skb_shared(list)) {
1648 /* Sucks! We need to fork list. :-( */
1649 clone = skb_clone(list, GFP_ATOMIC);
1650 if (!clone)
1651 return NULL;
1652 insp = list->next;
1653 list = clone;
1654 } else {
1655 /* This may be pulled without
1656 * problems. */
1657 insp = list;
1658 }
1659 if (!pskb_pull(list, eat)) {
Wei Yongjunf3fbbe02009-02-25 00:37:32 +00001660 kfree_skb(clone);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 return NULL;
1662 }
1663 break;
1664 }
1665 } while (eat);
1666
1667 /* Free pulled out fragments. */
1668 while ((list = skb_shinfo(skb)->frag_list) != insp) {
1669 skb_shinfo(skb)->frag_list = list->next;
1670 kfree_skb(list);
1671 }
1672 /* And insert new clone at head. */
1673 if (clone) {
1674 clone->next = list;
1675 skb_shinfo(skb)->frag_list = clone;
1676 }
1677 }
1678 /* Success! Now we may commit changes to skb data. */
1679
1680pull_pages:
1681 eat = delta;
1682 k = 0;
1683 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
Eric Dumazet9e903e02011-10-18 21:00:24 +00001684 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1685
1686 if (size <= eat) {
Ian Campbellea2ab692011-08-22 23:44:58 +00001687 skb_frag_unref(skb, i);
Eric Dumazet9e903e02011-10-18 21:00:24 +00001688 eat -= size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 } else {
1690 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1691 if (eat) {
1692 skb_shinfo(skb)->frags[k].page_offset += eat;
Eric Dumazet9e903e02011-10-18 21:00:24 +00001693 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 eat = 0;
1695 }
1696 k++;
1697 }
1698 }
1699 skb_shinfo(skb)->nr_frags = k;
1700
1701 skb->tail += delta;
1702 skb->data_len -= delta;
1703
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001704 return skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001706EXPORT_SYMBOL(__pskb_pull_tail);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Eric Dumazet22019b12011-07-29 18:37:31 +00001708/**
1709 * skb_copy_bits - copy bits from skb to kernel buffer
1710 * @skb: source skb
1711 * @offset: offset in source
1712 * @to: destination buffer
1713 * @len: number of bytes to copy
1714 *
1715 * Copy the specified number of bytes from the source skb to the
1716 * destination buffer.
1717 *
1718 * CAUTION ! :
1719 * If its prototype is ever changed,
1720 * check arch/{*}/net/{*}.S files,
1721 * since it is called from BPF assembly code.
1722 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1724{
David S. Miller1a028e52007-04-27 15:21:23 -07001725 int start = skb_headlen(skb);
David S. Millerfbb398a2009-06-09 00:18:59 -07001726 struct sk_buff *frag_iter;
1727 int i, copy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728
1729 if (offset > (int)skb->len - len)
1730 goto fault;
1731
1732 /* Copy header. */
David S. Miller1a028e52007-04-27 15:21:23 -07001733 if ((copy = start - offset) > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 if (copy > len)
1735 copy = len;
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03001736 skb_copy_from_linear_data_offset(skb, offset, to, copy);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 if ((len -= copy) == 0)
1738 return 0;
1739 offset += copy;
1740 to += copy;
1741 }
1742
1743 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -07001744 int end;
Eric Dumazet51c56b02012-04-05 11:35:15 +02001745 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001747 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -07001748
Eric Dumazet51c56b02012-04-05 11:35:15 +02001749 end = start + skb_frag_size(f);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 if ((copy = end - offset) > 0) {
1751 u8 *vaddr;
1752
1753 if (copy > len)
1754 copy = len;
1755
Eric Dumazet51c56b02012-04-05 11:35:15 +02001756 vaddr = kmap_atomic(skb_frag_page(f));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 memcpy(to,
Eric Dumazet51c56b02012-04-05 11:35:15 +02001758 vaddr + f->page_offset + offset - start,
1759 copy);
1760 kunmap_atomic(vaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761
1762 if ((len -= copy) == 0)
1763 return 0;
1764 offset += copy;
1765 to += copy;
1766 }
David S. Miller1a028e52007-04-27 15:21:23 -07001767 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 }
1769
David S. Millerfbb398a2009-06-09 00:18:59 -07001770 skb_walk_frags(skb, frag_iter) {
1771 int end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
David S. Millerfbb398a2009-06-09 00:18:59 -07001773 WARN_ON(start > offset + len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774
David S. Millerfbb398a2009-06-09 00:18:59 -07001775 end = start + frag_iter->len;
1776 if ((copy = end - offset) > 0) {
1777 if (copy > len)
1778 copy = len;
1779 if (skb_copy_bits(frag_iter, offset - start, to, copy))
1780 goto fault;
1781 if ((len -= copy) == 0)
1782 return 0;
1783 offset += copy;
1784 to += copy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 }
David S. Millerfbb398a2009-06-09 00:18:59 -07001786 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 }
Shirley Maa6686f22011-07-06 12:22:12 +00001788
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 if (!len)
1790 return 0;
1791
1792fault:
1793 return -EFAULT;
1794}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08001795EXPORT_SYMBOL(skb_copy_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796
Jens Axboe9c55e012007-11-06 23:30:13 -08001797/*
1798 * Callback from splice_to_pipe(), if we need to release some pages
1799 * at the end of the spd in case we error'ed out in filling the pipe.
1800 */
1801static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1802{
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001803 put_page(spd->pages[i]);
1804}
Jens Axboe9c55e012007-11-06 23:30:13 -08001805
David S. Millera108d5f2012-04-23 23:06:11 -04001806static struct page *linear_to_page(struct page *page, unsigned int *len,
1807 unsigned int *offset,
Eric Dumazet18aafc62013-01-11 14:46:37 +00001808 struct sock *sk)
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001809{
Eric Dumazet5640f762012-09-23 23:04:42 +00001810 struct page_frag *pfrag = sk_page_frag(sk);
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001811
Eric Dumazet5640f762012-09-23 23:04:42 +00001812 if (!sk_page_frag_refill(sk, pfrag))
1813 return NULL;
Jarek Poplawski4fb66992009-02-01 00:41:42 -08001814
Eric Dumazet5640f762012-09-23 23:04:42 +00001815 *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
Jarek Poplawski4fb66992009-02-01 00:41:42 -08001816
Eric Dumazet5640f762012-09-23 23:04:42 +00001817 memcpy(page_address(pfrag->page) + pfrag->offset,
1818 page_address(page) + *offset, *len);
1819 *offset = pfrag->offset;
1820 pfrag->offset += *len;
Jarek Poplawski4fb66992009-02-01 00:41:42 -08001821
Eric Dumazet5640f762012-09-23 23:04:42 +00001822 return pfrag->page;
Jens Axboe9c55e012007-11-06 23:30:13 -08001823}
1824
Eric Dumazet41c73a02012-04-22 12:26:16 +00001825static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
1826 struct page *page,
1827 unsigned int offset)
1828{
1829 return spd->nr_pages &&
1830 spd->pages[spd->nr_pages - 1] == page &&
1831 (spd->partial[spd->nr_pages - 1].offset +
1832 spd->partial[spd->nr_pages - 1].len == offset);
1833}
1834
Jens Axboe9c55e012007-11-06 23:30:13 -08001835/*
1836 * Fill page/offset/length into spd, if it can hold more pages.
1837 */
David S. Millera108d5f2012-04-23 23:06:11 -04001838static bool spd_fill_page(struct splice_pipe_desc *spd,
1839 struct pipe_inode_info *pipe, struct page *page,
1840 unsigned int *len, unsigned int offset,
Eric Dumazet18aafc62013-01-11 14:46:37 +00001841 bool linear,
David S. Millera108d5f2012-04-23 23:06:11 -04001842 struct sock *sk)
Jens Axboe9c55e012007-11-06 23:30:13 -08001843{
Eric Dumazet41c73a02012-04-22 12:26:16 +00001844 if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
David S. Millera108d5f2012-04-23 23:06:11 -04001845 return true;
Jens Axboe9c55e012007-11-06 23:30:13 -08001846
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001847 if (linear) {
Eric Dumazet18aafc62013-01-11 14:46:37 +00001848 page = linear_to_page(page, len, &offset, sk);
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001849 if (!page)
David S. Millera108d5f2012-04-23 23:06:11 -04001850 return true;
Eric Dumazet41c73a02012-04-22 12:26:16 +00001851 }
1852 if (spd_can_coalesce(spd, page, offset)) {
1853 spd->partial[spd->nr_pages - 1].len += *len;
David S. Millera108d5f2012-04-23 23:06:11 -04001854 return false;
Eric Dumazet41c73a02012-04-22 12:26:16 +00001855 }
1856 get_page(page);
Jens Axboe9c55e012007-11-06 23:30:13 -08001857 spd->pages[spd->nr_pages] = page;
Jarek Poplawski4fb66992009-02-01 00:41:42 -08001858 spd->partial[spd->nr_pages].len = *len;
Jens Axboe9c55e012007-11-06 23:30:13 -08001859 spd->partial[spd->nr_pages].offset = offset;
Jens Axboe9c55e012007-11-06 23:30:13 -08001860 spd->nr_pages++;
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001861
David S. Millera108d5f2012-04-23 23:06:11 -04001862 return false;
Jens Axboe9c55e012007-11-06 23:30:13 -08001863}
1864
David S. Millera108d5f2012-04-23 23:06:11 -04001865static bool __splice_segment(struct page *page, unsigned int poff,
1866 unsigned int plen, unsigned int *off,
Eric Dumazet18aafc62013-01-11 14:46:37 +00001867 unsigned int *len,
Eric Dumazetd7ccf7c2012-04-23 23:35:04 -04001868 struct splice_pipe_desc *spd, bool linear,
David S. Millera108d5f2012-04-23 23:06:11 -04001869 struct sock *sk,
1870 struct pipe_inode_info *pipe)
Octavian Purdila2870c432008-07-15 00:49:11 -07001871{
1872 if (!*len)
David S. Millera108d5f2012-04-23 23:06:11 -04001873 return true;
Octavian Purdila2870c432008-07-15 00:49:11 -07001874
1875 /* skip this segment if already processed */
1876 if (*off >= plen) {
1877 *off -= plen;
David S. Millera108d5f2012-04-23 23:06:11 -04001878 return false;
Octavian Purdiladb43a282008-06-27 17:27:21 -07001879 }
Jens Axboe9c55e012007-11-06 23:30:13 -08001880
Octavian Purdila2870c432008-07-15 00:49:11 -07001881 /* ignore any bits we already processed */
Eric Dumazet9ca1b222013-01-05 21:31:18 +00001882 poff += *off;
1883 plen -= *off;
1884 *off = 0;
Octavian Purdila2870c432008-07-15 00:49:11 -07001885
Eric Dumazet18aafc62013-01-11 14:46:37 +00001886 do {
1887 unsigned int flen = min(*len, plen);
Octavian Purdila2870c432008-07-15 00:49:11 -07001888
Eric Dumazet18aafc62013-01-11 14:46:37 +00001889 if (spd_fill_page(spd, pipe, page, &flen, poff,
1890 linear, sk))
1891 return true;
1892 poff += flen;
1893 plen -= flen;
1894 *len -= flen;
1895 } while (*len && plen);
Octavian Purdila2870c432008-07-15 00:49:11 -07001896
David S. Millera108d5f2012-04-23 23:06:11 -04001897 return false;
Octavian Purdila2870c432008-07-15 00:49:11 -07001898}
1899
1900/*
David S. Millera108d5f2012-04-23 23:06:11 -04001901 * Map linear and fragment data from the skb to spd. It reports true if the
Octavian Purdila2870c432008-07-15 00:49:11 -07001902 * pipe is full or if we already spliced the requested length.
1903 */
David S. Millera108d5f2012-04-23 23:06:11 -04001904static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1905 unsigned int *offset, unsigned int *len,
1906 struct splice_pipe_desc *spd, struct sock *sk)
Octavian Purdila2870c432008-07-15 00:49:11 -07001907{
1908 int seg;
1909
Eric Dumazet1d0c0b32012-04-27 02:10:03 +00001910 /* map the linear part :
Alexander Duyck2996d312012-05-02 18:18:42 +00001911 * If skb->head_frag is set, this 'linear' part is backed by a
1912 * fragment, and if the head is not shared with any clones then
1913 * we can avoid a copy since we own the head portion of this page.
Jens Axboe9c55e012007-11-06 23:30:13 -08001914 */
Octavian Purdila2870c432008-07-15 00:49:11 -07001915 if (__splice_segment(virt_to_page(skb->data),
1916 (unsigned long) skb->data & (PAGE_SIZE - 1),
1917 skb_headlen(skb),
Eric Dumazet18aafc62013-01-11 14:46:37 +00001918 offset, len, spd,
Alexander Duyck3a7c1ee42012-05-03 01:09:42 +00001919 skb_head_is_locked(skb),
Eric Dumazet1d0c0b32012-04-27 02:10:03 +00001920 sk, pipe))
David S. Millera108d5f2012-04-23 23:06:11 -04001921 return true;
Jens Axboe9c55e012007-11-06 23:30:13 -08001922
1923 /*
1924 * then map the fragments
1925 */
Jens Axboe9c55e012007-11-06 23:30:13 -08001926 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1927 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1928
Ian Campbellea2ab692011-08-22 23:44:58 +00001929 if (__splice_segment(skb_frag_page(f),
Eric Dumazet9e903e02011-10-18 21:00:24 +00001930 f->page_offset, skb_frag_size(f),
Eric Dumazet18aafc62013-01-11 14:46:37 +00001931 offset, len, spd, false, sk, pipe))
David S. Millera108d5f2012-04-23 23:06:11 -04001932 return true;
Jens Axboe9c55e012007-11-06 23:30:13 -08001933 }
1934
David S. Millera108d5f2012-04-23 23:06:11 -04001935 return false;
Jens Axboe9c55e012007-11-06 23:30:13 -08001936}
1937
1938/*
1939 * Map data from the skb to a pipe. Should handle both the linear part,
1940 * the fragments, and the frag list. It does NOT handle frag lists within
1941 * the frag list, if such a thing exists. We'd probably need to recurse to
1942 * handle that cleanly.
1943 */
Jarek Poplawski8b9d3722009-01-19 17:03:56 -08001944int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
Jens Axboe9c55e012007-11-06 23:30:13 -08001945 struct pipe_inode_info *pipe, unsigned int tlen,
1946 unsigned int flags)
1947{
Eric Dumazet41c73a02012-04-22 12:26:16 +00001948 struct partial_page partial[MAX_SKB_FRAGS];
1949 struct page *pages[MAX_SKB_FRAGS];
Jens Axboe9c55e012007-11-06 23:30:13 -08001950 struct splice_pipe_desc spd = {
1951 .pages = pages,
1952 .partial = partial,
Eric Dumazet047fe362012-06-12 15:24:40 +02001953 .nr_pages_max = MAX_SKB_FRAGS,
Jens Axboe9c55e012007-11-06 23:30:13 -08001954 .flags = flags,
Miklos Szeredi28a625c2014-01-22 19:36:57 +01001955 .ops = &nosteal_pipe_buf_ops,
Jens Axboe9c55e012007-11-06 23:30:13 -08001956 .spd_release = sock_spd_release,
1957 };
David S. Millerfbb398a2009-06-09 00:18:59 -07001958 struct sk_buff *frag_iter;
Jarek Poplawski7a67e562009-04-30 05:41:19 -07001959 struct sock *sk = skb->sk;
Jens Axboe35f3d142010-05-20 10:43:18 +02001960 int ret = 0;
1961
Jens Axboe9c55e012007-11-06 23:30:13 -08001962 /*
1963 * __skb_splice_bits() only fails if the output has no room left,
1964 * so no point in going over the frag_list for the error case.
1965 */
Jens Axboe35f3d142010-05-20 10:43:18 +02001966 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
Jens Axboe9c55e012007-11-06 23:30:13 -08001967 goto done;
1968 else if (!tlen)
1969 goto done;
1970
1971 /*
1972 * now see if we have a frag_list to map
1973 */
David S. Millerfbb398a2009-06-09 00:18:59 -07001974 skb_walk_frags(skb, frag_iter) {
1975 if (!tlen)
1976 break;
Jens Axboe35f3d142010-05-20 10:43:18 +02001977 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
David S. Millerfbb398a2009-06-09 00:18:59 -07001978 break;
Jens Axboe9c55e012007-11-06 23:30:13 -08001979 }
1980
1981done:
Jens Axboe9c55e012007-11-06 23:30:13 -08001982 if (spd.nr_pages) {
Jens Axboe9c55e012007-11-06 23:30:13 -08001983 /*
1984 * Drop the socket lock, otherwise we have reverse
1985 * locking dependencies between sk_lock and i_mutex
1986 * here as compared to sendfile(). We enter here
1987 * with the socket lock held, and splice_to_pipe() will
1988 * grab the pipe inode lock. For sendfile() emulation,
1989 * we call into ->sendpage() with the i_mutex lock held
1990 * and networking will grab the socket lock.
1991 */
Octavian Purdila293ad602008-06-04 15:45:58 -07001992 release_sock(sk);
Jens Axboe9c55e012007-11-06 23:30:13 -08001993 ret = splice_to_pipe(pipe, &spd);
Octavian Purdila293ad602008-06-04 15:45:58 -07001994 lock_sock(sk);
Jens Axboe9c55e012007-11-06 23:30:13 -08001995 }
1996
Jens Axboe35f3d142010-05-20 10:43:18 +02001997 return ret;
Jens Axboe9c55e012007-11-06 23:30:13 -08001998}
1999
Herbert Xu357b40a2005-04-19 22:30:14 -07002000/**
2001 * skb_store_bits - store bits from kernel buffer to skb
2002 * @skb: destination buffer
2003 * @offset: offset in destination
2004 * @from: source buffer
2005 * @len: number of bytes to copy
2006 *
2007 * Copy the specified number of bytes from the source buffer to the
2008 * destination skb. This function handles all the messy bits of
2009 * traversing fragment lists and such.
2010 */
2011
Stephen Hemminger0c6fcc82007-04-20 16:40:01 -07002012int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
Herbert Xu357b40a2005-04-19 22:30:14 -07002013{
David S. Miller1a028e52007-04-27 15:21:23 -07002014 int start = skb_headlen(skb);
David S. Millerfbb398a2009-06-09 00:18:59 -07002015 struct sk_buff *frag_iter;
2016 int i, copy;
Herbert Xu357b40a2005-04-19 22:30:14 -07002017
2018 if (offset > (int)skb->len - len)
2019 goto fault;
2020
David S. Miller1a028e52007-04-27 15:21:23 -07002021 if ((copy = start - offset) > 0) {
Herbert Xu357b40a2005-04-19 22:30:14 -07002022 if (copy > len)
2023 copy = len;
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -03002024 skb_copy_to_linear_data_offset(skb, offset, from, copy);
Herbert Xu357b40a2005-04-19 22:30:14 -07002025 if ((len -= copy) == 0)
2026 return 0;
2027 offset += copy;
2028 from += copy;
2029 }
2030
2031 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2032 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
David S. Miller1a028e52007-04-27 15:21:23 -07002033 int end;
Herbert Xu357b40a2005-04-19 22:30:14 -07002034
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002035 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -07002036
Eric Dumazet9e903e02011-10-18 21:00:24 +00002037 end = start + skb_frag_size(frag);
Herbert Xu357b40a2005-04-19 22:30:14 -07002038 if ((copy = end - offset) > 0) {
2039 u8 *vaddr;
2040
2041 if (copy > len)
2042 copy = len;
2043
Eric Dumazet51c56b02012-04-05 11:35:15 +02002044 vaddr = kmap_atomic(skb_frag_page(frag));
David S. Miller1a028e52007-04-27 15:21:23 -07002045 memcpy(vaddr + frag->page_offset + offset - start,
2046 from, copy);
Eric Dumazet51c56b02012-04-05 11:35:15 +02002047 kunmap_atomic(vaddr);
Herbert Xu357b40a2005-04-19 22:30:14 -07002048
2049 if ((len -= copy) == 0)
2050 return 0;
2051 offset += copy;
2052 from += copy;
2053 }
David S. Miller1a028e52007-04-27 15:21:23 -07002054 start = end;
Herbert Xu357b40a2005-04-19 22:30:14 -07002055 }
2056
David S. Millerfbb398a2009-06-09 00:18:59 -07002057 skb_walk_frags(skb, frag_iter) {
2058 int end;
Herbert Xu357b40a2005-04-19 22:30:14 -07002059
David S. Millerfbb398a2009-06-09 00:18:59 -07002060 WARN_ON(start > offset + len);
Herbert Xu357b40a2005-04-19 22:30:14 -07002061
David S. Millerfbb398a2009-06-09 00:18:59 -07002062 end = start + frag_iter->len;
2063 if ((copy = end - offset) > 0) {
2064 if (copy > len)
2065 copy = len;
2066 if (skb_store_bits(frag_iter, offset - start,
2067 from, copy))
2068 goto fault;
2069 if ((len -= copy) == 0)
2070 return 0;
2071 offset += copy;
2072 from += copy;
Herbert Xu357b40a2005-04-19 22:30:14 -07002073 }
David S. Millerfbb398a2009-06-09 00:18:59 -07002074 start = end;
Herbert Xu357b40a2005-04-19 22:30:14 -07002075 }
2076 if (!len)
2077 return 0;
2078
2079fault:
2080 return -EFAULT;
2081}
Herbert Xu357b40a2005-04-19 22:30:14 -07002082EXPORT_SYMBOL(skb_store_bits);
2083
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084/* Checksum skb data. */
Daniel Borkmann2817a332013-10-30 11:50:51 +01002085__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
2086 __wsum csum, const struct skb_checksum_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087{
David S. Miller1a028e52007-04-27 15:21:23 -07002088 int start = skb_headlen(skb);
2089 int i, copy = start - offset;
David S. Millerfbb398a2009-06-09 00:18:59 -07002090 struct sk_buff *frag_iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 int pos = 0;
2092
2093 /* Checksum header. */
2094 if (copy > 0) {
2095 if (copy > len)
2096 copy = len;
Daniel Borkmann2817a332013-10-30 11:50:51 +01002097 csum = ops->update(skb->data + offset, copy, csum);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 if ((len -= copy) == 0)
2099 return csum;
2100 offset += copy;
2101 pos = copy;
2102 }
2103
2104 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -07002105 int end;
Eric Dumazet51c56b02012-04-05 11:35:15 +02002106 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002108 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -07002109
Eric Dumazet51c56b02012-04-05 11:35:15 +02002110 end = start + skb_frag_size(frag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 if ((copy = end - offset) > 0) {
Al Viro44bb9362006-11-14 21:36:14 -08002112 __wsum csum2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 u8 *vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114
2115 if (copy > len)
2116 copy = len;
Eric Dumazet51c56b02012-04-05 11:35:15 +02002117 vaddr = kmap_atomic(skb_frag_page(frag));
Daniel Borkmann2817a332013-10-30 11:50:51 +01002118 csum2 = ops->update(vaddr + frag->page_offset +
2119 offset - start, copy, 0);
Eric Dumazet51c56b02012-04-05 11:35:15 +02002120 kunmap_atomic(vaddr);
Daniel Borkmann2817a332013-10-30 11:50:51 +01002121 csum = ops->combine(csum, csum2, pos, copy);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 if (!(len -= copy))
2123 return csum;
2124 offset += copy;
2125 pos += copy;
2126 }
David S. Miller1a028e52007-04-27 15:21:23 -07002127 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 }
2129
David S. Millerfbb398a2009-06-09 00:18:59 -07002130 skb_walk_frags(skb, frag_iter) {
2131 int end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132
David S. Millerfbb398a2009-06-09 00:18:59 -07002133 WARN_ON(start > offset + len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134
David S. Millerfbb398a2009-06-09 00:18:59 -07002135 end = start + frag_iter->len;
2136 if ((copy = end - offset) > 0) {
2137 __wsum csum2;
2138 if (copy > len)
2139 copy = len;
Daniel Borkmann2817a332013-10-30 11:50:51 +01002140 csum2 = __skb_checksum(frag_iter, offset - start,
2141 copy, 0, ops);
2142 csum = ops->combine(csum, csum2, pos, copy);
David S. Millerfbb398a2009-06-09 00:18:59 -07002143 if ((len -= copy) == 0)
2144 return csum;
2145 offset += copy;
2146 pos += copy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 }
David S. Millerfbb398a2009-06-09 00:18:59 -07002148 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 }
Kris Katterjohn09a62662006-01-08 22:24:28 -08002150 BUG_ON(len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151
2152 return csum;
2153}
Daniel Borkmann2817a332013-10-30 11:50:51 +01002154EXPORT_SYMBOL(__skb_checksum);
2155
2156__wsum skb_checksum(const struct sk_buff *skb, int offset,
2157 int len, __wsum csum)
2158{
2159 const struct skb_checksum_ops ops = {
Daniel Borkmanncea80ea2013-11-04 17:10:25 +01002160 .update = csum_partial_ext,
Daniel Borkmann2817a332013-10-30 11:50:51 +01002161 .combine = csum_block_add_ext,
2162 };
2163
2164 return __skb_checksum(skb, offset, len, csum, &ops);
2165}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002166EXPORT_SYMBOL(skb_checksum);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
2168/* Both of above in one bottle. */
2169
Al Viro81d77662006-11-14 21:37:33 -08002170__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2171 u8 *to, int len, __wsum csum)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172{
David S. Miller1a028e52007-04-27 15:21:23 -07002173 int start = skb_headlen(skb);
2174 int i, copy = start - offset;
David S. Millerfbb398a2009-06-09 00:18:59 -07002175 struct sk_buff *frag_iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176 int pos = 0;
2177
2178 /* Copy header. */
2179 if (copy > 0) {
2180 if (copy > len)
2181 copy = len;
2182 csum = csum_partial_copy_nocheck(skb->data + offset, to,
2183 copy, csum);
2184 if ((len -= copy) == 0)
2185 return csum;
2186 offset += copy;
2187 to += copy;
2188 pos = copy;
2189 }
2190
2191 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -07002192 int end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002194 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -07002195
Eric Dumazet9e903e02011-10-18 21:00:24 +00002196 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 if ((copy = end - offset) > 0) {
Al Viro50842052006-11-14 21:36:34 -08002198 __wsum csum2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 u8 *vaddr;
2200 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2201
2202 if (copy > len)
2203 copy = len;
Eric Dumazet51c56b02012-04-05 11:35:15 +02002204 vaddr = kmap_atomic(skb_frag_page(frag));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 csum2 = csum_partial_copy_nocheck(vaddr +
David S. Miller1a028e52007-04-27 15:21:23 -07002206 frag->page_offset +
2207 offset - start, to,
2208 copy, 0);
Eric Dumazet51c56b02012-04-05 11:35:15 +02002209 kunmap_atomic(vaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 csum = csum_block_add(csum, csum2, pos);
2211 if (!(len -= copy))
2212 return csum;
2213 offset += copy;
2214 to += copy;
2215 pos += copy;
2216 }
David S. Miller1a028e52007-04-27 15:21:23 -07002217 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 }
2219
David S. Millerfbb398a2009-06-09 00:18:59 -07002220 skb_walk_frags(skb, frag_iter) {
2221 __wsum csum2;
2222 int end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
David S. Millerfbb398a2009-06-09 00:18:59 -07002224 WARN_ON(start > offset + len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225
David S. Millerfbb398a2009-06-09 00:18:59 -07002226 end = start + frag_iter->len;
2227 if ((copy = end - offset) > 0) {
2228 if (copy > len)
2229 copy = len;
2230 csum2 = skb_copy_and_csum_bits(frag_iter,
2231 offset - start,
2232 to, copy, 0);
2233 csum = csum_block_add(csum, csum2, pos);
2234 if ((len -= copy) == 0)
2235 return csum;
2236 offset += copy;
2237 to += copy;
2238 pos += copy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 }
David S. Millerfbb398a2009-06-09 00:18:59 -07002240 start = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 }
Kris Katterjohn09a62662006-01-08 22:24:28 -08002242 BUG_ON(len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 return csum;
2244}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002245EXPORT_SYMBOL(skb_copy_and_csum_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246
Thomas Grafaf2806f2013-12-13 15:22:17 +01002247 /**
2248 * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
2249 * @from: source buffer
2250 *
2251 * Calculates the amount of linear headroom needed in the 'to' skb passed
2252 * into skb_zerocopy().
2253 */
2254unsigned int
2255skb_zerocopy_headlen(const struct sk_buff *from)
2256{
2257 unsigned int hlen = 0;
2258
2259 if (!from->head_frag ||
2260 skb_headlen(from) < L1_CACHE_BYTES ||
2261 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
2262 hlen = skb_headlen(from);
2263
2264 if (skb_has_frag_list(from))
2265 hlen = from->len;
2266
2267 return hlen;
2268}
2269EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
2270
2271/**
2272 * skb_zerocopy - Zero copy skb to skb
2273 * @to: destination buffer
Masanari Iida7fceb4d2014-01-29 01:05:28 +09002274 * @from: source buffer
Thomas Grafaf2806f2013-12-13 15:22:17 +01002275 * @len: number of bytes to copy from source buffer
2276 * @hlen: size of linear headroom in destination buffer
2277 *
2278 * Copies up to `len` bytes from `from` to `to` by creating references
2279 * to the frags in the source buffer.
2280 *
2281 * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
2282 * headroom in the `to` buffer.
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002283 *
2284 * Return value:
2285 * 0: everything is OK
2286 * -ENOMEM: couldn't orphan frags of @from due to lack of memory
2287 * -EFAULT: skb_copy_bits() found some problem with skb geometry
Thomas Grafaf2806f2013-12-13 15:22:17 +01002288 */
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002289int
2290skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
Thomas Grafaf2806f2013-12-13 15:22:17 +01002291{
2292 int i, j = 0;
2293 int plen = 0; /* length of skb->head fragment */
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002294 int ret;
Thomas Grafaf2806f2013-12-13 15:22:17 +01002295 struct page *page;
2296 unsigned int offset;
2297
2298 BUG_ON(!from->head_frag && !hlen);
2299
2300 /* dont bother with small payloads */
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002301 if (len <= skb_tailroom(to))
2302 return skb_copy_bits(from, 0, skb_put(to, len), len);
Thomas Grafaf2806f2013-12-13 15:22:17 +01002303
2304 if (hlen) {
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002305 ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
2306 if (unlikely(ret))
2307 return ret;
Thomas Grafaf2806f2013-12-13 15:22:17 +01002308 len -= hlen;
2309 } else {
2310 plen = min_t(int, skb_headlen(from), len);
2311 if (plen) {
2312 page = virt_to_head_page(from->head);
2313 offset = from->data - (unsigned char *)page_address(page);
2314 __skb_fill_page_desc(to, 0, page, offset, plen);
2315 get_page(page);
2316 j = 1;
2317 len -= plen;
2318 }
2319 }
2320
2321 to->truesize += len + plen;
2322 to->len += len + plen;
2323 to->data_len += len + plen;
2324
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002325 if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
2326 skb_tx_error(from);
2327 return -ENOMEM;
2328 }
2329
Thomas Grafaf2806f2013-12-13 15:22:17 +01002330 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
2331 if (!len)
2332 break;
2333 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2334 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2335 len -= skb_shinfo(to)->frags[j].size;
2336 skb_frag_ref(to, j);
2337 j++;
2338 }
2339 skb_shinfo(to)->nr_frags = j;
Zoltan Kiss36d5fe62014-03-26 22:37:45 +00002340
2341 return 0;
Thomas Grafaf2806f2013-12-13 15:22:17 +01002342}
2343EXPORT_SYMBOL_GPL(skb_zerocopy);
2344
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
2346{
Al Virod3bc23e2006-11-14 21:24:49 -08002347 __wsum csum;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 long csstart;
2349
Patrick McHardy84fa7932006-08-29 16:44:56 -07002350 if (skb->ip_summed == CHECKSUM_PARTIAL)
Michał Mirosław55508d62010-12-14 15:24:08 +00002351 csstart = skb_checksum_start_offset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 else
2353 csstart = skb_headlen(skb);
2354
Kris Katterjohn09a62662006-01-08 22:24:28 -08002355 BUG_ON(csstart > skb_headlen(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03002357 skb_copy_from_linear_data(skb, to, csstart);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
2359 csum = 0;
2360 if (csstart != skb->len)
2361 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
2362 skb->len - csstart, 0);
2363
Patrick McHardy84fa7932006-08-29 16:44:56 -07002364 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Al Viroff1dcad2006-11-20 18:07:29 -08002365 long csstuff = csstart + skb->csum_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366
Al Virod3bc23e2006-11-14 21:24:49 -08002367 *((__sum16 *)(to + csstuff)) = csum_fold(csum);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 }
2369}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002370EXPORT_SYMBOL(skb_copy_and_csum_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371
2372/**
2373 * skb_dequeue - remove from the head of the queue
2374 * @list: list to dequeue from
2375 *
2376 * Remove the head of the list. The list lock is taken so the function
2377 * may be used safely with other locking list functions. The head item is
2378 * returned or %NULL if the list is empty.
2379 */
2380
2381struct sk_buff *skb_dequeue(struct sk_buff_head *list)
2382{
2383 unsigned long flags;
2384 struct sk_buff *result;
2385
2386 spin_lock_irqsave(&list->lock, flags);
2387 result = __skb_dequeue(list);
2388 spin_unlock_irqrestore(&list->lock, flags);
2389 return result;
2390}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002391EXPORT_SYMBOL(skb_dequeue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392
2393/**
2394 * skb_dequeue_tail - remove from the tail of the queue
2395 * @list: list to dequeue from
2396 *
2397 * Remove the tail of the list. The list lock is taken so the function
2398 * may be used safely with other locking list functions. The tail item is
2399 * returned or %NULL if the list is empty.
2400 */
2401struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
2402{
2403 unsigned long flags;
2404 struct sk_buff *result;
2405
2406 spin_lock_irqsave(&list->lock, flags);
2407 result = __skb_dequeue_tail(list);
2408 spin_unlock_irqrestore(&list->lock, flags);
2409 return result;
2410}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002411EXPORT_SYMBOL(skb_dequeue_tail);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412
2413/**
2414 * skb_queue_purge - empty a list
2415 * @list: list to empty
2416 *
2417 * Delete all buffers on an &sk_buff list. Each buffer is removed from
2418 * the list and one reference dropped. This function takes the list
2419 * lock and is atomic with respect to other list locking functions.
2420 */
2421void skb_queue_purge(struct sk_buff_head *list)
2422{
2423 struct sk_buff *skb;
2424 while ((skb = skb_dequeue(list)) != NULL)
2425 kfree_skb(skb);
2426}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002427EXPORT_SYMBOL(skb_queue_purge);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428
2429/**
2430 * skb_queue_head - queue a buffer at the list head
2431 * @list: list to use
2432 * @newsk: buffer to queue
2433 *
2434 * Queue a buffer at the start of the list. This function takes the
2435 * list lock and can be used safely with other locking &sk_buff functions
2436 * safely.
2437 *
2438 * A buffer cannot be placed on two lists at the same time.
2439 */
2440void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
2441{
2442 unsigned long flags;
2443
2444 spin_lock_irqsave(&list->lock, flags);
2445 __skb_queue_head(list, newsk);
2446 spin_unlock_irqrestore(&list->lock, flags);
2447}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002448EXPORT_SYMBOL(skb_queue_head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449
2450/**
2451 * skb_queue_tail - queue a buffer at the list tail
2452 * @list: list to use
2453 * @newsk: buffer to queue
2454 *
2455 * Queue a buffer at the tail of the list. This function takes the
2456 * list lock and can be used safely with other locking &sk_buff functions
2457 * safely.
2458 *
2459 * A buffer cannot be placed on two lists at the same time.
2460 */
2461void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
2462{
2463 unsigned long flags;
2464
2465 spin_lock_irqsave(&list->lock, flags);
2466 __skb_queue_tail(list, newsk);
2467 spin_unlock_irqrestore(&list->lock, flags);
2468}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002469EXPORT_SYMBOL(skb_queue_tail);
David S. Miller8728b832005-08-09 19:25:21 -07002470
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471/**
2472 * skb_unlink - remove a buffer from a list
2473 * @skb: buffer to remove
David S. Miller8728b832005-08-09 19:25:21 -07002474 * @list: list to use
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 *
David S. Miller8728b832005-08-09 19:25:21 -07002476 * Remove a packet from a list. The list locks are taken and this
2477 * function is atomic with respect to other list locked calls
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 *
David S. Miller8728b832005-08-09 19:25:21 -07002479 * You must know what list the SKB is on.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 */
David S. Miller8728b832005-08-09 19:25:21 -07002481void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482{
David S. Miller8728b832005-08-09 19:25:21 -07002483 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484
David S. Miller8728b832005-08-09 19:25:21 -07002485 spin_lock_irqsave(&list->lock, flags);
2486 __skb_unlink(skb, list);
2487 spin_unlock_irqrestore(&list->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002489EXPORT_SYMBOL(skb_unlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491/**
2492 * skb_append - append a buffer
2493 * @old: buffer to insert after
2494 * @newsk: buffer to insert
David S. Miller8728b832005-08-09 19:25:21 -07002495 * @list: list to use
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 *
2497 * Place a packet after a given packet in a list. The list locks are taken
2498 * and this function is atomic with respect to other list locked calls.
2499 * A buffer cannot be placed on two lists at the same time.
2500 */
David S. Miller8728b832005-08-09 19:25:21 -07002501void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502{
2503 unsigned long flags;
2504
David S. Miller8728b832005-08-09 19:25:21 -07002505 spin_lock_irqsave(&list->lock, flags);
Gerrit Renker7de6c032008-04-14 00:05:09 -07002506 __skb_queue_after(list, old, newsk);
David S. Miller8728b832005-08-09 19:25:21 -07002507 spin_unlock_irqrestore(&list->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002509EXPORT_SYMBOL(skb_append);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510
2511/**
2512 * skb_insert - insert a buffer
2513 * @old: buffer to insert before
2514 * @newsk: buffer to insert
David S. Miller8728b832005-08-09 19:25:21 -07002515 * @list: list to use
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 *
David S. Miller8728b832005-08-09 19:25:21 -07002517 * Place a packet before a given packet in a list. The list locks are
2518 * taken and this function is atomic with respect to other list locked
2519 * calls.
2520 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521 * A buffer cannot be placed on two lists at the same time.
2522 */
David S. Miller8728b832005-08-09 19:25:21 -07002523void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524{
2525 unsigned long flags;
2526
David S. Miller8728b832005-08-09 19:25:21 -07002527 spin_lock_irqsave(&list->lock, flags);
2528 __skb_insert(newsk, old->prev, old, list);
2529 spin_unlock_irqrestore(&list->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002531EXPORT_SYMBOL(skb_insert);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533static inline void skb_split_inside_header(struct sk_buff *skb,
2534 struct sk_buff* skb1,
2535 const u32 len, const int pos)
2536{
2537 int i;
2538
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03002539 skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
2540 pos - len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 /* And move data appendix as is. */
2542 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
2543 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
2544
2545 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
2546 skb_shinfo(skb)->nr_frags = 0;
2547 skb1->data_len = skb->data_len;
2548 skb1->len += skb1->data_len;
2549 skb->data_len = 0;
2550 skb->len = len;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07002551 skb_set_tail_pointer(skb, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552}
2553
2554static inline void skb_split_no_header(struct sk_buff *skb,
2555 struct sk_buff* skb1,
2556 const u32 len, int pos)
2557{
2558 int i, k = 0;
2559 const int nfrags = skb_shinfo(skb)->nr_frags;
2560
2561 skb_shinfo(skb)->nr_frags = 0;
2562 skb1->len = skb1->data_len = skb->len - len;
2563 skb->len = len;
2564 skb->data_len = len - pos;
2565
2566 for (i = 0; i < nfrags; i++) {
Eric Dumazet9e903e02011-10-18 21:00:24 +00002567 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568
2569 if (pos + size > len) {
2570 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
2571
2572 if (pos < len) {
2573 /* Split frag.
2574 * We have two variants in this case:
2575 * 1. Move all the frag to the second
2576 * part, if it is possible. F.e.
2577 * this approach is mandatory for TUX,
2578 * where splitting is expensive.
2579 * 2. Split is accurately. We make this.
2580 */
Ian Campbellea2ab692011-08-22 23:44:58 +00002581 skb_frag_ref(skb, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
Eric Dumazet9e903e02011-10-18 21:00:24 +00002583 skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
2584 skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 skb_shinfo(skb)->nr_frags++;
2586 }
2587 k++;
2588 } else
2589 skb_shinfo(skb)->nr_frags++;
2590 pos += size;
2591 }
2592 skb_shinfo(skb1)->nr_frags = k;
2593}
2594
2595/**
2596 * skb_split - Split fragmented skb to two parts at length len.
2597 * @skb: the buffer to split
2598 * @skb1: the buffer to receive the second part
2599 * @len: new length for skb
2600 */
2601void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2602{
2603 int pos = skb_headlen(skb);
2604
Amerigo Wang68534c62013-02-19 22:51:30 +00002605 skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 if (len < pos) /* Split line is inside header. */
2607 skb_split_inside_header(skb, skb1, len, pos);
2608 else /* Second chunk has no header, nothing to copy. */
2609 skb_split_no_header(skb, skb1, len, pos);
2610}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002611EXPORT_SYMBOL(skb_split);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612
Ilpo Järvinen9f782db2008-11-25 13:57:01 -08002613/* Shifting from/to a cloned skb is a no-go.
2614 *
2615 * Caller cannot keep skb_shinfo related pointers past calling here!
2616 */
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002617static int skb_prepare_for_shift(struct sk_buff *skb)
2618{
Ilpo Järvinen0ace2852008-11-24 21:30:21 -08002619 return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002620}
2621
2622/**
2623 * skb_shift - Shifts paged data partially from skb to another
2624 * @tgt: buffer into which tail data gets added
2625 * @skb: buffer from which the paged data comes from
2626 * @shiftlen: shift up to this many bytes
2627 *
2628 * Attempts to shift up to shiftlen worth of bytes, which may be less than
Feng King20e994a2011-11-21 01:47:11 +00002629 * the length of the skb, from skb to tgt. Returns number bytes shifted.
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002630 * It's up to caller to free skb if everything was shifted.
2631 *
2632 * If @tgt runs out of frags, the whole operation is aborted.
2633 *
2634 * Skb cannot include anything else but paged data while tgt is allowed
2635 * to have non-paged data as well.
2636 *
2637 * TODO: full sized shift could be optimized but that would need
2638 * specialized skb free'er to handle frags without up-to-date nr_frags.
2639 */
2640int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2641{
2642 int from, to, merge, todo;
2643 struct skb_frag_struct *fragfrom, *fragto;
2644
2645 BUG_ON(shiftlen > skb->len);
2646 BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
2647
2648 todo = shiftlen;
2649 from = 0;
2650 to = skb_shinfo(tgt)->nr_frags;
2651 fragfrom = &skb_shinfo(skb)->frags[from];
2652
2653 /* Actual merge is delayed until the point when we know we can
2654 * commit all, so that we don't have to undo partial changes
2655 */
2656 if (!to ||
Ian Campbellea2ab692011-08-22 23:44:58 +00002657 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
2658 fragfrom->page_offset)) {
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002659 merge = -1;
2660 } else {
2661 merge = to - 1;
2662
Eric Dumazet9e903e02011-10-18 21:00:24 +00002663 todo -= skb_frag_size(fragfrom);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002664 if (todo < 0) {
2665 if (skb_prepare_for_shift(skb) ||
2666 skb_prepare_for_shift(tgt))
2667 return 0;
2668
Ilpo Järvinen9f782db2008-11-25 13:57:01 -08002669 /* All previous frag pointers might be stale! */
2670 fragfrom = &skb_shinfo(skb)->frags[from];
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002671 fragto = &skb_shinfo(tgt)->frags[merge];
2672
Eric Dumazet9e903e02011-10-18 21:00:24 +00002673 skb_frag_size_add(fragto, shiftlen);
2674 skb_frag_size_sub(fragfrom, shiftlen);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002675 fragfrom->page_offset += shiftlen;
2676
2677 goto onlymerged;
2678 }
2679
2680 from++;
2681 }
2682
2683 /* Skip full, not-fitting skb to avoid expensive operations */
2684 if ((shiftlen == skb->len) &&
2685 (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
2686 return 0;
2687
2688 if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
2689 return 0;
2690
2691 while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
2692 if (to == MAX_SKB_FRAGS)
2693 return 0;
2694
2695 fragfrom = &skb_shinfo(skb)->frags[from];
2696 fragto = &skb_shinfo(tgt)->frags[to];
2697
Eric Dumazet9e903e02011-10-18 21:00:24 +00002698 if (todo >= skb_frag_size(fragfrom)) {
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002699 *fragto = *fragfrom;
Eric Dumazet9e903e02011-10-18 21:00:24 +00002700 todo -= skb_frag_size(fragfrom);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002701 from++;
2702 to++;
2703
2704 } else {
Ian Campbellea2ab692011-08-22 23:44:58 +00002705 __skb_frag_ref(fragfrom);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002706 fragto->page = fragfrom->page;
2707 fragto->page_offset = fragfrom->page_offset;
Eric Dumazet9e903e02011-10-18 21:00:24 +00002708 skb_frag_size_set(fragto, todo);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002709
2710 fragfrom->page_offset += todo;
Eric Dumazet9e903e02011-10-18 21:00:24 +00002711 skb_frag_size_sub(fragfrom, todo);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002712 todo = 0;
2713
2714 to++;
2715 break;
2716 }
2717 }
2718
2719 /* Ready to "commit" this state change to tgt */
2720 skb_shinfo(tgt)->nr_frags = to;
2721
2722 if (merge >= 0) {
2723 fragfrom = &skb_shinfo(skb)->frags[0];
2724 fragto = &skb_shinfo(tgt)->frags[merge];
2725
Eric Dumazet9e903e02011-10-18 21:00:24 +00002726 skb_frag_size_add(fragto, skb_frag_size(fragfrom));
Ian Campbellea2ab692011-08-22 23:44:58 +00002727 __skb_frag_unref(fragfrom);
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08002728 }
2729
2730 /* Reposition in the original skb */
2731 to = 0;
2732 while (from < skb_shinfo(skb)->nr_frags)
2733 skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
2734 skb_shinfo(skb)->nr_frags = to;
2735
2736 BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
2737
2738onlymerged:
2739 /* Most likely the tgt won't ever need its checksum anymore, skb on
2740 * the other hand might need it if it needs to be resent
2741 */
2742 tgt->ip_summed = CHECKSUM_PARTIAL;
2743 skb->ip_summed = CHECKSUM_PARTIAL;
2744
2745 /* Yak, is it really working this way? Some helper please? */
2746 skb->len -= shiftlen;
2747 skb->data_len -= shiftlen;
2748 skb->truesize -= shiftlen;
2749 tgt->len += shiftlen;
2750 tgt->data_len += shiftlen;
2751 tgt->truesize += shiftlen;
2752
2753 return shiftlen;
2754}
2755
Thomas Graf677e90e2005-06-23 20:59:51 -07002756/**
2757 * skb_prepare_seq_read - Prepare a sequential read of skb data
2758 * @skb: the buffer to read
2759 * @from: lower offset of data to be read
2760 * @to: upper offset of data to be read
2761 * @st: state variable
2762 *
2763 * Initializes the specified state variable. Must be called before
2764 * invoking skb_seq_read() for the first time.
2765 */
2766void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
2767 unsigned int to, struct skb_seq_state *st)
2768{
2769 st->lower_offset = from;
2770 st->upper_offset = to;
2771 st->root_skb = st->cur_skb = skb;
2772 st->frag_idx = st->stepped_offset = 0;
2773 st->frag_data = NULL;
2774}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002775EXPORT_SYMBOL(skb_prepare_seq_read);
Thomas Graf677e90e2005-06-23 20:59:51 -07002776
2777/**
2778 * skb_seq_read - Sequentially read skb data
2779 * @consumed: number of bytes consumed by the caller so far
2780 * @data: destination pointer for data to be returned
2781 * @st: state variable
2782 *
Mathias Krausebc323832013-11-07 14:18:26 +01002783 * Reads a block of skb data at @consumed relative to the
Thomas Graf677e90e2005-06-23 20:59:51 -07002784 * lower offset specified to skb_prepare_seq_read(). Assigns
Mathias Krausebc323832013-11-07 14:18:26 +01002785 * the head of the data block to @data and returns the length
Thomas Graf677e90e2005-06-23 20:59:51 -07002786 * of the block or 0 if the end of the skb data or the upper
2787 * offset has been reached.
2788 *
2789 * The caller is not required to consume all of the data
Mathias Krausebc323832013-11-07 14:18:26 +01002790 * returned, i.e. @consumed is typically set to the number
Thomas Graf677e90e2005-06-23 20:59:51 -07002791 * of bytes already consumed and the next call to
2792 * skb_seq_read() will return the remaining part of the block.
2793 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002794 * Note 1: The size of each block of data returned can be arbitrary,
Masanari Iidae793c0f2014-09-04 23:44:36 +09002795 * this limitation is the cost for zerocopy sequential
Thomas Graf677e90e2005-06-23 20:59:51 -07002796 * reads of potentially non linear data.
2797 *
Randy Dunlapbc2cda12008-02-13 15:03:25 -08002798 * Note 2: Fragment lists within fragments are not implemented
Thomas Graf677e90e2005-06-23 20:59:51 -07002799 * at the moment, state->root_skb could be replaced with
2800 * a stack for this purpose.
2801 */
2802unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2803 struct skb_seq_state *st)
2804{
2805 unsigned int block_limit, abs_offset = consumed + st->lower_offset;
2806 skb_frag_t *frag;
2807
Wedson Almeida Filhoaeb193e2013-06-23 23:33:48 -07002808 if (unlikely(abs_offset >= st->upper_offset)) {
2809 if (st->frag_data) {
2810 kunmap_atomic(st->frag_data);
2811 st->frag_data = NULL;
2812 }
Thomas Graf677e90e2005-06-23 20:59:51 -07002813 return 0;
Wedson Almeida Filhoaeb193e2013-06-23 23:33:48 -07002814 }
Thomas Graf677e90e2005-06-23 20:59:51 -07002815
2816next_skb:
Herbert Xu95e3b242009-01-29 16:07:52 -08002817 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
Thomas Graf677e90e2005-06-23 20:59:51 -07002818
Thomas Chenault995b3372009-05-18 21:43:27 -07002819 if (abs_offset < block_limit && !st->frag_data) {
Herbert Xu95e3b242009-01-29 16:07:52 -08002820 *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
Thomas Graf677e90e2005-06-23 20:59:51 -07002821 return block_limit - abs_offset;
2822 }
2823
2824 if (st->frag_idx == 0 && !st->frag_data)
2825 st->stepped_offset += skb_headlen(st->cur_skb);
2826
2827 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
2828 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
Eric Dumazet9e903e02011-10-18 21:00:24 +00002829 block_limit = skb_frag_size(frag) + st->stepped_offset;
Thomas Graf677e90e2005-06-23 20:59:51 -07002830
2831 if (abs_offset < block_limit) {
2832 if (!st->frag_data)
Eric Dumazet51c56b02012-04-05 11:35:15 +02002833 st->frag_data = kmap_atomic(skb_frag_page(frag));
Thomas Graf677e90e2005-06-23 20:59:51 -07002834
2835 *data = (u8 *) st->frag_data + frag->page_offset +
2836 (abs_offset - st->stepped_offset);
2837
2838 return block_limit - abs_offset;
2839 }
2840
2841 if (st->frag_data) {
Eric Dumazet51c56b02012-04-05 11:35:15 +02002842 kunmap_atomic(st->frag_data);
Thomas Graf677e90e2005-06-23 20:59:51 -07002843 st->frag_data = NULL;
2844 }
2845
2846 st->frag_idx++;
Eric Dumazet9e903e02011-10-18 21:00:24 +00002847 st->stepped_offset += skb_frag_size(frag);
Thomas Graf677e90e2005-06-23 20:59:51 -07002848 }
2849
Olaf Kirch5b5a60d2007-06-23 23:11:52 -07002850 if (st->frag_data) {
Eric Dumazet51c56b02012-04-05 11:35:15 +02002851 kunmap_atomic(st->frag_data);
Olaf Kirch5b5a60d2007-06-23 23:11:52 -07002852 st->frag_data = NULL;
2853 }
2854
David S. Miller21dc3302010-08-23 00:13:46 -07002855 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
Shyam Iyer71b33462009-01-29 16:12:42 -08002856 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
Thomas Graf677e90e2005-06-23 20:59:51 -07002857 st->frag_idx = 0;
2858 goto next_skb;
Shyam Iyer71b33462009-01-29 16:12:42 -08002859 } else if (st->cur_skb->next) {
2860 st->cur_skb = st->cur_skb->next;
Herbert Xu95e3b242009-01-29 16:07:52 -08002861 st->frag_idx = 0;
Thomas Graf677e90e2005-06-23 20:59:51 -07002862 goto next_skb;
2863 }
2864
2865 return 0;
2866}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002867EXPORT_SYMBOL(skb_seq_read);
Thomas Graf677e90e2005-06-23 20:59:51 -07002868
2869/**
2870 * skb_abort_seq_read - Abort a sequential read of skb data
2871 * @st: state variable
2872 *
2873 * Must be called if skb_seq_read() was not called until it
2874 * returned 0.
2875 */
2876void skb_abort_seq_read(struct skb_seq_state *st)
2877{
2878 if (st->frag_data)
Eric Dumazet51c56b02012-04-05 11:35:15 +02002879 kunmap_atomic(st->frag_data);
Thomas Graf677e90e2005-06-23 20:59:51 -07002880}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002881EXPORT_SYMBOL(skb_abort_seq_read);
Thomas Graf677e90e2005-06-23 20:59:51 -07002882
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002883#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
2884
2885static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
2886 struct ts_config *conf,
2887 struct ts_state *state)
2888{
2889 return skb_seq_read(offset, text, TS_SKB_CB(state));
2890}
2891
2892static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
2893{
2894 skb_abort_seq_read(TS_SKB_CB(state));
2895}
2896
2897/**
2898 * skb_find_text - Find a text pattern in skb data
2899 * @skb: the buffer to look in
2900 * @from: search offset
2901 * @to: search limit
2902 * @config: textsearch configuration
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002903 *
2904 * Finds a pattern in the skb data according to the specified
2905 * textsearch configuration. Use textsearch_next() to retrieve
2906 * subsequent occurrences of the pattern. Returns the offset
2907 * to the first occurrence or UINT_MAX if no match was found.
2908 */
2909unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
Bojan Prtvar059a2442015-02-22 11:46:35 +01002910 unsigned int to, struct ts_config *config)
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002911{
Bojan Prtvar059a2442015-02-22 11:46:35 +01002912 struct ts_state state;
Phil Oesterf72b9482006-06-26 00:00:57 -07002913 unsigned int ret;
2914
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002915 config->get_next_block = skb_ts_get_next_block;
2916 config->finish = skb_ts_finish;
2917
Bojan Prtvar059a2442015-02-22 11:46:35 +01002918 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002919
Bojan Prtvar059a2442015-02-22 11:46:35 +01002920 ret = textsearch_find(config, &state);
Phil Oesterf72b9482006-06-26 00:00:57 -07002921 return (ret <= to - from ? ret : UINT_MAX);
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002922}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002923EXPORT_SYMBOL(skb_find_text);
Thomas Graf3fc7e8a2005-06-23 21:00:17 -07002924
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002925/**
Ben Hutchings2c530402012-07-10 10:55:09 +00002926 * skb_append_datato_frags - append the user data to a skb
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002927 * @sk: sock structure
Masanari Iidae793c0f2014-09-04 23:44:36 +09002928 * @skb: skb structure to be appended with user data.
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002929 * @getfrag: call back function to be used for getting the user data
2930 * @from: pointer to user message iov
2931 * @length: length of the iov message
2932 *
2933 * Description: This procedure append the user data in the fragment part
2934 * of the skb if any page alloc fails user this procedure returns -ENOMEM
2935 */
2936int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
Martin Waitzdab96302005-12-05 13:40:12 -08002937 int (*getfrag)(void *from, char *to, int offset,
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002938 int len, int odd, struct sk_buff *skb),
2939 void *from, int length)
2940{
Eric Dumazetb2111722012-12-28 06:06:37 +00002941 int frg_cnt = skb_shinfo(skb)->nr_frags;
2942 int copy;
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002943 int offset = 0;
2944 int ret;
Eric Dumazetb2111722012-12-28 06:06:37 +00002945 struct page_frag *pfrag = &current->task_frag;
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002946
2947 do {
2948 /* Return error if we don't have space for new frag */
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002949 if (frg_cnt >= MAX_SKB_FRAGS)
Eric Dumazetb2111722012-12-28 06:06:37 +00002950 return -EMSGSIZE;
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002951
Eric Dumazetb2111722012-12-28 06:06:37 +00002952 if (!sk_page_frag_refill(sk, pfrag))
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002953 return -ENOMEM;
2954
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002955 /* copy the user data to page */
Eric Dumazetb2111722012-12-28 06:06:37 +00002956 copy = min_t(int, length, pfrag->size - pfrag->offset);
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002957
Eric Dumazetb2111722012-12-28 06:06:37 +00002958 ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
2959 offset, copy, 0, skb);
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002960 if (ret < 0)
2961 return -EFAULT;
2962
2963 /* copy was successful so update the size parameters */
Eric Dumazetb2111722012-12-28 06:06:37 +00002964 skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
2965 copy);
2966 frg_cnt++;
2967 pfrag->offset += copy;
2968 get_page(pfrag->page);
2969
2970 skb->truesize += copy;
2971 atomic_add(copy, &sk->sk_wmem_alloc);
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002972 skb->len += copy;
2973 skb->data_len += copy;
2974 offset += copy;
2975 length -= copy;
2976
2977 } while (length > 0);
2978
2979 return 0;
2980}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08002981EXPORT_SYMBOL(skb_append_datato_frags);
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002982
Herbert Xucbb042f2006-03-20 22:43:56 -08002983/**
2984 * skb_pull_rcsum - pull skb and update receive checksum
2985 * @skb: buffer to update
Herbert Xucbb042f2006-03-20 22:43:56 -08002986 * @len: length of data pulled
2987 *
2988 * This function performs an skb_pull on the packet and updates
Urs Thuermannfee54fa2008-02-12 22:03:25 -08002989 * the CHECKSUM_COMPLETE checksum. It should be used on
Patrick McHardy84fa7932006-08-29 16:44:56 -07002990 * receive path processing instead of skb_pull unless you know
2991 * that the checksum difference is zero (e.g., a valid IP header)
2992 * or you are setting ip_summed to CHECKSUM_NONE.
Herbert Xucbb042f2006-03-20 22:43:56 -08002993 */
2994unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2995{
2996 BUG_ON(len > skb->len);
2997 skb->len -= len;
2998 BUG_ON(skb->len < skb->data_len);
2999 skb_postpull_rcsum(skb, skb->data, len);
3000 return skb->data += len;
3001}
Arnaldo Carvalho de Melof94691a2006-03-20 22:47:55 -08003002EXPORT_SYMBOL_GPL(skb_pull_rcsum);
3003
Herbert Xuf4c50d92006-06-22 03:02:40 -07003004/**
3005 * skb_segment - Perform protocol segmentation on skb.
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003006 * @head_skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07003007 * @features: features for the output path (see dev->features)
Herbert Xuf4c50d92006-06-22 03:02:40 -07003008 *
3009 * This function performs segmentation on the given skb. It returns
Ben Hutchings4c821d72008-04-13 21:52:48 -07003010 * a pointer to the first in a list of new skbs for the segments.
3011 * In case of error it returns ERR_PTR(err).
Herbert Xuf4c50d92006-06-22 03:02:40 -07003012 */
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003013struct sk_buff *skb_segment(struct sk_buff *head_skb,
3014 netdev_features_t features)
Herbert Xuf4c50d92006-06-22 03:02:40 -07003015{
3016 struct sk_buff *segs = NULL;
3017 struct sk_buff *tail = NULL;
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003018 struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003019 skb_frag_t *frag = skb_shinfo(head_skb)->frags;
3020 unsigned int mss = skb_shinfo(head_skb)->gso_size;
3021 unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
Michael S. Tsirkin1fd819e2014-03-10 19:28:08 +02003022 struct sk_buff *frag_skb = head_skb;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003023 unsigned int offset = doffset;
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003024 unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003025 unsigned int headroom;
3026 unsigned int len;
Pravin B Shelarec5f0612013-03-07 09:28:01 +00003027 __be16 proto;
3028 bool csum;
Michał Mirosław04ed3e72011-01-24 15:32:47 -08003029 int sg = !!(features & NETIF_F_SG);
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003030 int nfrags = skb_shinfo(head_skb)->nr_frags;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003031 int err = -ENOMEM;
3032 int i = 0;
3033 int pos;
Vlad Yasevich53d64712014-03-27 17:26:18 -04003034 int dummy;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003035
Wei-Chun Chao5882a072014-06-08 23:48:54 -07003036 __skb_push(head_skb, doffset);
Vlad Yasevich53d64712014-03-27 17:26:18 -04003037 proto = skb_network_protocol(head_skb, &dummy);
Pravin B Shelarec5f0612013-03-07 09:28:01 +00003038 if (unlikely(!proto))
3039 return ERR_PTR(-EINVAL);
3040
Tom Herbert7e2b10c2014-06-04 17:20:02 -07003041 csum = !head_skb->encap_hdr_csum &&
3042 !!can_checksum_protocol(features, proto);
3043
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003044 headroom = skb_headroom(head_skb);
3045 pos = skb_headlen(head_skb);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003046
3047 do {
3048 struct sk_buff *nskb;
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003049 skb_frag_t *nskb_frag;
Herbert Xuc8884ed2006-10-29 15:59:41 -08003050 int hsize;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003051 int size;
3052
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003053 len = head_skb->len - offset;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003054 if (len > mss)
3055 len = mss;
3056
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003057 hsize = skb_headlen(head_skb) - offset;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003058 if (hsize < 0)
3059 hsize = 0;
Herbert Xuc8884ed2006-10-29 15:59:41 -08003060 if (hsize > len || !sg)
3061 hsize = len;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003062
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003063 if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
3064 (skb_headlen(list_skb) == len || sg)) {
3065 BUG_ON(skb_headlen(list_skb) > len);
Herbert Xu89319d382008-12-15 23:26:06 -08003066
Herbert Xu9d8506c2013-11-21 11:10:04 -08003067 i = 0;
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003068 nfrags = skb_shinfo(list_skb)->nr_frags;
3069 frag = skb_shinfo(list_skb)->frags;
Michael S. Tsirkin1fd819e2014-03-10 19:28:08 +02003070 frag_skb = list_skb;
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003071 pos += skb_headlen(list_skb);
Herbert Xu9d8506c2013-11-21 11:10:04 -08003072
3073 while (pos < offset + len) {
3074 BUG_ON(i >= nfrags);
3075
Michael S. Tsirkin4e1beba2014-03-10 18:29:14 +02003076 size = skb_frag_size(frag);
Herbert Xu9d8506c2013-11-21 11:10:04 -08003077 if (pos + size > offset + len)
3078 break;
3079
3080 i++;
3081 pos += size;
Michael S. Tsirkin4e1beba2014-03-10 18:29:14 +02003082 frag++;
Herbert Xu9d8506c2013-11-21 11:10:04 -08003083 }
3084
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003085 nskb = skb_clone(list_skb, GFP_ATOMIC);
3086 list_skb = list_skb->next;
Herbert Xu89319d382008-12-15 23:26:06 -08003087
3088 if (unlikely(!nskb))
3089 goto err;
3090
Herbert Xu9d8506c2013-11-21 11:10:04 -08003091 if (unlikely(pskb_trim(nskb, len))) {
3092 kfree_skb(nskb);
3093 goto err;
3094 }
3095
Alexander Duyckec47ea82012-05-04 14:26:56 +00003096 hsize = skb_end_offset(nskb);
Herbert Xu89319d382008-12-15 23:26:06 -08003097 if (skb_cow_head(nskb, doffset + headroom)) {
3098 kfree_skb(nskb);
3099 goto err;
3100 }
3101
Alexander Duyckec47ea82012-05-04 14:26:56 +00003102 nskb->truesize += skb_end_offset(nskb) - hsize;
Herbert Xu89319d382008-12-15 23:26:06 -08003103 skb_release_head_state(nskb);
3104 __skb_push(nskb, doffset);
3105 } else {
Mel Gormanc93bdd02012-07-31 16:44:19 -07003106 nskb = __alloc_skb(hsize + doffset + headroom,
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003107 GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
Mel Gormanc93bdd02012-07-31 16:44:19 -07003108 NUMA_NO_NODE);
Herbert Xu89319d382008-12-15 23:26:06 -08003109
3110 if (unlikely(!nskb))
3111 goto err;
3112
3113 skb_reserve(nskb, headroom);
3114 __skb_put(nskb, doffset);
3115 }
Herbert Xuf4c50d92006-06-22 03:02:40 -07003116
3117 if (segs)
3118 tail->next = nskb;
3119 else
3120 segs = nskb;
3121 tail = nskb;
3122
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003123 __copy_skb_header(nskb, head_skb);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003124
Eric Dumazet030737b2013-10-19 11:42:54 -07003125 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
Vlad Yasevichfcdfe3a2014-07-31 10:33:06 -04003126 skb_reset_mac_len(nskb);
Pravin B Shelar68c33162013-02-14 14:02:41 +00003127
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003128 skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
Pravin B Shelar68c33162013-02-14 14:02:41 +00003129 nskb->data - tnl_hlen,
3130 doffset + tnl_hlen);
Herbert Xu89319d382008-12-15 23:26:06 -08003131
Herbert Xu9d8506c2013-11-21 11:10:04 -08003132 if (nskb->len == len + doffset)
Simon Horman1cdbcb72013-05-19 15:46:49 +00003133 goto perform_csum_check;
Herbert Xu89319d382008-12-15 23:26:06 -08003134
Tom Herberte585f232014-11-04 09:06:54 -08003135 if (!sg && !nskb->remcsum_offload) {
Herbert Xu6f85a122008-08-15 14:55:02 -07003136 nskb->ip_summed = CHECKSUM_NONE;
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003137 nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
Herbert Xuf4c50d92006-06-22 03:02:40 -07003138 skb_put(nskb, len),
3139 len, 0);
Tom Herbert7e2b10c2014-06-04 17:20:02 -07003140 SKB_GSO_CB(nskb)->csum_start =
Tom Herbertde843722014-06-25 12:51:01 -07003141 skb_headroom(nskb) + doffset;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003142 continue;
3143 }
3144
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003145 nskb_frag = skb_shinfo(nskb)->frags;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003146
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003147 skb_copy_from_linear_data_offset(head_skb, offset,
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03003148 skb_put(nskb, hsize), hsize);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003149
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003150 skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
3151 SKBTX_SHARED_FRAG;
Eric Dumazetcef401d2013-01-25 20:34:37 +00003152
Herbert Xu9d8506c2013-11-21 11:10:04 -08003153 while (pos < offset + len) {
3154 if (i >= nfrags) {
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003155 BUG_ON(skb_headlen(list_skb));
Herbert Xu9d8506c2013-11-21 11:10:04 -08003156
3157 i = 0;
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003158 nfrags = skb_shinfo(list_skb)->nr_frags;
3159 frag = skb_shinfo(list_skb)->frags;
Michael S. Tsirkin1fd819e2014-03-10 19:28:08 +02003160 frag_skb = list_skb;
Herbert Xu9d8506c2013-11-21 11:10:04 -08003161
3162 BUG_ON(!nfrags);
3163
Michael S. Tsirkin1a4ceda2014-03-10 19:27:59 +02003164 list_skb = list_skb->next;
Herbert Xu9d8506c2013-11-21 11:10:04 -08003165 }
3166
3167 if (unlikely(skb_shinfo(nskb)->nr_frags >=
3168 MAX_SKB_FRAGS)) {
3169 net_warn_ratelimited(
3170 "skb_segment: too many frags: %u %u\n",
3171 pos, mss);
3172 goto err;
3173 }
3174
Michael S. Tsirkin1fd819e2014-03-10 19:28:08 +02003175 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3176 goto err;
3177
Michael S. Tsirkin4e1beba2014-03-10 18:29:14 +02003178 *nskb_frag = *frag;
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003179 __skb_frag_ref(nskb_frag);
3180 size = skb_frag_size(nskb_frag);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003181
3182 if (pos < offset) {
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003183 nskb_frag->page_offset += offset - pos;
3184 skb_frag_size_sub(nskb_frag, offset - pos);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003185 }
3186
Herbert Xu89319d382008-12-15 23:26:06 -08003187 skb_shinfo(nskb)->nr_frags++;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003188
3189 if (pos + size <= offset + len) {
3190 i++;
Michael S. Tsirkin4e1beba2014-03-10 18:29:14 +02003191 frag++;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003192 pos += size;
3193 } else {
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003194 skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
Herbert Xu89319d382008-12-15 23:26:06 -08003195 goto skip_fraglist;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003196 }
3197
Michael S. Tsirkin8cb19902014-03-10 18:29:04 +02003198 nskb_frag++;
Herbert Xuf4c50d92006-06-22 03:02:40 -07003199 }
3200
Herbert Xu89319d382008-12-15 23:26:06 -08003201skip_fraglist:
Herbert Xuf4c50d92006-06-22 03:02:40 -07003202 nskb->data_len = len - hsize;
3203 nskb->len += nskb->data_len;
3204 nskb->truesize += nskb->data_len;
Pravin B Shelarec5f0612013-03-07 09:28:01 +00003205
Simon Horman1cdbcb72013-05-19 15:46:49 +00003206perform_csum_check:
Tom Herberte585f232014-11-04 09:06:54 -08003207 if (!csum && !nskb->remcsum_offload) {
Pravin B Shelarec5f0612013-03-07 09:28:01 +00003208 nskb->csum = skb_checksum(nskb, doffset,
3209 nskb->len - doffset, 0);
3210 nskb->ip_summed = CHECKSUM_NONE;
Tom Herbert7e2b10c2014-06-04 17:20:02 -07003211 SKB_GSO_CB(nskb)->csum_start =
3212 skb_headroom(nskb) + doffset;
Pravin B Shelarec5f0612013-03-07 09:28:01 +00003213 }
Michael S. Tsirkindf5771f2014-03-10 18:29:19 +02003214 } while ((offset += len) < head_skb->len);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003215
Eric Dumazetbec3cfd2014-10-03 20:59:19 -07003216 /* Some callers want to get the end of the list.
3217 * Put it in segs->prev to avoid walking the list.
3218 * (see validate_xmit_skb_list() for example)
3219 */
3220 segs->prev = tail;
Toshiaki Makita432c8562014-10-27 10:30:51 -07003221
3222 /* Following permits correct backpressure, for protocols
3223 * using skb_set_owner_w().
3224 * Idea is to tranfert ownership from head_skb to last segment.
3225 */
3226 if (head_skb->destructor == sock_wfree) {
3227 swap(tail->truesize, head_skb->truesize);
3228 swap(tail->destructor, head_skb->destructor);
3229 swap(tail->sk, head_skb->sk);
3230 }
Herbert Xuf4c50d92006-06-22 03:02:40 -07003231 return segs;
3232
3233err:
Eric Dumazet289dccb2013-12-20 14:29:08 -08003234 kfree_skb_list(segs);
Herbert Xuf4c50d92006-06-22 03:02:40 -07003235 return ERR_PTR(err);
3236}
Herbert Xuf4c50d92006-06-22 03:02:40 -07003237EXPORT_SYMBOL_GPL(skb_segment);
3238
Herbert Xu71d93b32008-12-15 23:42:33 -08003239int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
3240{
Eric Dumazet8a291112013-10-08 09:02:23 -07003241 struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
Herbert Xu67147ba2009-05-26 18:50:22 +00003242 unsigned int offset = skb_gro_offset(skb);
3243 unsigned int headlen = skb_headlen(skb);
Eric Dumazet8a291112013-10-08 09:02:23 -07003244 unsigned int len = skb_gro_len(skb);
Eric Dumazet58025e42015-03-05 13:47:48 -08003245 struct sk_buff *lp, *p = *head;
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003246 unsigned int delta_truesize;
Herbert Xu71d93b32008-12-15 23:42:33 -08003247
Eric Dumazet8a291112013-10-08 09:02:23 -07003248 if (unlikely(p->len + len >= 65536))
Herbert Xu71d93b32008-12-15 23:42:33 -08003249 return -E2BIG;
3250
Eric Dumazet29e98242014-05-16 11:34:37 -07003251 lp = NAPI_GRO_CB(p)->last;
Eric Dumazet8a291112013-10-08 09:02:23 -07003252 pinfo = skb_shinfo(lp);
3253
3254 if (headlen <= offset) {
Herbert Xu42da6992009-05-26 18:50:19 +00003255 skb_frag_t *frag;
Herbert Xu66e92fc2009-05-26 18:50:32 +00003256 skb_frag_t *frag2;
Herbert Xu9aaa1562009-05-26 18:50:33 +00003257 int i = skbinfo->nr_frags;
3258 int nr_frags = pinfo->nr_frags + i;
Herbert Xu42da6992009-05-26 18:50:19 +00003259
Herbert Xu66e92fc2009-05-26 18:50:32 +00003260 if (nr_frags > MAX_SKB_FRAGS)
Eric Dumazet8a291112013-10-08 09:02:23 -07003261 goto merge;
Herbert Xu81705ad2009-01-29 14:19:51 +00003262
Eric Dumazet8a291112013-10-08 09:02:23 -07003263 offset -= headlen;
Herbert Xu9aaa1562009-05-26 18:50:33 +00003264 pinfo->nr_frags = nr_frags;
3265 skbinfo->nr_frags = 0;
Herbert Xuf5572062009-01-14 20:40:03 -08003266
Herbert Xu9aaa1562009-05-26 18:50:33 +00003267 frag = pinfo->frags + nr_frags;
3268 frag2 = skbinfo->frags + i;
Herbert Xu66e92fc2009-05-26 18:50:32 +00003269 do {
3270 *--frag = *--frag2;
3271 } while (--i);
3272
3273 frag->page_offset += offset;
Eric Dumazet9e903e02011-10-18 21:00:24 +00003274 skb_frag_size_sub(frag, offset);
Herbert Xu66e92fc2009-05-26 18:50:32 +00003275
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003276 /* all fragments truesize : remove (head size + sk_buff) */
Alexander Duyckec47ea82012-05-04 14:26:56 +00003277 delta_truesize = skb->truesize -
3278 SKB_TRUESIZE(skb_end_offset(skb));
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003279
Herbert Xuf5572062009-01-14 20:40:03 -08003280 skb->truesize -= skb->data_len;
3281 skb->len -= skb->data_len;
3282 skb->data_len = 0;
3283
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003284 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
Herbert Xu5d38a072009-01-04 16:13:40 -08003285 goto done;
Eric Dumazetd7e88832012-04-30 08:10:34 +00003286 } else if (skb->head_frag) {
3287 int nr_frags = pinfo->nr_frags;
3288 skb_frag_t *frag = pinfo->frags + nr_frags;
3289 struct page *page = virt_to_head_page(skb->head);
3290 unsigned int first_size = headlen - offset;
3291 unsigned int first_offset;
3292
3293 if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
Eric Dumazet8a291112013-10-08 09:02:23 -07003294 goto merge;
Eric Dumazetd7e88832012-04-30 08:10:34 +00003295
3296 first_offset = skb->data -
3297 (unsigned char *)page_address(page) +
3298 offset;
3299
3300 pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
3301
3302 frag->page.p = page;
3303 frag->page_offset = first_offset;
3304 skb_frag_size_set(frag, first_size);
3305
3306 memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
3307 /* We dont need to clear skbinfo->nr_frags here */
3308
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003309 delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
Eric Dumazetd7e88832012-04-30 08:10:34 +00003310 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
3311 goto done;
Eric Dumazet8a291112013-10-08 09:02:23 -07003312 }
Herbert Xu71d93b32008-12-15 23:42:33 -08003313
3314merge:
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003315 delta_truesize = skb->truesize;
Herbert Xu67147ba2009-05-26 18:50:22 +00003316 if (offset > headlen) {
Michal Schmidtd1dc7ab2011-01-24 12:08:48 +00003317 unsigned int eat = offset - headlen;
3318
3319 skbinfo->frags[0].page_offset += eat;
Eric Dumazet9e903e02011-10-18 21:00:24 +00003320 skb_frag_size_sub(&skbinfo->frags[0], eat);
Michal Schmidtd1dc7ab2011-01-24 12:08:48 +00003321 skb->data_len -= eat;
3322 skb->len -= eat;
Herbert Xu67147ba2009-05-26 18:50:22 +00003323 offset = headlen;
Herbert Xu56035022009-02-05 21:26:52 -08003324 }
3325
Herbert Xu67147ba2009-05-26 18:50:22 +00003326 __skb_pull(skb, offset);
Herbert Xu56035022009-02-05 21:26:52 -08003327
Eric Dumazet29e98242014-05-16 11:34:37 -07003328 if (NAPI_GRO_CB(p)->last == p)
Eric Dumazet8a291112013-10-08 09:02:23 -07003329 skb_shinfo(p)->frag_list = skb;
3330 else
3331 NAPI_GRO_CB(p)->last->next = skb;
Eric Dumazetc3c7c252012-12-06 13:54:59 +00003332 NAPI_GRO_CB(p)->last = skb;
Eric Dumazetf4a775d2014-09-22 16:29:32 -07003333 __skb_header_release(skb);
Eric Dumazet8a291112013-10-08 09:02:23 -07003334 lp = p;
Herbert Xu71d93b32008-12-15 23:42:33 -08003335
Herbert Xu5d38a072009-01-04 16:13:40 -08003336done:
3337 NAPI_GRO_CB(p)->count++;
Herbert Xu37fe4732009-01-17 19:48:13 +00003338 p->data_len += len;
Eric Dumazet715dc1f2012-05-02 23:33:21 +00003339 p->truesize += delta_truesize;
Herbert Xu37fe4732009-01-17 19:48:13 +00003340 p->len += len;
Eric Dumazet8a291112013-10-08 09:02:23 -07003341 if (lp != p) {
3342 lp->data_len += len;
3343 lp->truesize += delta_truesize;
3344 lp->len += len;
3345 }
Herbert Xu71d93b32008-12-15 23:42:33 -08003346 NAPI_GRO_CB(skb)->same_flow = 1;
3347 return 0;
3348}
Herbert Xu71d93b32008-12-15 23:42:33 -08003349
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350void __init skb_init(void)
3351{
3352 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
3353 sizeof(struct sk_buff),
3354 0,
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003355 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09003356 NULL);
David S. Millerd179cd12005-08-17 14:57:30 -07003357 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
Eric Dumazetd0bf4a92014-09-29 13:29:15 -07003358 sizeof(struct sk_buff_fclones),
David S. Millerd179cd12005-08-17 14:57:30 -07003359 0,
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003360 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09003361 NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003362}
3363
David Howells716ea3a2007-04-02 20:19:53 -07003364/**
3365 * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
3366 * @skb: Socket buffer containing the buffers to be mapped
3367 * @sg: The scatter-gather list to map into
3368 * @offset: The offset into the buffer's contents to start mapping
3369 * @len: Length of buffer space to be mapped
3370 *
3371 * Fill the specified scatter-gather list with mappings/pointers into a
3372 * region of the buffer space attached to a socket buffer.
3373 */
David S. Miller51c739d2007-10-30 21:29:29 -07003374static int
3375__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
David Howells716ea3a2007-04-02 20:19:53 -07003376{
David S. Miller1a028e52007-04-27 15:21:23 -07003377 int start = skb_headlen(skb);
3378 int i, copy = start - offset;
David S. Millerfbb398a2009-06-09 00:18:59 -07003379 struct sk_buff *frag_iter;
David Howells716ea3a2007-04-02 20:19:53 -07003380 int elt = 0;
3381
3382 if (copy > 0) {
3383 if (copy > len)
3384 copy = len;
Jens Axboe642f1492007-10-24 11:20:47 +02003385 sg_set_buf(sg, skb->data + offset, copy);
David Howells716ea3a2007-04-02 20:19:53 -07003386 elt++;
3387 if ((len -= copy) == 0)
3388 return elt;
3389 offset += copy;
3390 }
3391
3392 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
David S. Miller1a028e52007-04-27 15:21:23 -07003393 int end;
David Howells716ea3a2007-04-02 20:19:53 -07003394
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003395 WARN_ON(start > offset + len);
David S. Miller1a028e52007-04-27 15:21:23 -07003396
Eric Dumazet9e903e02011-10-18 21:00:24 +00003397 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
David Howells716ea3a2007-04-02 20:19:53 -07003398 if ((copy = end - offset) > 0) {
3399 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3400
3401 if (copy > len)
3402 copy = len;
Ian Campbellea2ab692011-08-22 23:44:58 +00003403 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
Jens Axboe642f1492007-10-24 11:20:47 +02003404 frag->page_offset+offset-start);
David Howells716ea3a2007-04-02 20:19:53 -07003405 elt++;
3406 if (!(len -= copy))
3407 return elt;
3408 offset += copy;
3409 }
David S. Miller1a028e52007-04-27 15:21:23 -07003410 start = end;
David Howells716ea3a2007-04-02 20:19:53 -07003411 }
3412
David S. Millerfbb398a2009-06-09 00:18:59 -07003413 skb_walk_frags(skb, frag_iter) {
3414 int end;
David Howells716ea3a2007-04-02 20:19:53 -07003415
David S. Millerfbb398a2009-06-09 00:18:59 -07003416 WARN_ON(start > offset + len);
David Howells716ea3a2007-04-02 20:19:53 -07003417
David S. Millerfbb398a2009-06-09 00:18:59 -07003418 end = start + frag_iter->len;
3419 if ((copy = end - offset) > 0) {
3420 if (copy > len)
3421 copy = len;
3422 elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
3423 copy);
3424 if ((len -= copy) == 0)
3425 return elt;
3426 offset += copy;
David Howells716ea3a2007-04-02 20:19:53 -07003427 }
David S. Millerfbb398a2009-06-09 00:18:59 -07003428 start = end;
David Howells716ea3a2007-04-02 20:19:53 -07003429 }
3430 BUG_ON(len);
3431 return elt;
3432}
3433
Fan Du25a91d82014-01-18 09:54:23 +08003434/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
3435 * sglist without mark the sg which contain last skb data as the end.
3436 * So the caller can mannipulate sg list as will when padding new data after
3437 * the first call without calling sg_unmark_end to expend sg list.
3438 *
3439 * Scenario to use skb_to_sgvec_nomark:
3440 * 1. sg_init_table
3441 * 2. skb_to_sgvec_nomark(payload1)
3442 * 3. skb_to_sgvec_nomark(payload2)
3443 *
3444 * This is equivalent to:
3445 * 1. sg_init_table
3446 * 2. skb_to_sgvec(payload1)
3447 * 3. sg_unmark_end
3448 * 4. skb_to_sgvec(payload2)
3449 *
3450 * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
3451 * is more preferable.
3452 */
3453int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
3454 int offset, int len)
3455{
3456 return __skb_to_sgvec(skb, sg, offset, len);
3457}
3458EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
3459
David S. Miller51c739d2007-10-30 21:29:29 -07003460int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
3461{
3462 int nsg = __skb_to_sgvec(skb, sg, offset, len);
3463
Jens Axboec46f2332007-10-31 12:06:37 +01003464 sg_mark_end(&sg[nsg - 1]);
David S. Miller51c739d2007-10-30 21:29:29 -07003465
3466 return nsg;
3467}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08003468EXPORT_SYMBOL_GPL(skb_to_sgvec);
David S. Miller51c739d2007-10-30 21:29:29 -07003469
David Howells716ea3a2007-04-02 20:19:53 -07003470/**
3471 * skb_cow_data - Check that a socket buffer's data buffers are writable
3472 * @skb: The socket buffer to check.
3473 * @tailbits: Amount of trailing space to be added
3474 * @trailer: Returned pointer to the skb where the @tailbits space begins
3475 *
3476 * Make sure that the data buffers attached to a socket buffer are
3477 * writable. If they are not, private copies are made of the data buffers
3478 * and the socket buffer is set to use these instead.
3479 *
3480 * If @tailbits is given, make sure that there is space to write @tailbits
3481 * bytes of data beyond current end of socket buffer. @trailer will be
3482 * set to point to the skb in which this space begins.
3483 *
3484 * The number of scatterlist elements required to completely map the
3485 * COW'd and extended socket buffer will be returned.
3486 */
3487int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
3488{
3489 int copyflag;
3490 int elt;
3491 struct sk_buff *skb1, **skb_p;
3492
3493 /* If skb is cloned or its head is paged, reallocate
3494 * head pulling out all the pages (pages are considered not writable
3495 * at the moment even if they are anonymous).
3496 */
3497 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
3498 __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
3499 return -ENOMEM;
3500
3501 /* Easy case. Most of packets will go this way. */
David S. Miller21dc3302010-08-23 00:13:46 -07003502 if (!skb_has_frag_list(skb)) {
David Howells716ea3a2007-04-02 20:19:53 -07003503 /* A little of trouble, not enough of space for trailer.
3504 * This should not happen, when stack is tuned to generate
3505 * good frames. OK, on miss we reallocate and reserve even more
3506 * space, 128 bytes is fair. */
3507
3508 if (skb_tailroom(skb) < tailbits &&
3509 pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
3510 return -ENOMEM;
3511
3512 /* Voila! */
3513 *trailer = skb;
3514 return 1;
3515 }
3516
3517 /* Misery. We are in troubles, going to mincer fragments... */
3518
3519 elt = 1;
3520 skb_p = &skb_shinfo(skb)->frag_list;
3521 copyflag = 0;
3522
3523 while ((skb1 = *skb_p) != NULL) {
3524 int ntail = 0;
3525
3526 /* The fragment is partially pulled by someone,
3527 * this can happen on input. Copy it and everything
3528 * after it. */
3529
3530 if (skb_shared(skb1))
3531 copyflag = 1;
3532
3533 /* If the skb is the last, worry about trailer. */
3534
3535 if (skb1->next == NULL && tailbits) {
3536 if (skb_shinfo(skb1)->nr_frags ||
David S. Miller21dc3302010-08-23 00:13:46 -07003537 skb_has_frag_list(skb1) ||
David Howells716ea3a2007-04-02 20:19:53 -07003538 skb_tailroom(skb1) < tailbits)
3539 ntail = tailbits + 128;
3540 }
3541
3542 if (copyflag ||
3543 skb_cloned(skb1) ||
3544 ntail ||
3545 skb_shinfo(skb1)->nr_frags ||
David S. Miller21dc3302010-08-23 00:13:46 -07003546 skb_has_frag_list(skb1)) {
David Howells716ea3a2007-04-02 20:19:53 -07003547 struct sk_buff *skb2;
3548
3549 /* Fuck, we are miserable poor guys... */
3550 if (ntail == 0)
3551 skb2 = skb_copy(skb1, GFP_ATOMIC);
3552 else
3553 skb2 = skb_copy_expand(skb1,
3554 skb_headroom(skb1),
3555 ntail,
3556 GFP_ATOMIC);
3557 if (unlikely(skb2 == NULL))
3558 return -ENOMEM;
3559
3560 if (skb1->sk)
3561 skb_set_owner_w(skb2, skb1->sk);
3562
3563 /* Looking around. Are we still alive?
3564 * OK, link new skb, drop old one */
3565
3566 skb2->next = skb1->next;
3567 *skb_p = skb2;
3568 kfree_skb(skb1);
3569 skb1 = skb2;
3570 }
3571 elt++;
3572 *trailer = skb1;
3573 skb_p = &skb1->next;
3574 }
3575
3576 return elt;
3577}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08003578EXPORT_SYMBOL_GPL(skb_cow_data);
David Howells716ea3a2007-04-02 20:19:53 -07003579
Eric Dumazetb1faf562010-05-31 23:44:05 -07003580static void sock_rmem_free(struct sk_buff *skb)
3581{
3582 struct sock *sk = skb->sk;
3583
3584 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
3585}
3586
3587/*
3588 * Note: We dont mem charge error packets (no sk_forward_alloc changes)
3589 */
3590int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
3591{
3592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
Eric Dumazet95c96172012-04-15 05:58:06 +00003593 (unsigned int)sk->sk_rcvbuf)
Eric Dumazetb1faf562010-05-31 23:44:05 -07003594 return -ENOMEM;
3595
3596 skb_orphan(skb);
3597 skb->sk = sk;
3598 skb->destructor = sock_rmem_free;
3599 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
3600
Eric Dumazetabb57ea2011-05-18 02:21:31 -04003601 /* before exiting rcu section, make sure dst is refcounted */
3602 skb_dst_force(skb);
3603
Eric Dumazetb1faf562010-05-31 23:44:05 -07003604 skb_queue_tail(&sk->sk_error_queue, skb);
3605 if (!sock_flag(sk, SOCK_DEAD))
David S. Miller676d2362014-04-11 16:15:36 -04003606 sk->sk_data_ready(sk);
Eric Dumazetb1faf562010-05-31 23:44:05 -07003607 return 0;
3608}
3609EXPORT_SYMBOL(sock_queue_err_skb);
3610
Willem de Bruijn364a9e92014-08-31 21:30:27 -04003611struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
3612{
3613 struct sk_buff_head *q = &sk->sk_error_queue;
3614 struct sk_buff *skb, *skb_next;
Eric Dumazet997d5c32015-02-18 05:47:55 -08003615 unsigned long flags;
Willem de Bruijn364a9e92014-08-31 21:30:27 -04003616 int err = 0;
3617
Eric Dumazet997d5c32015-02-18 05:47:55 -08003618 spin_lock_irqsave(&q->lock, flags);
Willem de Bruijn364a9e92014-08-31 21:30:27 -04003619 skb = __skb_dequeue(q);
3620 if (skb && (skb_next = skb_peek(q)))
3621 err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
Eric Dumazet997d5c32015-02-18 05:47:55 -08003622 spin_unlock_irqrestore(&q->lock, flags);
Willem de Bruijn364a9e92014-08-31 21:30:27 -04003623
3624 sk->sk_err = err;
3625 if (err)
3626 sk->sk_error_report(sk);
3627
3628 return skb;
3629}
3630EXPORT_SYMBOL(sock_dequeue_err_skb);
3631
Alexander Duyckcab41c42014-09-10 18:05:26 -04003632/**
3633 * skb_clone_sk - create clone of skb, and take reference to socket
3634 * @skb: the skb to clone
3635 *
3636 * This function creates a clone of a buffer that holds a reference on
3637 * sk_refcnt. Buffers created via this function are meant to be
3638 * returned using sock_queue_err_skb, or free via kfree_skb.
3639 *
3640 * When passing buffers allocated with this function to sock_queue_err_skb
3641 * it is necessary to wrap the call with sock_hold/sock_put in order to
3642 * prevent the socket from being released prior to being enqueued on
3643 * the sk_error_queue.
3644 */
Alexander Duyck62bccb82014-09-04 13:31:35 -04003645struct sk_buff *skb_clone_sk(struct sk_buff *skb)
3646{
3647 struct sock *sk = skb->sk;
3648 struct sk_buff *clone;
3649
3650 if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
3651 return NULL;
3652
3653 clone = skb_clone(skb, GFP_ATOMIC);
3654 if (!clone) {
3655 sock_put(sk);
3656 return NULL;
3657 }
3658
3659 clone->sk = sk;
3660 clone->destructor = sock_efree;
3661
3662 return clone;
3663}
3664EXPORT_SYMBOL(skb_clone_sk);
3665
Alexander Duyck37846ef2014-09-04 13:31:10 -04003666static void __skb_complete_tx_timestamp(struct sk_buff *skb,
3667 struct sock *sk,
3668 int tstype)
Patrick Ohlyac45f602009-02-12 05:03:37 +00003669{
Patrick Ohlyac45f602009-02-12 05:03:37 +00003670 struct sock_exterr_skb *serr;
Patrick Ohlyac45f602009-02-12 05:03:37 +00003671 int err;
3672
Patrick Ohlyac45f602009-02-12 05:03:37 +00003673 serr = SKB_EXT_ERR(skb);
3674 memset(serr, 0, sizeof(*serr));
3675 serr->ee.ee_errno = ENOMSG;
3676 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
Willem de Bruijne7fd2882014-08-04 22:11:48 -04003677 serr->ee.ee_info = tstype;
Willem de Bruijn4ed2d762014-08-04 22:11:49 -04003678 if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
Willem de Bruijn09c2d252014-08-04 22:11:47 -04003679 serr->ee.ee_data = skb_shinfo(skb)->tskey;
Willem de Bruijn4ed2d762014-08-04 22:11:49 -04003680 if (sk->sk_protocol == IPPROTO_TCP)
3681 serr->ee.ee_data -= sk->sk_tskey;
3682 }
Eric Dumazet29030372010-05-29 00:20:48 -07003683
Patrick Ohlyac45f602009-02-12 05:03:37 +00003684 err = sock_queue_err_skb(sk, skb);
Eric Dumazet29030372010-05-29 00:20:48 -07003685
Patrick Ohlyac45f602009-02-12 05:03:37 +00003686 if (err)
3687 kfree_skb(skb);
3688}
Alexander Duyck37846ef2014-09-04 13:31:10 -04003689
Willem de Bruijnb245be12015-01-30 13:29:32 -05003690static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
3691{
3692 bool ret;
3693
3694 if (likely(sysctl_tstamp_allow_data || tsonly))
3695 return true;
3696
3697 read_lock_bh(&sk->sk_callback_lock);
3698 ret = sk->sk_socket && sk->sk_socket->file &&
3699 file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
3700 read_unlock_bh(&sk->sk_callback_lock);
3701 return ret;
3702}
3703
Alexander Duyck37846ef2014-09-04 13:31:10 -04003704void skb_complete_tx_timestamp(struct sk_buff *skb,
3705 struct skb_shared_hwtstamps *hwtstamps)
3706{
3707 struct sock *sk = skb->sk;
3708
Willem de Bruijnb245be12015-01-30 13:29:32 -05003709 if (!skb_may_tx_timestamp(sk, false))
3710 return;
3711
Alexander Duyck62bccb82014-09-04 13:31:35 -04003712 /* take a reference to prevent skb_orphan() from freeing the socket */
3713 sock_hold(sk);
Alexander Duyck37846ef2014-09-04 13:31:10 -04003714
Alexander Duyck62bccb82014-09-04 13:31:35 -04003715 *skb_hwtstamps(skb) = *hwtstamps;
3716 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
Alexander Duyck37846ef2014-09-04 13:31:10 -04003717
3718 sock_put(sk);
3719}
3720EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
3721
3722void __skb_tstamp_tx(struct sk_buff *orig_skb,
3723 struct skb_shared_hwtstamps *hwtstamps,
3724 struct sock *sk, int tstype)
3725{
3726 struct sk_buff *skb;
Willem de Bruijn3a8dd972015-03-11 15:43:55 -04003727 bool tsonly;
Alexander Duyck37846ef2014-09-04 13:31:10 -04003728
Willem de Bruijn3a8dd972015-03-11 15:43:55 -04003729 if (!sk)
3730 return;
3731
3732 tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
3733 if (!skb_may_tx_timestamp(sk, tsonly))
Alexander Duyck37846ef2014-09-04 13:31:10 -04003734 return;
3735
Willem de Bruijn49ca0d82015-01-30 13:29:31 -05003736 if (tsonly)
3737 skb = alloc_skb(0, GFP_ATOMIC);
Alexander Duyck37846ef2014-09-04 13:31:10 -04003738 else
Willem de Bruijn49ca0d82015-01-30 13:29:31 -05003739 skb = skb_clone(orig_skb, GFP_ATOMIC);
Alexander Duyck37846ef2014-09-04 13:31:10 -04003740 if (!skb)
3741 return;
3742
Willem de Bruijn49ca0d82015-01-30 13:29:31 -05003743 if (tsonly) {
3744 skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
3745 skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
3746 }
3747
3748 if (hwtstamps)
3749 *skb_hwtstamps(skb) = *hwtstamps;
3750 else
3751 skb->tstamp = ktime_get_real();
3752
Alexander Duyck37846ef2014-09-04 13:31:10 -04003753 __skb_complete_tx_timestamp(skb, sk, tstype);
3754}
Willem de Bruijne7fd2882014-08-04 22:11:48 -04003755EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
3756
3757void skb_tstamp_tx(struct sk_buff *orig_skb,
3758 struct skb_shared_hwtstamps *hwtstamps)
3759{
3760 return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
3761 SCM_TSTAMP_SND);
3762}
Patrick Ohlyac45f602009-02-12 05:03:37 +00003763EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3764
Johannes Berg6e3e9392011-11-09 10:15:42 +01003765void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
3766{
3767 struct sock *sk = skb->sk;
3768 struct sock_exterr_skb *serr;
3769 int err;
3770
3771 skb->wifi_acked_valid = 1;
3772 skb->wifi_acked = acked;
3773
3774 serr = SKB_EXT_ERR(skb);
3775 memset(serr, 0, sizeof(*serr));
3776 serr->ee.ee_errno = ENOMSG;
3777 serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
3778
Alexander Duyckbf7fa552014-09-10 18:05:42 -04003779 /* take a reference to prevent skb_orphan() from freeing the socket */
3780 sock_hold(sk);
3781
Johannes Berg6e3e9392011-11-09 10:15:42 +01003782 err = sock_queue_err_skb(sk, skb);
3783 if (err)
3784 kfree_skb(skb);
Alexander Duyckbf7fa552014-09-10 18:05:42 -04003785
3786 sock_put(sk);
Johannes Berg6e3e9392011-11-09 10:15:42 +01003787}
3788EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
3789
Rusty Russellf35d9d82008-02-04 23:49:54 -05003790/**
3791 * skb_partial_csum_set - set up and verify partial csum values for packet
3792 * @skb: the skb to set
3793 * @start: the number of bytes after skb->data to start checksumming.
3794 * @off: the offset from start to place the checksum.
3795 *
3796 * For untrusted partially-checksummed packets, we need to make sure the values
3797 * for skb->csum_start and skb->csum_offset are valid so we don't oops.
3798 *
3799 * This function checks and sets those values and skb->ip_summed: if this
3800 * returns false you should drop the packet.
3801 */
3802bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3803{
Herbert Xu5ff8dda2009-06-04 01:22:01 +00003804 if (unlikely(start > skb_headlen(skb)) ||
3805 unlikely((int)start + off > skb_headlen(skb) - 2)) {
Joe Perchese87cc472012-05-13 21:56:26 +00003806 net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n",
3807 start, off, skb_headlen(skb));
Rusty Russellf35d9d82008-02-04 23:49:54 -05003808 return false;
3809 }
3810 skb->ip_summed = CHECKSUM_PARTIAL;
3811 skb->csum_start = skb_headroom(skb) + start;
3812 skb->csum_offset = off;
Jason Wange5d5dec2013-03-26 23:11:20 +00003813 skb_set_transport_header(skb, start);
Rusty Russellf35d9d82008-02-04 23:49:54 -05003814 return true;
3815}
David S. Millerb4ac530fc2009-02-10 02:09:24 -08003816EXPORT_SYMBOL_GPL(skb_partial_csum_set);
Rusty Russellf35d9d82008-02-04 23:49:54 -05003817
Paul Durranted1f50c2014-01-09 10:02:46 +00003818static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
3819 unsigned int max)
3820{
3821 if (skb_headlen(skb) >= len)
3822 return 0;
3823
3824 /* If we need to pullup then pullup to the max, so we
3825 * won't need to do it again.
3826 */
3827 if (max > skb->len)
3828 max = skb->len;
3829
3830 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
3831 return -ENOMEM;
3832
3833 if (skb_headlen(skb) < len)
3834 return -EPROTO;
3835
3836 return 0;
3837}
3838
Jan Beulichf9708b42014-03-11 13:56:05 +00003839#define MAX_TCP_HDR_LEN (15 * 4)
3840
3841static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
3842 typeof(IPPROTO_IP) proto,
3843 unsigned int off)
3844{
3845 switch (proto) {
3846 int err;
3847
3848 case IPPROTO_TCP:
3849 err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
3850 off + MAX_TCP_HDR_LEN);
3851 if (!err && !skb_partial_csum_set(skb, off,
3852 offsetof(struct tcphdr,
3853 check)))
3854 err = -EPROTO;
3855 return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
3856
3857 case IPPROTO_UDP:
3858 err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
3859 off + sizeof(struct udphdr));
3860 if (!err && !skb_partial_csum_set(skb, off,
3861 offsetof(struct udphdr,
3862 check)))
3863 err = -EPROTO;
3864 return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
3865 }
3866
3867 return ERR_PTR(-EPROTO);
3868}
3869
Paul Durranted1f50c2014-01-09 10:02:46 +00003870/* This value should be large enough to cover a tagged ethernet header plus
3871 * maximally sized IP and TCP or UDP headers.
3872 */
3873#define MAX_IP_HDR_LEN 128
3874
Jan Beulichf9708b42014-03-11 13:56:05 +00003875static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
Paul Durranted1f50c2014-01-09 10:02:46 +00003876{
3877 unsigned int off;
3878 bool fragment;
Jan Beulichf9708b42014-03-11 13:56:05 +00003879 __sum16 *csum;
Paul Durranted1f50c2014-01-09 10:02:46 +00003880 int err;
3881
3882 fragment = false;
3883
3884 err = skb_maybe_pull_tail(skb,
3885 sizeof(struct iphdr),
3886 MAX_IP_HDR_LEN);
3887 if (err < 0)
3888 goto out;
3889
3890 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
3891 fragment = true;
3892
3893 off = ip_hdrlen(skb);
3894
3895 err = -EPROTO;
3896
3897 if (fragment)
3898 goto out;
3899
Jan Beulichf9708b42014-03-11 13:56:05 +00003900 csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
3901 if (IS_ERR(csum))
3902 return PTR_ERR(csum);
Paul Durranted1f50c2014-01-09 10:02:46 +00003903
Jan Beulichf9708b42014-03-11 13:56:05 +00003904 if (recalculate)
3905 *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
3906 ip_hdr(skb)->daddr,
3907 skb->len - off,
3908 ip_hdr(skb)->protocol, 0);
Paul Durranted1f50c2014-01-09 10:02:46 +00003909 err = 0;
3910
3911out:
3912 return err;
3913}
3914
3915/* This value should be large enough to cover a tagged ethernet header plus
3916 * an IPv6 header, all options, and a maximal TCP or UDP header.
3917 */
3918#define MAX_IPV6_HDR_LEN 256
3919
3920#define OPT_HDR(type, skb, off) \
3921 (type *)(skb_network_header(skb) + (off))
3922
3923static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
3924{
3925 int err;
3926 u8 nexthdr;
3927 unsigned int off;
3928 unsigned int len;
3929 bool fragment;
3930 bool done;
Jan Beulichf9708b42014-03-11 13:56:05 +00003931 __sum16 *csum;
Paul Durranted1f50c2014-01-09 10:02:46 +00003932
3933 fragment = false;
3934 done = false;
3935
3936 off = sizeof(struct ipv6hdr);
3937
3938 err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
3939 if (err < 0)
3940 goto out;
3941
3942 nexthdr = ipv6_hdr(skb)->nexthdr;
3943
3944 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
3945 while (off <= len && !done) {
3946 switch (nexthdr) {
3947 case IPPROTO_DSTOPTS:
3948 case IPPROTO_HOPOPTS:
3949 case IPPROTO_ROUTING: {
3950 struct ipv6_opt_hdr *hp;
3951
3952 err = skb_maybe_pull_tail(skb,
3953 off +
3954 sizeof(struct ipv6_opt_hdr),
3955 MAX_IPV6_HDR_LEN);
3956 if (err < 0)
3957 goto out;
3958
3959 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
3960 nexthdr = hp->nexthdr;
3961 off += ipv6_optlen(hp);
3962 break;
3963 }
3964 case IPPROTO_AH: {
3965 struct ip_auth_hdr *hp;
3966
3967 err = skb_maybe_pull_tail(skb,
3968 off +
3969 sizeof(struct ip_auth_hdr),
3970 MAX_IPV6_HDR_LEN);
3971 if (err < 0)
3972 goto out;
3973
3974 hp = OPT_HDR(struct ip_auth_hdr, skb, off);
3975 nexthdr = hp->nexthdr;
3976 off += ipv6_authlen(hp);
3977 break;
3978 }
3979 case IPPROTO_FRAGMENT: {
3980 struct frag_hdr *hp;
3981
3982 err = skb_maybe_pull_tail(skb,
3983 off +
3984 sizeof(struct frag_hdr),
3985 MAX_IPV6_HDR_LEN);
3986 if (err < 0)
3987 goto out;
3988
3989 hp = OPT_HDR(struct frag_hdr, skb, off);
3990
3991 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
3992 fragment = true;
3993
3994 nexthdr = hp->nexthdr;
3995 off += sizeof(struct frag_hdr);
3996 break;
3997 }
3998 default:
3999 done = true;
4000 break;
4001 }
4002 }
4003
4004 err = -EPROTO;
4005
4006 if (!done || fragment)
4007 goto out;
4008
Jan Beulichf9708b42014-03-11 13:56:05 +00004009 csum = skb_checksum_setup_ip(skb, nexthdr, off);
4010 if (IS_ERR(csum))
4011 return PTR_ERR(csum);
Paul Durranted1f50c2014-01-09 10:02:46 +00004012
Jan Beulichf9708b42014-03-11 13:56:05 +00004013 if (recalculate)
4014 *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4015 &ipv6_hdr(skb)->daddr,
4016 skb->len - off, nexthdr, 0);
Paul Durranted1f50c2014-01-09 10:02:46 +00004017 err = 0;
4018
4019out:
4020 return err;
4021}
4022
4023/**
4024 * skb_checksum_setup - set up partial checksum offset
4025 * @skb: the skb to set up
4026 * @recalculate: if true the pseudo-header checksum will be recalculated
4027 */
4028int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
4029{
4030 int err;
4031
4032 switch (skb->protocol) {
4033 case htons(ETH_P_IP):
Jan Beulichf9708b42014-03-11 13:56:05 +00004034 err = skb_checksum_setup_ipv4(skb, recalculate);
Paul Durranted1f50c2014-01-09 10:02:46 +00004035 break;
4036
4037 case htons(ETH_P_IPV6):
4038 err = skb_checksum_setup_ipv6(skb, recalculate);
4039 break;
4040
4041 default:
4042 err = -EPROTO;
4043 break;
4044 }
4045
4046 return err;
4047}
4048EXPORT_SYMBOL(skb_checksum_setup);
4049
Linus Lüssing9afd85c2015-05-02 14:01:07 +02004050/**
4051 * skb_checksum_maybe_trim - maybe trims the given skb
4052 * @skb: the skb to check
4053 * @transport_len: the data length beyond the network header
4054 *
4055 * Checks whether the given skb has data beyond the given transport length.
4056 * If so, returns a cloned skb trimmed to this transport length.
4057 * Otherwise returns the provided skb. Returns NULL in error cases
4058 * (e.g. transport_len exceeds skb length or out-of-memory).
4059 *
4060 * Caller needs to set the skb transport header and release the returned skb.
4061 * Provided skb is consumed.
4062 */
4063static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
4064 unsigned int transport_len)
4065{
4066 struct sk_buff *skb_chk;
4067 unsigned int len = skb_transport_offset(skb) + transport_len;
4068 int ret;
4069
4070 if (skb->len < len) {
4071 kfree_skb(skb);
4072 return NULL;
4073 } else if (skb->len == len) {
4074 return skb;
4075 }
4076
4077 skb_chk = skb_clone(skb, GFP_ATOMIC);
4078 kfree_skb(skb);
4079
4080 if (!skb_chk)
4081 return NULL;
4082
4083 ret = pskb_trim_rcsum(skb_chk, len);
4084 if (ret) {
4085 kfree_skb(skb_chk);
4086 return NULL;
4087 }
4088
4089 return skb_chk;
4090}
4091
4092/**
4093 * skb_checksum_trimmed - validate checksum of an skb
4094 * @skb: the skb to check
4095 * @transport_len: the data length beyond the network header
4096 * @skb_chkf: checksum function to use
4097 *
4098 * Applies the given checksum function skb_chkf to the provided skb.
4099 * Returns a checked and maybe trimmed skb. Returns NULL on error.
4100 *
4101 * If the skb has data beyond the given transport length, then a
4102 * trimmed & cloned skb is checked and returned.
4103 *
4104 * Caller needs to set the skb transport header and release the returned skb.
4105 * Provided skb is consumed.
4106 */
4107struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
4108 unsigned int transport_len,
4109 __sum16(*skb_chkf)(struct sk_buff *skb))
4110{
4111 struct sk_buff *skb_chk;
4112 unsigned int offset = skb_transport_offset(skb);
Linus Lüssingfcba67c2015-05-05 00:19:35 +02004113 __sum16 ret;
Linus Lüssing9afd85c2015-05-02 14:01:07 +02004114
4115 skb_chk = skb_checksum_maybe_trim(skb, transport_len);
4116 if (!skb_chk)
4117 return NULL;
4118
4119 if (!pskb_may_pull(skb_chk, offset)) {
4120 kfree_skb(skb_chk);
4121 return NULL;
4122 }
4123
4124 __skb_pull(skb_chk, offset);
4125 ret = skb_chkf(skb_chk);
4126 __skb_push(skb_chk, offset);
4127
4128 if (ret) {
4129 kfree_skb(skb_chk);
4130 return NULL;
4131 }
4132
4133 return skb_chk;
4134}
4135EXPORT_SYMBOL(skb_checksum_trimmed);
4136
Ben Hutchings4497b072008-06-19 16:22:28 -07004137void __skb_warn_lro_forwarding(const struct sk_buff *skb)
4138{
Joe Perchese87cc472012-05-13 21:56:26 +00004139 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
4140 skb->dev->name);
Ben Hutchings4497b072008-06-19 16:22:28 -07004141}
Ben Hutchings4497b072008-06-19 16:22:28 -07004142EXPORT_SYMBOL(__skb_warn_lro_forwarding);
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004143
4144void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
4145{
Eric Dumazet3d861f62012-10-22 09:03:40 +00004146 if (head_stolen) {
4147 skb_release_head_state(skb);
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004148 kmem_cache_free(skbuff_head_cache, skb);
Eric Dumazet3d861f62012-10-22 09:03:40 +00004149 } else {
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004150 __kfree_skb(skb);
Eric Dumazet3d861f62012-10-22 09:03:40 +00004151 }
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004152}
4153EXPORT_SYMBOL(kfree_skb_partial);
4154
4155/**
4156 * skb_try_coalesce - try to merge skb to prior one
4157 * @to: prior buffer
4158 * @from: buffer to add
4159 * @fragstolen: pointer to boolean
Randy Dunlapc6c4b972012-06-08 14:01:44 +00004160 * @delta_truesize: how much more was allocated than was requested
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004161 */
4162bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4163 bool *fragstolen, int *delta_truesize)
4164{
4165 int i, delta, len = from->len;
4166
4167 *fragstolen = false;
4168
4169 if (skb_cloned(to))
4170 return false;
4171
4172 if (len <= skb_tailroom(to)) {
Eric Dumazete93a0432014-09-15 04:19:52 -07004173 if (len)
4174 BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004175 *delta_truesize = 0;
4176 return true;
4177 }
4178
4179 if (skb_has_frag_list(to) || skb_has_frag_list(from))
4180 return false;
4181
4182 if (skb_headlen(from) != 0) {
4183 struct page *page;
4184 unsigned int offset;
4185
4186 if (skb_shinfo(to)->nr_frags +
4187 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
4188 return false;
4189
4190 if (skb_head_is_locked(from))
4191 return false;
4192
4193 delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
4194
4195 page = virt_to_head_page(from->head);
4196 offset = from->data - (unsigned char *)page_address(page);
4197
4198 skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
4199 page, offset, skb_headlen(from));
4200 *fragstolen = true;
4201 } else {
4202 if (skb_shinfo(to)->nr_frags +
4203 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
4204 return false;
4205
Weiping Panf4b549a2012-09-28 20:15:30 +00004206 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004207 }
4208
4209 WARN_ON_ONCE(delta < len);
4210
4211 memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
4212 skb_shinfo(from)->frags,
4213 skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
4214 skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
4215
4216 if (!skb_cloned(from))
4217 skb_shinfo(from)->nr_frags = 0;
4218
Li RongQing8ea853f2012-09-18 16:53:21 +00004219 /* if the skb is not cloned this does nothing
4220 * since we set nr_frags to 0.
4221 */
Eric Dumazetbad43ca2012-05-19 03:02:02 +00004222 for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
4223 skb_frag_ref(from, i);
4224
4225 to->truesize += delta;
4226 to->len += len;
4227 to->data_len += len;
4228
4229 *delta_truesize = delta;
4230 return true;
4231}
4232EXPORT_SYMBOL(skb_try_coalesce);
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004233
4234/**
Nicolas Dichtel8b27f272013-09-02 15:34:56 +02004235 * skb_scrub_packet - scrub an skb
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004236 *
4237 * @skb: buffer to clean
Nicolas Dichtel8b27f272013-09-02 15:34:56 +02004238 * @xnet: packet is crossing netns
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004239 *
Nicolas Dichtel8b27f272013-09-02 15:34:56 +02004240 * skb_scrub_packet can be used after encapsulating or decapsulting a packet
4241 * into/from a tunnel. Some information have to be cleared during these
4242 * operations.
4243 * skb_scrub_packet can also be used to clean a skb before injecting it in
4244 * another namespace (@xnet == true). We have to clear all information in the
4245 * skb that could impact namespace isolation.
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004246 */
Nicolas Dichtel8b27f272013-09-02 15:34:56 +02004247void skb_scrub_packet(struct sk_buff *skb, bool xnet)
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004248{
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004249 skb->tstamp.tv64 = 0;
4250 skb->pkt_type = PACKET_HOST;
4251 skb->skb_iif = 0;
WANG Cong60ff7462014-05-04 16:39:18 -07004252 skb->ignore_df = 0;
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004253 skb_dst_drop(skb);
Eric Dumazetc29390c2015-03-11 18:42:02 -07004254 skb_sender_cpu_clear(skb);
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004255 secpath_reset(skb);
4256 nf_reset(skb);
4257 nf_reset_trace(skb);
Herbert Xu213dd742015-04-16 09:03:27 +08004258
4259 if (!xnet)
4260 return;
4261
4262 skb_orphan(skb);
4263 skb->mark = 0;
Nicolas Dichtel621e84d2013-06-26 16:11:27 +02004264}
4265EXPORT_SYMBOL_GPL(skb_scrub_packet);
Florian Westphalde960aa2014-01-26 10:58:16 +01004266
4267/**
4268 * skb_gso_transport_seglen - Return length of individual segments of a gso packet
4269 *
4270 * @skb: GSO skb
4271 *
4272 * skb_gso_transport_seglen is used to determine the real size of the
4273 * individual segments, including Layer4 headers (TCP/UDP).
4274 *
4275 * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
4276 */
4277unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
4278{
4279 const struct skb_shared_info *shinfo = skb_shinfo(skb);
Florian Westphalf993bc22014-10-20 13:49:18 +02004280 unsigned int thlen = 0;
Florian Westphalde960aa2014-01-26 10:58:16 +01004281
Florian Westphalf993bc22014-10-20 13:49:18 +02004282 if (skb->encapsulation) {
4283 thlen = skb_inner_transport_header(skb) -
4284 skb_transport_header(skb);
Florian Westphal6d39d582014-04-09 10:28:50 +02004285
Florian Westphalf993bc22014-10-20 13:49:18 +02004286 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
4287 thlen += inner_tcp_hdrlen(skb);
4288 } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
4289 thlen = tcp_hdrlen(skb);
4290 }
Florian Westphal6d39d582014-04-09 10:28:50 +02004291 /* UFO sets gso_size to the size of the fragmentation
4292 * payload, i.e. the size of the L4 (UDP) header is already
4293 * accounted for.
4294 */
Florian Westphalf993bc22014-10-20 13:49:18 +02004295 return thlen + shinfo->gso_size;
Florian Westphalde960aa2014-01-26 10:58:16 +01004296}
4297EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
Vlad Yasevich0d5501c2014-08-08 14:42:13 -04004298
4299static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4300{
4301 if (skb_cow(skb, skb_headroom(skb)) < 0) {
4302 kfree_skb(skb);
4303 return NULL;
4304 }
4305
4306 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
4307 skb->mac_header += VLAN_HLEN;
4308 return skb;
4309}
4310
4311struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
4312{
4313 struct vlan_hdr *vhdr;
4314 u16 vlan_tci;
4315
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01004316 if (unlikely(skb_vlan_tag_present(skb))) {
Vlad Yasevich0d5501c2014-08-08 14:42:13 -04004317 /* vlan_tci is already set-up so leave this for another time */
4318 return skb;
4319 }
4320
4321 skb = skb_share_check(skb, GFP_ATOMIC);
4322 if (unlikely(!skb))
4323 goto err_free;
4324
4325 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
4326 goto err_free;
4327
4328 vhdr = (struct vlan_hdr *)skb->data;
4329 vlan_tci = ntohs(vhdr->h_vlan_TCI);
4330 __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
4331
4332 skb_pull_rcsum(skb, VLAN_HLEN);
4333 vlan_set_encap_proto(skb, vhdr);
4334
4335 skb = skb_reorder_vlan_header(skb);
4336 if (unlikely(!skb))
4337 goto err_free;
4338
4339 skb_reset_network_header(skb);
4340 skb_reset_transport_header(skb);
4341 skb_reset_mac_len(skb);
4342
4343 return skb;
4344
4345err_free:
4346 kfree_skb(skb);
4347 return NULL;
4348}
4349EXPORT_SYMBOL(skb_vlan_untag);
Eric Dumazet2e4e4412014-09-17 04:49:49 -07004350
Jiri Pirkoe2195122014-11-19 14:05:01 +01004351int skb_ensure_writable(struct sk_buff *skb, int write_len)
4352{
4353 if (!pskb_may_pull(skb, write_len))
4354 return -ENOMEM;
4355
4356 if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
4357 return 0;
4358
4359 return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4360}
4361EXPORT_SYMBOL(skb_ensure_writable);
4362
Jiri Pirko93515d52014-11-19 14:05:02 +01004363/* remove VLAN header from packet and update csum accordingly. */
4364static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
4365{
4366 struct vlan_hdr *vhdr;
4367 unsigned int offset = skb->data - skb_mac_header(skb);
4368 int err;
4369
4370 __skb_push(skb, offset);
4371 err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
4372 if (unlikely(err))
4373 goto pull;
4374
4375 skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
4376
4377 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
4378 *vlan_tci = ntohs(vhdr->h_vlan_TCI);
4379
4380 memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
4381 __skb_pull(skb, VLAN_HLEN);
4382
4383 vlan_set_encap_proto(skb, vhdr);
4384 skb->mac_header += VLAN_HLEN;
4385
4386 if (skb_network_offset(skb) < ETH_HLEN)
4387 skb_set_network_header(skb, ETH_HLEN);
4388
4389 skb_reset_mac_len(skb);
4390pull:
4391 __skb_pull(skb, offset);
4392
4393 return err;
4394}
4395
4396int skb_vlan_pop(struct sk_buff *skb)
4397{
4398 u16 vlan_tci;
4399 __be16 vlan_proto;
4400 int err;
4401
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01004402 if (likely(skb_vlan_tag_present(skb))) {
Jiri Pirko93515d52014-11-19 14:05:02 +01004403 skb->vlan_tci = 0;
4404 } else {
4405 if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
4406 skb->protocol != htons(ETH_P_8021AD)) ||
4407 skb->len < VLAN_ETH_HLEN))
4408 return 0;
4409
4410 err = __skb_vlan_pop(skb, &vlan_tci);
4411 if (err)
4412 return err;
4413 }
4414 /* move next vlan tag to hw accel tag */
4415 if (likely((skb->protocol != htons(ETH_P_8021Q) &&
4416 skb->protocol != htons(ETH_P_8021AD)) ||
4417 skb->len < VLAN_ETH_HLEN))
4418 return 0;
4419
4420 vlan_proto = skb->protocol;
4421 err = __skb_vlan_pop(skb, &vlan_tci);
4422 if (unlikely(err))
4423 return err;
4424
4425 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
4426 return 0;
4427}
4428EXPORT_SYMBOL(skb_vlan_pop);
4429
4430int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4431{
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01004432 if (skb_vlan_tag_present(skb)) {
Jiri Pirko93515d52014-11-19 14:05:02 +01004433 unsigned int offset = skb->data - skb_mac_header(skb);
4434 int err;
4435
4436 /* __vlan_insert_tag expect skb->data pointing to mac header.
4437 * So change skb->data before calling it and change back to
4438 * original position later
4439 */
4440 __skb_push(skb, offset);
4441 err = __vlan_insert_tag(skb, skb->vlan_proto,
Jiri Pirkodf8a39d2015-01-13 17:13:44 +01004442 skb_vlan_tag_get(skb));
Jiri Pirko93515d52014-11-19 14:05:02 +01004443 if (err)
4444 return err;
4445 skb->protocol = skb->vlan_proto;
4446 skb->mac_len += VLAN_HLEN;
4447 __skb_pull(skb, offset);
4448
4449 if (skb->ip_summed == CHECKSUM_COMPLETE)
4450 skb->csum = csum_add(skb->csum, csum_partial(skb->data
4451 + (2 * ETH_ALEN), VLAN_HLEN, 0));
4452 }
4453 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
4454 return 0;
4455}
4456EXPORT_SYMBOL(skb_vlan_push);
4457
Eric Dumazet2e4e4412014-09-17 04:49:49 -07004458/**
4459 * alloc_skb_with_frags - allocate skb with page frags
4460 *
Masanari Iidade3f0d02014-10-09 12:58:08 +09004461 * @header_len: size of linear part
4462 * @data_len: needed length in frags
4463 * @max_page_order: max page order desired.
4464 * @errcode: pointer to error code if any
4465 * @gfp_mask: allocation mask
Eric Dumazet2e4e4412014-09-17 04:49:49 -07004466 *
4467 * This can be used to allocate a paged skb, given a maximal order for frags.
4468 */
4469struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4470 unsigned long data_len,
4471 int max_page_order,
4472 int *errcode,
4473 gfp_t gfp_mask)
4474{
4475 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
4476 unsigned long chunk;
4477 struct sk_buff *skb;
4478 struct page *page;
4479 gfp_t gfp_head;
4480 int i;
4481
4482 *errcode = -EMSGSIZE;
4483 /* Note this test could be relaxed, if we succeed to allocate
4484 * high order pages...
4485 */
4486 if (npages > MAX_SKB_FRAGS)
4487 return NULL;
4488
4489 gfp_head = gfp_mask;
4490 if (gfp_head & __GFP_WAIT)
4491 gfp_head |= __GFP_REPEAT;
4492
4493 *errcode = -ENOBUFS;
4494 skb = alloc_skb(header_len, gfp_head);
4495 if (!skb)
4496 return NULL;
4497
4498 skb->truesize += npages << PAGE_SHIFT;
4499
4500 for (i = 0; npages > 0; i++) {
4501 int order = max_page_order;
4502
4503 while (order) {
4504 if (npages >= 1 << order) {
4505 page = alloc_pages(gfp_mask |
4506 __GFP_COMP |
4507 __GFP_NOWARN |
4508 __GFP_NORETRY,
4509 order);
4510 if (page)
4511 goto fill_page;
4512 /* Do not retry other high order allocations */
4513 order = 1;
4514 max_page_order = 0;
4515 }
4516 order--;
4517 }
4518 page = alloc_page(gfp_mask);
4519 if (!page)
4520 goto failure;
4521fill_page:
4522 chunk = min_t(unsigned long, data_len,
4523 PAGE_SIZE << order);
4524 skb_fill_page_desc(skb, i, page, 0, chunk);
4525 data_len -= chunk;
4526 npages -= 1 << order;
4527 }
4528 return skb;
4529
4530failure:
4531 kfree_skb(skb);
4532 return NULL;
4533}
4534EXPORT_SYMBOL(alloc_skb_with_frags);