blob: a11c731d2ee4d577e5438f5c846083ea8efbeaed [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090063#include <linux/slab.h>
Neil Horman0e3125c2010-11-16 10:26:47 -080064#include <linux/vmalloc.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020065#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040076#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070077#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080083#include <linux/mutex.h>
Eric Dumazet05423b22009-10-26 18:40:35 -070084#include <linux/if_vlan.h>
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -080085#include <linux/virtio_net.h>
Richard Cochraned85b562010-04-07 22:41:28 +000086#include <linux/errqueue.h>
Scott McMillan614f60f2010-06-02 05:53:56 -070087#include <linux/net_tstamp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
Linus Torvalds1da177e2005-04-16 15:20:36 -070093/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700108 mac_header -> ll header
109 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
111Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700112 mac_header -> ll header
113 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900118 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700119 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
121Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700122 mac_header -> data. ll header is still not built!
123 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700133 mac_header -> ll header
134 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700137 mac_header -> data
138 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144/* Private packet socket structures. */
145
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000146struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000157struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162};
David S. Millera2efcfa2007-05-29 13:12:50 -0700163
Johann Baudy69e3c752009-05-18 22:11:22 -0700164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
Neil Horman0e3125c2010-11-16 10:26:47 -0800167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
Neil Horman0e3125c2010-11-16 10:26:47 -0800170};
171
Johann Baudy69e3c752009-05-18 22:11:22 -0700172struct packet_ring_buffer {
Neil Horman0e3125c2010-11-16 10:26:47 -0800173 struct pgv *pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192 /* struct sock has to be the first member of packet_sock */
193 struct sock sk;
194 struct tpacket_stats stats;
Johann Baudy69e3c752009-05-18 22:11:22 -0700195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 int copy_thresh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800199 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800200 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700201 auxdata:1,
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -0800202 origdev:1,
203 has_vnet_hdr:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800205 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
Patrick McHardy8913336a2008-07-18 18:05:19 -0700210 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700211 unsigned int tp_loss:1;
Scott McMillan614f60f2010-06-02 05:53:56 -0700212 unsigned int tp_tstamp;
Eric Dumazet94b05952009-10-16 04:02:20 +0000213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214};
215
Herbert Xuffbc6112007-02-04 23:33:10 -0800216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800225
Changli Gao0af55bb2010-12-01 02:52:20 +0000226static inline struct page *pgv_to_page(void *addr)
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
234{
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
240
241 h.raw = frame;
242 switch (po->tp_version) {
243 case TPACKET_V1:
244 h.h1->tp_status = status;
Changli Gao0af55bb2010-12-01 02:52:20 +0000245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700246 break;
247 case TPACKET_V2:
248 h.h2->tp_status = status;
Changli Gao0af55bb2010-12-01 02:52:20 +0000249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700250 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700251 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000252 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700253 BUG();
254 }
255
256 smp_wmb();
257}
258
259static int __packet_get_status(struct packet_sock *po, void *frame)
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
267 smp_rmb();
268
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
Changli Gao0af55bb2010-12-01 02:52:20 +0000272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Johann Baudy69e3c752009-05-18 22:11:22 -0700273 return h.h1->tp_status;
274 case TPACKET_V2:
Changli Gao0af55bb2010-12-01 02:52:20 +0000275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Johann Baudy69e3c752009-05-18 22:11:22 -0700276 return h.h2->tp_status;
277 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000278 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700279 BUG();
280 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700281 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282}
Johann Baudy69e3c752009-05-18 22:11:22 -0700283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
Neil Horman0e3125c2010-11-16 10:26:47 -0800299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
Johann Baudy69e3c752009-05-18 22:11:22 -0700301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
Richard Cochraned85b562010-04-07 22:41:28 +0000335 skb_queue_purge(&sk->sk_error_queue);
336
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
340 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000341 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 return;
343 }
344
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800345 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346}
347
348
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800349static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800351static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359 /*
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
362 */
363
364 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900365
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 /*
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
369 *
370 * Incoming packets have ll header pulled,
371 * push it back.
372 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700373 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 * so that this procedure is noop.
375 */
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800380 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800381 goto out;
382
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto oom;
386
387 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000388 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Phil Oester84531c22005-07-12 11:57:52 -0700390 /* drop conntrack reference */
391 nf_reset(skb);
392
Herbert Xuffbc6112007-02-04 23:33:10 -0800393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700395 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
397 /*
398 * The SOCK_PACKET socket receives _all_ frames.
399 */
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405 /*
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
408 */
409
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000410 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420/*
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
423 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000430 struct sk_buff *skb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000432 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900436 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 */
438
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000439 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
447 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900448 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 */
450
451 saddr->spkt_device[13] = 0;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000452retry:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900458
David S. Millerd5e76b02007-01-25 19:30:36 -0800459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900467
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800469 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 goto out_unlock;
471
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
483 */
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900486
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000487 /* Try to align data part correctly */
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 }
499
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000500
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000504 skb->mark = sk->sk_mark;
Oliver Hartkopp2244d072010-08-17 08:59:14 +0000505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
Richard Cochraned85b562010-04-07 22:41:28 +0000506 if (err < 0)
507 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
509 dev_queue_xmit(skb);
Eric Dumazet654d1f82009-11-02 10:43:32 +0100510 rcu_read_unlock();
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000511 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513out_unlock:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100514 rcu_read_unlock();
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000515out_free:
516 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 return err;
518}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
David S. Millerdbcb5852007-01-24 15:21:02 -0800520static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
521 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
523 struct sk_filter *filter;
524
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700525 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -0800526 filter = rcu_dereference_bh(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800527 if (filter != NULL)
Eric Dumazet93aaae22010-11-19 09:49:59 -0800528 res = sk_run_filter(skb, filter->insns);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700529 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530
David S. Millerdbcb5852007-01-24 15:21:02 -0800531 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532}
533
534/*
535 This function makes lazy skb cloning in hope that most of packets
536 are discarded by BPF.
537
538 Note tricky part: we DO mangle shared skb! skb->data, skb->len
539 and skb->cb are mangled. It works because (and until) packets
540 falling here are owned by current CPU. Output packets are cloned
541 by dev_queue_xmit_nit(), input packets are processed by net_bh
542 sequencially, so that if we return skb to original state on exit,
543 we will not harm anyone.
544 */
545
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000546static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
547 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548{
549 struct sock *sk;
550 struct sockaddr_ll *sll;
551 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000552 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800554 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 if (skb->pkt_type == PACKET_LOOPBACK)
557 goto drop;
558
559 sk = pt->af_packet_priv;
560 po = pkt_sk(sk);
561
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800562 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800563 goto drop;
564
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 skb->dev = dev;
566
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700567 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 /* The device has an explicit notion of ll header,
569 exported to higher levels.
570
571 Otherwise, the device hides datails of it frame
572 structure, so that corresponding packet head
573 never delivered to user.
574 */
575 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700576 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 else if (skb->pkt_type == PACKET_OUTGOING) {
578 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300579 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 }
581 }
582
583 snaplen = skb->len;
584
David S. Millerdbcb5852007-01-24 15:21:02 -0800585 res = run_filter(skb, sk, snaplen);
586 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700587 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800588 if (snaplen > res)
589 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
591 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
592 (unsigned)sk->sk_rcvbuf)
593 goto drop_n_acct;
594
595 if (skb_shared(skb)) {
596 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
597 if (nskb == NULL)
598 goto drop_n_acct;
599
600 if (skb_head != skb->data) {
601 skb->data = skb_head;
602 skb->len = skb_len;
603 }
604 kfree_skb(skb);
605 skb = nskb;
606 }
607
Herbert Xuffbc6112007-02-04 23:33:10 -0800608 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
609 sizeof(skb->cb));
610
611 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 sll->sll_family = AF_PACKET;
613 sll->sll_hatype = dev->type;
614 sll->sll_protocol = skb->protocol;
615 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800616 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700617 sll->sll_ifindex = orig_dev->ifindex;
618 else
619 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700621 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
Herbert Xuffbc6112007-02-04 23:33:10 -0800623 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800624
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 if (pskb_trim(skb, snaplen))
626 goto drop_n_acct;
627
628 skb_set_owner_r(skb, sk);
629 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000630 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Phil Oester84531c22005-07-12 11:57:52 -0700632 /* drop conntrack reference */
633 nf_reset(skb);
634
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 spin_lock(&sk->sk_receive_queue.lock);
636 po->stats.tp_packets++;
Neil Horman3b885782009-10-12 13:26:31 -0700637 skb->dropcount = atomic_read(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 __skb_queue_tail(&sk->sk_receive_queue, skb);
639 spin_unlock(&sk->sk_receive_queue.lock);
640 sk->sk_data_ready(sk, skb->len);
641 return 0;
642
643drop_n_acct:
Neil Horman3b885782009-10-12 13:26:31 -0700644 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646drop_n_restore:
647 if (skb_head != skb->data && skb_shared(skb)) {
648 skb->data = skb_head;
649 skb->len = skb_len;
650 }
651drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000652 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 return 0;
654}
655
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000656static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
657 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658{
659 struct sock *sk;
660 struct packet_sock *po;
661 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700662 union {
663 struct tpacket_hdr *h1;
664 struct tpacket2_hdr *h2;
665 void *raw;
666 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000667 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800669 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700671 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700673 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700674 struct timespec ts;
Scott McMillan614f60f2010-06-02 05:53:56 -0700675 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676
677 if (skb->pkt_type == PACKET_LOOPBACK)
678 goto drop;
679
680 sk = pt->af_packet_priv;
681 po = pkt_sk(sk);
682
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800683 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800684 goto drop;
685
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700686 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700688 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 else if (skb->pkt_type == PACKET_OUTGOING) {
690 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300691 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 }
693 }
694
Herbert Xu8dc41942007-02-04 23:31:32 -0800695 if (skb->ip_summed == CHECKSUM_PARTIAL)
696 status |= TP_STATUS_CSUMNOTREADY;
697
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 snaplen = skb->len;
699
David S. Millerdbcb5852007-01-24 15:21:02 -0800700 res = run_filter(skb, sk, snaplen);
701 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700702 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800703 if (snaplen > res)
704 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
706 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy8913336a2008-07-18 18:05:19 -0700707 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
708 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300710 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700711 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy8913336a2008-07-18 18:05:19 -0700712 (maclen < 16 ? 16 : maclen)) +
713 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 macoff = netoff - maclen;
715 }
716
Johann Baudy69e3c752009-05-18 22:11:22 -0700717 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 if (po->copy_thresh &&
719 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
720 (unsigned)sk->sk_rcvbuf) {
721 if (skb_shared(skb)) {
722 copy_skb = skb_clone(skb, GFP_ATOMIC);
723 } else {
724 copy_skb = skb_get(skb);
725 skb_head = skb->data;
726 }
727 if (copy_skb)
728 skb_set_owner_r(copy_skb, sk);
729 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700730 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 if ((int)snaplen < 0)
732 snaplen = 0;
733 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
735 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700736 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700737 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700739 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 po->stats.tp_packets++;
741 if (copy_skb) {
742 status |= TP_STATUS_COPY;
743 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
744 }
745 if (!po->stats.tp_drops)
746 status &= ~TP_STATUS_LOSING;
747 spin_unlock(&sk->sk_receive_queue.lock);
748
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700749 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700751 switch (po->tp_version) {
752 case TPACKET_V1:
753 h.h1->tp_len = skb->len;
754 h.h1->tp_snaplen = snaplen;
755 h.h1->tp_mac = macoff;
756 h.h1->tp_net = netoff;
Scott McMillan614f60f2010-06-02 05:53:56 -0700757 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
758 && shhwtstamps->syststamp.tv64)
759 tv = ktime_to_timeval(shhwtstamps->syststamp);
760 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
761 && shhwtstamps->hwtstamp.tv64)
762 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
763 else if (skb->tstamp.tv64)
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700764 tv = ktime_to_timeval(skb->tstamp);
765 else
766 do_gettimeofday(&tv);
767 h.h1->tp_sec = tv.tv_sec;
768 h.h1->tp_usec = tv.tv_usec;
769 hdrlen = sizeof(*h.h1);
770 break;
771 case TPACKET_V2:
772 h.h2->tp_len = skb->len;
773 h.h2->tp_snaplen = snaplen;
774 h.h2->tp_mac = macoff;
775 h.h2->tp_net = netoff;
Scott McMillan614f60f2010-06-02 05:53:56 -0700776 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
777 && shhwtstamps->syststamp.tv64)
778 ts = ktime_to_timespec(shhwtstamps->syststamp);
779 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
780 && shhwtstamps->hwtstamp.tv64)
781 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
782 else if (skb->tstamp.tv64)
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700783 ts = ktime_to_timespec(skb->tstamp);
784 else
785 getnstimeofday(&ts);
786 h.h2->tp_sec = ts.tv_sec;
787 h.h2->tp_nsec = ts.tv_nsec;
Eric Dumazet05423b22009-10-26 18:40:35 -0700788 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700789 hdrlen = sizeof(*h.h2);
790 break;
791 default:
792 BUG();
793 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700795 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700796 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 sll->sll_family = AF_PACKET;
798 sll->sll_hatype = dev->type;
799 sll->sll_protocol = skb->protocol;
800 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800801 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700802 sll->sll_ifindex = orig_dev->ifindex;
803 else
804 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700806 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800807 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 {
Changli Gao0af55bb2010-12-01 02:52:20 +0000809 u8 *start, *end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
Changli Gao0af55bb2010-12-01 02:52:20 +0000811 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
812 for (start = h.raw; start < end; start += PAGE_SIZE)
813 flush_dcache_page(pgv_to_page(start));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 }
815
816 sk->sk_data_ready(sk, 0);
817
818drop_n_restore:
819 if (skb_head != skb->data && skb_shared(skb)) {
820 skb->data = skb_head;
821 skb->len = skb_len;
822 }
823drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900824 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 return 0;
826
827ring_is_full:
828 po->stats.tp_drops++;
829 spin_unlock(&sk->sk_receive_queue.lock);
830
831 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000832 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 goto drop_n_restore;
834}
835
Johann Baudy69e3c752009-05-18 22:11:22 -0700836static void tpacket_destruct_skb(struct sk_buff *skb)
837{
838 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000839 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700840
841 BUG_ON(skb == NULL);
842
843 if (likely(po->tx_ring.pg_vec)) {
844 ph = skb_shinfo(skb)->destructor_arg;
845 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
846 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
847 atomic_dec(&po->tx_ring.pending);
848 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
849 }
850
851 sock_wfree(skb);
852}
853
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000854static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
855 void *frame, struct net_device *dev, int size_max,
856 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700857{
858 union {
859 struct tpacket_hdr *h1;
860 struct tpacket2_hdr *h2;
861 void *raw;
862 } ph;
863 int to_write, offset, len, tp_len, nr_frags, len_max;
864 struct socket *sock = po->sk.sk_socket;
865 struct page *page;
866 void *data;
867 int err;
868
869 ph.raw = frame;
870
871 skb->protocol = proto;
872 skb->dev = dev;
873 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000874 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700875 skb_shinfo(skb)->destructor_arg = ph.raw;
876
877 switch (po->tp_version) {
878 case TPACKET_V2:
879 tp_len = ph.h2->tp_len;
880 break;
881 default:
882 tp_len = ph.h1->tp_len;
883 break;
884 }
885 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000886 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700887 return -EMSGSIZE;
888 }
889
890 skb_reserve(skb, LL_RESERVED_SPACE(dev));
891 skb_reset_network_header(skb);
892
893 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
894 to_write = tp_len;
895
896 if (sock->type == SOCK_DGRAM) {
897 err = dev_hard_header(skb, dev, ntohs(proto), addr,
898 NULL, tp_len);
899 if (unlikely(err < 0))
900 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000901 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700902 /* net device doesn't like empty head */
903 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000904 pr_err("packet size is too short (%d < %d)\n",
905 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700906 return -EINVAL;
907 }
908
909 skb_push(skb, dev->hard_header_len);
910 err = skb_store_bits(skb, 0, data,
911 dev->hard_header_len);
912 if (unlikely(err))
913 return err;
914
915 data += dev->hard_header_len;
916 to_write -= dev->hard_header_len;
917 }
918
919 err = -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -0700920 offset = offset_in_page(data);
921 len_max = PAGE_SIZE - offset;
922 len = ((to_write > len_max) ? len_max : to_write);
923
924 skb->data_len = to_write;
925 skb->len += to_write;
926 skb->truesize += to_write;
927 atomic_add(to_write, &po->sk.sk_wmem_alloc);
928
929 while (likely(to_write)) {
930 nr_frags = skb_shinfo(skb)->nr_frags;
931
932 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000933 pr_err("Packet exceed the number of skb frags(%lu)\n",
934 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700935 return -EFAULT;
936 }
937
Changli Gao0af55bb2010-12-01 02:52:20 +0000938 page = pgv_to_page(data);
939 data += len;
Johann Baudy69e3c752009-05-18 22:11:22 -0700940 flush_dcache_page(page);
941 get_page(page);
Changli Gao0af55bb2010-12-01 02:52:20 +0000942 skb_fill_page_desc(skb, nr_frags, page, offset, len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700943 to_write -= len;
944 offset = 0;
945 len_max = PAGE_SIZE;
946 len = ((to_write > len_max) ? len_max : to_write);
947 }
948
949 return tp_len;
950}
951
952static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
953{
954 struct socket *sock;
955 struct sk_buff *skb;
956 struct net_device *dev;
957 __be16 proto;
958 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000959 void *ph;
960 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700961 int tp_len, size_max;
962 unsigned char *addr;
963 int len_sum = 0;
964 int status = 0;
965
966 sock = po->sk.sk_socket;
967
968 mutex_lock(&po->pg_vec_lock);
969
970 err = -EBUSY;
971 if (saddr == NULL) {
972 ifindex = po->ifindex;
973 proto = po->num;
974 addr = NULL;
975 } else {
976 err = -EINVAL;
977 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
978 goto out;
979 if (msg->msg_namelen < (saddr->sll_halen
980 + offsetof(struct sockaddr_ll,
981 sll_addr)))
982 goto out;
983 ifindex = saddr->sll_ifindex;
984 proto = saddr->sll_protocol;
985 addr = saddr->sll_addr;
986 }
987
988 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
989 err = -ENXIO;
990 if (unlikely(dev == NULL))
991 goto out;
992
993 reserve = dev->hard_header_len;
994
995 err = -ENETDOWN;
996 if (unlikely(!(dev->flags & IFF_UP)))
997 goto out_put;
998
999 size_max = po->tx_ring.frame_size
Gabor Gombasb5dd8842009-10-29 03:19:11 -07001000 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
Johann Baudy69e3c752009-05-18 22:11:22 -07001001
1002 if (size_max > dev->mtu + reserve)
1003 size_max = dev->mtu + reserve;
1004
1005 do {
1006 ph = packet_current_frame(po, &po->tx_ring,
1007 TP_STATUS_SEND_REQUEST);
1008
1009 if (unlikely(ph == NULL)) {
1010 schedule();
1011 continue;
1012 }
1013
1014 status = TP_STATUS_SEND_REQUEST;
1015 skb = sock_alloc_send_skb(&po->sk,
1016 LL_ALLOCATED_SPACE(dev)
1017 + sizeof(struct sockaddr_ll),
1018 0, &err);
1019
1020 if (unlikely(skb == NULL))
1021 goto out_status;
1022
1023 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1024 addr);
1025
1026 if (unlikely(tp_len < 0)) {
1027 if (po->tp_loss) {
1028 __packet_set_status(po, ph,
1029 TP_STATUS_AVAILABLE);
1030 packet_increment_head(&po->tx_ring);
1031 kfree_skb(skb);
1032 continue;
1033 } else {
1034 status = TP_STATUS_WRONG_FORMAT;
1035 err = tp_len;
1036 goto out_status;
1037 }
1038 }
1039
1040 skb->destructor = tpacket_destruct_skb;
1041 __packet_set_status(po, ph, TP_STATUS_SENDING);
1042 atomic_inc(&po->tx_ring.pending);
1043
1044 status = TP_STATUS_SEND_REQUEST;
1045 err = dev_queue_xmit(skb);
Jarek Poplawskieb70df12010-01-10 22:04:19 +00001046 if (unlikely(err > 0)) {
1047 err = net_xmit_errno(err);
1048 if (err && __packet_get_status(po, ph) ==
1049 TP_STATUS_AVAILABLE) {
1050 /* skb was destructed already */
1051 skb = NULL;
1052 goto out_status;
1053 }
1054 /*
1055 * skb was dropped but not destructed yet;
1056 * let's treat it like congestion or err < 0
1057 */
1058 err = 0;
1059 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001060 packet_increment_head(&po->tx_ring);
1061 len_sum += tp_len;
Joe Perchesf64f9e72009-11-29 16:55:45 -08001062 } while (likely((ph != NULL) ||
1063 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1064 (atomic_read(&po->tx_ring.pending))))
1065 );
Johann Baudy69e3c752009-05-18 22:11:22 -07001066
1067 err = len_sum;
1068 goto out_put;
1069
Johann Baudy69e3c752009-05-18 22:11:22 -07001070out_status:
1071 __packet_set_status(po, ph, status);
1072 kfree_skb(skb);
1073out_put:
1074 dev_put(dev);
1075out:
1076 mutex_unlock(&po->pg_vec_lock);
1077 return err;
1078}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001080static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1081 size_t reserve, size_t len,
1082 size_t linear, int noblock,
1083 int *err)
1084{
1085 struct sk_buff *skb;
1086
1087 /* Under a page? Don't bother with paged skb. */
1088 if (prepad + len < PAGE_SIZE || !linear)
1089 linear = len;
1090
1091 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1092 err);
1093 if (!skb)
1094 return NULL;
1095
1096 skb_reserve(skb, reserve);
1097 skb_put(skb, linear);
1098 skb->data_len = len - linear;
1099 skb->len += len - linear;
1100
1101 return skb;
1102}
1103
Johann Baudy69e3c752009-05-18 22:11:22 -07001104static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 struct msghdr *msg, size_t len)
1106{
1107 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001108 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 struct sk_buff *skb;
1110 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001111 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 unsigned char *addr;
1113 int ifindex, err, reserve = 0;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001114 struct virtio_net_hdr vnet_hdr = { 0 };
1115 int offset = 0;
1116 int vnet_hdr_len;
1117 struct packet_sock *po = pkt_sk(sk);
1118 unsigned short gso_type = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119
1120 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001121 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001123
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 if (saddr == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 ifindex = po->ifindex;
1126 proto = po->num;
1127 addr = NULL;
1128 } else {
1129 err = -EINVAL;
1130 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1131 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001132 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1133 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 ifindex = saddr->sll_ifindex;
1135 proto = saddr->sll_protocol;
1136 addr = saddr->sll_addr;
1137 }
1138
1139
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001140 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 err = -ENXIO;
1142 if (dev == NULL)
1143 goto out_unlock;
1144 if (sock->type == SOCK_RAW)
1145 reserve = dev->hard_header_len;
1146
David S. Millerd5e76b02007-01-25 19:30:36 -08001147 err = -ENETDOWN;
1148 if (!(dev->flags & IFF_UP))
1149 goto out_unlock;
1150
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001151 if (po->has_vnet_hdr) {
1152 vnet_hdr_len = sizeof(vnet_hdr);
1153
1154 err = -EINVAL;
1155 if (len < vnet_hdr_len)
1156 goto out_unlock;
1157
1158 len -= vnet_hdr_len;
1159
1160 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1161 vnet_hdr_len);
1162 if (err < 0)
1163 goto out_unlock;
1164
1165 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1166 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1167 vnet_hdr.hdr_len))
1168 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1169 vnet_hdr.csum_offset + 2;
1170
1171 err = -EINVAL;
1172 if (vnet_hdr.hdr_len > len)
1173 goto out_unlock;
1174
1175 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1176 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1177 case VIRTIO_NET_HDR_GSO_TCPV4:
1178 gso_type = SKB_GSO_TCPV4;
1179 break;
1180 case VIRTIO_NET_HDR_GSO_TCPV6:
1181 gso_type = SKB_GSO_TCPV6;
1182 break;
1183 case VIRTIO_NET_HDR_GSO_UDP:
1184 gso_type = SKB_GSO_UDP;
1185 break;
1186 default:
1187 goto out_unlock;
1188 }
1189
1190 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1191 gso_type |= SKB_GSO_TCP_ECN;
1192
1193 if (vnet_hdr.gso_size == 0)
1194 goto out_unlock;
1195
1196 }
1197 }
1198
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 err = -EMSGSIZE;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001200 if (!gso_type && (len > dev->mtu+reserve))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 goto out_unlock;
1202
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001203 err = -ENOBUFS;
1204 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1205 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1206 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001207 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 goto out_unlock;
1209
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001210 skb_set_network_header(skb, reserve);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001212 err = -EINVAL;
1213 if (sock->type == SOCK_DGRAM &&
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001214 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001215 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
1217 /* Returns -EFAULT on error */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001218 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 if (err)
1220 goto out_free;
Oliver Hartkopp2244d072010-08-17 08:59:14 +00001221 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
Richard Cochraned85b562010-04-07 22:41:28 +00001222 if (err < 0)
1223 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
1225 skb->protocol = proto;
1226 skb->dev = dev;
1227 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001228 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001230 if (po->has_vnet_hdr) {
1231 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1232 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1233 vnet_hdr.csum_offset)) {
1234 err = -EINVAL;
1235 goto out_free;
1236 }
1237 }
1238
1239 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1240 skb_shinfo(skb)->gso_type = gso_type;
1241
1242 /* Header must be checked, and gso_segs computed. */
1243 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1244 skb_shinfo(skb)->gso_segs = 0;
1245
1246 len += vnet_hdr_len;
1247 }
1248
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 /*
1250 * Now send it
1251 */
1252
1253 err = dev_queue_xmit(skb);
1254 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1255 goto out_unlock;
1256
1257 dev_put(dev);
1258
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001259 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
1261out_free:
1262 kfree_skb(skb);
1263out_unlock:
1264 if (dev)
1265 dev_put(dev);
1266out:
1267 return err;
1268}
1269
Johann Baudy69e3c752009-05-18 22:11:22 -07001270static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1271 struct msghdr *msg, size_t len)
1272{
Johann Baudy69e3c752009-05-18 22:11:22 -07001273 struct sock *sk = sock->sk;
1274 struct packet_sock *po = pkt_sk(sk);
1275 if (po->tx_ring.pg_vec)
1276 return tpacket_snd(po, msg);
1277 else
Johann Baudy69e3c752009-05-18 22:11:22 -07001278 return packet_snd(sock, msg, len);
1279}
1280
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281/*
1282 * Close a PACKET socket. This is fairly simple. We immediately go
1283 * to 'closed' state and remove our protocol entry in the device list.
1284 */
1285
1286static int packet_release(struct socket *sock)
1287{
1288 struct sock *sk = sock->sk;
1289 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001290 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001291 struct tpacket_req req;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292
1293 if (!sk)
1294 return 0;
1295
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001296 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 po = pkt_sk(sk);
1298
stephen hemminger808f5112010-02-22 07:57:18 +00001299 spin_lock_bh(&net->packet.sklist_lock);
1300 sk_del_node_init_rcu(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001301 sock_prot_inuse_add(net, sk->sk_prot, -1);
stephen hemminger808f5112010-02-22 07:57:18 +00001302 spin_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
stephen hemminger808f5112010-02-22 07:57:18 +00001304 spin_lock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 if (po->running) {
1306 /*
stephen hemminger808f5112010-02-22 07:57:18 +00001307 * Remove from protocol table
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 po->running = 0;
1310 po->num = 0;
stephen hemminger808f5112010-02-22 07:57:18 +00001311 __dev_remove_pack(&po->prot_hook);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 __sock_put(sk);
1313 }
stephen hemminger808f5112010-02-22 07:57:18 +00001314 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317
Johann Baudy69e3c752009-05-18 22:11:22 -07001318 memset(&req, 0, sizeof(req));
1319
1320 if (po->rx_ring.pg_vec)
1321 packet_set_ring(sk, &req, 1, 0);
1322
1323 if (po->tx_ring.pg_vec)
1324 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
stephen hemminger808f5112010-02-22 07:57:18 +00001326 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 /*
1328 * Now the socket is dead. No more input will appear.
1329 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 sock_orphan(sk);
1331 sock->sk = NULL;
1332
1333 /* Purge queues */
1334
1335 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001336 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338 sock_put(sk);
1339 return 0;
1340}
1341
1342/*
1343 * Attach a packet hook.
1344 */
1345
Al Viro0e11c912006-11-08 00:26:29 -08001346static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347{
1348 struct packet_sock *po = pkt_sk(sk);
1349 /*
1350 * Detach an existing hook if present.
1351 */
1352
1353 lock_sock(sk);
1354
1355 spin_lock(&po->bind_lock);
1356 if (po->running) {
1357 __sock_put(sk);
1358 po->running = 0;
1359 po->num = 0;
1360 spin_unlock(&po->bind_lock);
1361 dev_remove_pack(&po->prot_hook);
1362 spin_lock(&po->bind_lock);
1363 }
1364
1365 po->num = protocol;
1366 po->prot_hook.type = protocol;
1367 po->prot_hook.dev = dev;
1368
1369 po->ifindex = dev ? dev->ifindex : 0;
1370
1371 if (protocol == 0)
1372 goto out_unlock;
1373
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001374 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 dev_add_pack(&po->prot_hook);
1376 sock_hold(sk);
1377 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001378 } else {
1379 sk->sk_err = ENETDOWN;
1380 if (!sock_flag(sk, SOCK_DEAD))
1381 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 }
1383
1384out_unlock:
1385 spin_unlock(&po->bind_lock);
1386 release_sock(sk);
1387 return 0;
1388}
1389
1390/*
1391 * Bind a packet socket to a device
1392 */
1393
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001394static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1395 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001397 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 char name[15];
1399 struct net_device *dev;
1400 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001401
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 /*
1403 * Check legality
1404 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001405
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001406 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001408 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001410 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 if (dev) {
1412 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1413 dev_put(dev);
1414 }
1415 return err;
1416}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1419{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001420 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1421 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 struct net_device *dev = NULL;
1423 int err;
1424
1425
1426 /*
1427 * Check legality
1428 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001429
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 if (addr_len < sizeof(struct sockaddr_ll))
1431 return -EINVAL;
1432 if (sll->sll_family != AF_PACKET)
1433 return -EINVAL;
1434
1435 if (sll->sll_ifindex) {
1436 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001437 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (dev == NULL)
1439 goto out;
1440 }
1441 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1442 if (dev)
1443 dev_put(dev);
1444
1445out:
1446 return err;
1447}
1448
1449static struct proto packet_proto = {
1450 .name = "PACKET",
1451 .owner = THIS_MODULE,
1452 .obj_size = sizeof(struct packet_sock),
1453};
1454
1455/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001456 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 */
1458
Eric Paris3f378b62009-11-05 22:18:14 -08001459static int packet_create(struct net *net, struct socket *sock, int protocol,
1460 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461{
1462 struct sock *sk;
1463 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001464 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 int err;
1466
1467 if (!capable(CAP_NET_RAW))
1468 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001469 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1470 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return -ESOCKTNOSUPPORT;
1472
1473 sock->state = SS_UNCONNECTED;
1474
1475 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001476 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 if (sk == NULL)
1478 goto out;
1479
1480 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 if (sock->type == SOCK_PACKET)
1482 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001483
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 sock_init_data(sock, sk);
1485
1486 po = pkt_sk(sk);
1487 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001488 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489
1490 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001491 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492
1493 /*
1494 * Attach a protocol block
1495 */
1496
1497 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001498 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001500
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 if (sock->type == SOCK_PACKET)
1502 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001503
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 po->prot_hook.af_packet_priv = sk;
1505
Al Viro0e11c912006-11-08 00:26:29 -08001506 if (proto) {
1507 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 dev_add_pack(&po->prot_hook);
1509 sock_hold(sk);
1510 po->running = 1;
1511 }
1512
stephen hemminger808f5112010-02-22 07:57:18 +00001513 spin_lock_bh(&net->packet.sklist_lock);
1514 sk_add_node_rcu(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001515 sock_prot_inuse_add(net, &packet_proto, 1);
stephen hemminger808f5112010-02-22 07:57:18 +00001516 spin_unlock_bh(&net->packet.sklist_lock);
1517
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001518 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519out:
1520 return err;
1521}
1522
Richard Cochraned85b562010-04-07 22:41:28 +00001523static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1524{
1525 struct sock_exterr_skb *serr;
1526 struct sk_buff *skb, *skb2;
1527 int copied, err;
1528
1529 err = -EAGAIN;
1530 skb = skb_dequeue(&sk->sk_error_queue);
1531 if (skb == NULL)
1532 goto out;
1533
1534 copied = skb->len;
1535 if (copied > len) {
1536 msg->msg_flags |= MSG_TRUNC;
1537 copied = len;
1538 }
1539 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1540 if (err)
1541 goto out_free_skb;
1542
1543 sock_recv_timestamp(msg, sk, skb);
1544
1545 serr = SKB_EXT_ERR(skb);
1546 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1547 sizeof(serr->ee), &serr->ee);
1548
1549 msg->msg_flags |= MSG_ERRQUEUE;
1550 err = copied;
1551
1552 /* Reset and regenerate socket error */
1553 spin_lock_bh(&sk->sk_error_queue.lock);
1554 sk->sk_err = 0;
1555 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1556 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1557 spin_unlock_bh(&sk->sk_error_queue.lock);
1558 sk->sk_error_report(sk);
1559 } else
1560 spin_unlock_bh(&sk->sk_error_queue.lock);
1561
1562out_free_skb:
1563 kfree_skb(skb);
1564out:
1565 return err;
1566}
1567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568/*
1569 * Pull a packet from our receive queue and hand it to the user.
1570 * If necessary we block.
1571 */
1572
1573static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1574 struct msghdr *msg, size_t len, int flags)
1575{
1576 struct sock *sk = sock->sk;
1577 struct sk_buff *skb;
1578 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001579 struct sockaddr_ll *sll;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001580 int vnet_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581
1582 err = -EINVAL;
Richard Cochraned85b562010-04-07 22:41:28 +00001583 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 goto out;
1585
1586#if 0
1587 /* What error should we return now? EUNATTACH? */
1588 if (pkt_sk(sk)->ifindex < 0)
1589 return -ENODEV;
1590#endif
1591
Richard Cochraned85b562010-04-07 22:41:28 +00001592 if (flags & MSG_ERRQUEUE) {
1593 err = packet_recv_error(sk, msg, len);
1594 goto out;
1595 }
1596
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 * Call the generic datagram receiver. This handles all sorts
1599 * of horrible races and re-entrancy so we can forget about it
1600 * in the protocol layers.
1601 *
1602 * Now it will return ENETDOWN, if device have just gone down,
1603 * but then it will block.
1604 */
1605
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001606 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607
1608 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001609 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 * handles the blocking we don't see and worry about blocking
1611 * retries.
1612 */
1613
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001614 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 goto out;
1616
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001617 if (pkt_sk(sk)->has_vnet_hdr) {
1618 struct virtio_net_hdr vnet_hdr = { 0 };
1619
1620 err = -EINVAL;
1621 vnet_hdr_len = sizeof(vnet_hdr);
Mariusz Kozlowski1f18b712010-11-08 11:58:45 +00001622 if (len < vnet_hdr_len)
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001623 goto out_free;
1624
Mariusz Kozlowski1f18b712010-11-08 11:58:45 +00001625 len -= vnet_hdr_len;
1626
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001627 if (skb_is_gso(skb)) {
1628 struct skb_shared_info *sinfo = skb_shinfo(skb);
1629
1630 /* This is a hint as to how much should be linear. */
1631 vnet_hdr.hdr_len = skb_headlen(skb);
1632 vnet_hdr.gso_size = sinfo->gso_size;
1633 if (sinfo->gso_type & SKB_GSO_TCPV4)
1634 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1635 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1636 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1637 else if (sinfo->gso_type & SKB_GSO_UDP)
1638 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1639 else if (sinfo->gso_type & SKB_GSO_FCOE)
1640 goto out_free;
1641 else
1642 BUG();
1643 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1644 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1645 } else
1646 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1647
1648 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1649 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1650 vnet_hdr.csum_start = skb->csum_start -
1651 skb_headroom(skb);
1652 vnet_hdr.csum_offset = skb->csum_offset;
1653 } /* else everything is zero */
1654
1655 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1656 vnet_hdr_len);
1657 if (err < 0)
1658 goto out_free;
1659 }
1660
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001662 * If the address length field is there to be filled in, we fill
1663 * it in now.
1664 */
1665
Herbert Xuffbc6112007-02-04 23:33:10 -08001666 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001667 if (sock->type == SOCK_PACKET)
1668 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1669 else
1670 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1671
1672 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 * You lose any data beyond the buffer you gave. If it worries a
1674 * user program they can ask the device for its MTU anyway.
1675 */
1676
1677 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001678 if (copied > len) {
1679 copied = len;
1680 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 }
1682
1683 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1684 if (err)
1685 goto out_free;
1686
Neil Horman3b885782009-10-12 13:26:31 -07001687 sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688
1689 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001690 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1691 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
Herbert Xu8dc41942007-02-04 23:31:32 -08001693 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001694 struct tpacket_auxdata aux;
1695
1696 aux.tp_status = TP_STATUS_USER;
1697 if (skb->ip_summed == CHECKSUM_PARTIAL)
1698 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1699 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1700 aux.tp_snaplen = skb->len;
1701 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001702 aux.tp_net = skb_network_offset(skb);
Eric Dumazet05423b22009-10-26 18:40:35 -07001703 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001704
1705 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001706 }
1707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 /*
1709 * Free or return the buffer as appropriate. Again this
1710 * hides all the races and re-entrancy issues from us.
1711 */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001712 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
1714out_free:
1715 skb_free_datagram(sk, skb);
1716out:
1717 return err;
1718}
1719
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1721 int *uaddr_len, int peer)
1722{
1723 struct net_device *dev;
1724 struct sock *sk = sock->sk;
1725
1726 if (peer)
1727 return -EOPNOTSUPP;
1728
1729 uaddr->sa_family = AF_PACKET;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001730 rcu_read_lock();
1731 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1732 if (dev)
Vasiliy Kulikov67286642010-11-10 12:09:10 -08001733 strncpy(uaddr->sa_data, dev->name, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001734 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 memset(uaddr->sa_data, 0, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001736 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 *uaddr_len = sizeof(*uaddr);
1738
1739 return 0;
1740}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
1742static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1743 int *uaddr_len, int peer)
1744{
1745 struct net_device *dev;
1746 struct sock *sk = sock->sk;
1747 struct packet_sock *po = pkt_sk(sk);
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001748 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
1750 if (peer)
1751 return -EOPNOTSUPP;
1752
1753 sll->sll_family = AF_PACKET;
1754 sll->sll_ifindex = po->ifindex;
1755 sll->sll_protocol = po->num;
Vasiliy Kulikov67286642010-11-10 12:09:10 -08001756 sll->sll_pkttype = 0;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001757 rcu_read_lock();
1758 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 if (dev) {
1760 sll->sll_hatype = dev->type;
1761 sll->sll_halen = dev->addr_len;
1762 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 } else {
1764 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1765 sll->sll_halen = 0;
1766 }
Eric Dumazet654d1f82009-11-02 10:43:32 +01001767 rcu_read_unlock();
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001768 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
1770 return 0;
1771}
1772
Wang Chen2aeb0b82008-07-14 20:49:46 -07001773static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1774 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775{
1776 switch (i->type) {
1777 case PACKET_MR_MULTICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001778 if (i->alen != dev->addr_len)
1779 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 if (what > 0)
Jiri Pirko22bedad32010-04-01 21:22:57 +00001781 return dev_mc_add(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 else
Jiri Pirko22bedad32010-04-01 21:22:57 +00001783 return dev_mc_del(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 break;
1785 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001786 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 break;
1788 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001789 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001791 case PACKET_MR_UNICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001792 if (i->alen != dev->addr_len)
1793 return -EINVAL;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001794 if (what > 0)
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001795 return dev_uc_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001796 else
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001797 return dev_uc_del(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001798 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001799 default:
1800 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001802 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803}
1804
1805static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1806{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001807 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 if (i->ifindex == dev->ifindex)
1809 packet_dev_mc(dev, i, what);
1810 }
1811}
1812
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001813static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814{
1815 struct packet_sock *po = pkt_sk(sk);
1816 struct packet_mclist *ml, *i;
1817 struct net_device *dev;
1818 int err;
1819
1820 rtnl_lock();
1821
1822 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001823 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 if (!dev)
1825 goto done;
1826
1827 err = -EINVAL;
Jiri Pirko11625632010-03-02 20:40:01 +00001828 if (mreq->mr_alen > dev->addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 goto done;
1830
1831 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001832 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 if (i == NULL)
1834 goto done;
1835
1836 err = 0;
1837 for (ml = po->mclist; ml; ml = ml->next) {
1838 if (ml->ifindex == mreq->mr_ifindex &&
1839 ml->type == mreq->mr_type &&
1840 ml->alen == mreq->mr_alen &&
1841 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1842 ml->count++;
1843 /* Free the new element ... */
1844 kfree(i);
1845 goto done;
1846 }
1847 }
1848
1849 i->type = mreq->mr_type;
1850 i->ifindex = mreq->mr_ifindex;
1851 i->alen = mreq->mr_alen;
1852 memcpy(i->addr, mreq->mr_address, i->alen);
1853 i->count = 1;
1854 i->next = po->mclist;
1855 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001856 err = packet_dev_mc(dev, i, 1);
1857 if (err) {
1858 po->mclist = i->next;
1859 kfree(i);
1860 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
1862done:
1863 rtnl_unlock();
1864 return err;
1865}
1866
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001867static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868{
1869 struct packet_mclist *ml, **mlp;
1870
1871 rtnl_lock();
1872
1873 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1874 if (ml->ifindex == mreq->mr_ifindex &&
1875 ml->type == mreq->mr_type &&
1876 ml->alen == mreq->mr_alen &&
1877 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1878 if (--ml->count == 0) {
1879 struct net_device *dev;
1880 *mlp = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001881 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1882 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 kfree(ml);
1885 }
1886 rtnl_unlock();
1887 return 0;
1888 }
1889 }
1890 rtnl_unlock();
1891 return -EADDRNOTAVAIL;
1892}
1893
1894static void packet_flush_mclist(struct sock *sk)
1895{
1896 struct packet_sock *po = pkt_sk(sk);
1897 struct packet_mclist *ml;
1898
1899 if (!po->mclist)
1900 return;
1901
1902 rtnl_lock();
1903 while ((ml = po->mclist) != NULL) {
1904 struct net_device *dev;
1905
1906 po->mclist = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001907 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1908 if (dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 kfree(ml);
1911 }
1912 rtnl_unlock();
1913}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914
1915static int
David S. Millerb7058842009-09-30 16:12:20 -07001916packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917{
1918 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001919 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 int ret;
1921
1922 if (level != SOL_PACKET)
1923 return -ENOPROTOOPT;
1924
Johann Baudy69e3c752009-05-18 22:11:22 -07001925 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001926 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 case PACKET_DROP_MEMBERSHIP:
1928 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001929 struct packet_mreq_max mreq;
1930 int len = optlen;
1931 memset(&mreq, 0, sizeof(mreq));
1932 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001934 if (len > sizeof(mreq))
1935 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001936 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001938 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1939 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 if (optname == PACKET_ADD_MEMBERSHIP)
1941 ret = packet_mc_add(sk, &mreq);
1942 else
1943 ret = packet_mc_drop(sk, &mreq);
1944 return ret;
1945 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001946
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001948 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 {
1950 struct tpacket_req req;
1951
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001952 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 return -EINVAL;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001954 if (pkt_sk(sk)->has_vnet_hdr)
1955 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001956 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001958 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 }
1960 case PACKET_COPY_THRESH:
1961 {
1962 int val;
1963
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001964 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001966 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 return -EFAULT;
1968
1969 pkt_sk(sk)->copy_thresh = val;
1970 return 0;
1971 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001972 case PACKET_VERSION:
1973 {
1974 int val;
1975
1976 if (optlen != sizeof(val))
1977 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001978 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001979 return -EBUSY;
1980 if (copy_from_user(&val, optval, sizeof(val)))
1981 return -EFAULT;
1982 switch (val) {
1983 case TPACKET_V1:
1984 case TPACKET_V2:
1985 po->tp_version = val;
1986 return 0;
1987 default:
1988 return -EINVAL;
1989 }
1990 }
Patrick McHardy8913336a2008-07-18 18:05:19 -07001991 case PACKET_RESERVE:
1992 {
1993 unsigned int val;
1994
1995 if (optlen != sizeof(val))
1996 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001997 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy8913336a2008-07-18 18:05:19 -07001998 return -EBUSY;
1999 if (copy_from_user(&val, optval, sizeof(val)))
2000 return -EFAULT;
2001 po->tp_reserve = val;
2002 return 0;
2003 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002004 case PACKET_LOSS:
2005 {
2006 unsigned int val;
2007
2008 if (optlen != sizeof(val))
2009 return -EINVAL;
2010 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2011 return -EBUSY;
2012 if (copy_from_user(&val, optval, sizeof(val)))
2013 return -EFAULT;
2014 po->tp_loss = !!val;
2015 return 0;
2016 }
Herbert Xu8dc41942007-02-04 23:31:32 -08002017 case PACKET_AUXDATA:
2018 {
2019 int val;
2020
2021 if (optlen < sizeof(val))
2022 return -EINVAL;
2023 if (copy_from_user(&val, optval, sizeof(val)))
2024 return -EFAULT;
2025
2026 po->auxdata = !!val;
2027 return 0;
2028 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002029 case PACKET_ORIGDEV:
2030 {
2031 int val;
2032
2033 if (optlen < sizeof(val))
2034 return -EINVAL;
2035 if (copy_from_user(&val, optval, sizeof(val)))
2036 return -EFAULT;
2037
2038 po->origdev = !!val;
2039 return 0;
2040 }
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002041 case PACKET_VNET_HDR:
2042 {
2043 int val;
2044
2045 if (sock->type != SOCK_RAW)
2046 return -EINVAL;
2047 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2048 return -EBUSY;
2049 if (optlen < sizeof(val))
2050 return -EINVAL;
2051 if (copy_from_user(&val, optval, sizeof(val)))
2052 return -EFAULT;
2053
2054 po->has_vnet_hdr = !!val;
2055 return 0;
2056 }
Scott McMillan614f60f2010-06-02 05:53:56 -07002057 case PACKET_TIMESTAMP:
2058 {
2059 int val;
2060
2061 if (optlen != sizeof(val))
2062 return -EINVAL;
2063 if (copy_from_user(&val, optval, sizeof(val)))
2064 return -EFAULT;
2065
2066 po->tp_tstamp = val;
2067 return 0;
2068 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 default:
2070 return -ENOPROTOOPT;
2071 }
2072}
2073
2074static int packet_getsockopt(struct socket *sock, int level, int optname,
2075 char __user *optval, int __user *optlen)
2076{
2077 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08002078 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 struct sock *sk = sock->sk;
2080 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08002081 void *data;
2082 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
2084 if (level != SOL_PACKET)
2085 return -ENOPROTOOPT;
2086
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002087 if (get_user(len, optlen))
2088 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089
2090 if (len < 0)
2091 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002092
Johann Baudy69e3c752009-05-18 22:11:22 -07002093 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 if (len > sizeof(struct tpacket_stats))
2096 len = sizeof(struct tpacket_stats);
2097 spin_lock_bh(&sk->sk_receive_queue.lock);
2098 st = po->stats;
2099 memset(&po->stats, 0, sizeof(st));
2100 spin_unlock_bh(&sk->sk_receive_queue.lock);
2101 st.tp_packets += st.tp_drops;
2102
Herbert Xu8dc41942007-02-04 23:31:32 -08002103 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08002105 case PACKET_AUXDATA:
2106 if (len > sizeof(int))
2107 len = sizeof(int);
2108 val = po->auxdata;
2109
2110 data = &val;
2111 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002112 case PACKET_ORIGDEV:
2113 if (len > sizeof(int))
2114 len = sizeof(int);
2115 val = po->origdev;
2116
2117 data = &val;
2118 break;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002119 case PACKET_VNET_HDR:
2120 if (len > sizeof(int))
2121 len = sizeof(int);
2122 val = po->has_vnet_hdr;
2123
2124 data = &val;
2125 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002126 case PACKET_VERSION:
2127 if (len > sizeof(int))
2128 len = sizeof(int);
2129 val = po->tp_version;
2130 data = &val;
2131 break;
2132 case PACKET_HDRLEN:
2133 if (len > sizeof(int))
2134 len = sizeof(int);
2135 if (copy_from_user(&val, optval, len))
2136 return -EFAULT;
2137 switch (val) {
2138 case TPACKET_V1:
2139 val = sizeof(struct tpacket_hdr);
2140 break;
2141 case TPACKET_V2:
2142 val = sizeof(struct tpacket2_hdr);
2143 break;
2144 default:
2145 return -EINVAL;
2146 }
2147 data = &val;
2148 break;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002149 case PACKET_RESERVE:
2150 if (len > sizeof(unsigned int))
2151 len = sizeof(unsigned int);
2152 val = po->tp_reserve;
2153 data = &val;
2154 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07002155 case PACKET_LOSS:
2156 if (len > sizeof(unsigned int))
2157 len = sizeof(unsigned int);
2158 val = po->tp_loss;
2159 data = &val;
2160 break;
Scott McMillan614f60f2010-06-02 05:53:56 -07002161 case PACKET_TIMESTAMP:
2162 if (len > sizeof(int))
2163 len = sizeof(int);
2164 val = po->tp_tstamp;
2165 data = &val;
2166 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 default:
2168 return -ENOPROTOOPT;
2169 }
2170
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002171 if (put_user(len, optlen))
2172 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08002173 if (copy_to_user(optval, data, len))
2174 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002175 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176}
2177
2178
2179static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2180{
2181 struct sock *sk;
2182 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08002183 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002184 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185
stephen hemminger808f5112010-02-22 07:57:18 +00002186 rcu_read_lock();
2187 sk_for_each_rcu(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 struct packet_sock *po = pkt_sk(sk);
2189
2190 switch (msg) {
2191 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 if (po->mclist)
2193 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07002194 /* fallthrough */
2195
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 case NETDEV_DOWN:
2197 if (dev->ifindex == po->ifindex) {
2198 spin_lock(&po->bind_lock);
2199 if (po->running) {
2200 __dev_remove_pack(&po->prot_hook);
2201 __sock_put(sk);
2202 po->running = 0;
2203 sk->sk_err = ENETDOWN;
2204 if (!sock_flag(sk, SOCK_DEAD))
2205 sk->sk_error_report(sk);
2206 }
2207 if (msg == NETDEV_UNREGISTER) {
2208 po->ifindex = -1;
2209 po->prot_hook.dev = NULL;
2210 }
2211 spin_unlock(&po->bind_lock);
2212 }
2213 break;
2214 case NETDEV_UP:
stephen hemminger808f5112010-02-22 07:57:18 +00002215 if (dev->ifindex == po->ifindex) {
2216 spin_lock(&po->bind_lock);
2217 if (po->num && !po->running) {
2218 dev_add_pack(&po->prot_hook);
2219 sock_hold(sk);
2220 po->running = 1;
2221 }
2222 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 break;
2225 }
2226 }
stephen hemminger808f5112010-02-22 07:57:18 +00002227 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 return NOTIFY_DONE;
2229}
2230
2231
2232static int packet_ioctl(struct socket *sock, unsigned int cmd,
2233 unsigned long arg)
2234{
2235 struct sock *sk = sock->sk;
2236
Johann Baudy69e3c752009-05-18 22:11:22 -07002237 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002238 case SIOCOUTQ:
2239 {
2240 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002241
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002242 return put_user(amount, (int __user *)arg);
2243 }
2244 case SIOCINQ:
2245 {
2246 struct sk_buff *skb;
2247 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002249 spin_lock_bh(&sk->sk_receive_queue.lock);
2250 skb = skb_peek(&sk->sk_receive_queue);
2251 if (skb)
2252 amount = skb->len;
2253 spin_unlock_bh(&sk->sk_receive_queue.lock);
2254 return put_user(amount, (int __user *)arg);
2255 }
2256 case SIOCGSTAMP:
2257 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2258 case SIOCGSTAMPNS:
2259 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002260
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002262 case SIOCADDRT:
2263 case SIOCDELRT:
2264 case SIOCDARP:
2265 case SIOCGARP:
2266 case SIOCSARP:
2267 case SIOCGIFADDR:
2268 case SIOCSIFADDR:
2269 case SIOCGIFBRDADDR:
2270 case SIOCSIFBRDADDR:
2271 case SIOCGIFNETMASK:
2272 case SIOCSIFNETMASK:
2273 case SIOCGIFDSTADDR:
2274 case SIOCSIFDSTADDR:
2275 case SIOCSIFFLAGS:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002276 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277#endif
2278
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002279 default:
2280 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 }
2282 return 0;
2283}
2284
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002285static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 poll_table *wait)
2287{
2288 struct sock *sk = sock->sk;
2289 struct packet_sock *po = pkt_sk(sk);
2290 unsigned int mask = datagram_poll(file, sock, wait);
2291
2292 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002293 if (po->rx_ring.pg_vec) {
2294 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 mask |= POLLIN | POLLRDNORM;
2296 }
2297 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002298 spin_lock_bh(&sk->sk_write_queue.lock);
2299 if (po->tx_ring.pg_vec) {
2300 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2301 mask |= POLLOUT | POLLWRNORM;
2302 }
2303 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 return mask;
2305}
2306
2307
2308/* Dirty? Well, I still did not learn better way to account
2309 * for user mmaps.
2310 */
2311
2312static void packet_mm_open(struct vm_area_struct *vma)
2313{
2314 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002315 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002317
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 if (sk)
2319 atomic_inc(&pkt_sk(sk)->mapped);
2320}
2321
2322static void packet_mm_close(struct vm_area_struct *vma)
2323{
2324 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002325 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002327
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 if (sk)
2329 atomic_dec(&pkt_sk(sk)->mapped);
2330}
2331
Alexey Dobriyanf0f37e22009-09-27 22:29:37 +04002332static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002333 .open = packet_mm_open,
2334 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335};
2336
Neil Horman0e3125c2010-11-16 10:26:47 -08002337static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2338 unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
2340 int i;
2341
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002342 for (i = 0; i < len; i++) {
Neil Horman0e3125c2010-11-16 10:26:47 -08002343 if (likely(pg_vec[i].buffer)) {
Changli Gaoc56b4d92010-12-01 02:52:57 +00002344 if (is_vmalloc_addr(pg_vec[i].buffer))
Neil Horman0e3125c2010-11-16 10:26:47 -08002345 vfree(pg_vec[i].buffer);
2346 else
2347 free_pages((unsigned long)pg_vec[i].buffer,
2348 order);
2349 pg_vec[i].buffer = NULL;
2350 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 }
2352 kfree(pg_vec);
2353}
2354
Changli Gaoc56b4d92010-12-01 02:52:57 +00002355static inline char *alloc_one_pg_vec_page(unsigned long order)
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002356{
Neil Horman0e3125c2010-11-16 10:26:47 -08002357 char *buffer = NULL;
2358 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2359 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
Eric Dumazet719bfea2009-04-15 03:39:52 -07002360
Neil Horman0e3125c2010-11-16 10:26:47 -08002361 buffer = (char *) __get_free_pages(gfp_flags, order);
2362
2363 if (buffer)
2364 return buffer;
2365
2366 /*
2367 * __get_free_pages failed, fall back to vmalloc
2368 */
Eric Dumazetbbce5a52010-11-20 07:31:54 +00002369 buffer = vzalloc((1 << order) * PAGE_SIZE);
Neil Horman0e3125c2010-11-16 10:26:47 -08002370
2371 if (buffer)
2372 return buffer;
2373
2374 /*
2375 * vmalloc failed, lets dig into swap here
2376 */
Neil Horman0e3125c2010-11-16 10:26:47 -08002377 gfp_flags &= ~__GFP_NORETRY;
2378 buffer = (char *)__get_free_pages(gfp_flags, order);
2379 if (buffer)
2380 return buffer;
2381
2382 /*
2383 * complete and utter failure
2384 */
2385 return NULL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002386}
2387
Neil Horman0e3125c2010-11-16 10:26:47 -08002388static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002389{
2390 unsigned int block_nr = req->tp_block_nr;
Neil Horman0e3125c2010-11-16 10:26:47 -08002391 struct pgv *pg_vec;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002392 int i;
2393
Neil Horman0e3125c2010-11-16 10:26:47 -08002394 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002395 if (unlikely(!pg_vec))
2396 goto out;
2397
2398 for (i = 0; i < block_nr; i++) {
Changli Gaoc56b4d92010-12-01 02:52:57 +00002399 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
Neil Horman0e3125c2010-11-16 10:26:47 -08002400 if (unlikely(!pg_vec[i].buffer))
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002401 goto out_free_pgvec;
2402 }
2403
2404out:
2405 return pg_vec;
2406
2407out_free_pgvec:
2408 free_pg_vec(pg_vec, order, block_nr);
Neil Horman0e3125c2010-11-16 10:26:47 -08002409 kfree(pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002410 pg_vec = NULL;
2411 goto out;
2412}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
Johann Baudy69e3c752009-05-18 22:11:22 -07002414static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2415 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416{
Neil Horman0e3125c2010-11-16 10:26:47 -08002417 struct pgv *pg_vec = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002419 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002420 struct packet_ring_buffer *rb;
2421 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002422 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002423 int err;
2424
2425 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2426 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2427
2428 err = -EBUSY;
2429 if (!closing) {
2430 if (atomic_read(&po->mapped))
2431 goto out;
2432 if (atomic_read(&rb->pending))
2433 goto out;
2434 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002435
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002438 err = -EBUSY;
2439 if (unlikely(rb->pg_vec))
2440 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002442 switch (po->tp_version) {
2443 case TPACKET_V1:
2444 po->tp_hdrlen = TPACKET_HDRLEN;
2445 break;
2446 case TPACKET_V2:
2447 po->tp_hdrlen = TPACKET2_HDRLEN;
2448 break;
2449 }
2450
Johann Baudy69e3c752009-05-18 22:11:22 -07002451 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002452 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002453 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002454 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002455 goto out;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002456 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002457 po->tp_reserve))
2458 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002459 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002460 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461
Johann Baudy69e3c752009-05-18 22:11:22 -07002462 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2463 if (unlikely(rb->frames_per_block <= 0))
2464 goto out;
2465 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2466 req->tp_frame_nr))
2467 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468
2469 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002470 order = get_order(req->tp_block_size);
2471 pg_vec = alloc_pg_vec(req, order);
2472 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002474 }
2475 /* Done */
2476 else {
2477 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002478 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002479 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 }
2481
2482 lock_sock(sk);
2483
2484 /* Detach socket from network */
2485 spin_lock(&po->bind_lock);
2486 was_running = po->running;
2487 num = po->num;
2488 if (was_running) {
2489 __dev_remove_pack(&po->prot_hook);
2490 po->num = 0;
2491 po->running = 0;
2492 __sock_put(sk);
2493 }
2494 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002495
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 synchronize_net();
2497
2498 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002499 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 if (closing || atomic_read(&po->mapped) == 0) {
2501 err = 0;
2502#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002503 spin_lock_bh(&rb_queue->lock);
2504 pg_vec = XC(rb->pg_vec, pg_vec);
2505 rb->frame_max = (req->tp_frame_nr - 1);
2506 rb->head = 0;
2507 rb->frame_size = req->tp_frame_size;
2508 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509
Johann Baudy69e3c752009-05-18 22:11:22 -07002510 order = XC(rb->pg_vec_order, order);
2511 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512
Johann Baudy69e3c752009-05-18 22:11:22 -07002513 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2514 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2515 tpacket_rcv : packet_rcv;
2516 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517#undef XC
2518 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002519 pr_err("packet_mmap: vma is busy: %d\n",
2520 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521 }
Herbert Xu905db442009-01-30 14:12:06 -08002522 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523
2524 spin_lock(&po->bind_lock);
2525 if (was_running && !po->running) {
2526 sock_hold(sk);
2527 po->running = 1;
2528 po->num = num;
2529 dev_add_pack(&po->prot_hook);
2530 }
2531 spin_unlock(&po->bind_lock);
2532
2533 release_sock(sk);
2534
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 if (pg_vec)
2536 free_pg_vec(pg_vec, order, req->tp_block_nr);
2537out:
2538 return err;
2539}
2540
Johann Baudy69e3c752009-05-18 22:11:22 -07002541static int packet_mmap(struct file *file, struct socket *sock,
2542 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543{
2544 struct sock *sk = sock->sk;
2545 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002546 unsigned long size, expected_size;
2547 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 unsigned long start;
2549 int err = -EINVAL;
2550 int i;
2551
2552 if (vma->vm_pgoff)
2553 return -EINVAL;
2554
Herbert Xu905db442009-01-30 14:12:06 -08002555 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002556
2557 expected_size = 0;
2558 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2559 if (rb->pg_vec) {
2560 expected_size += rb->pg_vec_len
2561 * rb->pg_vec_pages
2562 * PAGE_SIZE;
2563 }
2564 }
2565
2566 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002568
2569 size = vma->vm_end - vma->vm_start;
2570 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 goto out;
2572
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002574 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2575 if (rb->pg_vec == NULL)
2576 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002577
Johann Baudy69e3c752009-05-18 22:11:22 -07002578 for (i = 0; i < rb->pg_vec_len; i++) {
Neil Horman0e3125c2010-11-16 10:26:47 -08002579 struct page *page;
2580 void *kaddr = rb->pg_vec[i].buffer;
Johann Baudy69e3c752009-05-18 22:11:22 -07002581 int pg_num;
2582
Changli Gaoc56b4d92010-12-01 02:52:57 +00002583 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2584 page = pgv_to_page(kaddr);
Johann Baudy69e3c752009-05-18 22:11:22 -07002585 err = vm_insert_page(vma, start, page);
2586 if (unlikely(err))
2587 goto out;
2588 start += PAGE_SIZE;
Neil Horman0e3125c2010-11-16 10:26:47 -08002589 kaddr += PAGE_SIZE;
Johann Baudy69e3c752009-05-18 22:11:22 -07002590 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002591 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002593
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002594 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 vma->vm_ops = &packet_mmap_ops;
2596 err = 0;
2597
2598out:
Herbert Xu905db442009-01-30 14:12:06 -08002599 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 return err;
2601}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002603static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 .family = PF_PACKET,
2605 .owner = THIS_MODULE,
2606 .release = packet_release,
2607 .bind = packet_bind_spkt,
2608 .connect = sock_no_connect,
2609 .socketpair = sock_no_socketpair,
2610 .accept = sock_no_accept,
2611 .getname = packet_getname_spkt,
2612 .poll = datagram_poll,
2613 .ioctl = packet_ioctl,
2614 .listen = sock_no_listen,
2615 .shutdown = sock_no_shutdown,
2616 .setsockopt = sock_no_setsockopt,
2617 .getsockopt = sock_no_getsockopt,
2618 .sendmsg = packet_sendmsg_spkt,
2619 .recvmsg = packet_recvmsg,
2620 .mmap = sock_no_mmap,
2621 .sendpage = sock_no_sendpage,
2622};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002624static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 .family = PF_PACKET,
2626 .owner = THIS_MODULE,
2627 .release = packet_release,
2628 .bind = packet_bind,
2629 .connect = sock_no_connect,
2630 .socketpair = sock_no_socketpair,
2631 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002632 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 .poll = packet_poll,
2634 .ioctl = packet_ioctl,
2635 .listen = sock_no_listen,
2636 .shutdown = sock_no_shutdown,
2637 .setsockopt = packet_setsockopt,
2638 .getsockopt = packet_getsockopt,
2639 .sendmsg = packet_sendmsg,
2640 .recvmsg = packet_recvmsg,
2641 .mmap = packet_mmap,
2642 .sendpage = sock_no_sendpage,
2643};
2644
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002645static const struct net_proto_family packet_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646 .family = PF_PACKET,
2647 .create = packet_create,
2648 .owner = THIS_MODULE,
2649};
2650
2651static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002652 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653};
2654
2655#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656
2657static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
stephen hemminger808f5112010-02-22 07:57:18 +00002658 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659{
Denis V. Luneve372c412007-11-19 22:31:54 -08002660 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002661
2662 rcu_read_lock();
2663 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664}
2665
2666static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2667{
Herbert Xu1bf40952007-12-16 14:04:02 -08002668 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002669 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670}
2671
2672static void packet_seq_stop(struct seq_file *seq, void *v)
stephen hemminger808f5112010-02-22 07:57:18 +00002673 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674{
stephen hemminger808f5112010-02-22 07:57:18 +00002675 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676}
2677
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002678static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679{
2680 if (v == SEQ_START_TOKEN)
2681 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2682 else {
Li Zefanb7ceabd2010-02-08 23:19:29 +00002683 struct sock *s = sk_entry(v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684 const struct packet_sock *po = pkt_sk(s);
2685
2686 seq_printf(seq,
2687 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2688 s,
2689 atomic_read(&s->sk_refcnt),
2690 s->sk_type,
2691 ntohs(po->num),
2692 po->ifindex,
2693 po->running,
2694 atomic_read(&s->sk_rmem_alloc),
2695 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002696 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697 }
2698
2699 return 0;
2700}
2701
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002702static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 .start = packet_seq_start,
2704 .next = packet_seq_next,
2705 .stop = packet_seq_stop,
2706 .show = packet_seq_show,
2707};
2708
2709static int packet_seq_open(struct inode *inode, struct file *file)
2710{
Denis V. Luneve372c412007-11-19 22:31:54 -08002711 return seq_open_net(inode, file, &packet_seq_ops,
2712 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713}
2714
Arjan van de Venda7071d2007-02-12 00:55:36 -08002715static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 .owner = THIS_MODULE,
2717 .open = packet_seq_open,
2718 .read = seq_read,
2719 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002720 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721};
2722
2723#endif
2724
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002725static int __net_init packet_net_init(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002726{
stephen hemminger808f5112010-02-22 07:57:18 +00002727 spin_lock_init(&net->packet.sklist_lock);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002728 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002729
2730 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2731 return -ENOMEM;
2732
2733 return 0;
2734}
2735
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002736static void __net_exit packet_net_exit(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002737{
2738 proc_net_remove(net, "packet");
2739}
2740
2741static struct pernet_operations packet_net_ops = {
2742 .init = packet_net_init,
2743 .exit = packet_net_exit,
2744};
2745
2746
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747static void __exit packet_exit(void)
2748{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002750 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 sock_unregister(PF_PACKET);
2752 proto_unregister(&packet_proto);
2753}
2754
2755static int __init packet_init(void)
2756{
2757 int rc = proto_register(&packet_proto, 0);
2758
2759 if (rc != 0)
2760 goto out;
2761
2762 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002763 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765out:
2766 return rc;
2767}
2768
2769module_init(packet_init);
2770module_exit(packet_exit);
2771MODULE_LICENSE("GPL");
2772MODULE_ALIAS_NETPROTO(PF_PACKET);