blob: 4f059775d48f3387d9060bbef7d19383c503d994 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Linus Torvalds1da177e2005-04-16 15:20:36 -070042 *
43 * This program is free software; you can redistribute it and/or
44 * modify it under the terms of the GNU General Public License
45 * as published by the Free Software Foundation; either version
46 * 2 of the License, or (at your option) any later version.
47 *
48 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080052#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <linux/fcntl.h>
54#include <linux/socket.h>
55#include <linux/in.h>
56#include <linux/inet.h>
57#include <linux/netdevice.h>
58#include <linux/if_packet.h>
59#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080060#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070061#include <linux/kmod.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020062#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <net/ip.h>
64#include <net/protocol.h>
65#include <linux/skbuff.h>
66#include <net/sock.h>
67#include <linux/errno.h>
68#include <linux/timer.h>
69#include <asm/system.h>
70#include <asm/uaccess.h>
71#include <asm/ioctls.h>
72#include <asm/page.h>
Al Viroa1f8e7f2006-10-19 16:08:53 -040073#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <asm/io.h>
75#include <linux/proc_fs.h>
76#include <linux/seq_file.h>
77#include <linux/poll.h>
78#include <linux/module.h>
79#include <linux/init.h>
80
81#ifdef CONFIG_INET
82#include <net/inet_common.h>
83#endif
84
Linus Torvalds1da177e2005-04-16 15:20:36 -070085/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 Assumptions:
87 - if device has no dev->hard_header routine, it adds and removes ll header
88 inside itself. In this case ll header is invisible outside of device,
89 but higher levels still should reserve dev->hard_header_len.
90 Some devices are enough clever to reallocate skb, when header
91 will not fit to reserved space (tunnel), another ones are silly
92 (PPP).
93 - packet socket receives packets with pulled ll header,
94 so that SOCK_RAW should push it back.
95
96On receive:
97-----------
98
99Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700100 mac_header -> ll header
101 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
103Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700104 mac_header -> ll header
105 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
107Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700108 mac_header -> UNKNOWN position. It is very likely, that it points to ll
109 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900110 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700111 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
113Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700114 mac_header -> data. ll header is still not built!
115 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117Resume
118 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
119
120
121On transmit:
122------------
123
124dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700125 mac_header -> ll header
126 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700129 mac_header -> data
130 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132 We should set nh.raw on output to correct posistion,
133 packet classifier depends on it.
134 */
135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136/* Private packet socket structures. */
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138struct packet_mclist
139{
140 struct packet_mclist *next;
141 int ifindex;
142 int count;
143 unsigned short type;
144 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700145 unsigned char addr[MAX_ADDR_LEN];
146};
147/* identical to struct packet_mreq except it has
148 * a longer address field.
149 */
150struct packet_mreq_max
151{
152 int mr_ifindex;
153 unsigned short mr_type;
154 unsigned short mr_alen;
155 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
David S. Millera2efcfa2007-05-29 13:12:50 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158#ifdef CONFIG_PACKET_MMAP
159static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
160#endif
161
162static void packet_flush_mclist(struct sock *sk);
163
164struct packet_sock {
165 /* struct sock has to be the first member of packet_sock */
166 struct sock sk;
167 struct tpacket_stats stats;
168#ifdef CONFIG_PACKET_MMAP
169 char * *pg_vec;
170 unsigned int head;
171 unsigned int frames_per_block;
172 unsigned int frame_size;
173 unsigned int frame_max;
174 int copy_thresh;
175#endif
176 struct packet_type prot_hook;
177 spinlock_t bind_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800178 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700179 auxdata:1,
180 origdev:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800182 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184#ifdef CONFIG_PACKET_MMAP
185 atomic_t mapped;
186 unsigned int pg_vec_order;
187 unsigned int pg_vec_pages;
188 unsigned int pg_vec_len;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700189 enum tpacket_versions tp_version;
190 unsigned int tp_hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191#endif
192};
193
Herbert Xuffbc6112007-02-04 23:33:10 -0800194struct packet_skb_cb {
195 unsigned int origlen;
196 union {
197 struct sockaddr_pkt pkt;
198 struct sockaddr_ll ll;
199 } sa;
200};
201
202#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204#ifdef CONFIG_PACKET_MMAP
205
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700206static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
207 int status)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
209 unsigned int pg_vec_pos, frame_offset;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700210 union {
211 struct tpacket_hdr *h1;
212 struct tpacket2_hdr *h2;
213 void *raw;
214 } h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
216 pg_vec_pos = position / po->frames_per_block;
217 frame_offset = position % po->frames_per_block;
218
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700219 h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
220 switch (po->tp_version) {
221 case TPACKET_V1:
222 if (status != h.h1->tp_status ? TP_STATUS_USER :
223 TP_STATUS_KERNEL)
224 return NULL;
225 break;
226 case TPACKET_V2:
227 if (status != h.h2->tp_status ? TP_STATUS_USER :
228 TP_STATUS_KERNEL)
229 return NULL;
230 break;
231 }
232 return h.raw;
233}
234
235static void __packet_set_status(struct packet_sock *po, void *frame, int status)
236{
237 union {
238 struct tpacket_hdr *h1;
239 struct tpacket2_hdr *h2;
240 void *raw;
241 } h;
242
243 h.raw = frame;
244 switch (po->tp_version) {
245 case TPACKET_V1:
246 h.h1->tp_status = status;
247 break;
248 case TPACKET_V2:
249 h.h2->tp_status = status;
250 break;
251 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252}
253#endif
254
255static inline struct packet_sock *pkt_sk(struct sock *sk)
256{
257 return (struct packet_sock *)sk;
258}
259
260static void packet_sock_destruct(struct sock *sk)
261{
262 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
263 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
264
265 if (!sock_flag(sk, SOCK_DEAD)) {
266 printk("Attempt to release alive packet socket: %p\n", sk);
267 return;
268 }
269
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800270 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271}
272
273
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800274static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800276static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700278static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279{
280 struct sock *sk;
281 struct sockaddr_pkt *spkt;
282
283 /*
284 * When we registered the protocol we saved the socket in the data
285 * field for just this event.
286 */
287
288 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900289
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 /*
291 * Yank back the headers [hope the device set this
292 * right or kerboom...]
293 *
294 * Incoming packets have ll header pulled,
295 * push it back.
296 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700297 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 * so that this procedure is noop.
299 */
300
301 if (skb->pkt_type == PACKET_LOOPBACK)
302 goto out;
303
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900304 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800305 goto out;
306
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
308 goto oom;
309
310 /* drop any routing info */
311 dst_release(skb->dst);
312 skb->dst = NULL;
313
Phil Oester84531c22005-07-12 11:57:52 -0700314 /* drop conntrack reference */
315 nf_reset(skb);
316
Herbert Xuffbc6112007-02-04 23:33:10 -0800317 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700319 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321 /*
322 * The SOCK_PACKET socket receives _all_ frames.
323 */
324
325 spkt->spkt_family = dev->type;
326 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
327 spkt->spkt_protocol = skb->protocol;
328
329 /*
330 * Charge the memory to the socket. This is done specifically
331 * to prevent sockets using all the memory up.
332 */
333
334 if (sock_queue_rcv_skb(sk,skb) == 0)
335 return 0;
336
337out:
338 kfree_skb(skb);
339oom:
340 return 0;
341}
342
343
344/*
345 * Output a raw packet to a device layer. This bypasses all the other
346 * protocol layers and you must therefore supply it with a complete frame
347 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
350 struct msghdr *msg, size_t len)
351{
352 struct sock *sk = sock->sk;
353 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
354 struct sk_buff *skb;
355 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800356 __be16 proto=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900360 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 */
362
363 if (saddr)
364 {
365 if (msg->msg_namelen < sizeof(struct sockaddr))
366 return(-EINVAL);
367 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
368 proto=saddr->spkt_protocol;
369 }
370 else
371 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
372
373 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900374 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 */
376
377 saddr->spkt_device[13] = 0;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900378 dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 err = -ENODEV;
380 if (dev == NULL)
381 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900382
David S. Millerd5e76b02007-01-25 19:30:36 -0800383 err = -ENETDOWN;
384 if (!(dev->flags & IFF_UP))
385 goto out_unlock;
386
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 /*
388 * You may not queue a frame bigger than the mtu. This is the lowest level
389 * raw protocol and you must do your own fragmentation at this level.
390 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900391
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800393 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 goto out_unlock;
395
396 err = -ENOBUFS;
397 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
398
399 /*
400 * If the write buffer is full, then tough. At this level the user gets to
401 * deal with the problem - do your own algorithmic backoffs. That's far
402 * more flexible.
403 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900404
405 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 goto out_unlock;
407
408 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900409 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 /* FIXME: Save some space for broken drivers that write a
413 * hard header at transmission time by themselves. PPP is the
414 * notable one here. This should really be fixed at the driver level.
415 */
416 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700417 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
419 /* Try to align data part correctly */
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700420 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 skb->data -= dev->hard_header_len;
422 skb->tail -= dev->hard_header_len;
423 if (len < dev->hard_header_len)
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700424 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 }
426
427 /* Returns -EFAULT on error */
428 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
429 skb->protocol = proto;
430 skb->dev = dev;
431 skb->priority = sk->sk_priority;
432 if (err)
433 goto out_free;
434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 /*
436 * Now send it
437 */
438
439 dev_queue_xmit(skb);
440 dev_put(dev);
441 return(len);
442
443out_free:
444 kfree_skb(skb);
445out_unlock:
446 if (dev)
447 dev_put(dev);
448 return err;
449}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
David S. Millerdbcb5852007-01-24 15:21:02 -0800451static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
452 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453{
454 struct sk_filter *filter;
455
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700456 rcu_read_lock_bh();
457 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800458 if (filter != NULL)
459 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700460 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
David S. Millerdbcb5852007-01-24 15:21:02 -0800462 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463}
464
465/*
466 This function makes lazy skb cloning in hope that most of packets
467 are discarded by BPF.
468
469 Note tricky part: we DO mangle shared skb! skb->data, skb->len
470 and skb->cb are mangled. It works because (and until) packets
471 falling here are owned by current CPU. Output packets are cloned
472 by dev_queue_xmit_nit(), input packets are processed by net_bh
473 sequencially, so that if we return skb to original state on exit,
474 we will not harm anyone.
475 */
476
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700477static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478{
479 struct sock *sk;
480 struct sockaddr_ll *sll;
481 struct packet_sock *po;
482 u8 * skb_head = skb->data;
483 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800484 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485
486 if (skb->pkt_type == PACKET_LOOPBACK)
487 goto drop;
488
489 sk = pt->af_packet_priv;
490 po = pkt_sk(sk);
491
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900492 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800493 goto drop;
494
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 skb->dev = dev;
496
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700497 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 /* The device has an explicit notion of ll header,
499 exported to higher levels.
500
501 Otherwise, the device hides datails of it frame
502 structure, so that corresponding packet head
503 never delivered to user.
504 */
505 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700506 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 else if (skb->pkt_type == PACKET_OUTGOING) {
508 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300509 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 }
511 }
512
513 snaplen = skb->len;
514
David S. Millerdbcb5852007-01-24 15:21:02 -0800515 res = run_filter(skb, sk, snaplen);
516 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700517 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800518 if (snaplen > res)
519 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
521 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
522 (unsigned)sk->sk_rcvbuf)
523 goto drop_n_acct;
524
525 if (skb_shared(skb)) {
526 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
527 if (nskb == NULL)
528 goto drop_n_acct;
529
530 if (skb_head != skb->data) {
531 skb->data = skb_head;
532 skb->len = skb_len;
533 }
534 kfree_skb(skb);
535 skb = nskb;
536 }
537
Herbert Xuffbc6112007-02-04 23:33:10 -0800538 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
539 sizeof(skb->cb));
540
541 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 sll->sll_family = AF_PACKET;
543 sll->sll_hatype = dev->type;
544 sll->sll_protocol = skb->protocol;
545 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800546 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700547 sll->sll_ifindex = orig_dev->ifindex;
548 else
549 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700551 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552
Herbert Xuffbc6112007-02-04 23:33:10 -0800553 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800554
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 if (pskb_trim(skb, snaplen))
556 goto drop_n_acct;
557
558 skb_set_owner_r(skb, sk);
559 skb->dev = NULL;
560 dst_release(skb->dst);
561 skb->dst = NULL;
562
Phil Oester84531c22005-07-12 11:57:52 -0700563 /* drop conntrack reference */
564 nf_reset(skb);
565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 spin_lock(&sk->sk_receive_queue.lock);
567 po->stats.tp_packets++;
568 __skb_queue_tail(&sk->sk_receive_queue, skb);
569 spin_unlock(&sk->sk_receive_queue.lock);
570 sk->sk_data_ready(sk, skb->len);
571 return 0;
572
573drop_n_acct:
574 spin_lock(&sk->sk_receive_queue.lock);
575 po->stats.tp_drops++;
576 spin_unlock(&sk->sk_receive_queue.lock);
577
578drop_n_restore:
579 if (skb_head != skb->data && skb_shared(skb)) {
580 skb->data = skb_head;
581 skb->len = skb_len;
582 }
583drop:
584 kfree_skb(skb);
585 return 0;
586}
587
588#ifdef CONFIG_PACKET_MMAP
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700589static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590{
591 struct sock *sk;
592 struct packet_sock *po;
593 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700594 union {
595 struct tpacket_hdr *h1;
596 struct tpacket2_hdr *h2;
597 void *raw;
598 } h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 u8 * skb_head = skb->data;
600 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800601 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700603 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700605 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700606 struct timespec ts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
608 if (skb->pkt_type == PACKET_LOOPBACK)
609 goto drop;
610
611 sk = pt->af_packet_priv;
612 po = pkt_sk(sk);
613
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900614 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800615 goto drop;
616
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700617 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700619 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 else if (skb->pkt_type == PACKET_OUTGOING) {
621 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300622 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 }
624 }
625
Herbert Xu8dc41942007-02-04 23:31:32 -0800626 if (skb->ip_summed == CHECKSUM_PARTIAL)
627 status |= TP_STATUS_CSUMNOTREADY;
628
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 snaplen = skb->len;
630
David S. Millerdbcb5852007-01-24 15:21:02 -0800631 res = run_filter(skb, sk, snaplen);
632 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700633 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800634 if (snaplen > res)
635 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
637 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700638 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300640 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700641 netoff = TPACKET_ALIGN(po->tp_hdrlen +
642 (maclen < 16 ? 16 : maclen));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 macoff = netoff - maclen;
644 }
645
646 if (macoff + snaplen > po->frame_size) {
647 if (po->copy_thresh &&
648 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
649 (unsigned)sk->sk_rcvbuf) {
650 if (skb_shared(skb)) {
651 copy_skb = skb_clone(skb, GFP_ATOMIC);
652 } else {
653 copy_skb = skb_get(skb);
654 skb_head = skb->data;
655 }
656 if (copy_skb)
657 skb_set_owner_r(copy_skb, sk);
658 }
659 snaplen = po->frame_size - macoff;
660 if ((int)snaplen < 0)
661 snaplen = 0;
662 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663
664 spin_lock(&sk->sk_receive_queue.lock);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700665 h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
666 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 goto ring_is_full;
668 po->head = po->head != po->frame_max ? po->head+1 : 0;
669 po->stats.tp_packets++;
670 if (copy_skb) {
671 status |= TP_STATUS_COPY;
672 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
673 }
674 if (!po->stats.tp_drops)
675 status &= ~TP_STATUS_LOSING;
676 spin_unlock(&sk->sk_receive_queue.lock);
677
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700678 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700680 switch (po->tp_version) {
681 case TPACKET_V1:
682 h.h1->tp_len = skb->len;
683 h.h1->tp_snaplen = snaplen;
684 h.h1->tp_mac = macoff;
685 h.h1->tp_net = netoff;
686 if (skb->tstamp.tv64)
687 tv = ktime_to_timeval(skb->tstamp);
688 else
689 do_gettimeofday(&tv);
690 h.h1->tp_sec = tv.tv_sec;
691 h.h1->tp_usec = tv.tv_usec;
692 hdrlen = sizeof(*h.h1);
693 break;
694 case TPACKET_V2:
695 h.h2->tp_len = skb->len;
696 h.h2->tp_snaplen = snaplen;
697 h.h2->tp_mac = macoff;
698 h.h2->tp_net = netoff;
699 if (skb->tstamp.tv64)
700 ts = ktime_to_timespec(skb->tstamp);
701 else
702 getnstimeofday(&ts);
703 h.h2->tp_sec = ts.tv_sec;
704 h.h2->tp_nsec = ts.tv_nsec;
705 hdrlen = sizeof(*h.h2);
706 break;
707 default:
708 BUG();
709 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700711 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700712 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 sll->sll_family = AF_PACKET;
714 sll->sll_hatype = dev->type;
715 sll->sll_protocol = skb->protocol;
716 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800717 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700718 sll->sll_ifindex = orig_dev->ifindex;
719 else
720 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700722 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800723 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
725 {
726 struct page *p_start, *p_end;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700727 u8 *h_end = h.raw + macoff + snaplen - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700729 p_start = virt_to_page(h.raw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 p_end = virt_to_page(h_end);
731 while (p_start <= p_end) {
732 flush_dcache_page(p_start);
733 p_start++;
734 }
735 }
736
737 sk->sk_data_ready(sk, 0);
738
739drop_n_restore:
740 if (skb_head != skb->data && skb_shared(skb)) {
741 skb->data = skb_head;
742 skb->len = skb_len;
743 }
744drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900745 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 return 0;
747
748ring_is_full:
749 po->stats.tp_drops++;
750 spin_unlock(&sk->sk_receive_queue.lock);
751
752 sk->sk_data_ready(sk, 0);
753 if (copy_skb)
754 kfree_skb(copy_skb);
755 goto drop_n_restore;
756}
757
758#endif
759
760
761static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
762 struct msghdr *msg, size_t len)
763{
764 struct sock *sk = sock->sk;
765 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
766 struct sk_buff *skb;
767 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800768 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 unsigned char *addr;
770 int ifindex, err, reserve = 0;
771
772 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900773 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900775
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 if (saddr == NULL) {
777 struct packet_sock *po = pkt_sk(sk);
778
779 ifindex = po->ifindex;
780 proto = po->num;
781 addr = NULL;
782 } else {
783 err = -EINVAL;
784 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
785 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700786 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
787 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 ifindex = saddr->sll_ifindex;
789 proto = saddr->sll_protocol;
790 addr = saddr->sll_addr;
791 }
792
793
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900794 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 err = -ENXIO;
796 if (dev == NULL)
797 goto out_unlock;
798 if (sock->type == SOCK_RAW)
799 reserve = dev->hard_header_len;
800
David S. Millerd5e76b02007-01-25 19:30:36 -0800801 err = -ENETDOWN;
802 if (!(dev->flags & IFF_UP))
803 goto out_unlock;
804
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 err = -EMSGSIZE;
806 if (len > dev->mtu+reserve)
807 goto out_unlock;
808
Johannes Bergf5184d22008-05-12 20:48:31 -0700809 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 msg->msg_flags & MSG_DONTWAIT, &err);
811 if (skb==NULL)
812 goto out_unlock;
813
814 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700815 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700817 err = -EINVAL;
818 if (sock->type == SOCK_DGRAM &&
819 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
820 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
822 /* Returns -EFAULT on error */
823 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
824 if (err)
825 goto out_free;
826
827 skb->protocol = proto;
828 skb->dev = dev;
829 skb->priority = sk->sk_priority;
830
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 /*
832 * Now send it
833 */
834
835 err = dev_queue_xmit(skb);
836 if (err > 0 && (err = net_xmit_errno(err)) != 0)
837 goto out_unlock;
838
839 dev_put(dev);
840
841 return(len);
842
843out_free:
844 kfree_skb(skb);
845out_unlock:
846 if (dev)
847 dev_put(dev);
848out:
849 return err;
850}
851
852/*
853 * Close a PACKET socket. This is fairly simple. We immediately go
854 * to 'closed' state and remove our protocol entry in the device list.
855 */
856
857static int packet_release(struct socket *sock)
858{
859 struct sock *sk = sock->sk;
860 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800861 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
863 if (!sk)
864 return 0;
865
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900866 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 po = pkt_sk(sk);
868
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -0800869 write_lock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 sk_del_node_init(sk);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -0800871 write_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 /*
874 * Unhook packet receive handler.
875 */
876
877 if (po->running) {
878 /*
879 * Remove the protocol hook
880 */
881 dev_remove_pack(&po->prot_hook);
882 po->running = 0;
883 po->num = 0;
884 __sock_put(sk);
885 }
886
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888
889#ifdef CONFIG_PACKET_MMAP
890 if (po->pg_vec) {
891 struct tpacket_req req;
892 memset(&req, 0, sizeof(req));
893 packet_set_ring(sk, &req, 1);
894 }
895#endif
896
897 /*
898 * Now the socket is dead. No more input will appear.
899 */
900
901 sock_orphan(sk);
902 sock->sk = NULL;
903
904 /* Purge queues */
905
906 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800907 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
909 sock_put(sk);
910 return 0;
911}
912
913/*
914 * Attach a packet hook.
915 */
916
Al Viro0e11c912006-11-08 00:26:29 -0800917static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918{
919 struct packet_sock *po = pkt_sk(sk);
920 /*
921 * Detach an existing hook if present.
922 */
923
924 lock_sock(sk);
925
926 spin_lock(&po->bind_lock);
927 if (po->running) {
928 __sock_put(sk);
929 po->running = 0;
930 po->num = 0;
931 spin_unlock(&po->bind_lock);
932 dev_remove_pack(&po->prot_hook);
933 spin_lock(&po->bind_lock);
934 }
935
936 po->num = protocol;
937 po->prot_hook.type = protocol;
938 po->prot_hook.dev = dev;
939
940 po->ifindex = dev ? dev->ifindex : 0;
941
942 if (protocol == 0)
943 goto out_unlock;
944
Urs Thuermannbe85d4a2007-11-12 21:05:20 -0800945 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 dev_add_pack(&po->prot_hook);
947 sock_hold(sk);
948 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -0800949 } else {
950 sk->sk_err = ENETDOWN;
951 if (!sock_flag(sk, SOCK_DEAD))
952 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 }
954
955out_unlock:
956 spin_unlock(&po->bind_lock);
957 release_sock(sk);
958 return 0;
959}
960
961/*
962 * Bind a packet socket to a device
963 */
964
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
966{
967 struct sock *sk=sock->sk;
968 char name[15];
969 struct net_device *dev;
970 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 /*
973 * Check legality
974 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900975
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800976 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 return -EINVAL;
978 strlcpy(name,uaddr->sa_data,sizeof(name));
979
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900980 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (dev) {
982 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
983 dev_put(dev);
984 }
985 return err;
986}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987
988static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
989{
990 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
991 struct sock *sk=sock->sk;
992 struct net_device *dev = NULL;
993 int err;
994
995
996 /*
997 * Check legality
998 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900999
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 if (addr_len < sizeof(struct sockaddr_ll))
1001 return -EINVAL;
1002 if (sll->sll_family != AF_PACKET)
1003 return -EINVAL;
1004
1005 if (sll->sll_ifindex) {
1006 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001007 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 if (dev == NULL)
1009 goto out;
1010 }
1011 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1012 if (dev)
1013 dev_put(dev);
1014
1015out:
1016 return err;
1017}
1018
1019static struct proto packet_proto = {
1020 .name = "PACKET",
1021 .owner = THIS_MODULE,
1022 .obj_size = sizeof(struct packet_sock),
1023};
1024
1025/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001026 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 */
1028
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -07001029static int packet_create(struct net *net, struct socket *sock, int protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030{
1031 struct sock *sk;
1032 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001033 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 int err;
1035
1036 if (!capable(CAP_NET_RAW))
1037 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001038 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1039 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 return -ESOCKTNOSUPPORT;
1041
1042 sock->state = SS_UNCONNECTED;
1043
1044 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001045 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 if (sk == NULL)
1047 goto out;
1048
1049 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 if (sock->type == SOCK_PACKET)
1051 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001052
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 sock_init_data(sock, sk);
1054
1055 po = pkt_sk(sk);
1056 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001057 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058
1059 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001060 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
1062 /*
1063 * Attach a protocol block
1064 */
1065
1066 spin_lock_init(&po->bind_lock);
1067 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001068
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 if (sock->type == SOCK_PACKET)
1070 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001071
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 po->prot_hook.af_packet_priv = sk;
1073
Al Viro0e11c912006-11-08 00:26:29 -08001074 if (proto) {
1075 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 dev_add_pack(&po->prot_hook);
1077 sock_hold(sk);
1078 po->running = 1;
1079 }
1080
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001081 write_lock_bh(&net->packet.sklist_lock);
1082 sk_add_node(sk, &net->packet.sklist);
1083 write_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 return(0);
1085out:
1086 return err;
1087}
1088
1089/*
1090 * Pull a packet from our receive queue and hand it to the user.
1091 * If necessary we block.
1092 */
1093
1094static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1095 struct msghdr *msg, size_t len, int flags)
1096{
1097 struct sock *sk = sock->sk;
1098 struct sk_buff *skb;
1099 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001100 struct sockaddr_ll *sll;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102 err = -EINVAL;
1103 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1104 goto out;
1105
1106#if 0
1107 /* What error should we return now? EUNATTACH? */
1108 if (pkt_sk(sk)->ifindex < 0)
1109 return -ENODEV;
1110#endif
1111
1112 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 * Call the generic datagram receiver. This handles all sorts
1114 * of horrible races and re-entrancy so we can forget about it
1115 * in the protocol layers.
1116 *
1117 * Now it will return ENETDOWN, if device have just gone down,
1118 * but then it will block.
1119 */
1120
1121 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1122
1123 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001124 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 * handles the blocking we don't see and worry about blocking
1126 * retries.
1127 */
1128
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001129 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 goto out;
1131
1132 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001133 * If the address length field is there to be filled in, we fill
1134 * it in now.
1135 */
1136
Herbert Xuffbc6112007-02-04 23:33:10 -08001137 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001138 if (sock->type == SOCK_PACKET)
1139 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1140 else
1141 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1142
1143 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 * You lose any data beyond the buffer you gave. If it worries a
1145 * user program they can ask the device for its MTU anyway.
1146 */
1147
1148 copied = skb->len;
1149 if (copied > len)
1150 {
1151 copied=len;
1152 msg->msg_flags|=MSG_TRUNC;
1153 }
1154
1155 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1156 if (err)
1157 goto out_free;
1158
1159 sock_recv_timestamp(msg, sk, skb);
1160
1161 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001162 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1163 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
Herbert Xu8dc41942007-02-04 23:31:32 -08001165 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001166 struct tpacket_auxdata aux;
1167
1168 aux.tp_status = TP_STATUS_USER;
1169 if (skb->ip_summed == CHECKSUM_PARTIAL)
1170 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1171 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1172 aux.tp_snaplen = skb->len;
1173 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001174 aux.tp_net = skb_network_offset(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001175
1176 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001177 }
1178
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 /*
1180 * Free or return the buffer as appropriate. Again this
1181 * hides all the races and re-entrancy issues from us.
1182 */
1183 err = (flags&MSG_TRUNC) ? skb->len : copied;
1184
1185out_free:
1186 skb_free_datagram(sk, skb);
1187out:
1188 return err;
1189}
1190
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1192 int *uaddr_len, int peer)
1193{
1194 struct net_device *dev;
1195 struct sock *sk = sock->sk;
1196
1197 if (peer)
1198 return -EOPNOTSUPP;
1199
1200 uaddr->sa_family = AF_PACKET;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001201 dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 if (dev) {
1203 strlcpy(uaddr->sa_data, dev->name, 15);
1204 dev_put(dev);
1205 } else
1206 memset(uaddr->sa_data, 0, 14);
1207 *uaddr_len = sizeof(*uaddr);
1208
1209 return 0;
1210}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
1212static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1213 int *uaddr_len, int peer)
1214{
1215 struct net_device *dev;
1216 struct sock *sk = sock->sk;
1217 struct packet_sock *po = pkt_sk(sk);
1218 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1219
1220 if (peer)
1221 return -EOPNOTSUPP;
1222
1223 sll->sll_family = AF_PACKET;
1224 sll->sll_ifindex = po->ifindex;
1225 sll->sll_protocol = po->num;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001226 dev = dev_get_by_index(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 if (dev) {
1228 sll->sll_hatype = dev->type;
1229 sll->sll_halen = dev->addr_len;
1230 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1231 dev_put(dev);
1232 } else {
1233 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1234 sll->sll_halen = 0;
1235 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001236 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
1238 return 0;
1239}
1240
Wang Chen2aeb0b82008-07-14 20:49:46 -07001241static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1242 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243{
1244 switch (i->type) {
1245 case PACKET_MR_MULTICAST:
1246 if (what > 0)
1247 dev_mc_add(dev, i->addr, i->alen, 0);
1248 else
1249 dev_mc_delete(dev, i->addr, i->alen, 0);
1250 break;
1251 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001252 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 break;
1254 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001255 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 break;
1257 default:;
1258 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001259 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260}
1261
1262static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1263{
1264 for ( ; i; i=i->next) {
1265 if (i->ifindex == dev->ifindex)
1266 packet_dev_mc(dev, i, what);
1267 }
1268}
1269
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001270static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271{
1272 struct packet_sock *po = pkt_sk(sk);
1273 struct packet_mclist *ml, *i;
1274 struct net_device *dev;
1275 int err;
1276
1277 rtnl_lock();
1278
1279 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001280 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 if (!dev)
1282 goto done;
1283
1284 err = -EINVAL;
1285 if (mreq->mr_alen > dev->addr_len)
1286 goto done;
1287
1288 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001289 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 if (i == NULL)
1291 goto done;
1292
1293 err = 0;
1294 for (ml = po->mclist; ml; ml = ml->next) {
1295 if (ml->ifindex == mreq->mr_ifindex &&
1296 ml->type == mreq->mr_type &&
1297 ml->alen == mreq->mr_alen &&
1298 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1299 ml->count++;
1300 /* Free the new element ... */
1301 kfree(i);
1302 goto done;
1303 }
1304 }
1305
1306 i->type = mreq->mr_type;
1307 i->ifindex = mreq->mr_ifindex;
1308 i->alen = mreq->mr_alen;
1309 memcpy(i->addr, mreq->mr_address, i->alen);
1310 i->count = 1;
1311 i->next = po->mclist;
1312 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001313 err = packet_dev_mc(dev, i, 1);
1314 if (err) {
1315 po->mclist = i->next;
1316 kfree(i);
1317 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
1319done:
1320 rtnl_unlock();
1321 return err;
1322}
1323
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001324static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325{
1326 struct packet_mclist *ml, **mlp;
1327
1328 rtnl_lock();
1329
1330 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1331 if (ml->ifindex == mreq->mr_ifindex &&
1332 ml->type == mreq->mr_type &&
1333 ml->alen == mreq->mr_alen &&
1334 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1335 if (--ml->count == 0) {
1336 struct net_device *dev;
1337 *mlp = ml->next;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001338 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 if (dev) {
1340 packet_dev_mc(dev, ml, -1);
1341 dev_put(dev);
1342 }
1343 kfree(ml);
1344 }
1345 rtnl_unlock();
1346 return 0;
1347 }
1348 }
1349 rtnl_unlock();
1350 return -EADDRNOTAVAIL;
1351}
1352
1353static void packet_flush_mclist(struct sock *sk)
1354{
1355 struct packet_sock *po = pkt_sk(sk);
1356 struct packet_mclist *ml;
1357
1358 if (!po->mclist)
1359 return;
1360
1361 rtnl_lock();
1362 while ((ml = po->mclist) != NULL) {
1363 struct net_device *dev;
1364
1365 po->mclist = ml->next;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001366 if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 packet_dev_mc(dev, ml, -1);
1368 dev_put(dev);
1369 }
1370 kfree(ml);
1371 }
1372 rtnl_unlock();
1373}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
1375static int
1376packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1377{
1378 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001379 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 int ret;
1381
1382 if (level != SOL_PACKET)
1383 return -ENOPROTOOPT;
1384
1385 switch(optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001386 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 case PACKET_DROP_MEMBERSHIP:
1388 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001389 struct packet_mreq_max mreq;
1390 int len = optlen;
1391 memset(&mreq, 0, sizeof(mreq));
1392 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001394 if (len > sizeof(mreq))
1395 len = sizeof(mreq);
1396 if (copy_from_user(&mreq,optval,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001398 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1399 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 if (optname == PACKET_ADD_MEMBERSHIP)
1401 ret = packet_mc_add(sk, &mreq);
1402 else
1403 ret = packet_mc_drop(sk, &mreq);
1404 return ret;
1405 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001406
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407#ifdef CONFIG_PACKET_MMAP
1408 case PACKET_RX_RING:
1409 {
1410 struct tpacket_req req;
1411
1412 if (optlen<sizeof(req))
1413 return -EINVAL;
1414 if (copy_from_user(&req,optval,sizeof(req)))
1415 return -EFAULT;
1416 return packet_set_ring(sk, &req, 0);
1417 }
1418 case PACKET_COPY_THRESH:
1419 {
1420 int val;
1421
1422 if (optlen!=sizeof(val))
1423 return -EINVAL;
1424 if (copy_from_user(&val,optval,sizeof(val)))
1425 return -EFAULT;
1426
1427 pkt_sk(sk)->copy_thresh = val;
1428 return 0;
1429 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001430 case PACKET_VERSION:
1431 {
1432 int val;
1433
1434 if (optlen != sizeof(val))
1435 return -EINVAL;
1436 if (po->pg_vec)
1437 return -EBUSY;
1438 if (copy_from_user(&val, optval, sizeof(val)))
1439 return -EFAULT;
1440 switch (val) {
1441 case TPACKET_V1:
1442 case TPACKET_V2:
1443 po->tp_version = val;
1444 return 0;
1445 default:
1446 return -EINVAL;
1447 }
1448 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001450 case PACKET_AUXDATA:
1451 {
1452 int val;
1453
1454 if (optlen < sizeof(val))
1455 return -EINVAL;
1456 if (copy_from_user(&val, optval, sizeof(val)))
1457 return -EFAULT;
1458
1459 po->auxdata = !!val;
1460 return 0;
1461 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001462 case PACKET_ORIGDEV:
1463 {
1464 int val;
1465
1466 if (optlen < sizeof(val))
1467 return -EINVAL;
1468 if (copy_from_user(&val, optval, sizeof(val)))
1469 return -EFAULT;
1470
1471 po->origdev = !!val;
1472 return 0;
1473 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 default:
1475 return -ENOPROTOOPT;
1476 }
1477}
1478
1479static int packet_getsockopt(struct socket *sock, int level, int optname,
1480 char __user *optval, int __user *optlen)
1481{
1482 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001483 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 struct sock *sk = sock->sk;
1485 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001486 void *data;
1487 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
1489 if (level != SOL_PACKET)
1490 return -ENOPROTOOPT;
1491
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001492 if (get_user(len, optlen))
1493 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
1495 if (len < 0)
1496 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001497
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 switch(optname) {
1499 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 if (len > sizeof(struct tpacket_stats))
1501 len = sizeof(struct tpacket_stats);
1502 spin_lock_bh(&sk->sk_receive_queue.lock);
1503 st = po->stats;
1504 memset(&po->stats, 0, sizeof(st));
1505 spin_unlock_bh(&sk->sk_receive_queue.lock);
1506 st.tp_packets += st.tp_drops;
1507
Herbert Xu8dc41942007-02-04 23:31:32 -08001508 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001510 case PACKET_AUXDATA:
1511 if (len > sizeof(int))
1512 len = sizeof(int);
1513 val = po->auxdata;
1514
1515 data = &val;
1516 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001517 case PACKET_ORIGDEV:
1518 if (len > sizeof(int))
1519 len = sizeof(int);
1520 val = po->origdev;
1521
1522 data = &val;
1523 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001524#ifdef CONFIG_PACKET_MMAP
1525 case PACKET_VERSION:
1526 if (len > sizeof(int))
1527 len = sizeof(int);
1528 val = po->tp_version;
1529 data = &val;
1530 break;
1531 case PACKET_HDRLEN:
1532 if (len > sizeof(int))
1533 len = sizeof(int);
1534 if (copy_from_user(&val, optval, len))
1535 return -EFAULT;
1536 switch (val) {
1537 case TPACKET_V1:
1538 val = sizeof(struct tpacket_hdr);
1539 break;
1540 case TPACKET_V2:
1541 val = sizeof(struct tpacket2_hdr);
1542 break;
1543 default:
1544 return -EINVAL;
1545 }
1546 data = &val;
1547 break;
1548#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 default:
1550 return -ENOPROTOOPT;
1551 }
1552
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001553 if (put_user(len, optlen))
1554 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001555 if (copy_to_user(optval, data, len))
1556 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001557 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558}
1559
1560
1561static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1562{
1563 struct sock *sk;
1564 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08001565 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001566 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001568 read_lock(&net->packet.sklist_lock);
1569 sk_for_each(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 struct packet_sock *po = pkt_sk(sk);
1571
1572 switch (msg) {
1573 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (po->mclist)
1575 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07001576 /* fallthrough */
1577
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 case NETDEV_DOWN:
1579 if (dev->ifindex == po->ifindex) {
1580 spin_lock(&po->bind_lock);
1581 if (po->running) {
1582 __dev_remove_pack(&po->prot_hook);
1583 __sock_put(sk);
1584 po->running = 0;
1585 sk->sk_err = ENETDOWN;
1586 if (!sock_flag(sk, SOCK_DEAD))
1587 sk->sk_error_report(sk);
1588 }
1589 if (msg == NETDEV_UNREGISTER) {
1590 po->ifindex = -1;
1591 po->prot_hook.dev = NULL;
1592 }
1593 spin_unlock(&po->bind_lock);
1594 }
1595 break;
1596 case NETDEV_UP:
1597 spin_lock(&po->bind_lock);
1598 if (dev->ifindex == po->ifindex && po->num &&
1599 !po->running) {
1600 dev_add_pack(&po->prot_hook);
1601 sock_hold(sk);
1602 po->running = 1;
1603 }
1604 spin_unlock(&po->bind_lock);
1605 break;
1606 }
1607 }
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001608 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 return NOTIFY_DONE;
1610}
1611
1612
1613static int packet_ioctl(struct socket *sock, unsigned int cmd,
1614 unsigned long arg)
1615{
1616 struct sock *sk = sock->sk;
1617
1618 switch(cmd) {
1619 case SIOCOUTQ:
1620 {
1621 int amount = atomic_read(&sk->sk_wmem_alloc);
1622 return put_user(amount, (int __user *)arg);
1623 }
1624 case SIOCINQ:
1625 {
1626 struct sk_buff *skb;
1627 int amount = 0;
1628
1629 spin_lock_bh(&sk->sk_receive_queue.lock);
1630 skb = skb_peek(&sk->sk_receive_queue);
1631 if (skb)
1632 amount = skb->len;
1633 spin_unlock_bh(&sk->sk_receive_queue.lock);
1634 return put_user(amount, (int __user *)arg);
1635 }
1636 case SIOCGSTAMP:
1637 return sock_get_timestamp(sk, (struct timeval __user *)arg);
Eric Dumazetae40eb12007-03-18 17:33:16 -07001638 case SIOCGSTAMPNS:
1639 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001640
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641#ifdef CONFIG_INET
1642 case SIOCADDRT:
1643 case SIOCDELRT:
1644 case SIOCDARP:
1645 case SIOCGARP:
1646 case SIOCSARP:
1647 case SIOCGIFADDR:
1648 case SIOCSIFADDR:
1649 case SIOCGIFBRDADDR:
1650 case SIOCSIFBRDADDR:
1651 case SIOCGIFNETMASK:
1652 case SIOCSIFNETMASK:
1653 case SIOCGIFDSTADDR:
1654 case SIOCSIFDSTADDR:
1655 case SIOCSIFFLAGS:
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001656 if (sock_net(sk) != &init_net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001657 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 return inet_dgram_ops.ioctl(sock, cmd, arg);
1659#endif
1660
1661 default:
Christoph Hellwigb5e5fa52006-01-03 14:18:33 -08001662 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 }
1664 return 0;
1665}
1666
1667#ifndef CONFIG_PACKET_MMAP
1668#define packet_mmap sock_no_mmap
1669#define packet_poll datagram_poll
1670#else
1671
1672static unsigned int packet_poll(struct file * file, struct socket *sock,
1673 poll_table *wait)
1674{
1675 struct sock *sk = sock->sk;
1676 struct packet_sock *po = pkt_sk(sk);
1677 unsigned int mask = datagram_poll(file, sock, wait);
1678
1679 spin_lock_bh(&sk->sk_receive_queue.lock);
1680 if (po->pg_vec) {
1681 unsigned last = po->head ? po->head-1 : po->frame_max;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001683 if (packet_lookup_frame(po, last, TP_STATUS_USER))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 mask |= POLLIN | POLLRDNORM;
1685 }
1686 spin_unlock_bh(&sk->sk_receive_queue.lock);
1687 return mask;
1688}
1689
1690
1691/* Dirty? Well, I still did not learn better way to account
1692 * for user mmaps.
1693 */
1694
1695static void packet_mm_open(struct vm_area_struct *vma)
1696{
1697 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001698 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001700
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 if (sk)
1702 atomic_inc(&pkt_sk(sk)->mapped);
1703}
1704
1705static void packet_mm_close(struct vm_area_struct *vma)
1706{
1707 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001708 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001710
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 if (sk)
1712 atomic_dec(&pkt_sk(sk)->mapped);
1713}
1714
1715static struct vm_operations_struct packet_mmap_ops = {
1716 .open = packet_mm_open,
1717 .close =packet_mm_close,
1718};
1719
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001720static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721{
1722 int i;
1723
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001724 for (i = 0; i < len; i++) {
1725 if (likely(pg_vec[i]))
1726 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 }
1728 kfree(pg_vec);
1729}
1730
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001731static inline char *alloc_one_pg_vec_page(unsigned long order)
1732{
1733 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1734 order);
1735}
1736
1737static char **alloc_pg_vec(struct tpacket_req *req, int order)
1738{
1739 unsigned int block_nr = req->tp_block_nr;
1740 char **pg_vec;
1741 int i;
1742
1743 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1744 if (unlikely(!pg_vec))
1745 goto out;
1746
1747 for (i = 0; i < block_nr; i++) {
1748 pg_vec[i] = alloc_one_pg_vec_page(order);
1749 if (unlikely(!pg_vec[i]))
1750 goto out_free_pgvec;
1751 }
1752
1753out:
1754 return pg_vec;
1755
1756out_free_pgvec:
1757 free_pg_vec(pg_vec, order, block_nr);
1758 pg_vec = NULL;
1759 goto out;
1760}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761
1762static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1763{
1764 char **pg_vec = NULL;
1765 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08001766 int was_running, order = 0;
1767 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 int err = 0;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001769
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 if (req->tp_block_nr) {
Jiri Olsa2a706ec2008-03-23 22:42:34 -07001771 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
1773 /* Sanity tests and some calculations */
1774
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001775 if (unlikely(po->pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 return -EBUSY;
1777
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001778 switch (po->tp_version) {
1779 case TPACKET_V1:
1780 po->tp_hdrlen = TPACKET_HDRLEN;
1781 break;
1782 case TPACKET_V2:
1783 po->tp_hdrlen = TPACKET2_HDRLEN;
1784 break;
1785 }
1786
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001787 if (unlikely((int)req->tp_block_size <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001789 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 return -EINVAL;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001791 if (unlikely(req->tp_frame_size < po->tp_hdrlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001793 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 return -EINVAL;
1795
1796 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001797 if (unlikely(po->frames_per_block <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001799 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1800 req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
1803 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001804 order = get_order(req->tp_block_size);
1805 pg_vec = alloc_pg_vec(req, order);
1806 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001809 for (i = 0; i < req->tp_block_nr; i++) {
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001810 void *ptr = pg_vec[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 int k;
1812
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001813 for (k = 0; k < po->frames_per_block; k++) {
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001814 __packet_set_status(po, ptr, TP_STATUS_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 ptr += req->tp_frame_size;
1816 }
1817 }
1818 /* Done */
1819 } else {
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001820 if (unlikely(req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 return -EINVAL;
1822 }
1823
1824 lock_sock(sk);
1825
1826 /* Detach socket from network */
1827 spin_lock(&po->bind_lock);
1828 was_running = po->running;
1829 num = po->num;
1830 if (was_running) {
1831 __dev_remove_pack(&po->prot_hook);
1832 po->num = 0;
1833 po->running = 0;
1834 __sock_put(sk);
1835 }
1836 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001837
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 synchronize_net();
1839
1840 err = -EBUSY;
1841 if (closing || atomic_read(&po->mapped) == 0) {
1842 err = 0;
1843#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1844
1845 spin_lock_bh(&sk->sk_receive_queue.lock);
1846 pg_vec = XC(po->pg_vec, pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001847 po->frame_max = (req->tp_frame_nr - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 po->head = 0;
1849 po->frame_size = req->tp_frame_size;
1850 spin_unlock_bh(&sk->sk_receive_queue.lock);
1851
1852 order = XC(po->pg_vec_order, order);
1853 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1854
1855 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1856 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1857 skb_queue_purge(&sk->sk_receive_queue);
1858#undef XC
1859 if (atomic_read(&po->mapped))
1860 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1861 }
1862
1863 spin_lock(&po->bind_lock);
1864 if (was_running && !po->running) {
1865 sock_hold(sk);
1866 po->running = 1;
1867 po->num = num;
1868 dev_add_pack(&po->prot_hook);
1869 }
1870 spin_unlock(&po->bind_lock);
1871
1872 release_sock(sk);
1873
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 if (pg_vec)
1875 free_pg_vec(pg_vec, order, req->tp_block_nr);
1876out:
1877 return err;
1878}
1879
1880static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1881{
1882 struct sock *sk = sock->sk;
1883 struct packet_sock *po = pkt_sk(sk);
1884 unsigned long size;
1885 unsigned long start;
1886 int err = -EINVAL;
1887 int i;
1888
1889 if (vma->vm_pgoff)
1890 return -EINVAL;
1891
1892 size = vma->vm_end - vma->vm_start;
1893
1894 lock_sock(sk);
1895 if (po->pg_vec == NULL)
1896 goto out;
1897 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1898 goto out;
1899
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 start = vma->vm_start;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001901 for (i = 0; i < po->pg_vec_len; i++) {
1902 struct page *page = virt_to_page(po->pg_vec[i]);
1903 int pg_num;
1904
1905 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1906 err = vm_insert_page(vma, start, page);
1907 if (unlikely(err))
1908 goto out;
1909 start += PAGE_SIZE;
1910 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001912 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 vma->vm_ops = &packet_mmap_ops;
1914 err = 0;
1915
1916out:
1917 release_sock(sk);
1918 return err;
1919}
1920#endif
1921
1922
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001923static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 .family = PF_PACKET,
1925 .owner = THIS_MODULE,
1926 .release = packet_release,
1927 .bind = packet_bind_spkt,
1928 .connect = sock_no_connect,
1929 .socketpair = sock_no_socketpair,
1930 .accept = sock_no_accept,
1931 .getname = packet_getname_spkt,
1932 .poll = datagram_poll,
1933 .ioctl = packet_ioctl,
1934 .listen = sock_no_listen,
1935 .shutdown = sock_no_shutdown,
1936 .setsockopt = sock_no_setsockopt,
1937 .getsockopt = sock_no_getsockopt,
1938 .sendmsg = packet_sendmsg_spkt,
1939 .recvmsg = packet_recvmsg,
1940 .mmap = sock_no_mmap,
1941 .sendpage = sock_no_sendpage,
1942};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001944static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 .family = PF_PACKET,
1946 .owner = THIS_MODULE,
1947 .release = packet_release,
1948 .bind = packet_bind,
1949 .connect = sock_no_connect,
1950 .socketpair = sock_no_socketpair,
1951 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001952 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 .poll = packet_poll,
1954 .ioctl = packet_ioctl,
1955 .listen = sock_no_listen,
1956 .shutdown = sock_no_shutdown,
1957 .setsockopt = packet_setsockopt,
1958 .getsockopt = packet_getsockopt,
1959 .sendmsg = packet_sendmsg,
1960 .recvmsg = packet_recvmsg,
1961 .mmap = packet_mmap,
1962 .sendpage = sock_no_sendpage,
1963};
1964
1965static struct net_proto_family packet_family_ops = {
1966 .family = PF_PACKET,
1967 .create = packet_create,
1968 .owner = THIS_MODULE,
1969};
1970
1971static struct notifier_block packet_netdev_notifier = {
1972 .notifier_call =packet_notifier,
1973};
1974
1975#ifdef CONFIG_PROC_FS
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001976static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977{
1978 struct sock *s;
1979 struct hlist_node *node;
1980
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001981 sk_for_each(s, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 if (!off--)
1983 return s;
1984 }
1985 return NULL;
1986}
1987
1988static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08001989 __acquires(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990{
Denis V. Luneve372c412007-11-19 22:31:54 -08001991 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001992 read_lock(&net->packet.sklist_lock);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001993 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994}
1995
1996static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1997{
Herbert Xu1bf40952007-12-16 14:04:02 -08001998 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002000 return (v == SEQ_START_TOKEN)
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002001 ? sk_head(&net->packet.sklist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 : sk_next((struct sock*)v) ;
2003}
2004
2005static void packet_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002006 __releases(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007{
Herbert Xu1bf40952007-12-16 14:04:02 -08002008 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002009 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010}
2011
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002012static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013{
2014 if (v == SEQ_START_TOKEN)
2015 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2016 else {
2017 struct sock *s = v;
2018 const struct packet_sock *po = pkt_sk(s);
2019
2020 seq_printf(seq,
2021 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2022 s,
2023 atomic_read(&s->sk_refcnt),
2024 s->sk_type,
2025 ntohs(po->num),
2026 po->ifindex,
2027 po->running,
2028 atomic_read(&s->sk_rmem_alloc),
2029 sock_i_uid(s),
2030 sock_i_ino(s) );
2031 }
2032
2033 return 0;
2034}
2035
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002036static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 .start = packet_seq_start,
2038 .next = packet_seq_next,
2039 .stop = packet_seq_stop,
2040 .show = packet_seq_show,
2041};
2042
2043static int packet_seq_open(struct inode *inode, struct file *file)
2044{
Denis V. Luneve372c412007-11-19 22:31:54 -08002045 return seq_open_net(inode, file, &packet_seq_ops,
2046 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047}
2048
Arjan van de Venda7071d2007-02-12 00:55:36 -08002049static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 .owner = THIS_MODULE,
2051 .open = packet_seq_open,
2052 .read = seq_read,
2053 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002054 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055};
2056
2057#endif
2058
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002059static int packet_net_init(struct net *net)
2060{
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002061 rwlock_init(&net->packet.sklist_lock);
2062 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002063
2064 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2065 return -ENOMEM;
2066
2067 return 0;
2068}
2069
2070static void packet_net_exit(struct net *net)
2071{
2072 proc_net_remove(net, "packet");
2073}
2074
2075static struct pernet_operations packet_net_ops = {
2076 .init = packet_net_init,
2077 .exit = packet_net_exit,
2078};
2079
2080
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081static void __exit packet_exit(void)
2082{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002084 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 sock_unregister(PF_PACKET);
2086 proto_unregister(&packet_proto);
2087}
2088
2089static int __init packet_init(void)
2090{
2091 int rc = proto_register(&packet_proto, 0);
2092
2093 if (rc != 0)
2094 goto out;
2095
2096 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002097 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099out:
2100 return rc;
2101}
2102
2103module_init(packet_init);
2104module_exit(packet_exit);
2105MODULE_LICENSE("GPL");
2106MODULE_ALIAS_NETPROTO(PF_PACKET);